1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik 
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
   9 //              (c) 2000-2003 Vadim Zeitlin 
  10 // Licence:     wxWindows licence 
  11 ///////////////////////////////////////////////////////////////////////////// 
  13 // ============================================================================ 
  15 // ============================================================================ 
  17 // ---------------------------------------------------------------------------- 
  19 // ---------------------------------------------------------------------------- 
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  22   #pragma implementation "strconv.h" 
  25 // For compilers that support precompilation, includes "wx.h". 
  26 #include "wx/wxprec.h" 
  37 #include "wx/strconv.h" 
  42     #include "wx/msw/private.h" 
  46     #include "wx/msw/missing.h" 
  57 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  58     #define wxHAVE_WIN32_MB2WC 
  59 #endif // __WIN32__ but !__WXMICROWIN__ 
  61 // ---------------------------------------------------------------------------- 
  63 // ---------------------------------------------------------------------------- 
  73 #include "wx/encconv.h" 
  74 #include "wx/fontmap.h" 
  78 #include <ATSUnicode.h> 
  79 #include <TextCommon.h> 
  80 #include <TextEncodingConverter.h> 
  82 #include  "wx/mac/private.h"  // includes mac headers 
  84 // ---------------------------------------------------------------------------- 
  86 // ---------------------------------------------------------------------------- 
  88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); } 
  89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } 
  91 #if SIZEOF_WCHAR_T == 4 
  92     #define WC_NAME         "UCS4" 
  93     #define WC_BSWAP         BSWAP_UCS4 
  94     #ifdef WORDS_BIGENDIAN 
  95       #define WC_NAME_BEST  "UCS-4BE" 
  97       #define WC_NAME_BEST  "UCS-4LE" 
  99 #elif SIZEOF_WCHAR_T == 2 
 100     #define WC_NAME         "UTF16" 
 101     #define WC_BSWAP         BSWAP_UTF16 
 103     #ifdef WORDS_BIGENDIAN 
 104       #define WC_NAME_BEST  "UTF-16BE" 
 106       #define WC_NAME_BEST  "UTF-16LE" 
 108 #else // sizeof(wchar_t) != 2 nor 4 
 109     // does this ever happen? 
 110     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
 113 // ============================================================================ 
 115 // ============================================================================ 
 117 // ---------------------------------------------------------------------------- 
 118 // UTF-16 en/decoding to/from UCS-4 
 119 // ---------------------------------------------------------------------------- 
 122 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 127             *output 
= (wxUint16
) input
; 
 130     else if (input
>=0x110000) 
 138             *output
++ = (wxUint16
) ((input 
>> 10)+0xd7c0); 
 139             *output 
= (wxUint16
) ((input
&0x3ff)+0xdc00); 
 145 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 147     if ((*input
<0xd800) || (*input
>0xdfff)) 
 152     else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff)) 
 159         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 165 // ---------------------------------------------------------------------------- 
 167 // ---------------------------------------------------------------------------- 
 169 wxMBConv::~wxMBConv() 
 171     // nothing to do here 
 174 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 178         // calculate the length of the buffer needed first 
 179         size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 180         if ( nLen 
!= (size_t)-1 ) 
 182             // now do the actual conversion 
 183             wxWCharBuffer 
buf(nLen
); 
 184             nLen 
= MB2WC(buf
.data(), psz
, nLen 
+ 1); // with the trailing NULL 
 185             if ( nLen 
!= (size_t)-1 ) 
 192     wxWCharBuffer 
buf((wchar_t *)NULL
); 
 197 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 201         size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 202         if ( nLen 
!= (size_t)-1 ) 
 204             wxCharBuffer 
buf(nLen
+3);       // space for a wxUint32 trailing zero 
 205             nLen 
= WC2MB(buf
.data(), pwz
, nLen 
+ 4); 
 206             if ( nLen 
!= (size_t)-1 ) 
 213     wxCharBuffer 
buf((char *)NULL
); 
 218 // ---------------------------------------------------------------------------- 
 220 // ---------------------------------------------------------------------------- 
 222 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 224     return wxMB2WC(buf
, psz
, n
); 
 227 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 229     return wxWC2MB(buf
, psz
, n
); 
 232 // ---------------------------------------------------------------------------- 
 234 // ---------------------------------------------------------------------------- 
 237 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 238                         "abcdefghijklmnopqrstuvwxyz" 
 239                         "0123456789'(),-./:?"; 
 240 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}"; 
 241 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 242                         "abcdefghijklmnopqrstuvwxyz" 
 246 // TODO: write actual implementations of UTF-7 here 
 247 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
), 
 248                            const char * WXUNUSED(psz
), 
 249                            size_t WXUNUSED(n
)) const 
 254 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
), 
 255                            const wchar_t * WXUNUSED(psz
), 
 256                            size_t WXUNUSED(n
)) const 
 261 // ---------------------------------------------------------------------------- 
 263 // ---------------------------------------------------------------------------- 
 265 static wxUint32 utf8_max
[]= 
 266     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 268 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 272     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 274         unsigned char cc 
= *psz
++, fc 
= cc
; 
 276         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 290                 // invalid UTF-8 sequence 
 295                 unsigned ocnt 
= cnt 
- 1; 
 296                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 300                     if ((cc 
& 0xC0) != 0x80) 
 302                         // invalid UTF-8 sequence 
 305                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 307                 if (res 
<= utf8_max
[ocnt
]) 
 309                     // illegal UTF-8 encoding 
 313                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 314                 size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 315                 if (pa 
== (size_t)-1) 
 324 #endif // WC_UTF16/!WC_UTF16 
 328     if (buf 
&& (len 
< n
)) 
 333 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 337     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 341         // cast is ok for WC_UTF16 
 342         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 343         psz 
+= (pa 
== (size_t)-1) ? 1 : pa
; 
 345         cc
=(*psz
++) & 0x7fffffff; 
 348         for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) {} 
 362                 *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 364                     *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 369     if (buf 
&& (len
<n
)) *buf 
= 0; 
 377 // ---------------------------------------------------------------------------- 
 379 // ---------------------------------------------------------------------------- 
 381 #ifdef WORDS_BIGENDIAN 
 382     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 383     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 385     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 386     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 392 // copy 16bit MB to 16bit String 
 393 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 397     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 400             *buf
++ = *(wxUint16
*)psz
; 
 403         psz 
+= sizeof(wxUint16
); 
 405     if (buf 
&& len
<n
)   *buf
=0; 
 411 // copy 16bit String to 16bit MB 
 412 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 416     while (*psz 
&& (!buf 
|| len 
< n
)) 
 420             *(wxUint16
*)buf 
= *psz
; 
 421             buf 
+= sizeof(wxUint16
); 
 423         len 
+= sizeof(wxUint16
); 
 426     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 432 // swap 16bit MB to 16bit String 
 433 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 437     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 441             ((char *)buf
)[0] = psz
[1]; 
 442             ((char *)buf
)[1] = psz
[0]; 
 446         psz 
+= sizeof(wxUint16
); 
 448     if (buf 
&& len
<n
)   *buf
=0; 
 454 // swap 16bit MB to 16bit String 
 455 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 459     while (*psz 
&& (!buf 
|| len 
< n
)) 
 463             *buf
++ = ((char*)psz
)[1]; 
 464             *buf
++ = ((char*)psz
)[0]; 
 466         len 
+= sizeof(wxUint16
); 
 469     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 478 // copy 16bit MB to 32bit String 
 479 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 483     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 486         size_t pa
=decode_utf16((wxUint16
*)psz
, cc
); 
 487         if (pa 
== (size_t)-1) 
 493         psz 
+= pa 
* sizeof(wxUint16
); 
 495     if (buf 
&& len
<n
)   *buf
=0; 
 501 // copy 32bit String to 16bit MB 
 502 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 506     while (*psz 
&& (!buf 
|| len 
< n
)) 
 509         size_t pa
=encode_utf16(*psz
, cc
); 
 511         if (pa 
== (size_t)-1) 
 516             *(wxUint16
*)buf 
= cc
[0]; 
 517             buf 
+= sizeof(wxUint16
); 
 520                 *(wxUint16
*)buf 
= cc
[1]; 
 521                 buf 
+= sizeof(wxUint16
); 
 525         len 
+= pa
*sizeof(wxUint16
); 
 528     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 534 // swap 16bit MB to 32bit String 
 535 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 539     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 543         tmp
[0]=psz
[1];  tmp
[1]=psz
[0]; 
 544         tmp
[2]=psz
[3];  tmp
[3]=psz
[2]; 
 546         size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
); 
 547         if (pa 
== (size_t)-1) 
 554         psz 
+= pa 
* sizeof(wxUint16
); 
 556     if (buf 
&& len
<n
)   *buf
=0; 
 562 // swap 32bit String to 16bit MB 
 563 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 567     while (*psz 
&& (!buf 
|| len 
< n
)) 
 570         size_t pa
=encode_utf16(*psz
, cc
); 
 572         if (pa 
== (size_t)-1) 
 577             *buf
++ = ((char*)cc
)[1]; 
 578             *buf
++ = ((char*)cc
)[0]; 
 581                 *buf
++ = ((char*)cc
)[3]; 
 582                 *buf
++ = ((char*)cc
)[2]; 
 586         len 
+= pa
*sizeof(wxUint16
); 
 589     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 597 // ---------------------------------------------------------------------------- 
 599 // ---------------------------------------------------------------------------- 
 601 #ifdef WORDS_BIGENDIAN 
 602 #define wxMBConvUTF32straight  wxMBConvUTF32BE 
 603 #define wxMBConvUTF32swap      wxMBConvUTF32LE 
 605 #define wxMBConvUTF32swap      wxMBConvUTF32BE 
 606 #define wxMBConvUTF32straight  wxMBConvUTF32LE 
 610 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
 611 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
 616 // copy 32bit MB to 16bit String 
 617 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 621     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 625         size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
); 
 626         if (pa 
== (size_t)-1) 
 636         psz 
+= sizeof(wxUint32
); 
 638     if (buf 
&& len
<n
)   *buf
=0; 
 644 // copy 16bit String to 32bit MB 
 645 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 649     while (*psz 
&& (!buf 
|| len 
< n
)) 
 653         // cast is ok for WC_UTF16 
 654         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 655         if (pa 
== (size_t)-1) 
 660             *(wxUint32
*)buf 
= cc
; 
 661             buf 
+= sizeof(wxUint32
); 
 663         len 
+= sizeof(wxUint32
); 
 667     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 675 // swap 32bit MB to 16bit String 
 676 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 680     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 683         tmp
[0] = psz
[3];   tmp
[1] = psz
[2]; 
 684         tmp
[2] = psz
[1];   tmp
[3] = psz
[0]; 
 689         size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
); 
 690         if (pa 
== (size_t)-1) 
 700         psz 
+= sizeof(wxUint32
); 
 710 // swap 16bit String to 32bit MB 
 711 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 715     while (*psz 
&& (!buf 
|| len 
< n
)) 
 719         // cast is ok for WC_UTF16 
 720         size_t pa
=decode_utf16((const wxUint16 
*)psz
, *(wxUint32
*)cc
); 
 721         if (pa 
== (size_t)-1) 
 731         len 
+= sizeof(wxUint32
); 
 735     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 744 // copy 32bit MB to 32bit String 
 745 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 749     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 752             *buf
++ = *(wxUint32
*)psz
; 
 754         psz 
+= sizeof(wxUint32
); 
 764 // copy 32bit String to 32bit MB 
 765 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 769     while (*psz 
&& (!buf 
|| len 
< n
)) 
 773             *(wxUint32
*)buf 
= *psz
; 
 774             buf 
+= sizeof(wxUint32
); 
 777         len 
+= sizeof(wxUint32
); 
 781     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 788 // swap 32bit MB to 32bit String 
 789 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 793     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 797             ((char *)buf
)[0] = psz
[3]; 
 798             ((char *)buf
)[1] = psz
[2]; 
 799             ((char *)buf
)[2] = psz
[1]; 
 800             ((char *)buf
)[3] = psz
[0]; 
 804         psz 
+= sizeof(wxUint32
); 
 814 // swap 32bit String to 32bit MB 
 815 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 819     while (*psz 
&& (!buf 
|| len 
< n
)) 
 823             *buf
++ = ((char *)psz
)[3]; 
 824             *buf
++ = ((char *)psz
)[2]; 
 825             *buf
++ = ((char *)psz
)[1]; 
 826             *buf
++ = ((char *)psz
)[0]; 
 828         len 
+= sizeof(wxUint32
); 
 832     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 842 // ============================================================================ 
 843 // The classes doing conversion using the iconv_xxx() functions 
 844 // ============================================================================ 
 848 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG 
 849 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's 
 850 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1 
 851 //     (which means error) and says there are 0 bytes left in the input buffer -- 
 852 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence, 
 853 //     this alternative test for iconv() failure. 
 854 //     [This bug does not appear in glibc 2.2.] 
 855 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
 856 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
 857                                      (errno != E2BIG || bufLeft != 0)) 
 859 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
 862 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
 864 // ---------------------------------------------------------------------------- 
 865 // wxMBConv_iconv: encapsulates an iconv character set 
 866 // ---------------------------------------------------------------------------- 
 868 class wxMBConv_iconv 
: public wxMBConv
 
 871     wxMBConv_iconv(const wxChar 
*name
); 
 872     virtual ~wxMBConv_iconv(); 
 874     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
 875     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
 878         { return (m2w 
!= (iconv_t
)-1) && (w2m 
!= (iconv_t
)-1); } 
 881     // the iconv handlers used to translate from multibyte to wide char and in 
 882     // the other direction 
 887     // the name (for iconv_open()) of a wide char charset -- if none is 
 888     // available on this machine, it will remain NULL 
 889     static const char *ms_wcCharsetName
; 
 891     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
 892     // different endian-ness than the native one 
 893     static bool ms_wcNeedsSwap
; 
 896 const char *wxMBConv_iconv::ms_wcCharsetName 
= NULL
; 
 897 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
 899 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
 901     // Do it the hard way 
 903     for (size_t i 
= 0; i 
< wxStrlen(name
)+1; i
++) 
 904         cname
[i
] = (char) name
[i
]; 
 906     // check for charset that represents wchar_t: 
 907     if (ms_wcCharsetName 
== NULL
) 
 909         ms_wcNeedsSwap 
= false; 
 911         // try charset with explicit bytesex info (e.g. "UCS-4LE"): 
 912         ms_wcCharsetName 
= WC_NAME_BEST
; 
 913         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 915         if (m2w 
== (iconv_t
)-1) 
 917             // try charset w/o bytesex info (e.g. "UCS4") 
 918             // and check for bytesex ourselves: 
 919             ms_wcCharsetName 
= WC_NAME
; 
 920             m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 922             // last bet, try if it knows WCHAR_T pseudo-charset 
 923             if (m2w 
== (iconv_t
)-1) 
 925                 ms_wcCharsetName 
= "WCHAR_T"; 
 926                 m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 929             if (m2w 
!= (iconv_t
)-1) 
 931                 char    buf
[2], *bufPtr
; 
 932                 wchar_t wbuf
[2], *wbufPtr
; 
 940                 outsz 
= SIZEOF_WCHAR_T 
* 2; 
 944                 res 
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
 945                             (char**)&wbufPtr
, &outsz
); 
 947                 if (ICONV_FAILED(res
, insz
)) 
 949                     ms_wcCharsetName 
= NULL
; 
 950                     wxLogLastError(wxT("iconv")); 
 951                     wxLogError(_("Conversion to charset '%s' doesn't work."), name
); 
 955                     ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
 960                 ms_wcCharsetName 
= NULL
; 
 962                 // VS: we must not output an error here, since wxWidgets will safely 
 963                 //     fall back to using wxEncodingConverter. 
 964                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
); 
 968         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
); 
 970     else // we already have ms_wcCharsetName 
 972         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 975     // NB: don't ever pass NULL to iconv_open(), it may crash! 
 976     if ( ms_wcCharsetName 
) 
 978         w2m 
= iconv_open( cname
, ms_wcCharsetName
); 
 986 wxMBConv_iconv::~wxMBConv_iconv() 
 988     if ( m2w 
!= (iconv_t
)-1 ) 
 990     if ( w2m 
!= (iconv_t
)-1 ) 
 994 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 996     size_t inbuf 
= strlen(psz
); 
 997     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
 999     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
1000     wchar_t *bufPtr 
= buf
; 
1001     const char *pszPtr 
= psz
; 
1005         // have destination buffer, convert there 
1007                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1008                      (char**)&bufPtr
, &outbuf
); 
1009         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
1013             // convert to native endianness 
1014             WC_BSWAP(buf 
/* _not_ bufPtr */, res
) 
1017         // NB: iconv was given only strlen(psz) characters on input, and so 
1018         //     it couldn't convert the trailing zero. Let's do it ourselves 
1019         //     if there's some room left for it in the output buffer. 
1025         // no destination buffer... convert using temp buffer 
1026         // to calculate destination buffer requirement 
1031             outbuf 
= 8*SIZEOF_WCHAR_T
; 
1034                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1035                          (char**)&bufPtr
, &outbuf 
); 
1037             res 
+= 8-(outbuf
/SIZEOF_WCHAR_T
); 
1038         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1041     if (ICONV_FAILED(cres
, inbuf
)) 
1043         //VS: it is ok if iconv fails, hence trace only 
1044         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1051 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1053     size_t inbuf 
= wxWcslen(psz
) * SIZEOF_WCHAR_T
; 
1057     wchar_t *tmpbuf 
= 0; 
1061         // need to copy to temp buffer to switch endianness 
1062         // this absolutely doesn't rock! 
1063         // (no, doing WC_BSWAP twice on the original buffer won't help, as it 
1064         //  could be in read-only memory, or be accessed in some other thread) 
1065         tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
); 
1066         memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
); 
1067         WC_BSWAP(tmpbuf
, inbuf
) 
1073         // have destination buffer, convert there 
1074         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1078         // NB: iconv was given only wcslen(psz) characters on input, and so 
1079         //     it couldn't convert the trailing zero. Let's do it ourselves 
1080         //     if there's some room left for it in the output buffer. 
1086         // no destination buffer... convert using temp buffer 
1087         // to calculate destination buffer requirement 
1091             buf 
= tbuf
; outbuf 
= 16; 
1093             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1096         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1104     if (ICONV_FAILED(cres
, inbuf
)) 
1106         //VS: it is ok if iconv fails, hence trace only 
1107         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1114 #endif // HAVE_ICONV 
1117 // ============================================================================ 
1118 // Win32 conversion classes 
1119 // ============================================================================ 
1121 #ifdef wxHAVE_WIN32_MB2WC 
1125 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1126 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1129 class wxMBConv_win32 
: public wxMBConv
 
1134         m_CodePage 
= CP_ACP
; 
1138     wxMBConv_win32(const wxChar
* name
) 
1140         m_CodePage 
= wxCharsetToCodepage(name
); 
1143     wxMBConv_win32(wxFontEncoding encoding
) 
1145         m_CodePage 
= wxEncodingToCodepage(encoding
); 
1149     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1151         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
1152         // the behaviour is not compatible with the Unix version (using iconv) 
1153         // and break the library itself, e.g. wxTextInputStream::NextChar() 
1154         // wouldn't work if reading an incomplete MB char didn't result in an 
1156         const size_t len 
= ::MultiByteToWideChar
 
1158                                 m_CodePage
,     // code page 
1159                                 MB_ERR_INVALID_CHARS
, // flags: fall on error 
1160                                 psz
,            // input string 
1161                                 -1,             // its length (NUL-terminated) 
1162                                 buf
,            // output string 
1163                                 buf 
? n 
: 0     // size of output buffer 
1166         // note that it returns count of written chars for buf != NULL and size 
1167         // of the needed buffer for buf == NULL so in either case the length of 
1168         // the string (which never includes the terminating NUL) is one less 
1169         return len 
? len 
- 1 : (size_t)-1; 
1172     size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
1175             we have a problem here: by default, WideCharToMultiByte() may 
1176             replace characters unrepresentable in the target code page with bad 
1177             quality approximations such as turning "1/2" symbol (U+00BD) into 
1178             "1" for the code pages which don't have it and we, obviously, want 
1179             to avoid this at any price 
1181             the trouble is that this function does it _silently_, i.e. it won't 
1182             even tell us whether it did or not... Win98/2000 and higher provide 
1183             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
1184             we have to resort to a round trip, i.e. check that converting back 
1185             results in the same string -- this is, of course, expensive but 
1186             otherwise we simply can't be sure to not garble the data. 
1189         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
1190         // it doesn't work with CJK encodings (which we test for rather roughly 
1191         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
1193         BOOL usedDef 
wxDUMMY_INITIALIZE(false), 
1196         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
1198             // it's our lucky day 
1199             flags 
= WC_NO_BEST_FIT_CHARS
; 
1200             pUsedDef 
= &usedDef
; 
1202         else // old system or unsupported encoding 
1208         const size_t len 
= ::WideCharToMultiByte
 
1210                                 m_CodePage
,     // code page 
1211                                 flags
,          // either none or no best fit 
1212                                 pwz
,            // input string 
1213                                 -1,             // it is (wide) NUL-terminated 
1214                                 buf
,            // output buffer 
1215                                 buf 
? n 
: 0,    // and its size 
1216                                 NULL
,           // default "replacement" char 
1217                                 pUsedDef        
// [out] was it used? 
1222             // function totally failed 
1226         // if we were really converting, check if we succeeded 
1231                 // check if the conversion failed, i.e. if any replacements 
1236             else // we must resort to double tripping... 
1238                 wxWCharBuffer 
wcBuf(n
); 
1239                 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
1240                         wcscmp(wcBuf
, pwz
) != 0 ) 
1242                     // we didn't obtain the same thing we started from, hence 
1243                     // the conversion was lossy and we consider that it failed 
1249         // see the comment above for the reason of "len - 1" 
1253     bool IsOk() const { return m_CodePage 
!= -1; } 
1256     static bool CanUseNoBestFit() 
1258         static int s_isWin98Or2k 
= -1; 
1260         if ( s_isWin98Or2k 
== -1 ) 
1263             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
1266                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
1270                     s_isWin98Or2k 
= verMaj 
>= 5; 
1274                     // unknown, be conseravtive by default 
1278             wxASSERT_MSG( s_isWin98Or2k 
!= -1, _T("should be set above") ); 
1281         return s_isWin98Or2k 
== 1; 
1287 #endif // wxHAVE_WIN32_MB2WC 
1289 // ============================================================================ 
1290 // Mac conversion classes 
1291 // ============================================================================ 
1293 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
1295 class wxMBConv_mac 
: public wxMBConv
 
1300         Init(CFStringGetSystemEncoding()) ; 
1303     wxMBConv_mac(const wxChar
* name
) 
1305         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, FALSE
) ) ) ; 
1308     wxMBConv_mac(wxFontEncoding encoding
) 
1310         Init( wxMacGetSystemEncFromFontEnc(encoding
) ); 
1315             OSStatus status 
= noErr 
; 
1316             status 
= TECDisposeConverter(m_MB2WC_converter
); 
1317             status 
= TECDisposeConverter(m_WC2MB_converter
);             
1321         void Init( TextEncodingBase encoding
) 
1323             OSStatus status 
= noErr 
; 
1324                 m_char_encoding 
= encoding 
; 
1325                 m_unicode_encoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ; 
1327             status 
= TECCreateConverter(&m_MB2WC_converter
, 
1329                                         m_unicode_encoding
); 
1330             status 
= TECCreateConverter(&m_WC2MB_converter
, 
1335     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1337             OSStatus status 
= noErr 
; 
1338             ByteCount byteOutLen 
; 
1339             ByteCount byteInLen 
= strlen(psz
) ; 
1340                 wchar_t *tbuf 
= NULL 
; 
1341                 UniChar
* ubuf 
= NULL 
; 
1347                         tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T
) ; 
1349             ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ;  
1350 #if SIZEOF_WCHAR_T == 4 
1351                 ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
1353                 ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
1355             status 
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz 
, byteInLen
, &byteInLen
, 
1356               (TextPtr
) ubuf 
, byteBufferLen
, &byteOutLen
); 
1357 #if SIZEOF_WCHAR_T == 4 
1358         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
1359         // is not properly terminated we get random characters at the end 
1360         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
1361                 wxMBConvUTF16BE converter 
; 
1362                 res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
) , (const char*)ubuf 
, n 
) ; 
1365                 res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
1370         if ( buf  
&& res 
< n
) 
1376     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1378             OSStatus status 
= noErr 
; 
1379             ByteCount byteOutLen 
; 
1380             ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
1388                         tbuf 
= (char*) malloc( n 
) ; 
1391             ByteCount byteBufferLen 
= n 
; 
1392                 UniChar
* ubuf 
= NULL 
; 
1393 #if SIZEOF_WCHAR_T == 4 
1394                 wxMBConvUTF16BE converter 
; 
1395                 size_t unicharlen 
= converter
.WC2MB( NULL 
, psz 
, 0 ) ; 
1396                 byteInLen 
= unicharlen 
; 
1397                 ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
1398                 converter
.WC2MB( (char*) ubuf 
, psz
, unicharlen 
+ 2 ) ; 
1400                 ubuf 
= (UniChar
*) psz 
; 
1402             status 
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf 
, byteInLen
, &byteInLen
, 
1403                (TextPtr
) (buf 
? buf 
: tbuf
) , byteBufferLen
, &byteOutLen
); 
1404 #if SIZEOF_WCHAR_T == 4 
1410                 size_t res 
= byteOutLen 
; 
1411         if ( buf  
&& res 
< n
) 
1418         { return m_MB2WC_converter 
!=  NULL 
&& m_WC2MB_converter 
!= NULL  
; } 
1421         TECObjectRef m_MB2WC_converter 
; 
1422         TECObjectRef m_WC2MB_converter 
; 
1424         TextEncodingBase m_char_encoding 
; 
1425         TextEncodingBase m_unicode_encoding 
; 
1428 #endif // defined(__WXMAC__) && defined(TARGET_CARBON) 
1430 // ============================================================================ 
1431 // wxEncodingConverter based conversion classes 
1432 // ============================================================================ 
1436 class wxMBConv_wxwin 
: public wxMBConv
 
1441         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
1442                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
1446     // temporarily just use wxEncodingConverter stuff, 
1447     // so that it works while a better implementation is built 
1448     wxMBConv_wxwin(const wxChar
* name
) 
1451             m_enc 
= wxFontMapper::Get()->CharsetToEncoding(name
, false); 
1453             m_enc 
= wxFONTENCODING_SYSTEM
; 
1458     wxMBConv_wxwin(wxFontEncoding enc
) 
1465     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
1467         size_t inbuf 
= strlen(psz
); 
1469             m2w
.Convert(psz
,buf
); 
1473     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
1475         const size_t inbuf 
= wxWcslen(psz
); 
1477             w2m
.Convert(psz
,buf
); 
1482     bool IsOk() const { return m_ok
; } 
1485     wxFontEncoding m_enc
; 
1486     wxEncodingConverter m2w
, w2m
; 
1488     // were we initialized successfully? 
1491     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
1494 #endif // wxUSE_FONTMAP 
1496 // ============================================================================ 
1497 // wxCSConv implementation 
1498 // ============================================================================ 
1500 void wxCSConv::Init() 
1507 wxCSConv::wxCSConv(const wxChar 
*charset
) 
1516     m_encoding 
= wxFONTENCODING_SYSTEM
; 
1519 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
1521     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
1523         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
1525         encoding 
= wxFONTENCODING_SYSTEM
; 
1530     m_encoding 
= encoding
; 
1533 wxCSConv::~wxCSConv() 
1538 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
1543     SetName(conv
.m_name
); 
1544     m_encoding 
= conv
.m_encoding
; 
1547 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
1551     SetName(conv
.m_name
); 
1552     m_encoding 
= conv
.m_encoding
; 
1557 void wxCSConv::Clear() 
1566 void wxCSConv::SetName(const wxChar 
*charset
) 
1570         m_name 
= wxStrdup(charset
); 
1575 wxMBConv 
*wxCSConv::DoCreate() const 
1577     // check for the special case of ASCII or ISO8859-1 charset: as we have 
1578     // special knowledge of it anyhow, we don't need to create a special 
1579     // conversion object 
1580     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
) 
1582         // don't convert at all 
1586     // we trust OS to do conversion better than we can so try external 
1587     // conversion methods first 
1589     // the full order is: 
1590     //      1. OS conversion (iconv() under Unix or Win32 API) 
1591     //      2. hard coded conversions for UTF 
1592     //      3. wxEncodingConverter as fall back 
1598 #endif // !wxUSE_FONTMAP 
1600         wxString 
name(m_name
); 
1604             name 
= wxFontMapper::Get()->GetEncodingName(m_encoding
); 
1605 #endif // wxUSE_FONTMAP 
1607         wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
); 
1613 #endif // HAVE_ICONV 
1615 #ifdef wxHAVE_WIN32_MB2WC 
1618         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
1619                                       : new wxMBConv_win32(m_encoding
); 
1628 #endif // wxHAVE_WIN32_MB2WC 
1629 #if defined(__WXMAC__)  
1631         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
) ) 
1634                 wxMBConv_mac 
*conv 
= m_name 
? new wxMBConv_mac(m_name
) 
1635                                             : new wxMBConv_mac(m_encoding
); 
1644     wxFontEncoding enc 
= m_encoding
; 
1646     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
1648         // use "false" to suppress interactive dialogs -- we can be called from 
1649         // anywhere and popping up a dialog from here is the last thing we want to 
1651         enc 
= wxFontMapper::Get()->CharsetToEncoding(m_name
, false); 
1653 #endif // wxUSE_FONTMAP 
1657         case wxFONTENCODING_UTF7
: 
1658              return new wxMBConvUTF7
; 
1660         case wxFONTENCODING_UTF8
: 
1661              return new wxMBConvUTF8
; 
1663         case wxFONTENCODING_UTF16BE
: 
1664              return new wxMBConvUTF16BE
; 
1666         case wxFONTENCODING_UTF16LE
: 
1667              return new wxMBConvUTF16LE
; 
1669         case wxFONTENCODING_UTF32BE
: 
1670              return new wxMBConvUTF32BE
; 
1672         case wxFONTENCODING_UTF32LE
: 
1673              return new wxMBConvUTF32LE
; 
1676              // nothing to do but put here to suppress gcc warnings 
1683         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
1684                                       : new wxMBConv_wxwin(m_encoding
); 
1690 #endif // wxUSE_FONTMAP 
1692     // NB: This is a hack to prevent deadlock. What could otherwise happen 
1693     //     in Unicode build: wxConvLocal creation ends up being here 
1694     //     because of some failure and logs the error. But wxLog will try to 
1695     //     attach timestamp, for which it will need wxConvLocal (to convert 
1696     //     time to char* and then wchar_t*), but that fails, tries to log 
1697     //     error, but wxLog has a (already locked) critical section that 
1698     //     guards static buffer. 
1699     static bool alreadyLoggingError 
= false; 
1700     if (!alreadyLoggingError
) 
1702         alreadyLoggingError 
= true; 
1703         wxLogError(_("Cannot convert from the charset '%s'!"), 
1707                          wxFontMapper::GetEncodingDescription(m_encoding
).c_str() 
1708 #else // !wxUSE_FONTMAP 
1709                          wxString::Format(_("encoding %s"), m_encoding
).c_str() 
1710 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
1712         alreadyLoggingError 
= false; 
1718 void wxCSConv::CreateConvIfNeeded() const 
1722         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
1725         // if we don't have neither the name nor the encoding, use the default 
1726         // encoding for this system 
1727         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
1729             self
->m_name 
= wxStrdup(wxLocale::GetSystemEncodingName()); 
1731 #endif // wxUSE_INTL 
1733         self
->m_convReal 
= DoCreate(); 
1734         self
->m_deferred 
= false; 
1738 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1740     CreateConvIfNeeded(); 
1743         return m_convReal
->MB2WC(buf
, psz
, n
); 
1746     size_t len 
= strlen(psz
); 
1750         for (size_t c 
= 0; c 
<= len
; c
++) 
1751             buf
[c
] = (unsigned char)(psz
[c
]); 
1757 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1759     CreateConvIfNeeded(); 
1762         return m_convReal
->WC2MB(buf
, psz
, n
); 
1765     const size_t len 
= wxWcslen(psz
); 
1768         for (size_t c 
= 0; c 
<= len
; c
++) 
1777         for (size_t c 
= 0; c 
<= len
; c
++) 
1787 // ---------------------------------------------------------------------------- 
1789 // ---------------------------------------------------------------------------- 
1792     static wxMBConv_win32 wxConvLibcObj
; 
1793 #elif defined(__WXMAC__) && !defined(__MACH__) 
1794     static wxMBConv_mac wxConvLibcObj 
; 
1796     static wxMBConvLibc wxConvLibcObj
; 
1799 static wxCSConv 
wxConvLocalObj(wxFONTENCODING_SYSTEM
); 
1800 static wxCSConv 
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
); 
1801 static wxMBConvUTF7 wxConvUTF7Obj
; 
1802 static wxMBConvUTF8 wxConvUTF8Obj
; 
1805 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc 
= wxConvLibcObj
; 
1806 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal 
= wxConvLocalObj
; 
1807 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1 
= wxConvISO8859_1Obj
; 
1808 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7 
= wxConvUTF7Obj
; 
1809 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8 
= wxConvUTF8Obj
; 
1810 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= &wxConvLibcObj
; 
1812 #else // !wxUSE_WCHAR_T 
1814 // stand-ins in absence of wchar_t 
1815 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
1820 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T