1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, 
   5 //              Ryan Norton, Fredrik Roubert (UTF7) 
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
  10 //              (c) 2000-2003 Vadim Zeitlin 
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert 
  12 // Licence:     wxWindows licence 
  13 ///////////////////////////////////////////////////////////////////////////// 
  15 // ============================================================================ 
  17 // ============================================================================ 
  19 // ---------------------------------------------------------------------------- 
  21 // ---------------------------------------------------------------------------- 
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  24   #pragma implementation "strconv.h" 
  27 // For compilers that support precompilation, includes "wx.h". 
  28 #include "wx/wxprec.h" 
  39 #include "wx/strconv.h" 
  44     #include "wx/msw/private.h" 
  48     #include "wx/msw/missing.h" 
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  60     #define wxHAVE_WIN32_MB2WC 
  61 #endif // __WIN32__ but !__WXMICROWIN__ 
  63 // ---------------------------------------------------------------------------- 
  65 // ---------------------------------------------------------------------------- 
  73     #include "wx/thread.h" 
  76 #include "wx/encconv.h" 
  77 #include "wx/fontmap.h" 
  81 #include <ATSUnicode.h> 
  82 #include <TextCommon.h> 
  83 #include <TextEncodingConverter.h> 
  85 #include  "wx/mac/private.h"  // includes mac headers 
  87 // ---------------------------------------------------------------------------- 
  89 // ---------------------------------------------------------------------------- 
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); } 
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } 
  94 #if SIZEOF_WCHAR_T == 4 
  95     #define WC_NAME         "UCS4" 
  96     #define WC_BSWAP         BSWAP_UCS4 
  97     #ifdef WORDS_BIGENDIAN 
  98       #define WC_NAME_BEST  "UCS-4BE" 
 100       #define WC_NAME_BEST  "UCS-4LE" 
 102 #elif SIZEOF_WCHAR_T == 2 
 103     #define WC_NAME         "UTF16" 
 104     #define WC_BSWAP         BSWAP_UTF16 
 106     #ifdef WORDS_BIGENDIAN 
 107       #define WC_NAME_BEST  "UTF-16BE" 
 109       #define WC_NAME_BEST  "UTF-16LE" 
 111 #else // sizeof(wchar_t) != 2 nor 4 
 112     // does this ever happen? 
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
 116 // ============================================================================ 
 118 // ============================================================================ 
 120 // ---------------------------------------------------------------------------- 
 121 // UTF-16 en/decoding to/from UCS-4 
 122 // ---------------------------------------------------------------------------- 
 125 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 130             *output 
= (wxUint16
) input
; 
 133     else if (input
>=0x110000) 
 141             *output
++ = (wxUint16
) ((input 
>> 10)+0xd7c0); 
 142             *output 
= (wxUint16
) ((input
&0x3ff)+0xdc00); 
 148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 150     if ((*input
<0xd800) || (*input
>0xdfff)) 
 155     else if ((input
[1]<0xdc00) || (input
[1]>0xdfff)) 
 162         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 168 // ---------------------------------------------------------------------------- 
 170 // ---------------------------------------------------------------------------- 
 172 wxMBConv::~wxMBConv() 
 174     // nothing to do here (necessary for Darwin linking probably) 
 177 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 181         // calculate the length of the buffer needed first 
 182         size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 183         if ( nLen 
!= (size_t)-1 ) 
 185             // now do the actual conversion 
 186             wxWCharBuffer 
buf(nLen
); 
 187             nLen 
= MB2WC(buf
.data(), psz
, nLen 
+ 1); // with the trailing NULL 
 188             if ( nLen 
!= (size_t)-1 ) 
 195     wxWCharBuffer 
buf((wchar_t *)NULL
); 
 200 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 204         size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 205         if ( nLen 
!= (size_t)-1 ) 
 207             wxCharBuffer 
buf(nLen
+3);       // space for a wxUint32 trailing zero 
 208             nLen 
= WC2MB(buf
.data(), pwz
, nLen 
+ 4); 
 209             if ( nLen 
!= (size_t)-1 ) 
 216     wxCharBuffer 
buf((char *)NULL
); 
 221 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const 
 223     wxASSERT(pOutSize 
!= NULL
); 
 225     const char* szEnd 
= szString 
+ nStringLen 
+ 1; 
 226     const char* szPos 
= szString
; 
 227     const char* szStart 
= szPos
; 
 229     size_t nActualLength 
= 0; 
 230     size_t nCurrentSize 
= nStringLen
; //try normal size first (should never resize?) 
 232     wxWCharBuffer 
theBuffer(nCurrentSize
); 
 234     //Convert the string until the length() is reached, continuing the 
 235     //loop every time a null character is reached 
 236     while(szPos 
!= szEnd
) 
 238         wxASSERT(szPos 
< szEnd
); //something is _really_ screwed up if this rings true 
 240         //Get the length of the current (sub)string 
 241         size_t nLen 
= MB2WC(NULL
, szPos
, 0); 
 243         //Invalid conversion? 
 244         if( nLen 
== (size_t)-1 ) 
 247             theBuffer
.data()[0u] = wxT('\0'); 
 252         //Increase the actual length (+1 for current null character) 
 253         nActualLength 
+= nLen 
+ 1; 
 255         //if buffer too big, realloc the buffer 
 256         if (nActualLength 
> (nCurrentSize
+1)) 
 258             wxWCharBuffer 
theNewBuffer(nCurrentSize 
<< 1); 
 259             memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize 
* sizeof(wchar_t)); 
 260             theBuffer 
= theNewBuffer
; 
 264         //Convert the current (sub)string 
 265         if ( MB2WC(&theBuffer
.data()[szPos 
- szStart
], szPos
, nLen 
+ 1) == (size_t)-1 ) 
 268             theBuffer
.data()[0u] = wxT('\0'); 
 272         //Increment to next (sub)string 
 273         //Note that we have to use strlen instead of nLen here 
 274         //because XX2XX gives us the size of the output buffer, 
 275         //which is not necessarily the length of the string 
 276         szPos 
+= strlen(szPos
) + 1; 
 279     //success - return actual length and the buffer 
 280     *pOutSize 
= nActualLength
; 
 284 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const 
 286     wxASSERT(pOutSize 
!= NULL
); 
 288     const wchar_t* szEnd 
= szString 
+ nStringLen 
+ 1; 
 289     const wchar_t* szPos 
= szString
; 
 290     const wchar_t* szStart 
= szPos
; 
 292     size_t nActualLength 
= 0; 
 293     size_t nCurrentSize 
= nStringLen 
<< 2; //try * 4 first 
 295     wxCharBuffer 
theBuffer(nCurrentSize
); 
 297     //Convert the string until the length() is reached, continuing the 
 298     //loop every time a null character is reached 
 299     while(szPos 
!= szEnd
) 
 301         wxASSERT(szPos 
< szEnd
); //something is _really_ screwed up if this rings true 
 303         //Get the length of the current (sub)string 
 304         size_t nLen 
= WC2MB(NULL
, szPos
, 0); 
 306         //Invalid conversion? 
 307         if( nLen 
== (size_t)-1 ) 
 310             theBuffer
.data()[0u] = wxT('\0'); 
 314         //Increase the actual length (+1 for current null character) 
 315         nActualLength 
+= nLen 
+ 1; 
 317         //if buffer too big, realloc the buffer 
 318         if (nActualLength 
> (nCurrentSize
+1)) 
 320             wxCharBuffer 
theNewBuffer(nCurrentSize 
<< 1); 
 321             memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
); 
 322             theBuffer 
= theNewBuffer
; 
 326         //Convert the current (sub)string 
 327         if(WC2MB(&theBuffer
.data()[szPos 
- szStart
], szPos
, nLen 
+ 1) == (size_t)-1 ) 
 330             theBuffer
.data()[0u] = wxT('\0'); 
 334         //Increment to next (sub)string 
 335         //Note that we have to use wxWcslen instead of nLen here 
 336         //because XX2XX gives us the size of the output buffer, 
 337         //which is not necessarily the length of the string 
 338         szPos 
+= wxWcslen(szPos
) + 1; 
 341     //success - return actual length and the buffer 
 342     *pOutSize 
= nActualLength
; 
 346 // ---------------------------------------------------------------------------- 
 348 // ---------------------------------------------------------------------------- 
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 352     return wxMB2WC(buf
, psz
, n
); 
 355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 357     return wxWC2MB(buf
, psz
, n
); 
 362 // ---------------------------------------------------------------------------- 
 363 // wxConvBrokenFileNames  
 364 // ---------------------------------------------------------------------------- 
 366 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar 
*charset
) 
 368     if ( !charset 
|| wxStricmp(charset
, _T("UTF-8")) == 0 
 369                   || wxStricmp(charset
, _T("UTF8")) == 0  ) 
 370         m_conv 
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
); 
 372         m_conv 
= new wxCSConv(charset
); 
 376 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
, 
 378                              size_t outputSize
) const 
 380     return m_conv
->MB2WC( outputBuf
, psz
, outputSize 
); 
 384 wxConvBrokenFileNames::WC2MB(char *outputBuf
, 
 386                              size_t outputSize
) const 
 388     return m_conv
->WC2MB( outputBuf
, psz
, outputSize 
); 
 393 // ---------------------------------------------------------------------------- 
 395 // ---------------------------------------------------------------------------- 
 397 // Implementation (C) 2004 Fredrik Roubert 
 400 // BASE64 decoding table 
 402 static const unsigned char utf7unb64
[] = 
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 408     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 409     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 
 410     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 
 411     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 412     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
 413     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
 414     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
 415     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 
 416     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 
 417     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 
 418     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 
 419     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 
 438 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 442     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 444         unsigned char cc 
= *psz
++; 
 452         else if (*psz 
== '-') 
 462             // BASE64 encoded string 
 466             for (lsb 
= false, d 
= 0, l 
= 0; 
 467                 (cc 
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++) 
 471                 for (l 
+= 6; l 
>= 8; lsb 
= !lsb
) 
 473                     c 
= (unsigned char)((d 
>> (l 
-= 8)) % 256); 
 482                             *buf 
= (wchar_t)(c 
<< 8); 
 489     if (buf 
&& (len 
< n
)) 
 495 // BASE64 encoding table 
 497 static const unsigned char utf7enb64
[] = 
 499     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
 500     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
 501     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
 502     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
 503     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
 504     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
 505     'w', 'x', 'y', 'z', '0', '1', '2', '3', 
 506     '4', '5', '6', '7', '8', '9', '+', '/' 
 510 // UTF-7 encoding table 
 512 // 0 - Set D (directly encoded characters) 
 513 // 1 - Set O (optional direct characters) 
 514 // 2 - whitespace characters (optional) 
 515 // 3 - special characters 
 517 static const unsigned char utf7encode
[128] = 
 519     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 
 520     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
 521     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, 
 522     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
 523     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 524     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 
 525     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 526     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 
 529 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 535     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 538         if (cc 
< 0x80 && utf7encode
[cc
] < 1) 
 546         else if (((wxUint32
)cc
) > 0xffff) 
 548             // no surrogate pair generation (yet?) 
 559                 // BASE64 encode string 
 560                 unsigned int lsb
, d
, l
; 
 561                 for (d 
= 0, l 
= 0;; psz
++) 
 563                     for (lsb 
= 0; lsb 
< 2; lsb 
++) 
 566                         d 
+= lsb 
? cc 
& 0xff : (cc 
& 0xff00) >> 8; 
 568                         for (l 
+= 8; l 
>= 6; ) 
 572                                 *buf
++ = utf7enb64
[(d 
>> l
) % 64]; 
 577                     if (!(cc
) || (cc 
< 0x80 && utf7encode
[cc
] < 1)) 
 583                         *buf
++ = utf7enb64
[((d 
% 16) << (6 - l
)) % 64]; 
 592     if (buf 
&& (len 
< n
)) 
 597 // ---------------------------------------------------------------------------- 
 599 // ---------------------------------------------------------------------------- 
 601 static wxUint32 utf8_max
[]= 
 602     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 604 // boundaries of the private use area we use to (temporarily) remap invalid 
 605 // characters invalid in a UTF-8 encoded string 
 606 const wxUint32 wxUnicodePUA 
= 0x100000; 
 607 const wxUint32 wxUnicodePUAEnd 
= wxUnicodePUA 
+ 256; 
 609 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 613     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 615         const char *opsz 
= psz
; 
 616         bool invalid 
= false; 
 617         unsigned char cc 
= *psz
++, fc 
= cc
; 
 619         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 628             // escape the escape character for octal escapes 
 629             if ((m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 630                     && cc 
== '\\' && (!buf 
|| len 
< n
)) 
 642                 // invalid UTF-8 sequence 
 647                 unsigned ocnt 
= cnt 
- 1; 
 648                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 652                     if ((cc 
& 0xC0) != 0x80) 
 654                         // invalid UTF-8 sequence 
 659                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 661                 if (invalid 
|| res 
<= utf8_max
[ocnt
]) 
 663                     // illegal UTF-8 encoding 
 666                 else if ((m_options 
& MAP_INVALID_UTF8_TO_PUA
) && 
 667                         res 
>= wxUnicodePUA 
&& res 
< wxUnicodePUAEnd
) 
 669                     // if one of our PUA characters turns up externally 
 670                     // it must also be treated as an illegal sequence 
 671                     // (a bit like you have to escape an escape character) 
 677                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 678                     size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 679                     if (pa 
== (size_t)-1) 
 693 #endif // WC_UTF16/!WC_UTF16 
 698                 if (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 700                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 703                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 704                         size_t pa 
= encode_utf16((unsigned char)*opsz 
+ wxUnicodePUA
, (wxUint16 
*)buf
); 
 705                         wxASSERT(pa 
!= (size_t)-1); 
 712                             *buf
++ = wxUnicodePUA 
+ (unsigned char)*opsz
; 
 718                 else if (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 720                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 722                         if ( buf 
&& len 
+ 3 < n 
) 
 724                             unsigned char n 
= *opsz
; 
 726                             *buf
++ = (wchar_t)( L
'0' + n 
/ 0100 ); 
 727                             *buf
++ = (wchar_t)( L
'0' + (n 
% 0100) / 010 ); 
 728                             *buf
++ = (wchar_t)( L
'0' + n 
% 010 ); 
 734                 else // MAP_INVALID_UTF8_NOT 
 741     if (buf 
&& (len 
< n
)) 
 746 static inline bool isoctal(wchar_t wch
) 
 748     return L
'0' <= wch 
&& wch 
<= L
'7'; 
 751 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 755     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 759         // cast is ok for WC_UTF16 
 760         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 761         psz 
+= (pa 
== (size_t)-1) ? 1 : pa
; 
 763         cc
=(*psz
++) & 0x7fffffff; 
 766         if ( (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 767                 && cc 
>= wxUnicodePUA 
&& cc 
< wxUnicodePUAEnd 
) 
 770                 *buf
++ = (char)(cc 
- wxUnicodePUA
); 
 773         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 774                     && cc 
== L
'\\' && psz
[0] == L
'\\' ) 
 781         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) && 
 783                         isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) ) 
 787                 *buf
++ = (char) ((psz
[0] - L
'0')*0100 + 
 788                                  (psz
[1] - L
'0')*010 + 
 798             for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) {} 
 812                     *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 814                         *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 826 // ---------------------------------------------------------------------------- 
 828 // ---------------------------------------------------------------------------- 
 830 #ifdef WORDS_BIGENDIAN 
 831     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 832     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 834     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 835     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 841 // copy 16bit MB to 16bit String 
 842 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 846     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 849             *buf
++ = *(wxUint16
*)psz
; 
 852         psz 
+= sizeof(wxUint16
); 
 854     if (buf 
&& len
<n
)   *buf
=0; 
 860 // copy 16bit String to 16bit MB 
 861 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 865     while (*psz 
&& (!buf 
|| len 
< n
)) 
 869             *(wxUint16
*)buf 
= *psz
; 
 870             buf 
+= sizeof(wxUint16
); 
 872         len 
+= sizeof(wxUint16
); 
 875     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 881 // swap 16bit MB to 16bit String 
 882 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 886     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 890             ((char *)buf
)[0] = psz
[1]; 
 891             ((char *)buf
)[1] = psz
[0]; 
 895         psz 
+= sizeof(wxUint16
); 
 897     if (buf 
&& len
<n
)   *buf
=0; 
 903 // swap 16bit MB to 16bit String 
 904 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 908     while (*psz 
&& (!buf 
|| len 
< n
)) 
 912             *buf
++ = ((char*)psz
)[1]; 
 913             *buf
++ = ((char*)psz
)[0]; 
 915         len 
+= sizeof(wxUint16
); 
 918     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 927 // copy 16bit MB to 32bit String 
 928 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 932     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 935         size_t pa
=decode_utf16((wxUint16
*)psz
, cc
); 
 936         if (pa 
== (size_t)-1) 
 942         psz 
+= pa 
* sizeof(wxUint16
); 
 944     if (buf 
&& len
<n
)   *buf
=0; 
 950 // copy 32bit String to 16bit MB 
 951 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 955     while (*psz 
&& (!buf 
|| len 
< n
)) 
 958         size_t pa
=encode_utf16(*psz
, cc
); 
 960         if (pa 
== (size_t)-1) 
 965             *(wxUint16
*)buf 
= cc
[0]; 
 966             buf 
+= sizeof(wxUint16
); 
 969                 *(wxUint16
*)buf 
= cc
[1]; 
 970                 buf 
+= sizeof(wxUint16
); 
 974         len 
+= pa
*sizeof(wxUint16
); 
 977     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 983 // swap 16bit MB to 32bit String 
 984 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 988     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 992         tmp
[0]=psz
[1];  tmp
[1]=psz
[0]; 
 993         tmp
[2]=psz
[3];  tmp
[3]=psz
[2]; 
 995         size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
); 
 996         if (pa 
== (size_t)-1) 
1003         psz 
+= pa 
* sizeof(wxUint16
); 
1005     if (buf 
&& len
<n
)   *buf
=0; 
1011 // swap 32bit String to 16bit MB 
1012 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1016     while (*psz 
&& (!buf 
|| len 
< n
)) 
1019         size_t pa
=encode_utf16(*psz
, cc
); 
1021         if (pa 
== (size_t)-1) 
1026             *buf
++ = ((char*)cc
)[1]; 
1027             *buf
++ = ((char*)cc
)[0]; 
1030                 *buf
++ = ((char*)cc
)[3]; 
1031                 *buf
++ = ((char*)cc
)[2]; 
1035         len 
+= pa
*sizeof(wxUint16
); 
1038     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
1046 // ---------------------------------------------------------------------------- 
1048 // ---------------------------------------------------------------------------- 
1050 #ifdef WORDS_BIGENDIAN 
1051 #define wxMBConvUTF32straight  wxMBConvUTF32BE 
1052 #define wxMBConvUTF32swap      wxMBConvUTF32LE 
1054 #define wxMBConvUTF32swap      wxMBConvUTF32BE 
1055 #define wxMBConvUTF32straight  wxMBConvUTF32LE 
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
1065 // copy 32bit MB to 16bit String 
1066 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1070     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1074         size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
); 
1075         if (pa 
== (size_t)-1) 
1085         psz 
+= sizeof(wxUint32
); 
1087     if (buf 
&& len
<n
)   *buf
=0; 
1093 // copy 16bit String to 32bit MB 
1094 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1098     while (*psz 
&& (!buf 
|| len 
< n
)) 
1102         // cast is ok for WC_UTF16 
1103         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
1104         if (pa 
== (size_t)-1) 
1109             *(wxUint32
*)buf 
= cc
; 
1110             buf 
+= sizeof(wxUint32
); 
1112         len 
+= sizeof(wxUint32
); 
1116     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1124 // swap 32bit MB to 16bit String 
1125 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1129     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1132         tmp
[0] = psz
[3];   tmp
[1] = psz
[2]; 
1133         tmp
[2] = psz
[1];   tmp
[3] = psz
[0]; 
1138         size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
); 
1139         if (pa 
== (size_t)-1) 
1149         psz 
+= sizeof(wxUint32
); 
1159 // swap 16bit String to 32bit MB 
1160 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1164     while (*psz 
&& (!buf 
|| len 
< n
)) 
1168         // cast is ok for WC_UTF16 
1169         size_t pa
=decode_utf16((const wxUint16 
*)psz
, *(wxUint32
*)cc
); 
1170         if (pa 
== (size_t)-1) 
1180         len 
+= sizeof(wxUint32
); 
1184     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1193 // copy 32bit MB to 32bit String 
1194 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1198     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1201             *buf
++ = *(wxUint32
*)psz
; 
1203         psz 
+= sizeof(wxUint32
); 
1213 // copy 32bit String to 32bit MB 
1214 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1218     while (*psz 
&& (!buf 
|| len 
< n
)) 
1222             *(wxUint32
*)buf 
= *psz
; 
1223             buf 
+= sizeof(wxUint32
); 
1226         len 
+= sizeof(wxUint32
); 
1230     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1237 // swap 32bit MB to 32bit String 
1238 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1242     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1246             ((char *)buf
)[0] = psz
[3]; 
1247             ((char *)buf
)[1] = psz
[2]; 
1248             ((char *)buf
)[2] = psz
[1]; 
1249             ((char *)buf
)[3] = psz
[0]; 
1253         psz 
+= sizeof(wxUint32
); 
1263 // swap 32bit String to 32bit MB 
1264 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1268     while (*psz 
&& (!buf 
|| len 
< n
)) 
1272             *buf
++ = ((char *)psz
)[3]; 
1273             *buf
++ = ((char *)psz
)[2]; 
1274             *buf
++ = ((char *)psz
)[1]; 
1275             *buf
++ = ((char *)psz
)[0]; 
1277         len 
+= sizeof(wxUint32
); 
1281     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1291 // ============================================================================ 
1292 // The classes doing conversion using the iconv_xxx() functions 
1293 // ============================================================================ 
1297 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with 
1298 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is 
1299 //     (unless there's yet another bug in glibc) the only case when iconv() 
1300 //     returns with (size_t)-1 (which means error) and says there are 0 bytes 
1301 //     left in the input buffer -- when _real_ error occurs, 
1302 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for 
1304 //     [This bug does not appear in glibc 2.2.] 
1305 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
1306 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
1307                                      (errno != E2BIG || bufLeft != 0)) 
1309 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
1312 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
1314 // ---------------------------------------------------------------------------- 
1315 // wxMBConv_iconv: encapsulates an iconv character set 
1316 // ---------------------------------------------------------------------------- 
1318 class wxMBConv_iconv 
: public wxMBConv
 
1321     wxMBConv_iconv(const wxChar 
*name
); 
1322     virtual ~wxMBConv_iconv(); 
1324     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
1325     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
1328         { return (m2w 
!= (iconv_t
)-1) && (w2m 
!= (iconv_t
)-1); } 
1331     // the iconv handlers used to translate from multibyte to wide char and in 
1332     // the other direction 
1336     // guards access to m2w and w2m objects 
1337     wxMutex m_iconvMutex
; 
1341     // the name (for iconv_open()) of a wide char charset -- if none is 
1342     // available on this machine, it will remain NULL 
1343     static const char *ms_wcCharsetName
; 
1345     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
1346     // different endian-ness than the native one 
1347     static bool ms_wcNeedsSwap
; 
1350 const char *wxMBConv_iconv::ms_wcCharsetName 
= NULL
; 
1351 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
1353 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
1355     // Do it the hard way 
1357     for (size_t i 
= 0; i 
< wxStrlen(name
)+1; i
++) 
1358         cname
[i
] = (char) name
[i
]; 
1360     // check for charset that represents wchar_t: 
1361     if (ms_wcCharsetName 
== NULL
) 
1363         ms_wcNeedsSwap 
= false; 
1365         // try charset with explicit bytesex info (e.g. "UCS-4LE"): 
1366         ms_wcCharsetName 
= WC_NAME_BEST
; 
1367         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1369         if (m2w 
== (iconv_t
)-1) 
1371             // try charset w/o bytesex info (e.g. "UCS4") 
1372             // and check for bytesex ourselves: 
1373             ms_wcCharsetName 
= WC_NAME
; 
1374             m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1376             // last bet, try if it knows WCHAR_T pseudo-charset 
1377             if (m2w 
== (iconv_t
)-1) 
1379                 ms_wcCharsetName 
= "WCHAR_T"; 
1380                 m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1383             if (m2w 
!= (iconv_t
)-1) 
1385                 char    buf
[2], *bufPtr
; 
1386                 wchar_t wbuf
[2], *wbufPtr
; 
1394                 outsz 
= SIZEOF_WCHAR_T 
* 2; 
1398                 res 
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
1399                             (char**)&wbufPtr
, &outsz
); 
1401                 if (ICONV_FAILED(res
, insz
)) 
1403                     ms_wcCharsetName 
= NULL
; 
1404                     wxLogLastError(wxT("iconv")); 
1405                     wxLogError(_("Conversion to charset '%s' doesn't work."), name
); 
1409                     ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
1414                 ms_wcCharsetName 
= NULL
; 
1416                 // VS: we must not output an error here, since wxWidgets will safely 
1417                 //     fall back to using wxEncodingConverter. 
1418                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
); 
1422         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
); 
1424     else // we already have ms_wcCharsetName 
1426         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1429     // NB: don't ever pass NULL to iconv_open(), it may crash! 
1430     if ( ms_wcCharsetName 
) 
1432         w2m 
= iconv_open( cname
, ms_wcCharsetName
); 
1440 wxMBConv_iconv::~wxMBConv_iconv() 
1442     if ( m2w 
!= (iconv_t
)-1 ) 
1444     if ( w2m 
!= (iconv_t
)-1 ) 
1448 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1451     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. 
1452     //     Unfortunately there is a couple of global wxCSConv objects such as 
1453     //     wxConvLocal that are used all over wx code, so we have to make sure 
1454     //     the handle is used by at most one thread at the time. Otherwise 
1455     //     only a few wx classes would be safe to use from non-main threads 
1456     //     as MB<->WC conversion would fail "randomly". 
1457     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1460     size_t inbuf 
= strlen(psz
); 
1461     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
1463     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
1464     wchar_t *bufPtr 
= buf
; 
1465     const char *pszPtr 
= psz
; 
1469         // have destination buffer, convert there 
1471                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1472                      (char**)&bufPtr
, &outbuf
); 
1473         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
1477             // convert to native endianness 
1478             WC_BSWAP(buf 
/* _not_ bufPtr */, res
) 
1481         // NB: iconv was given only strlen(psz) characters on input, and so 
1482         //     it couldn't convert the trailing zero. Let's do it ourselves 
1483         //     if there's some room left for it in the output buffer. 
1489         // no destination buffer... convert using temp buffer 
1490         // to calculate destination buffer requirement 
1495             outbuf 
= 8*SIZEOF_WCHAR_T
; 
1498                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1499                          (char**)&bufPtr
, &outbuf 
); 
1501             res 
+= 8-(outbuf
/SIZEOF_WCHAR_T
); 
1502         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1505     if (ICONV_FAILED(cres
, inbuf
)) 
1507         //VS: it is ok if iconv fails, hence trace only 
1508         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1515 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1518     // NB: explained in MB2WC 
1519     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1522     size_t inbuf 
= wxWcslen(psz
) * SIZEOF_WCHAR_T
; 
1526     wchar_t *tmpbuf 
= 0; 
1530         // need to copy to temp buffer to switch endianness 
1531         // this absolutely doesn't rock! 
1532         // (no, doing WC_BSWAP twice on the original buffer won't help, as it 
1533         //  could be in read-only memory, or be accessed in some other thread) 
1534         tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
); 
1535         memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
); 
1536         WC_BSWAP(tmpbuf
, inbuf
) 
1542         // have destination buffer, convert there 
1543         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1547         // NB: iconv was given only wcslen(psz) characters on input, and so 
1548         //     it couldn't convert the trailing zero. Let's do it ourselves 
1549         //     if there's some room left for it in the output buffer. 
1555         // no destination buffer... convert using temp buffer 
1556         // to calculate destination buffer requirement 
1560             buf 
= tbuf
; outbuf 
= 16; 
1562             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1565         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1573     if (ICONV_FAILED(cres
, inbuf
)) 
1575         //VS: it is ok if iconv fails, hence trace only 
1576         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1583 #endif // HAVE_ICONV 
1586 // ============================================================================ 
1587 // Win32 conversion classes 
1588 // ============================================================================ 
1590 #ifdef wxHAVE_WIN32_MB2WC 
1594 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1595 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1598 class wxMBConv_win32 
: public wxMBConv
 
1603         m_CodePage 
= CP_ACP
; 
1607     wxMBConv_win32(const wxChar
* name
) 
1609         m_CodePage 
= wxCharsetToCodepage(name
); 
1612     wxMBConv_win32(wxFontEncoding encoding
) 
1614         m_CodePage 
= wxEncodingToCodepage(encoding
); 
1618     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1620         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
1621         // the behaviour is not compatible with the Unix version (using iconv) 
1622         // and break the library itself, e.g. wxTextInputStream::NextChar() 
1623         // wouldn't work if reading an incomplete MB char didn't result in an 
1626         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in 
1627         // an error (tested under Windows Server 2003) and apparently it is 
1628         // done on purpose, i.e. the function accepts any input in this case 
1629         // and although I'd prefer to return error on ill-formed output, our 
1630         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is 
1631         // explicitly ill-formed according to RFC 2152) neither so we don't 
1632         // even have any fallback here... 
1633         int flags 
= m_CodePage 
== CP_UTF7 
? 0 : MB_ERR_INVALID_CHARS
; 
1635         const size_t len 
= ::MultiByteToWideChar
 
1637                                 m_CodePage
,     // code page 
1638                                 flags
,          // flags: fall on error 
1639                                 psz
,            // input string 
1640                                 -1,             // its length (NUL-terminated) 
1641                                 buf
,            // output string 
1642                                 buf 
? n 
: 0     // size of output buffer 
1645         // note that it returns count of written chars for buf != NULL and size 
1646         // of the needed buffer for buf == NULL so in either case the length of 
1647         // the string (which never includes the terminating NUL) is one less 
1648         return len 
? len 
- 1 : (size_t)-1; 
1651     size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
1654             we have a problem here: by default, WideCharToMultiByte() may 
1655             replace characters unrepresentable in the target code page with bad 
1656             quality approximations such as turning "1/2" symbol (U+00BD) into 
1657             "1" for the code pages which don't have it and we, obviously, want 
1658             to avoid this at any price 
1660             the trouble is that this function does it _silently_, i.e. it won't 
1661             even tell us whether it did or not... Win98/2000 and higher provide 
1662             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
1663             we have to resort to a round trip, i.e. check that converting back 
1664             results in the same string -- this is, of course, expensive but 
1665             otherwise we simply can't be sure to not garble the data. 
1668         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
1669         // it doesn't work with CJK encodings (which we test for rather roughly 
1670         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
1672         BOOL usedDef 
wxDUMMY_INITIALIZE(false); 
1675         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
1677             // it's our lucky day 
1678             flags 
= WC_NO_BEST_FIT_CHARS
; 
1679             pUsedDef 
= &usedDef
; 
1681         else // old system or unsupported encoding 
1687         const size_t len 
= ::WideCharToMultiByte
 
1689                                 m_CodePage
,     // code page 
1690                                 flags
,          // either none or no best fit 
1691                                 pwz
,            // input string 
1692                                 -1,             // it is (wide) NUL-terminated 
1693                                 buf
,            // output buffer 
1694                                 buf 
? n 
: 0,    // and its size 
1695                                 NULL
,           // default "replacement" char 
1696                                 pUsedDef        
// [out] was it used? 
1701             // function totally failed 
1705         // if we were really converting, check if we succeeded 
1710                 // check if the conversion failed, i.e. if any replacements 
1715             else // we must resort to double tripping... 
1717                 wxWCharBuffer 
wcBuf(n
); 
1718                 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
1719                         wcscmp(wcBuf
, pwz
) != 0 ) 
1721                     // we didn't obtain the same thing we started from, hence 
1722                     // the conversion was lossy and we consider that it failed 
1728         // see the comment above for the reason of "len - 1" 
1732     bool IsOk() const { return m_CodePage 
!= -1; } 
1735     static bool CanUseNoBestFit() 
1737         static int s_isWin98Or2k 
= -1; 
1739         if ( s_isWin98Or2k 
== -1 ) 
1742             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
1745                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
1749                     s_isWin98Or2k 
= verMaj 
>= 5; 
1753                     // unknown, be conseravtive by default 
1757             wxASSERT_MSG( s_isWin98Or2k 
!= -1, _T("should be set above") ); 
1760         return s_isWin98Or2k 
== 1; 
1766 #endif // wxHAVE_WIN32_MB2WC 
1768 // ============================================================================ 
1769 // Cocoa conversion classes 
1770 // ============================================================================ 
1772 #if defined(__WXCOCOA__) 
1774 // RN:  There is no UTF-32 support in either Core Foundation or 
1775 // Cocoa.  Strangely enough, internally Core Foundation uses 
1776 // UTF 32 internally quite a bit - its just not public (yet). 
1778 #include <CoreFoundation/CFString.h> 
1779 #include <CoreFoundation/CFStringEncodingExt.h> 
1781 CFStringEncoding 
wxCFStringEncFromFontEnc(wxFontEncoding encoding
) 
1783     CFStringEncoding enc 
= kCFStringEncodingInvalidId 
; 
1784     if ( encoding 
== wxFONTENCODING_DEFAULT 
) 
1786         enc 
= CFStringGetSystemEncoding(); 
1788     else switch( encoding
) 
1790         case wxFONTENCODING_ISO8859_1 
: 
1791             enc 
= kCFStringEncodingISOLatin1 
; 
1793         case wxFONTENCODING_ISO8859_2 
: 
1794             enc 
= kCFStringEncodingISOLatin2
; 
1796         case wxFONTENCODING_ISO8859_3 
: 
1797             enc 
= kCFStringEncodingISOLatin3 
; 
1799         case wxFONTENCODING_ISO8859_4 
: 
1800             enc 
= kCFStringEncodingISOLatin4
; 
1802         case wxFONTENCODING_ISO8859_5 
: 
1803             enc 
= kCFStringEncodingISOLatinCyrillic
; 
1805         case wxFONTENCODING_ISO8859_6 
: 
1806             enc 
= kCFStringEncodingISOLatinArabic
; 
1808         case wxFONTENCODING_ISO8859_7 
: 
1809             enc 
= kCFStringEncodingISOLatinGreek
; 
1811         case wxFONTENCODING_ISO8859_8 
: 
1812             enc 
= kCFStringEncodingISOLatinHebrew
; 
1814         case wxFONTENCODING_ISO8859_9 
: 
1815             enc 
= kCFStringEncodingISOLatin5
; 
1817         case wxFONTENCODING_ISO8859_10 
: 
1818             enc 
= kCFStringEncodingISOLatin6
; 
1820         case wxFONTENCODING_ISO8859_11 
: 
1821             enc 
= kCFStringEncodingISOLatinThai
; 
1823         case wxFONTENCODING_ISO8859_13 
: 
1824             enc 
= kCFStringEncodingISOLatin7
; 
1826         case wxFONTENCODING_ISO8859_14 
: 
1827             enc 
= kCFStringEncodingISOLatin8
; 
1829         case wxFONTENCODING_ISO8859_15 
: 
1830             enc 
= kCFStringEncodingISOLatin9
; 
1833         case wxFONTENCODING_KOI8 
: 
1834             enc 
= kCFStringEncodingKOI8_R
; 
1836         case wxFONTENCODING_ALTERNATIVE 
: // MS-DOS CP866 
1837             enc 
= kCFStringEncodingDOSRussian
; 
1840 //      case wxFONTENCODING_BULGARIAN : 
1844         case wxFONTENCODING_CP437 
: 
1845             enc 
=kCFStringEncodingDOSLatinUS 
; 
1847         case wxFONTENCODING_CP850 
: 
1848             enc 
= kCFStringEncodingDOSLatin1
; 
1850         case wxFONTENCODING_CP852 
: 
1851             enc 
= kCFStringEncodingDOSLatin2
; 
1853         case wxFONTENCODING_CP855 
: 
1854             enc 
= kCFStringEncodingDOSCyrillic
; 
1856         case wxFONTENCODING_CP866 
: 
1857             enc 
=kCFStringEncodingDOSRussian 
; 
1859         case wxFONTENCODING_CP874 
: 
1860             enc 
= kCFStringEncodingDOSThai
; 
1862         case wxFONTENCODING_CP932 
: 
1863             enc 
= kCFStringEncodingDOSJapanese
; 
1865         case wxFONTENCODING_CP936 
: 
1866             enc 
=kCFStringEncodingDOSChineseSimplif 
; 
1868         case wxFONTENCODING_CP949 
: 
1869             enc 
= kCFStringEncodingDOSKorean
; 
1871         case wxFONTENCODING_CP950 
: 
1872             enc 
= kCFStringEncodingDOSChineseTrad
; 
1874         case wxFONTENCODING_CP1250 
: 
1875             enc 
= kCFStringEncodingWindowsLatin2
; 
1877         case wxFONTENCODING_CP1251 
: 
1878             enc 
=kCFStringEncodingWindowsCyrillic 
; 
1880         case wxFONTENCODING_CP1252 
: 
1881             enc 
=kCFStringEncodingWindowsLatin1 
; 
1883         case wxFONTENCODING_CP1253 
: 
1884             enc 
= kCFStringEncodingWindowsGreek
; 
1886         case wxFONTENCODING_CP1254 
: 
1887             enc 
= kCFStringEncodingWindowsLatin5
; 
1889         case wxFONTENCODING_CP1255 
: 
1890             enc 
=kCFStringEncodingWindowsHebrew 
; 
1892         case wxFONTENCODING_CP1256 
: 
1893             enc 
=kCFStringEncodingWindowsArabic 
; 
1895         case wxFONTENCODING_CP1257 
: 
1896             enc 
= kCFStringEncodingWindowsBalticRim
; 
1898 //   This only really encodes to UTF7 (if that) evidently 
1899 //        case wxFONTENCODING_UTF7 : 
1900 //            enc = kCFStringEncodingNonLossyASCII ; 
1902         case wxFONTENCODING_UTF8 
: 
1903             enc 
= kCFStringEncodingUTF8 
; 
1905         case wxFONTENCODING_EUC_JP 
: 
1906             enc 
= kCFStringEncodingEUC_JP
; 
1908         case wxFONTENCODING_UTF16 
: 
1909             enc 
= kCFStringEncodingUnicode 
; 
1911         case wxFONTENCODING_MACROMAN 
: 
1912             enc 
= kCFStringEncodingMacRoman 
; 
1914         case wxFONTENCODING_MACJAPANESE 
: 
1915             enc 
= kCFStringEncodingMacJapanese 
; 
1917         case wxFONTENCODING_MACCHINESETRAD 
: 
1918             enc 
= kCFStringEncodingMacChineseTrad 
; 
1920         case wxFONTENCODING_MACKOREAN 
: 
1921             enc 
= kCFStringEncodingMacKorean 
; 
1923         case wxFONTENCODING_MACARABIC 
: 
1924             enc 
= kCFStringEncodingMacArabic 
; 
1926         case wxFONTENCODING_MACHEBREW 
: 
1927             enc 
= kCFStringEncodingMacHebrew 
; 
1929         case wxFONTENCODING_MACGREEK 
: 
1930             enc 
= kCFStringEncodingMacGreek 
; 
1932         case wxFONTENCODING_MACCYRILLIC 
: 
1933             enc 
= kCFStringEncodingMacCyrillic 
; 
1935         case wxFONTENCODING_MACDEVANAGARI 
: 
1936             enc 
= kCFStringEncodingMacDevanagari 
; 
1938         case wxFONTENCODING_MACGURMUKHI 
: 
1939             enc 
= kCFStringEncodingMacGurmukhi 
; 
1941         case wxFONTENCODING_MACGUJARATI 
: 
1942             enc 
= kCFStringEncodingMacGujarati 
; 
1944         case wxFONTENCODING_MACORIYA 
: 
1945             enc 
= kCFStringEncodingMacOriya 
; 
1947         case wxFONTENCODING_MACBENGALI 
: 
1948             enc 
= kCFStringEncodingMacBengali 
; 
1950         case wxFONTENCODING_MACTAMIL 
: 
1951             enc 
= kCFStringEncodingMacTamil 
; 
1953         case wxFONTENCODING_MACTELUGU 
: 
1954             enc 
= kCFStringEncodingMacTelugu 
; 
1956         case wxFONTENCODING_MACKANNADA 
: 
1957             enc 
= kCFStringEncodingMacKannada 
; 
1959         case wxFONTENCODING_MACMALAJALAM 
: 
1960             enc 
= kCFStringEncodingMacMalayalam 
; 
1962         case wxFONTENCODING_MACSINHALESE 
: 
1963             enc 
= kCFStringEncodingMacSinhalese 
; 
1965         case wxFONTENCODING_MACBURMESE 
: 
1966             enc 
= kCFStringEncodingMacBurmese 
; 
1968         case wxFONTENCODING_MACKHMER 
: 
1969             enc 
= kCFStringEncodingMacKhmer 
; 
1971         case wxFONTENCODING_MACTHAI 
: 
1972             enc 
= kCFStringEncodingMacThai 
; 
1974         case wxFONTENCODING_MACLAOTIAN 
: 
1975             enc 
= kCFStringEncodingMacLaotian 
; 
1977         case wxFONTENCODING_MACGEORGIAN 
: 
1978             enc 
= kCFStringEncodingMacGeorgian 
; 
1980         case wxFONTENCODING_MACARMENIAN 
: 
1981             enc 
= kCFStringEncodingMacArmenian 
; 
1983         case wxFONTENCODING_MACCHINESESIMP 
: 
1984             enc 
= kCFStringEncodingMacChineseSimp 
; 
1986         case wxFONTENCODING_MACTIBETAN 
: 
1987             enc 
= kCFStringEncodingMacTibetan 
; 
1989         case wxFONTENCODING_MACMONGOLIAN 
: 
1990             enc 
= kCFStringEncodingMacMongolian 
; 
1992         case wxFONTENCODING_MACETHIOPIC 
: 
1993             enc 
= kCFStringEncodingMacEthiopic 
; 
1995         case wxFONTENCODING_MACCENTRALEUR 
: 
1996             enc 
= kCFStringEncodingMacCentralEurRoman 
; 
1998         case wxFONTENCODING_MACVIATNAMESE 
: 
1999             enc 
= kCFStringEncodingMacVietnamese 
; 
2001         case wxFONTENCODING_MACARABICEXT 
: 
2002             enc 
= kCFStringEncodingMacExtArabic 
; 
2004         case wxFONTENCODING_MACSYMBOL 
: 
2005             enc 
= kCFStringEncodingMacSymbol 
; 
2007         case wxFONTENCODING_MACDINGBATS 
: 
2008             enc 
= kCFStringEncodingMacDingbats 
; 
2010         case wxFONTENCODING_MACTURKISH 
: 
2011             enc 
= kCFStringEncodingMacTurkish 
; 
2013         case wxFONTENCODING_MACCROATIAN 
: 
2014             enc 
= kCFStringEncodingMacCroatian 
; 
2016         case wxFONTENCODING_MACICELANDIC 
: 
2017             enc 
= kCFStringEncodingMacIcelandic 
; 
2019         case wxFONTENCODING_MACROMANIAN 
: 
2020             enc 
= kCFStringEncodingMacRomanian 
; 
2022         case wxFONTENCODING_MACCELTIC 
: 
2023             enc 
= kCFStringEncodingMacCeltic 
; 
2025         case wxFONTENCODING_MACGAELIC 
: 
2026             enc 
= kCFStringEncodingMacGaelic 
; 
2028 //      case wxFONTENCODING_MACKEYBOARD : 
2029 //          enc = kCFStringEncodingMacKeyboardGlyphs ; 
2032             // because gcc is picky 
2038 class wxMBConv_cocoa 
: public wxMBConv
 
2043         Init(CFStringGetSystemEncoding()) ; 
2047     wxMBConv_cocoa(const wxChar
* name
) 
2049         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2053     wxMBConv_cocoa(wxFontEncoding encoding
) 
2055         Init( wxCFStringEncFromFontEnc(encoding
) ); 
2062     void Init( CFStringEncoding encoding
) 
2064         m_encoding 
= encoding 
; 
2067     size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const 
2071         CFStringRef theString 
= CFStringCreateWithBytes ( 
2072                                                 NULL
, //the allocator 
2073                                                 (const UInt8
*)szUnConv
, 
2076                                                 false //no BOM/external representation 
2079         wxASSERT(theString
); 
2081         size_t nOutLength 
= CFStringGetLength(theString
); 
2085             CFRelease(theString
); 
2089         CFRange theRange 
= { 0, nOutSize 
}; 
2091 #if SIZEOF_WCHAR_T == 4 
2092         UniChar
* szUniCharBuffer 
= new UniChar
[nOutSize
]; 
2095         CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
); 
2097         CFRelease(theString
); 
2099         szUniCharBuffer
[nOutLength
] = '\0' ; 
2101 #if SIZEOF_WCHAR_T == 4 
2102         wxMBConvUTF16 converter 
; 
2103         converter
.MB2WC(szOut
, (const char*)szUniCharBuffer 
, nOutSize 
) ; 
2104         delete[] szUniCharBuffer
; 
2110     size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const 
2114         size_t nRealOutSize
; 
2115         size_t nBufSize 
= wxWcslen(szUnConv
); 
2116         UniChar
* szUniBuffer 
= (UniChar
*) szUnConv
; 
2118 #if SIZEOF_WCHAR_T == 4 
2119         wxMBConvUTF16 converter 
; 
2120         nBufSize 
= converter
.WC2MB( NULL 
, szUnConv 
, 0 ); 
2121         szUniBuffer 
= new UniChar
[ (nBufSize 
/ sizeof(UniChar
)) + 1] ; 
2122         converter
.WC2MB( (char*) szUniBuffer 
, szUnConv
, nBufSize 
+ sizeof(UniChar
)) ; 
2123         nBufSize 
/= sizeof(UniChar
); 
2126         CFStringRef theString 
= CFStringCreateWithCharactersNoCopy( 
2130                                 kCFAllocatorNull 
//deallocator - we want to deallocate it ourselves 
2133         wxASSERT(theString
); 
2135         //Note that CER puts a BOM when converting to unicode 
2136         //so we  check and use getchars instead in that case 
2137         if (m_encoding 
== kCFStringEncodingUnicode
) 
2140                 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize 
- 1), (UniChar
*) szOut
); 
2142             nRealOutSize 
= CFStringGetLength(theString
) + 1; 
2148                 CFRangeMake(0, CFStringGetLength(theString
)), 
2150                 0, //what to put in characters that can't be converted - 
2151                     //0 tells CFString to return NULL if it meets such a character 
2152                 false, //not an external representation 
2155                 (CFIndex
*) &nRealOutSize
 
2159         CFRelease(theString
); 
2161 #if SIZEOF_WCHAR_T == 4 
2162         delete[] szUniBuffer
; 
2165         return  nRealOutSize 
- 1; 
2170         return m_encoding 
!= kCFStringEncodingInvalidId 
&& 
2171               CFStringIsEncodingAvailable(m_encoding
); 
2175     CFStringEncoding m_encoding 
; 
2178 #endif // defined(__WXCOCOA__) 
2180 // ============================================================================ 
2181 // Mac conversion classes 
2182 // ============================================================================ 
2184 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
2186 class wxMBConv_mac 
: public wxMBConv
 
2191         Init(CFStringGetSystemEncoding()) ; 
2195     wxMBConv_mac(const wxChar
* name
) 
2197         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2201     wxMBConv_mac(wxFontEncoding encoding
) 
2203         Init( wxMacGetSystemEncFromFontEnc(encoding
) ); 
2208         OSStatus status 
= noErr 
; 
2209         status 
= TECDisposeConverter(m_MB2WC_converter
); 
2210         status 
= TECDisposeConverter(m_WC2MB_converter
); 
2214     void Init( TextEncodingBase encoding
) 
2216         OSStatus status 
= noErr 
; 
2217         m_char_encoding 
= encoding 
; 
2218         m_unicode_encoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ; 
2220         status 
= TECCreateConverter(&m_MB2WC_converter
, 
2222                                     m_unicode_encoding
); 
2223         status 
= TECCreateConverter(&m_WC2MB_converter
, 
2228     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2230         OSStatus status 
= noErr 
; 
2231         ByteCount byteOutLen 
; 
2232         ByteCount byteInLen 
= strlen(psz
) ; 
2233         wchar_t *tbuf 
= NULL 
; 
2234         UniChar
* ubuf 
= NULL 
; 
2239             //apple specs say at least 32 
2240             n 
= wxMax( 32 , byteInLen 
) ; 
2241             tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T
) ; 
2243         ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ; 
2244 #if SIZEOF_WCHAR_T == 4 
2245         ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
2247         ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
2249         status 
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz 
, byteInLen
, &byteInLen
, 
2250           (TextPtr
) ubuf 
, byteBufferLen
, &byteOutLen
); 
2251 #if SIZEOF_WCHAR_T == 4 
2252         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
2253         // is not properly terminated we get random characters at the end 
2254         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
2255         wxMBConvUTF16 converter 
; 
2256         res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
) , (const char*)ubuf 
, n 
) ; 
2259         res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
2264         if ( buf  
&& res 
< n
) 
2270     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2272         OSStatus status 
= noErr 
; 
2273         ByteCount byteOutLen 
; 
2274         ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
2280             //apple specs say at least 32 
2281             n 
= wxMax( 32 , ((byteInLen 
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T 
); 
2282             tbuf 
= (char*) malloc( n 
) ; 
2285         ByteCount byteBufferLen 
= n 
; 
2286         UniChar
* ubuf 
= NULL 
; 
2287 #if SIZEOF_WCHAR_T == 4 
2288         wxMBConvUTF16 converter 
; 
2289         size_t unicharlen 
= converter
.WC2MB( NULL 
, psz 
, 0 ) ; 
2290         byteInLen 
= unicharlen 
; 
2291         ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
2292         converter
.WC2MB( (char*) ubuf 
, psz
, unicharlen 
+ 2 ) ; 
2294         ubuf 
= (UniChar
*) psz 
; 
2296         status 
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf 
, byteInLen
, &byteInLen
, 
2297             (TextPtr
) (buf 
? buf 
: tbuf
) , byteBufferLen
, &byteOutLen
); 
2298 #if SIZEOF_WCHAR_T == 4 
2304         size_t res 
= byteOutLen 
; 
2305         if ( buf  
&& res 
< n
) 
2309             //we need to double-trip to verify it didn't insert any ? in place 
2310             //of bogus characters 
2311             wxWCharBuffer 
wcBuf(n
); 
2312             size_t pszlen 
= wxWcslen(psz
); 
2313             if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
2314                         wxWcslen(wcBuf
) != pszlen 
|| 
2315                         memcmp(wcBuf
, psz
, pszlen 
* sizeof(wchar_t)) != 0 ) 
2317                 // we didn't obtain the same thing we started from, hence 
2318                 // the conversion was lossy and we consider that it failed 
2327         { return m_MB2WC_converter 
!=  NULL 
&& m_WC2MB_converter 
!= NULL  
; } 
2330     TECObjectRef m_MB2WC_converter 
; 
2331     TECObjectRef m_WC2MB_converter 
; 
2333     TextEncodingBase m_char_encoding 
; 
2334     TextEncodingBase m_unicode_encoding 
; 
2337 #endif // defined(__WXMAC__) && defined(TARGET_CARBON) 
2339 // ============================================================================ 
2340 // wxEncodingConverter based conversion classes 
2341 // ============================================================================ 
2345 class wxMBConv_wxwin 
: public wxMBConv
 
2350         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
2351                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
2355     // temporarily just use wxEncodingConverter stuff, 
2356     // so that it works while a better implementation is built 
2357     wxMBConv_wxwin(const wxChar
* name
) 
2360             m_enc 
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
2362             m_enc 
= wxFONTENCODING_SYSTEM
; 
2367     wxMBConv_wxwin(wxFontEncoding enc
) 
2374     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
2376         size_t inbuf 
= strlen(psz
); 
2379             if (!m2w
.Convert(psz
,buf
)) 
2385     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
2387         const size_t inbuf 
= wxWcslen(psz
); 
2390             if (!w2m
.Convert(psz
,buf
)) 
2397     bool IsOk() const { return m_ok
; } 
2400     wxFontEncoding m_enc
; 
2401     wxEncodingConverter m2w
, w2m
; 
2403     // were we initialized successfully? 
2406     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
2409 #endif // wxUSE_FONTMAP 
2411 // ============================================================================ 
2412 // wxCSConv implementation 
2413 // ============================================================================ 
2415 void wxCSConv::Init() 
2422 wxCSConv::wxCSConv(const wxChar 
*charset
) 
2431     m_encoding 
= wxFONTENCODING_SYSTEM
; 
2434 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
2436     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
2438         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
2440         encoding 
= wxFONTENCODING_SYSTEM
; 
2445     m_encoding 
= encoding
; 
2448 wxCSConv::~wxCSConv() 
2453 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
2458     SetName(conv
.m_name
); 
2459     m_encoding 
= conv
.m_encoding
; 
2462 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
2466     SetName(conv
.m_name
); 
2467     m_encoding 
= conv
.m_encoding
; 
2472 void wxCSConv::Clear() 
2481 void wxCSConv::SetName(const wxChar 
*charset
) 
2485         m_name 
= wxStrdup(charset
); 
2490 wxMBConv 
*wxCSConv::DoCreate() const 
2492     // check for the special case of ASCII or ISO8859-1 charset: as we have 
2493     // special knowledge of it anyhow, we don't need to create a special 
2494     // conversion object 
2495     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
) 
2497         // don't convert at all 
2501     // we trust OS to do conversion better than we can so try external 
2502     // conversion methods first 
2504     // the full order is: 
2505     //      1. OS conversion (iconv() under Unix or Win32 API) 
2506     //      2. hard coded conversions for UTF 
2507     //      3. wxEncodingConverter as fall back 
2513 #endif // !wxUSE_FONTMAP 
2515         wxString 
name(m_name
); 
2519             name 
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
); 
2520 #endif // wxUSE_FONTMAP 
2522         wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
); 
2528 #endif // HAVE_ICONV 
2530 #ifdef wxHAVE_WIN32_MB2WC 
2533         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
2534                                       : new wxMBConv_win32(m_encoding
); 
2543 #endif // wxHAVE_WIN32_MB2WC 
2544 #if defined(__WXMAC__) 
2546         // leave UTF16 and UTF32 to the built-ins of wx 
2547         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
|| 
2548             ( m_encoding 
>= wxFONTENCODING_MACMIN 
&& m_encoding 
<= wxFONTENCODING_MACMAX 
) ) ) 
2552             wxMBConv_mac 
*conv 
= m_name 
? new wxMBConv_mac(m_name
) 
2553                                         : new wxMBConv_mac(m_encoding
); 
2555             wxMBConv_mac 
*conv 
= new wxMBConv_mac(m_encoding
); 
2564 #if defined(__WXCOCOA__) 
2566         if ( m_name 
|| ( m_encoding 
<= wxFONTENCODING_UTF16 
) ) 
2570             wxMBConv_cocoa 
*conv 
= m_name 
? new wxMBConv_cocoa(m_name
) 
2571                                           : new wxMBConv_cocoa(m_encoding
); 
2573             wxMBConv_cocoa 
*conv 
= new wxMBConv_cocoa(m_encoding
); 
2583     wxFontEncoding enc 
= m_encoding
; 
2585     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
2587         // use "false" to suppress interactive dialogs -- we can be called from 
2588         // anywhere and popping up a dialog from here is the last thing we want to 
2590         enc 
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
2592 #endif // wxUSE_FONTMAP 
2596         case wxFONTENCODING_UTF7
: 
2597              return new wxMBConvUTF7
; 
2599         case wxFONTENCODING_UTF8
: 
2600              return new wxMBConvUTF8
; 
2602         case wxFONTENCODING_UTF16BE
: 
2603              return new wxMBConvUTF16BE
; 
2605         case wxFONTENCODING_UTF16LE
: 
2606              return new wxMBConvUTF16LE
; 
2608         case wxFONTENCODING_UTF32BE
: 
2609              return new wxMBConvUTF32BE
; 
2611         case wxFONTENCODING_UTF32LE
: 
2612              return new wxMBConvUTF32LE
; 
2615              // nothing to do but put here to suppress gcc warnings 
2622         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
2623                                       : new wxMBConv_wxwin(m_encoding
); 
2629 #endif // wxUSE_FONTMAP 
2631     // NB: This is a hack to prevent deadlock. What could otherwise happen 
2632     //     in Unicode build: wxConvLocal creation ends up being here 
2633     //     because of some failure and logs the error. But wxLog will try to 
2634     //     attach timestamp, for which it will need wxConvLocal (to convert 
2635     //     time to char* and then wchar_t*), but that fails, tries to log 
2636     //     error, but wxLog has a (already locked) critical section that 
2637     //     guards static buffer. 
2638     static bool alreadyLoggingError 
= false; 
2639     if (!alreadyLoggingError
) 
2641         alreadyLoggingError 
= true; 
2642         wxLogError(_("Cannot convert from the charset '%s'!"), 
2646                          wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str() 
2647 #else // !wxUSE_FONTMAP 
2648                          wxString::Format(_("encoding %s"), m_encoding
).c_str() 
2649 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
2651         alreadyLoggingError 
= false; 
2657 void wxCSConv::CreateConvIfNeeded() const 
2661         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
2664         // if we don't have neither the name nor the encoding, use the default 
2665         // encoding for this system 
2666         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
2668             self
->m_name 
= wxStrdup(wxLocale::GetSystemEncodingName()); 
2670 #endif // wxUSE_INTL 
2672         self
->m_convReal 
= DoCreate(); 
2673         self
->m_deferred 
= false; 
2677 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2679     CreateConvIfNeeded(); 
2682         return m_convReal
->MB2WC(buf
, psz
, n
); 
2685     size_t len 
= strlen(psz
); 
2689         for (size_t c 
= 0; c 
<= len
; c
++) 
2690             buf
[c
] = (unsigned char)(psz
[c
]); 
2696 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2698     CreateConvIfNeeded(); 
2701         return m_convReal
->WC2MB(buf
, psz
, n
); 
2704     const size_t len 
= wxWcslen(psz
); 
2707         for (size_t c 
= 0; c 
<= len
; c
++) 
2711             buf
[c
] = (char)psz
[c
]; 
2716         for (size_t c 
= 0; c 
<= len
; c
++) 
2726 // ---------------------------------------------------------------------------- 
2728 // ---------------------------------------------------------------------------- 
2731     static wxMBConv_win32 wxConvLibcObj
; 
2732 #elif defined(__WXMAC__) && !defined(__MACH__) 
2733     static wxMBConv_mac wxConvLibcObj 
; 
2735     static wxMBConvLibc wxConvLibcObj
; 
2738 static wxCSConv 
wxConvLocalObj(wxFONTENCODING_SYSTEM
); 
2739 static wxCSConv 
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
); 
2740 static wxMBConvUTF7 wxConvUTF7Obj
; 
2741 static wxMBConvUTF8 wxConvUTF8Obj
; 
2743 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc 
= wxConvLibcObj
; 
2744 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal 
= wxConvLocalObj
; 
2745 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1 
= wxConvISO8859_1Obj
; 
2746 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7 
= wxConvUTF7Obj
; 
2747 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8 
= wxConvUTF8Obj
; 
2748 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= &wxConvLibcObj
; 
2749 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvFileName 
= & 
2757 #else // !wxUSE_WCHAR_T 
2759 // stand-ins in absence of wchar_t 
2760 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
2765 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T