1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, 
   5 //              Ryan Norton, Fredrik Roubert (UTF7) 
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
  10 //              (c) 2000-2003 Vadim Zeitlin 
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert 
  12 // Licence:     wxWindows licence 
  13 ///////////////////////////////////////////////////////////////////////////// 
  15 // ============================================================================ 
  17 // ============================================================================ 
  19 // ---------------------------------------------------------------------------- 
  21 // ---------------------------------------------------------------------------- 
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  24   #pragma implementation "strconv.h" 
  27 // For compilers that support precompilation, includes "wx.h". 
  28 #include "wx/wxprec.h" 
  39 #include "wx/strconv.h" 
  44     #include "wx/msw/private.h" 
  48     #include "wx/msw/missing.h" 
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  60     #define wxHAVE_WIN32_MB2WC 
  61 #endif // __WIN32__ but !__WXMICROWIN__ 
  63 // ---------------------------------------------------------------------------- 
  65 // ---------------------------------------------------------------------------- 
  73     #include "wx/thread.h" 
  76 #include "wx/encconv.h" 
  77 #include "wx/fontmap.h" 
  81 #include <ATSUnicode.h> 
  82 #include <TextCommon.h> 
  83 #include <TextEncodingConverter.h> 
  85 #include  "wx/mac/private.h"  // includes mac headers 
  87 // ---------------------------------------------------------------------------- 
  89 // ---------------------------------------------------------------------------- 
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); } 
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } 
  94 #if SIZEOF_WCHAR_T == 4 
  95     #define WC_NAME         "UCS4" 
  96     #define WC_BSWAP         BSWAP_UCS4 
  97     #ifdef WORDS_BIGENDIAN 
  98       #define WC_NAME_BEST  "UCS-4BE" 
 100       #define WC_NAME_BEST  "UCS-4LE" 
 102 #elif SIZEOF_WCHAR_T == 2 
 103     #define WC_NAME         "UTF16" 
 104     #define WC_BSWAP         BSWAP_UTF16 
 106     #ifdef WORDS_BIGENDIAN 
 107       #define WC_NAME_BEST  "UTF-16BE" 
 109       #define WC_NAME_BEST  "UTF-16LE" 
 111 #else // sizeof(wchar_t) != 2 nor 4 
 112     // does this ever happen? 
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
 116 // ============================================================================ 
 118 // ============================================================================ 
 120 // ---------------------------------------------------------------------------- 
 121 // UTF-16 en/decoding to/from UCS-4 
 122 // ---------------------------------------------------------------------------- 
 125 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 130             *output 
= (wxUint16
) input
; 
 133     else if (input
>=0x110000) 
 141             *output
++ = (wxUint16
) ((input 
>> 10)+0xd7c0); 
 142             *output 
= (wxUint16
) ((input
&0x3ff)+0xdc00); 
 148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 150     if ((*input
<0xd800) || (*input
>0xdfff)) 
 155     else if ((input
[1]<0xdc00) || (input
[1]>0xdfff)) 
 162         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 168 // ---------------------------------------------------------------------------- 
 170 // ---------------------------------------------------------------------------- 
 172 wxMBConv::~wxMBConv() 
 174     // nothing to do here (necessary for Darwin linking probably) 
 177 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 181         // calculate the length of the buffer needed first 
 182         size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 183         if ( nLen 
!= (size_t)-1 ) 
 185             // now do the actual conversion 
 186             wxWCharBuffer 
buf(nLen
); 
 187             nLen 
= MB2WC(buf
.data(), psz
, nLen 
+ 1); // with the trailing NULL 
 188             if ( nLen 
!= (size_t)-1 ) 
 195     wxWCharBuffer 
buf((wchar_t *)NULL
); 
 200 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 204         size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 205         if ( nLen 
!= (size_t)-1 ) 
 207             wxCharBuffer 
buf(nLen
+3);       // space for a wxUint32 trailing zero 
 208             nLen 
= WC2MB(buf
.data(), pwz
, nLen 
+ 4); 
 209             if ( nLen 
!= (size_t)-1 ) 
 216     wxCharBuffer 
buf((char *)NULL
); 
 221 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const 
 223     wxASSERT(pOutSize 
!= NULL
); 
 225     const char* szEnd 
= szString 
+ nStringLen 
+ 1; 
 226     const char* szPos 
= szString
; 
 227     const char* szStart 
= szPos
; 
 229     size_t nActualLength 
= 0; 
 230     size_t nCurrentSize 
= nStringLen
; //try normal size first (should never resize?) 
 232     wxWCharBuffer 
theBuffer(nCurrentSize
); 
 234     //Convert the string until the length() is reached, continuing the 
 235     //loop every time a null character is reached 
 236     while(szPos 
!= szEnd
) 
 238         wxASSERT(szPos 
< szEnd
); //something is _really_ screwed up if this rings true 
 240         //Get the length of the current (sub)string 
 241         size_t nLen 
= MB2WC(NULL
, szPos
, 0); 
 243         //Invalid conversion? 
 244         if( nLen 
== (size_t)-1 ) 
 247             theBuffer
.data()[0u] = wxT('\0'); 
 252         //Increase the actual length (+1 for current null character) 
 253         nActualLength 
+= nLen 
+ 1; 
 255         //if buffer too big, realloc the buffer 
 256         if (nActualLength 
> (nCurrentSize
+1)) 
 258             wxWCharBuffer 
theNewBuffer(nCurrentSize 
<< 1); 
 259             memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize 
* sizeof(wchar_t)); 
 260             theBuffer 
= theNewBuffer
; 
 264         //Convert the current (sub)string 
 265         if ( MB2WC(&theBuffer
.data()[szPos 
- szStart
], szPos
, nLen 
+ 1) == (size_t)-1 ) 
 268             theBuffer
.data()[0u] = wxT('\0'); 
 272         //Increment to next (sub)string 
 273         //Note that we have to use strlen here instead of nLen 
 274         //here because XX2XX gives us the size of the output buffer, 
 275         //not neccessarly the length of the string 
 276         szPos 
+= strlen(szPos
) + 1; 
 279     //success - return actual length and the buffer 
 280     *pOutSize 
= nActualLength
; 
 284 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const 
 286     wxASSERT(pOutSize 
!= NULL
); 
 288     const wchar_t* szEnd 
= szString 
+ nStringLen 
+ 1; 
 289     const wchar_t* szPos 
= szString
; 
 290     const wchar_t* szStart 
= szPos
; 
 292     size_t nActualLength 
= 0; 
 293     size_t nCurrentSize 
= nStringLen 
<< 2; //try * 4 first 
 295     wxCharBuffer 
theBuffer(nCurrentSize
); 
 297     //Convert the string until the length() is reached, continuing the 
 298     //loop every time a null character is reached 
 299     while(szPos 
!= szEnd
) 
 301         wxASSERT(szPos 
< szEnd
); //something is _really_ screwed up if this rings true 
 303         //Get the length of the current (sub)string 
 304         size_t nLen 
= WC2MB(NULL
, szPos
, 0); 
 306         //Invalid conversion? 
 307         if( nLen 
== (size_t)-1 ) 
 310             theBuffer
.data()[0u] = wxT('\0'); 
 314         //Increase the actual length (+1 for current null character) 
 315         nActualLength 
+= nLen 
+ 1; 
 317         //if buffer too big, realloc the buffer 
 318         if (nActualLength 
> (nCurrentSize
+1)) 
 320             wxCharBuffer 
theNewBuffer(nCurrentSize 
<< 1); 
 321             memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
); 
 322             theBuffer 
= theNewBuffer
; 
 326         //Convert the current (sub)string 
 327         if(WC2MB(&theBuffer
.data()[szPos 
- szStart
], szPos
, nLen 
+ 1) == (size_t)-1 ) 
 330             theBuffer
.data()[0u] = wxT('\0'); 
 334         //Increment to next (sub)string 
 335         //Note that we have to use wxWcslen here instead of nLen 
 336         //here because XX2XX gives us the size of the output buffer, 
 337         //not neccessarly the length of the string 
 338         szPos 
+= wxWcslen(szPos
) + 1; 
 341     //success - return actual length and the buffer 
 342     *pOutSize 
= nActualLength
; 
 346 // ---------------------------------------------------------------------------- 
 348 // ---------------------------------------------------------------------------- 
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 352     return wxMB2WC(buf
, psz
, n
); 
 355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 357     return wxWC2MB(buf
, psz
, n
); 
 362 // ---------------------------------------------------------------------------- 
 363 // wxConvBrokenFileNames  
 364 // ---------------------------------------------------------------------------- 
 366 wxConvBrokenFileNames::wxConvBrokenFileNames() 
 368     // decide which conversion to use for the file names 
 370     // (1) this variable exists for the sole purpose of specifying the encoding 
 371     //     of the filenames for GTK+ programs, so use it if it is set 
 372     wxString 
encName(wxGetenv(_T("G_FILENAME_ENCODING"))); 
 374     if ( !encName
.empty() && encName 
!= _T("UTF-8") && encName 
!= _T("UTF8") ) 
 376         m_conv 
= new wxCSConv(encName
); 
 378     else // no G_FILENAME_ENCODING 
 380         if ( encName
.empty() ) 
 381             encName 
= wxLocale::GetSystemEncodingName().Upper(); 
 383         // (2) if a non default locale is set, assume that the user wants his 
 384         //     filenames in this locale too 
 385         if ( !encName
.empty() && encName 
!= _T("UTF-8") && encName 
!= _T("UTF8") ) 
 387             wxSetEnv(_T("G_FILENAME_ENCODING"), encName
); 
 388             m_conv 
= new wxMBConvLibc
; 
 392             // (3) finally use UTF-8 by default 
 393             m_conv 
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
); 
 399 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
, 
 401                              size_t outputSize
) const 
 403     return m_conv
->MB2WC( outputBuf
, psz
, outputSize 
); 
 407 wxConvBrokenFileNames::WC2MB(char *outputBuf
, 
 409                              size_t outputSize
) const 
 411     return m_conv
->WC2MB( outputBuf
, psz
, outputSize 
); 
 416 // ---------------------------------------------------------------------------- 
 418 // ---------------------------------------------------------------------------- 
 420 // Implementation (C) 2004 Fredrik Roubert 
 423 // BASE64 decoding table 
 425 static const unsigned char utf7unb64
[] = 
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 432     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 
 433     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 
 434     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 435     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
 436     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
 437     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
 438     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 
 439     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 
 440     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 
 441     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 
 442     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 
 443     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 450     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 451     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 452     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 453     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 454     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 455     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 456     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 457     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 458     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 
 461 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 465     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 467         unsigned char cc 
= *psz
++; 
 475         else if (*psz 
== '-') 
 485             // BASE64 encoded string 
 489             for (lsb 
= false, d 
= 0, l 
= 0; 
 490                 (cc 
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++) 
 494                 for (l 
+= 6; l 
>= 8; lsb 
= !lsb
) 
 496                     c 
= (unsigned char)((d 
>> (l 
-= 8)) % 256); 
 505                             *buf 
= (wchar_t)(c 
<< 8); 
 512     if (buf 
&& (len 
< n
)) 
 518 // BASE64 encoding table 
 520 static const unsigned char utf7enb64
[] = 
 522     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
 523     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
 524     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
 525     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
 526     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
 527     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
 528     'w', 'x', 'y', 'z', '0', '1', '2', '3', 
 529     '4', '5', '6', '7', '8', '9', '+', '/' 
 533 // UTF-7 encoding table 
 535 // 0 - Set D (directly encoded characters) 
 536 // 1 - Set O (optional direct characters) 
 537 // 2 - whitespace characters (optional) 
 538 // 3 - special characters 
 540 static const unsigned char utf7encode
[128] = 
 542     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 
 543     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
 544     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, 
 545     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
 546     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 547     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 
 548     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 549     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 
 552 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 558     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 561         if (cc 
< 0x80 && utf7encode
[cc
] < 1) 
 569         else if (((wxUint32
)cc
) > 0xffff) 
 571             // no surrogate pair generation (yet?) 
 582                 // BASE64 encode string 
 583                 unsigned int lsb
, d
, l
; 
 584                 for (d 
= 0, l 
= 0;; psz
++) 
 586                     for (lsb 
= 0; lsb 
< 2; lsb 
++) 
 589                         d 
+= lsb 
? cc 
& 0xff : (cc 
& 0xff00) >> 8; 
 591                         for (l 
+= 8; l 
>= 6; ) 
 595                                 *buf
++ = utf7enb64
[(d 
>> l
) % 64]; 
 600                     if (!(cc
) || (cc 
< 0x80 && utf7encode
[cc
] < 1)) 
 606                         *buf
++ = utf7enb64
[((d 
% 16) << (6 - l
)) % 64]; 
 615     if (buf 
&& (len 
< n
)) 
 620 // ---------------------------------------------------------------------------- 
 622 // ---------------------------------------------------------------------------- 
 624 static wxUint32 utf8_max
[]= 
 625     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 627 // boundaries of the private use area we use to (temporarily) remap invalid 
 628 // characters invalid in a UTF-8 encoded string 
 629 const wxUint32 wxUnicodePUA 
= 0x100000; 
 630 const wxUint32 wxUnicodePUAEnd 
= wxUnicodePUA 
+ 256; 
 632 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 636     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 638         const char *opsz 
= psz
; 
 639         bool invalid 
= false; 
 640         unsigned char cc 
= *psz
++, fc 
= cc
; 
 642         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 651             // escape the escape character for octal escapes 
 652             if ((m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 653                     && cc 
== '\\' && (!buf 
|| len 
< n
)) 
 665                 // invalid UTF-8 sequence 
 670                 unsigned ocnt 
= cnt 
- 1; 
 671                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 675                     if ((cc 
& 0xC0) != 0x80) 
 677                         // invalid UTF-8 sequence 
 682                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 684                 if (invalid 
|| res 
<= utf8_max
[ocnt
]) 
 686                     // illegal UTF-8 encoding 
 689                 else if ((m_options 
& MAP_INVALID_UTF8_TO_PUA
) && 
 690                         res 
>= wxUnicodePUA 
&& res 
< wxUnicodePUAEnd
) 
 692                     // if one of our PUA characters turns up externally 
 693                     // it must also be treated as an illegal sequence 
 694                     // (a bit like you have to escape an escape character) 
 700                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 701                     size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 702                     if (pa 
== (size_t)-1) 
 716 #endif // WC_UTF16/!WC_UTF16 
 721                 if (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 723                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 726                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 727                         size_t pa 
= encode_utf16((unsigned char)*opsz 
+ wxUnicodePUA
, (wxUint16 
*)buf
); 
 728                         wxASSERT(pa 
!= (size_t)-1); 
 735                             *buf
++ = wxUnicodePUA 
+ (unsigned char)*opsz
; 
 741                 else if (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 743                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 745                         if ( buf 
&& len 
+ 3 < n 
) 
 747                             unsigned char n 
= *opsz
; 
 749                             *buf
++ = (wchar_t)( L
'0' + n 
/ 0100 ); 
 750                             *buf
++ = (wchar_t)( L
'0' + (n 
% 0100) / 010 ); 
 751                             *buf
++ = (wchar_t)( L
'0' + n 
% 010 ); 
 757                 else // MAP_INVALID_UTF8_NOT 
 764     if (buf 
&& (len 
< n
)) 
 769 static inline bool isoctal(wchar_t wch
) 
 771     return L
'0' <= wch 
&& wch 
<= L
'7'; 
 774 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 778     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 782         // cast is ok for WC_UTF16 
 783         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 784         psz 
+= (pa 
== (size_t)-1) ? 1 : pa
; 
 786         cc
=(*psz
++) & 0x7fffffff; 
 789         if ( (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 790                 && cc 
>= wxUnicodePUA 
&& cc 
< wxUnicodePUAEnd 
) 
 793                 *buf
++ = (char)(cc 
- wxUnicodePUA
); 
 796         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 797                     && cc 
== L
'\\' && psz
[0] == L
'\\' ) 
 804         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) && 
 806                         isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) ) 
 810                 *buf
++ = (char) ((psz
[0] - L
'0')*0100 + 
 811                                  (psz
[1] - L
'0')*010 + 
 821             for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) {} 
 835                     *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 837                         *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 849 // ---------------------------------------------------------------------------- 
 851 // ---------------------------------------------------------------------------- 
 853 #ifdef WORDS_BIGENDIAN 
 854     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 855     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 857     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 858     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 864 // copy 16bit MB to 16bit String 
 865 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 869     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 872             *buf
++ = *(wxUint16
*)psz
; 
 875         psz 
+= sizeof(wxUint16
); 
 877     if (buf 
&& len
<n
)   *buf
=0; 
 883 // copy 16bit String to 16bit MB 
 884 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 888     while (*psz 
&& (!buf 
|| len 
< n
)) 
 892             *(wxUint16
*)buf 
= *psz
; 
 893             buf 
+= sizeof(wxUint16
); 
 895         len 
+= sizeof(wxUint16
); 
 898     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 904 // swap 16bit MB to 16bit String 
 905 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 909     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 913             ((char *)buf
)[0] = psz
[1]; 
 914             ((char *)buf
)[1] = psz
[0]; 
 918         psz 
+= sizeof(wxUint16
); 
 920     if (buf 
&& len
<n
)   *buf
=0; 
 926 // swap 16bit MB to 16bit String 
 927 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 931     while (*psz 
&& (!buf 
|| len 
< n
)) 
 935             *buf
++ = ((char*)psz
)[1]; 
 936             *buf
++ = ((char*)psz
)[0]; 
 938         len 
+= sizeof(wxUint16
); 
 941     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 950 // copy 16bit MB to 32bit String 
 951 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 955     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 958         size_t pa
=decode_utf16((wxUint16
*)psz
, cc
); 
 959         if (pa 
== (size_t)-1) 
 965         psz 
+= pa 
* sizeof(wxUint16
); 
 967     if (buf 
&& len
<n
)   *buf
=0; 
 973 // copy 32bit String to 16bit MB 
 974 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 978     while (*psz 
&& (!buf 
|| len 
< n
)) 
 981         size_t pa
=encode_utf16(*psz
, cc
); 
 983         if (pa 
== (size_t)-1) 
 988             *(wxUint16
*)buf 
= cc
[0]; 
 989             buf 
+= sizeof(wxUint16
); 
 992                 *(wxUint16
*)buf 
= cc
[1]; 
 993                 buf 
+= sizeof(wxUint16
); 
 997         len 
+= pa
*sizeof(wxUint16
); 
1000     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
1006 // swap 16bit MB to 32bit String 
1007 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1011     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
1015         tmp
[0]=psz
[1];  tmp
[1]=psz
[0]; 
1016         tmp
[2]=psz
[3];  tmp
[3]=psz
[2]; 
1018         size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
); 
1019         if (pa 
== (size_t)-1) 
1026         psz 
+= pa 
* sizeof(wxUint16
); 
1028     if (buf 
&& len
<n
)   *buf
=0; 
1034 // swap 32bit String to 16bit MB 
1035 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1039     while (*psz 
&& (!buf 
|| len 
< n
)) 
1042         size_t pa
=encode_utf16(*psz
, cc
); 
1044         if (pa 
== (size_t)-1) 
1049             *buf
++ = ((char*)cc
)[1]; 
1050             *buf
++ = ((char*)cc
)[0]; 
1053                 *buf
++ = ((char*)cc
)[3]; 
1054                 *buf
++ = ((char*)cc
)[2]; 
1058         len 
+= pa
*sizeof(wxUint16
); 
1061     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
1069 // ---------------------------------------------------------------------------- 
1071 // ---------------------------------------------------------------------------- 
1073 #ifdef WORDS_BIGENDIAN 
1074 #define wxMBConvUTF32straight  wxMBConvUTF32BE 
1075 #define wxMBConvUTF32swap      wxMBConvUTF32LE 
1077 #define wxMBConvUTF32swap      wxMBConvUTF32BE 
1078 #define wxMBConvUTF32straight  wxMBConvUTF32LE 
1082 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
1083 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
1088 // copy 32bit MB to 16bit String 
1089 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1093     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1097         size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
); 
1098         if (pa 
== (size_t)-1) 
1108         psz 
+= sizeof(wxUint32
); 
1110     if (buf 
&& len
<n
)   *buf
=0; 
1116 // copy 16bit String to 32bit MB 
1117 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1121     while (*psz 
&& (!buf 
|| len 
< n
)) 
1125         // cast is ok for WC_UTF16 
1126         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
1127         if (pa 
== (size_t)-1) 
1132             *(wxUint32
*)buf 
= cc
; 
1133             buf 
+= sizeof(wxUint32
); 
1135         len 
+= sizeof(wxUint32
); 
1139     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1147 // swap 32bit MB to 16bit String 
1148 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1152     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1155         tmp
[0] = psz
[3];   tmp
[1] = psz
[2]; 
1156         tmp
[2] = psz
[1];   tmp
[3] = psz
[0]; 
1161         size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
); 
1162         if (pa 
== (size_t)-1) 
1172         psz 
+= sizeof(wxUint32
); 
1182 // swap 16bit String to 32bit MB 
1183 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1187     while (*psz 
&& (!buf 
|| len 
< n
)) 
1191         // cast is ok for WC_UTF16 
1192         size_t pa
=decode_utf16((const wxUint16 
*)psz
, *(wxUint32
*)cc
); 
1193         if (pa 
== (size_t)-1) 
1203         len 
+= sizeof(wxUint32
); 
1207     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1216 // copy 32bit MB to 32bit String 
1217 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1221     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1224             *buf
++ = *(wxUint32
*)psz
; 
1226         psz 
+= sizeof(wxUint32
); 
1236 // copy 32bit String to 32bit MB 
1237 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1241     while (*psz 
&& (!buf 
|| len 
< n
)) 
1245             *(wxUint32
*)buf 
= *psz
; 
1246             buf 
+= sizeof(wxUint32
); 
1249         len 
+= sizeof(wxUint32
); 
1253     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1260 // swap 32bit MB to 32bit String 
1261 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1265     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1269             ((char *)buf
)[0] = psz
[3]; 
1270             ((char *)buf
)[1] = psz
[2]; 
1271             ((char *)buf
)[2] = psz
[1]; 
1272             ((char *)buf
)[3] = psz
[0]; 
1276         psz 
+= sizeof(wxUint32
); 
1286 // swap 32bit String to 32bit MB 
1287 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1291     while (*psz 
&& (!buf 
|| len 
< n
)) 
1295             *buf
++ = ((char *)psz
)[3]; 
1296             *buf
++ = ((char *)psz
)[2]; 
1297             *buf
++ = ((char *)psz
)[1]; 
1298             *buf
++ = ((char *)psz
)[0]; 
1300         len 
+= sizeof(wxUint32
); 
1304     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1314 // ============================================================================ 
1315 // The classes doing conversion using the iconv_xxx() functions 
1316 // ============================================================================ 
1320 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with 
1321 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is 
1322 //     (unless there's yet another bug in glibc) the only case when iconv() 
1323 //     returns with (size_t)-1 (which means error) and says there are 0 bytes 
1324 //     left in the input buffer -- when _real_ error occurs, 
1325 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for 
1327 //     [This bug does not appear in glibc 2.2.] 
1328 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
1329 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
1330                                      (errno != E2BIG || bufLeft != 0)) 
1332 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
1335 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
1337 // ---------------------------------------------------------------------------- 
1338 // wxMBConv_iconv: encapsulates an iconv character set 
1339 // ---------------------------------------------------------------------------- 
1341 class wxMBConv_iconv 
: public wxMBConv
 
1344     wxMBConv_iconv(const wxChar 
*name
); 
1345     virtual ~wxMBConv_iconv(); 
1347     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
1348     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
1351         { return (m2w 
!= (iconv_t
)-1) && (w2m 
!= (iconv_t
)-1); } 
1354     // the iconv handlers used to translate from multibyte to wide char and in 
1355     // the other direction 
1359     // guards access to m2w and w2m objects 
1360     wxMutex m_iconvMutex
; 
1364     // the name (for iconv_open()) of a wide char charset -- if none is 
1365     // available on this machine, it will remain NULL 
1366     static const char *ms_wcCharsetName
; 
1368     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
1369     // different endian-ness than the native one 
1370     static bool ms_wcNeedsSwap
; 
1373 const char *wxMBConv_iconv::ms_wcCharsetName 
= NULL
; 
1374 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
1376 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
1378     // Do it the hard way 
1380     for (size_t i 
= 0; i 
< wxStrlen(name
)+1; i
++) 
1381         cname
[i
] = (char) name
[i
]; 
1383     // check for charset that represents wchar_t: 
1384     if (ms_wcCharsetName 
== NULL
) 
1386         ms_wcNeedsSwap 
= false; 
1388         // try charset with explicit bytesex info (e.g. "UCS-4LE"): 
1389         ms_wcCharsetName 
= WC_NAME_BEST
; 
1390         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1392         if (m2w 
== (iconv_t
)-1) 
1394             // try charset w/o bytesex info (e.g. "UCS4") 
1395             // and check for bytesex ourselves: 
1396             ms_wcCharsetName 
= WC_NAME
; 
1397             m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1399             // last bet, try if it knows WCHAR_T pseudo-charset 
1400             if (m2w 
== (iconv_t
)-1) 
1402                 ms_wcCharsetName 
= "WCHAR_T"; 
1403                 m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1406             if (m2w 
!= (iconv_t
)-1) 
1408                 char    buf
[2], *bufPtr
; 
1409                 wchar_t wbuf
[2], *wbufPtr
; 
1417                 outsz 
= SIZEOF_WCHAR_T 
* 2; 
1421                 res 
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
1422                             (char**)&wbufPtr
, &outsz
); 
1424                 if (ICONV_FAILED(res
, insz
)) 
1426                     ms_wcCharsetName 
= NULL
; 
1427                     wxLogLastError(wxT("iconv")); 
1428                     wxLogError(_("Conversion to charset '%s' doesn't work."), name
); 
1432                     ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
1437                 ms_wcCharsetName 
= NULL
; 
1439                 // VS: we must not output an error here, since wxWidgets will safely 
1440                 //     fall back to using wxEncodingConverter. 
1441                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
); 
1445         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
); 
1447     else // we already have ms_wcCharsetName 
1449         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
1452     // NB: don't ever pass NULL to iconv_open(), it may crash! 
1453     if ( ms_wcCharsetName 
) 
1455         w2m 
= iconv_open( cname
, ms_wcCharsetName
); 
1463 wxMBConv_iconv::~wxMBConv_iconv() 
1465     if ( m2w 
!= (iconv_t
)-1 ) 
1467     if ( w2m 
!= (iconv_t
)-1 ) 
1471 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1474     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. 
1475     //     Unfortunately there is a couple of global wxCSConv objects such as 
1476     //     wxConvLocal that are used all over wx code, so we have to make sure 
1477     //     the handle is used by at most one thread at the time. Otherwise 
1478     //     only a few wx classes would be safe to use from non-main threads 
1479     //     as MB<->WC conversion would fail "randomly". 
1480     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1483     size_t inbuf 
= strlen(psz
); 
1484     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
1486     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
1487     wchar_t *bufPtr 
= buf
; 
1488     const char *pszPtr 
= psz
; 
1492         // have destination buffer, convert there 
1494                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1495                      (char**)&bufPtr
, &outbuf
); 
1496         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
1500             // convert to native endianness 
1501             WC_BSWAP(buf 
/* _not_ bufPtr */, res
) 
1504         // NB: iconv was given only strlen(psz) characters on input, and so 
1505         //     it couldn't convert the trailing zero. Let's do it ourselves 
1506         //     if there's some room left for it in the output buffer. 
1512         // no destination buffer... convert using temp buffer 
1513         // to calculate destination buffer requirement 
1518             outbuf 
= 8*SIZEOF_WCHAR_T
; 
1521                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1522                          (char**)&bufPtr
, &outbuf 
); 
1524             res 
+= 8-(outbuf
/SIZEOF_WCHAR_T
); 
1525         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1528     if (ICONV_FAILED(cres
, inbuf
)) 
1530         //VS: it is ok if iconv fails, hence trace only 
1531         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1538 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1541     // NB: explained in MB2WC 
1542     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1545     size_t inbuf 
= wxWcslen(psz
) * SIZEOF_WCHAR_T
; 
1549     wchar_t *tmpbuf 
= 0; 
1553         // need to copy to temp buffer to switch endianness 
1554         // this absolutely doesn't rock! 
1555         // (no, doing WC_BSWAP twice on the original buffer won't help, as it 
1556         //  could be in read-only memory, or be accessed in some other thread) 
1557         tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
); 
1558         memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
); 
1559         WC_BSWAP(tmpbuf
, inbuf
) 
1565         // have destination buffer, convert there 
1566         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1570         // NB: iconv was given only wcslen(psz) characters on input, and so 
1571         //     it couldn't convert the trailing zero. Let's do it ourselves 
1572         //     if there's some room left for it in the output buffer. 
1578         // no destination buffer... convert using temp buffer 
1579         // to calculate destination buffer requirement 
1583             buf 
= tbuf
; outbuf 
= 16; 
1585             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1588         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1596     if (ICONV_FAILED(cres
, inbuf
)) 
1598         //VS: it is ok if iconv fails, hence trace only 
1599         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1606 #endif // HAVE_ICONV 
1609 // ============================================================================ 
1610 // Win32 conversion classes 
1611 // ============================================================================ 
1613 #ifdef wxHAVE_WIN32_MB2WC 
1617 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1618 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1621 class wxMBConv_win32 
: public wxMBConv
 
1626         m_CodePage 
= CP_ACP
; 
1630     wxMBConv_win32(const wxChar
* name
) 
1632         m_CodePage 
= wxCharsetToCodepage(name
); 
1635     wxMBConv_win32(wxFontEncoding encoding
) 
1637         m_CodePage 
= wxEncodingToCodepage(encoding
); 
1641     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1643         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
1644         // the behaviour is not compatible with the Unix version (using iconv) 
1645         // and break the library itself, e.g. wxTextInputStream::NextChar() 
1646         // wouldn't work if reading an incomplete MB char didn't result in an 
1649         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in 
1650         // an error (tested under Windows Server 2003) and apparently it is 
1651         // done on purpose, i.e. the function accepts any input in this case 
1652         // and although I'd prefer to return error on ill-formed output, our 
1653         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is 
1654         // explicitly ill-formed according to RFC 2152) neither so we don't 
1655         // even have any fallback here... 
1656         int flags 
= m_CodePage 
== CP_UTF7 
? 0 : MB_ERR_INVALID_CHARS
; 
1658         const size_t len 
= ::MultiByteToWideChar
 
1660                                 m_CodePage
,     // code page 
1661                                 flags
,          // flags: fall on error 
1662                                 psz
,            // input string 
1663                                 -1,             // its length (NUL-terminated) 
1664                                 buf
,            // output string 
1665                                 buf 
? n 
: 0     // size of output buffer 
1668         // note that it returns count of written chars for buf != NULL and size 
1669         // of the needed buffer for buf == NULL so in either case the length of 
1670         // the string (which never includes the terminating NUL) is one less 
1671         return len 
? len 
- 1 : (size_t)-1; 
1674     size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
1677             we have a problem here: by default, WideCharToMultiByte() may 
1678             replace characters unrepresentable in the target code page with bad 
1679             quality approximations such as turning "1/2" symbol (U+00BD) into 
1680             "1" for the code pages which don't have it and we, obviously, want 
1681             to avoid this at any price 
1683             the trouble is that this function does it _silently_, i.e. it won't 
1684             even tell us whether it did or not... Win98/2000 and higher provide 
1685             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
1686             we have to resort to a round trip, i.e. check that converting back 
1687             results in the same string -- this is, of course, expensive but 
1688             otherwise we simply can't be sure to not garble the data. 
1691         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
1692         // it doesn't work with CJK encodings (which we test for rather roughly 
1693         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
1695         BOOL usedDef 
wxDUMMY_INITIALIZE(false); 
1698         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
1700             // it's our lucky day 
1701             flags 
= WC_NO_BEST_FIT_CHARS
; 
1702             pUsedDef 
= &usedDef
; 
1704         else // old system or unsupported encoding 
1710         const size_t len 
= ::WideCharToMultiByte
 
1712                                 m_CodePage
,     // code page 
1713                                 flags
,          // either none or no best fit 
1714                                 pwz
,            // input string 
1715                                 -1,             // it is (wide) NUL-terminated 
1716                                 buf
,            // output buffer 
1717                                 buf 
? n 
: 0,    // and its size 
1718                                 NULL
,           // default "replacement" char 
1719                                 pUsedDef        
// [out] was it used? 
1724             // function totally failed 
1728         // if we were really converting, check if we succeeded 
1733                 // check if the conversion failed, i.e. if any replacements 
1738             else // we must resort to double tripping... 
1740                 wxWCharBuffer 
wcBuf(n
); 
1741                 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
1742                         wcscmp(wcBuf
, pwz
) != 0 ) 
1744                     // we didn't obtain the same thing we started from, hence 
1745                     // the conversion was lossy and we consider that it failed 
1751         // see the comment above for the reason of "len - 1" 
1755     bool IsOk() const { return m_CodePage 
!= -1; } 
1758     static bool CanUseNoBestFit() 
1760         static int s_isWin98Or2k 
= -1; 
1762         if ( s_isWin98Or2k 
== -1 ) 
1765             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
1768                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
1772                     s_isWin98Or2k 
= verMaj 
>= 5; 
1776                     // unknown, be conseravtive by default 
1780             wxASSERT_MSG( s_isWin98Or2k 
!= -1, _T("should be set above") ); 
1783         return s_isWin98Or2k 
== 1; 
1789 #endif // wxHAVE_WIN32_MB2WC 
1791 // ============================================================================ 
1792 // Cocoa conversion classes 
1793 // ============================================================================ 
1795 #if defined(__WXCOCOA__) 
1797 // RN:  There is no UTF-32 support in either Core Foundation or 
1798 // Cocoa.  Strangely enough, internally Core Foundation uses 
1799 // UTF 32 internally quite a bit - its just not public (yet). 
1801 #include <CoreFoundation/CFString.h> 
1802 #include <CoreFoundation/CFStringEncodingExt.h> 
1804 CFStringEncoding 
wxCFStringEncFromFontEnc(wxFontEncoding encoding
) 
1806     CFStringEncoding enc 
= kCFStringEncodingInvalidId 
; 
1807     if ( encoding 
== wxFONTENCODING_DEFAULT 
) 
1809         enc 
= CFStringGetSystemEncoding(); 
1811     else switch( encoding
) 
1813         case wxFONTENCODING_ISO8859_1 
: 
1814             enc 
= kCFStringEncodingISOLatin1 
; 
1816         case wxFONTENCODING_ISO8859_2 
: 
1817             enc 
= kCFStringEncodingISOLatin2
; 
1819         case wxFONTENCODING_ISO8859_3 
: 
1820             enc 
= kCFStringEncodingISOLatin3 
; 
1822         case wxFONTENCODING_ISO8859_4 
: 
1823             enc 
= kCFStringEncodingISOLatin4
; 
1825         case wxFONTENCODING_ISO8859_5 
: 
1826             enc 
= kCFStringEncodingISOLatinCyrillic
; 
1828         case wxFONTENCODING_ISO8859_6 
: 
1829             enc 
= kCFStringEncodingISOLatinArabic
; 
1831         case wxFONTENCODING_ISO8859_7 
: 
1832             enc 
= kCFStringEncodingISOLatinGreek
; 
1834         case wxFONTENCODING_ISO8859_8 
: 
1835             enc 
= kCFStringEncodingISOLatinHebrew
; 
1837         case wxFONTENCODING_ISO8859_9 
: 
1838             enc 
= kCFStringEncodingISOLatin5
; 
1840         case wxFONTENCODING_ISO8859_10 
: 
1841             enc 
= kCFStringEncodingISOLatin6
; 
1843         case wxFONTENCODING_ISO8859_11 
: 
1844             enc 
= kCFStringEncodingISOLatinThai
; 
1846         case wxFONTENCODING_ISO8859_13 
: 
1847             enc 
= kCFStringEncodingISOLatin7
; 
1849         case wxFONTENCODING_ISO8859_14 
: 
1850             enc 
= kCFStringEncodingISOLatin8
; 
1852         case wxFONTENCODING_ISO8859_15 
: 
1853             enc 
= kCFStringEncodingISOLatin9
; 
1856         case wxFONTENCODING_KOI8 
: 
1857             enc 
= kCFStringEncodingKOI8_R
; 
1859         case wxFONTENCODING_ALTERNATIVE 
: // MS-DOS CP866 
1860             enc 
= kCFStringEncodingDOSRussian
; 
1863 //      case wxFONTENCODING_BULGARIAN : 
1867         case wxFONTENCODING_CP437 
: 
1868             enc 
=kCFStringEncodingDOSLatinUS 
; 
1870         case wxFONTENCODING_CP850 
: 
1871             enc 
= kCFStringEncodingDOSLatin1
; 
1873         case wxFONTENCODING_CP852 
: 
1874             enc 
= kCFStringEncodingDOSLatin2
; 
1876         case wxFONTENCODING_CP855 
: 
1877             enc 
= kCFStringEncodingDOSCyrillic
; 
1879         case wxFONTENCODING_CP866 
: 
1880             enc 
=kCFStringEncodingDOSRussian 
; 
1882         case wxFONTENCODING_CP874 
: 
1883             enc 
= kCFStringEncodingDOSThai
; 
1885         case wxFONTENCODING_CP932 
: 
1886             enc 
= kCFStringEncodingDOSJapanese
; 
1888         case wxFONTENCODING_CP936 
: 
1889             enc 
=kCFStringEncodingDOSChineseSimplif 
; 
1891         case wxFONTENCODING_CP949 
: 
1892             enc 
= kCFStringEncodingDOSKorean
; 
1894         case wxFONTENCODING_CP950 
: 
1895             enc 
= kCFStringEncodingDOSChineseTrad
; 
1897         case wxFONTENCODING_CP1250 
: 
1898             enc 
= kCFStringEncodingWindowsLatin2
; 
1900         case wxFONTENCODING_CP1251 
: 
1901             enc 
=kCFStringEncodingWindowsCyrillic 
; 
1903         case wxFONTENCODING_CP1252 
: 
1904             enc 
=kCFStringEncodingWindowsLatin1 
; 
1906         case wxFONTENCODING_CP1253 
: 
1907             enc 
= kCFStringEncodingWindowsGreek
; 
1909         case wxFONTENCODING_CP1254 
: 
1910             enc 
= kCFStringEncodingWindowsLatin5
; 
1912         case wxFONTENCODING_CP1255 
: 
1913             enc 
=kCFStringEncodingWindowsHebrew 
; 
1915         case wxFONTENCODING_CP1256 
: 
1916             enc 
=kCFStringEncodingWindowsArabic 
; 
1918         case wxFONTENCODING_CP1257 
: 
1919             enc 
= kCFStringEncodingWindowsBalticRim
; 
1921 //   This only really encodes to UTF7 (if that) evidently 
1922 //        case wxFONTENCODING_UTF7 : 
1923 //            enc = kCFStringEncodingNonLossyASCII ; 
1925         case wxFONTENCODING_UTF8 
: 
1926             enc 
= kCFStringEncodingUTF8 
; 
1928         case wxFONTENCODING_EUC_JP 
: 
1929             enc 
= kCFStringEncodingEUC_JP
; 
1931         case wxFONTENCODING_UTF16 
: 
1932             enc 
= kCFStringEncodingUnicode 
; 
1934         case wxFONTENCODING_MACROMAN 
: 
1935             enc 
= kCFStringEncodingMacRoman 
; 
1937         case wxFONTENCODING_MACJAPANESE 
: 
1938             enc 
= kCFStringEncodingMacJapanese 
; 
1940         case wxFONTENCODING_MACCHINESETRAD 
: 
1941             enc 
= kCFStringEncodingMacChineseTrad 
; 
1943         case wxFONTENCODING_MACKOREAN 
: 
1944             enc 
= kCFStringEncodingMacKorean 
; 
1946         case wxFONTENCODING_MACARABIC 
: 
1947             enc 
= kCFStringEncodingMacArabic 
; 
1949         case wxFONTENCODING_MACHEBREW 
: 
1950             enc 
= kCFStringEncodingMacHebrew 
; 
1952         case wxFONTENCODING_MACGREEK 
: 
1953             enc 
= kCFStringEncodingMacGreek 
; 
1955         case wxFONTENCODING_MACCYRILLIC 
: 
1956             enc 
= kCFStringEncodingMacCyrillic 
; 
1958         case wxFONTENCODING_MACDEVANAGARI 
: 
1959             enc 
= kCFStringEncodingMacDevanagari 
; 
1961         case wxFONTENCODING_MACGURMUKHI 
: 
1962             enc 
= kCFStringEncodingMacGurmukhi 
; 
1964         case wxFONTENCODING_MACGUJARATI 
: 
1965             enc 
= kCFStringEncodingMacGujarati 
; 
1967         case wxFONTENCODING_MACORIYA 
: 
1968             enc 
= kCFStringEncodingMacOriya 
; 
1970         case wxFONTENCODING_MACBENGALI 
: 
1971             enc 
= kCFStringEncodingMacBengali 
; 
1973         case wxFONTENCODING_MACTAMIL 
: 
1974             enc 
= kCFStringEncodingMacTamil 
; 
1976         case wxFONTENCODING_MACTELUGU 
: 
1977             enc 
= kCFStringEncodingMacTelugu 
; 
1979         case wxFONTENCODING_MACKANNADA 
: 
1980             enc 
= kCFStringEncodingMacKannada 
; 
1982         case wxFONTENCODING_MACMALAJALAM 
: 
1983             enc 
= kCFStringEncodingMacMalayalam 
; 
1985         case wxFONTENCODING_MACSINHALESE 
: 
1986             enc 
= kCFStringEncodingMacSinhalese 
; 
1988         case wxFONTENCODING_MACBURMESE 
: 
1989             enc 
= kCFStringEncodingMacBurmese 
; 
1991         case wxFONTENCODING_MACKHMER 
: 
1992             enc 
= kCFStringEncodingMacKhmer 
; 
1994         case wxFONTENCODING_MACTHAI 
: 
1995             enc 
= kCFStringEncodingMacThai 
; 
1997         case wxFONTENCODING_MACLAOTIAN 
: 
1998             enc 
= kCFStringEncodingMacLaotian 
; 
2000         case wxFONTENCODING_MACGEORGIAN 
: 
2001             enc 
= kCFStringEncodingMacGeorgian 
; 
2003         case wxFONTENCODING_MACARMENIAN 
: 
2004             enc 
= kCFStringEncodingMacArmenian 
; 
2006         case wxFONTENCODING_MACCHINESESIMP 
: 
2007             enc 
= kCFStringEncodingMacChineseSimp 
; 
2009         case wxFONTENCODING_MACTIBETAN 
: 
2010             enc 
= kCFStringEncodingMacTibetan 
; 
2012         case wxFONTENCODING_MACMONGOLIAN 
: 
2013             enc 
= kCFStringEncodingMacMongolian 
; 
2015         case wxFONTENCODING_MACETHIOPIC 
: 
2016             enc 
= kCFStringEncodingMacEthiopic 
; 
2018         case wxFONTENCODING_MACCENTRALEUR 
: 
2019             enc 
= kCFStringEncodingMacCentralEurRoman 
; 
2021         case wxFONTENCODING_MACVIATNAMESE 
: 
2022             enc 
= kCFStringEncodingMacVietnamese 
; 
2024         case wxFONTENCODING_MACARABICEXT 
: 
2025             enc 
= kCFStringEncodingMacExtArabic 
; 
2027         case wxFONTENCODING_MACSYMBOL 
: 
2028             enc 
= kCFStringEncodingMacSymbol 
; 
2030         case wxFONTENCODING_MACDINGBATS 
: 
2031             enc 
= kCFStringEncodingMacDingbats 
; 
2033         case wxFONTENCODING_MACTURKISH 
: 
2034             enc 
= kCFStringEncodingMacTurkish 
; 
2036         case wxFONTENCODING_MACCROATIAN 
: 
2037             enc 
= kCFStringEncodingMacCroatian 
; 
2039         case wxFONTENCODING_MACICELANDIC 
: 
2040             enc 
= kCFStringEncodingMacIcelandic 
; 
2042         case wxFONTENCODING_MACROMANIAN 
: 
2043             enc 
= kCFStringEncodingMacRomanian 
; 
2045         case wxFONTENCODING_MACCELTIC 
: 
2046             enc 
= kCFStringEncodingMacCeltic 
; 
2048         case wxFONTENCODING_MACGAELIC 
: 
2049             enc 
= kCFStringEncodingMacGaelic 
; 
2051 //      case wxFONTENCODING_MACKEYBOARD : 
2052 //          enc = kCFStringEncodingMacKeyboardGlyphs ; 
2055             // because gcc is picky 
2061 class wxMBConv_cocoa 
: public wxMBConv
 
2066         Init(CFStringGetSystemEncoding()) ; 
2070     wxMBConv_cocoa(const wxChar
* name
) 
2072         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2076     wxMBConv_cocoa(wxFontEncoding encoding
) 
2078         Init( wxCFStringEncFromFontEnc(encoding
) ); 
2085     void Init( CFStringEncoding encoding
) 
2087         m_encoding 
= encoding 
; 
2090     size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const 
2094         CFStringRef theString 
= CFStringCreateWithBytes ( 
2095                                                 NULL
, //the allocator 
2096                                                 (const UInt8
*)szUnConv
, 
2099                                                 false //no BOM/external representation 
2102         wxASSERT(theString
); 
2104         size_t nOutLength 
= CFStringGetLength(theString
); 
2108             CFRelease(theString
); 
2112         CFRange theRange 
= { 0, nOutSize 
}; 
2114 #if SIZEOF_WCHAR_T == 4 
2115         UniChar
* szUniCharBuffer 
= new UniChar
[nOutSize
]; 
2118         CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
); 
2120         CFRelease(theString
); 
2122         szUniCharBuffer
[nOutLength
] = '\0' ; 
2124 #if SIZEOF_WCHAR_T == 4 
2125         wxMBConvUTF16 converter 
; 
2126         converter
.MB2WC(szOut
, (const char*)szUniCharBuffer 
, nOutSize 
) ; 
2127         delete[] szUniCharBuffer
; 
2133     size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const 
2137         size_t nRealOutSize
; 
2138         size_t nBufSize 
= wxWcslen(szUnConv
); 
2139         UniChar
* szUniBuffer 
= (UniChar
*) szUnConv
; 
2141 #if SIZEOF_WCHAR_T == 4 
2142         wxMBConvUTF16BE converter 
; 
2143         nBufSize 
= converter
.WC2MB( NULL 
, szUnConv 
, 0 ); 
2144         szUniBuffer 
= new UniChar
[ (nBufSize 
/ sizeof(UniChar
)) + 1] ; 
2145         converter
.WC2MB( (char*) szUniBuffer 
, szUnConv
, nBufSize 
+ sizeof(UniChar
)) ; 
2146         nBufSize 
/= sizeof(UniChar
); 
2149         CFStringRef theString 
= CFStringCreateWithCharactersNoCopy( 
2153                                 kCFAllocatorNull 
//deallocator - we want to deallocate it ourselves 
2156         wxASSERT(theString
); 
2158         //Note that CER puts a BOM when converting to unicode 
2159         //so we  check and use getchars instead in that case 
2160         if (m_encoding 
== kCFStringEncodingUnicode
) 
2163                 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize 
- 1), (UniChar
*) szOut
); 
2165             nRealOutSize 
= CFStringGetLength(theString
) + 1; 
2171                 CFRangeMake(0, CFStringGetLength(theString
)), 
2173                 0, //what to put in characters that can't be converted - 
2174                     //0 tells CFString to return NULL if it meets such a character 
2175                 false, //not an external representation 
2178                 (CFIndex
*) &nRealOutSize
 
2182         CFRelease(theString
); 
2184 #if SIZEOF_WCHAR_T == 4 
2185         delete[] szUniBuffer
; 
2188         return  nRealOutSize 
- 1; 
2193         return m_encoding 
!= kCFStringEncodingInvalidId 
&& 
2194               CFStringIsEncodingAvailable(m_encoding
); 
2198     CFStringEncoding m_encoding 
; 
2201 #endif // defined(__WXCOCOA__) 
2203 // ============================================================================ 
2204 // Mac conversion classes 
2205 // ============================================================================ 
2207 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
2209 class wxMBConv_mac 
: public wxMBConv
 
2214         Init(CFStringGetSystemEncoding()) ; 
2218     wxMBConv_mac(const wxChar
* name
) 
2220         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2224     wxMBConv_mac(wxFontEncoding encoding
) 
2226         Init( wxMacGetSystemEncFromFontEnc(encoding
) ); 
2231         OSStatus status 
= noErr 
; 
2232         status 
= TECDisposeConverter(m_MB2WC_converter
); 
2233         status 
= TECDisposeConverter(m_WC2MB_converter
); 
2237     void Init( TextEncodingBase encoding
) 
2239         OSStatus status 
= noErr 
; 
2240         m_char_encoding 
= encoding 
; 
2241         m_unicode_encoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ; 
2243         status 
= TECCreateConverter(&m_MB2WC_converter
, 
2245                                     m_unicode_encoding
); 
2246         status 
= TECCreateConverter(&m_WC2MB_converter
, 
2251     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2253         OSStatus status 
= noErr 
; 
2254         ByteCount byteOutLen 
; 
2255         ByteCount byteInLen 
= strlen(psz
) ; 
2256         wchar_t *tbuf 
= NULL 
; 
2257         UniChar
* ubuf 
= NULL 
; 
2262             //apple specs say at least 32 
2263             n 
= wxMax( 32 , byteInLen 
) ; 
2264             tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T
) ; 
2266         ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ; 
2267 #if SIZEOF_WCHAR_T == 4 
2268         ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
2270         ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
2272         status 
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz 
, byteInLen
, &byteInLen
, 
2273           (TextPtr
) ubuf 
, byteBufferLen
, &byteOutLen
); 
2274 #if SIZEOF_WCHAR_T == 4 
2275         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
2276         // is not properly terminated we get random characters at the end 
2277         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
2278         wxMBConvUTF16BE converter 
; 
2279         res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
) , (const char*)ubuf 
, n 
) ; 
2282         res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
2287         if ( buf  
&& res 
< n
) 
2293     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2295         OSStatus status 
= noErr 
; 
2296         ByteCount byteOutLen 
; 
2297         ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
2303             //apple specs say at least 32 
2304             n 
= wxMax( 32 , ((byteInLen 
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T 
); 
2305             tbuf 
= (char*) malloc( n 
) ; 
2308         ByteCount byteBufferLen 
= n 
; 
2309         UniChar
* ubuf 
= NULL 
; 
2310 #if SIZEOF_WCHAR_T == 4 
2311         wxMBConvUTF16BE converter 
; 
2312         size_t unicharlen 
= converter
.WC2MB( NULL 
, psz 
, 0 ) ; 
2313         byteInLen 
= unicharlen 
; 
2314         ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
2315         converter
.WC2MB( (char*) ubuf 
, psz
, unicharlen 
+ 2 ) ; 
2317         ubuf 
= (UniChar
*) psz 
; 
2319         status 
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf 
, byteInLen
, &byteInLen
, 
2320             (TextPtr
) (buf 
? buf 
: tbuf
) , byteBufferLen
, &byteOutLen
); 
2321 #if SIZEOF_WCHAR_T == 4 
2327         size_t res 
= byteOutLen 
; 
2328         if ( buf  
&& res 
< n
) 
2332             //we need to double-trip to verify it didn't insert any ? in place 
2333             //of bogus characters 
2334             wxWCharBuffer 
wcBuf(n
); 
2335             size_t pszlen 
= wxWcslen(psz
); 
2336             if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
2337                         wxWcslen(wcBuf
) != pszlen 
|| 
2338                         memcmp(wcBuf
, psz
, pszlen 
* sizeof(wchar_t)) != 0 ) 
2340                 // we didn't obtain the same thing we started from, hence 
2341                 // the conversion was lossy and we consider that it failed 
2350         { return m_MB2WC_converter 
!=  NULL 
&& m_WC2MB_converter 
!= NULL  
; } 
2353     TECObjectRef m_MB2WC_converter 
; 
2354     TECObjectRef m_WC2MB_converter 
; 
2356     TextEncodingBase m_char_encoding 
; 
2357     TextEncodingBase m_unicode_encoding 
; 
2360 #endif // defined(__WXMAC__) && defined(TARGET_CARBON) 
2362 // ============================================================================ 
2363 // wxEncodingConverter based conversion classes 
2364 // ============================================================================ 
2368 class wxMBConv_wxwin 
: public wxMBConv
 
2373         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
2374                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
2378     // temporarily just use wxEncodingConverter stuff, 
2379     // so that it works while a better implementation is built 
2380     wxMBConv_wxwin(const wxChar
* name
) 
2383             m_enc 
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
2385             m_enc 
= wxFONTENCODING_SYSTEM
; 
2390     wxMBConv_wxwin(wxFontEncoding enc
) 
2397     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
2399         size_t inbuf 
= strlen(psz
); 
2402             if (!m2w
.Convert(psz
,buf
)) 
2408     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
2410         const size_t inbuf 
= wxWcslen(psz
); 
2413             if (!w2m
.Convert(psz
,buf
)) 
2420     bool IsOk() const { return m_ok
; } 
2423     wxFontEncoding m_enc
; 
2424     wxEncodingConverter m2w
, w2m
; 
2426     // were we initialized successfully? 
2429     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
2432 #endif // wxUSE_FONTMAP 
2434 // ============================================================================ 
2435 // wxCSConv implementation 
2436 // ============================================================================ 
2438 void wxCSConv::Init() 
2445 wxCSConv::wxCSConv(const wxChar 
*charset
) 
2454     m_encoding 
= wxFONTENCODING_SYSTEM
; 
2457 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
2459     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
2461         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
2463         encoding 
= wxFONTENCODING_SYSTEM
; 
2468     m_encoding 
= encoding
; 
2471 wxCSConv::~wxCSConv() 
2476 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
2481     SetName(conv
.m_name
); 
2482     m_encoding 
= conv
.m_encoding
; 
2485 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
2489     SetName(conv
.m_name
); 
2490     m_encoding 
= conv
.m_encoding
; 
2495 void wxCSConv::Clear() 
2504 void wxCSConv::SetName(const wxChar 
*charset
) 
2508         m_name 
= wxStrdup(charset
); 
2513 wxMBConv 
*wxCSConv::DoCreate() const 
2515     // check for the special case of ASCII or ISO8859-1 charset: as we have 
2516     // special knowledge of it anyhow, we don't need to create a special 
2517     // conversion object 
2518     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
) 
2520         // don't convert at all 
2524     // we trust OS to do conversion better than we can so try external 
2525     // conversion methods first 
2527     // the full order is: 
2528     //      1. OS conversion (iconv() under Unix or Win32 API) 
2529     //      2. hard coded conversions for UTF 
2530     //      3. wxEncodingConverter as fall back 
2536 #endif // !wxUSE_FONTMAP 
2538         wxString 
name(m_name
); 
2542             name 
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
); 
2543 #endif // wxUSE_FONTMAP 
2545         wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
); 
2551 #endif // HAVE_ICONV 
2553 #ifdef wxHAVE_WIN32_MB2WC 
2556         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
2557                                       : new wxMBConv_win32(m_encoding
); 
2566 #endif // wxHAVE_WIN32_MB2WC 
2567 #if defined(__WXMAC__) 
2569         // leave UTF16 and UTF32 to the built-ins of wx 
2570         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
|| 
2571             ( m_encoding 
>= wxFONTENCODING_MACMIN 
&& m_encoding 
<= wxFONTENCODING_MACMAX 
) ) ) 
2575             wxMBConv_mac 
*conv 
= m_name 
? new wxMBConv_mac(m_name
) 
2576                                         : new wxMBConv_mac(m_encoding
); 
2578             wxMBConv_mac 
*conv 
= new wxMBConv_mac(m_encoding
); 
2587 #if defined(__WXCOCOA__) 
2589         if ( m_name 
|| ( m_encoding 
<= wxFONTENCODING_UTF16 
) ) 
2593             wxMBConv_cocoa 
*conv 
= m_name 
? new wxMBConv_cocoa(m_name
) 
2594                                           : new wxMBConv_cocoa(m_encoding
); 
2596             wxMBConv_cocoa 
*conv 
= new wxMBConv_cocoa(m_encoding
); 
2606     wxFontEncoding enc 
= m_encoding
; 
2608     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
2610         // use "false" to suppress interactive dialogs -- we can be called from 
2611         // anywhere and popping up a dialog from here is the last thing we want to 
2613         enc 
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
2615 #endif // wxUSE_FONTMAP 
2619         case wxFONTENCODING_UTF7
: 
2620              return new wxMBConvUTF7
; 
2622         case wxFONTENCODING_UTF8
: 
2623              return new wxMBConvUTF8
; 
2625         case wxFONTENCODING_UTF16BE
: 
2626              return new wxMBConvUTF16BE
; 
2628         case wxFONTENCODING_UTF16LE
: 
2629              return new wxMBConvUTF16LE
; 
2631         case wxFONTENCODING_UTF32BE
: 
2632              return new wxMBConvUTF32BE
; 
2634         case wxFONTENCODING_UTF32LE
: 
2635              return new wxMBConvUTF32LE
; 
2638              // nothing to do but put here to suppress gcc warnings 
2645         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
2646                                       : new wxMBConv_wxwin(m_encoding
); 
2652 #endif // wxUSE_FONTMAP 
2654     // NB: This is a hack to prevent deadlock. What could otherwise happen 
2655     //     in Unicode build: wxConvLocal creation ends up being here 
2656     //     because of some failure and logs the error. But wxLog will try to 
2657     //     attach timestamp, for which it will need wxConvLocal (to convert 
2658     //     time to char* and then wchar_t*), but that fails, tries to log 
2659     //     error, but wxLog has a (already locked) critical section that 
2660     //     guards static buffer. 
2661     static bool alreadyLoggingError 
= false; 
2662     if (!alreadyLoggingError
) 
2664         alreadyLoggingError 
= true; 
2665         wxLogError(_("Cannot convert from the charset '%s'!"), 
2669                          wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str() 
2670 #else // !wxUSE_FONTMAP 
2671                          wxString::Format(_("encoding %s"), m_encoding
).c_str() 
2672 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
2674         alreadyLoggingError 
= false; 
2680 void wxCSConv::CreateConvIfNeeded() const 
2684         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
2687         // if we don't have neither the name nor the encoding, use the default 
2688         // encoding for this system 
2689         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
2691             self
->m_name 
= wxStrdup(wxLocale::GetSystemEncodingName()); 
2693 #endif // wxUSE_INTL 
2695         self
->m_convReal 
= DoCreate(); 
2696         self
->m_deferred 
= false; 
2700 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2702     CreateConvIfNeeded(); 
2705         return m_convReal
->MB2WC(buf
, psz
, n
); 
2708     size_t len 
= strlen(psz
); 
2712         for (size_t c 
= 0; c 
<= len
; c
++) 
2713             buf
[c
] = (unsigned char)(psz
[c
]); 
2719 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2721     CreateConvIfNeeded(); 
2724         return m_convReal
->WC2MB(buf
, psz
, n
); 
2727     const size_t len 
= wxWcslen(psz
); 
2730         for (size_t c 
= 0; c 
<= len
; c
++) 
2734             buf
[c
] = (char)psz
[c
]; 
2739         for (size_t c 
= 0; c 
<= len
; c
++) 
2749 // ---------------------------------------------------------------------------- 
2751 // ---------------------------------------------------------------------------- 
2754     static wxMBConv_win32 wxConvLibcObj
; 
2755 #elif defined(__WXMAC__) && !defined(__MACH__) 
2756     static wxMBConv_mac wxConvLibcObj 
; 
2758     static wxMBConvLibc wxConvLibcObj
; 
2761 static wxCSConv 
wxConvLocalObj(wxFONTENCODING_SYSTEM
); 
2762 static wxCSConv 
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
); 
2763 static wxMBConvUTF7 wxConvUTF7Obj
; 
2764 static wxMBConvUTF8 wxConvUTF8Obj
; 
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc 
= wxConvLibcObj
; 
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal 
= wxConvLocalObj
; 
2768 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1 
= wxConvISO8859_1Obj
; 
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7 
= wxConvUTF7Obj
; 
2770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8 
= wxConvUTF8Obj
; 
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= &wxConvLibcObj
; 
2772 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvFileName 
= & 
2780 #else // !wxUSE_WCHAR_T 
2782 // stand-ins in absence of wchar_t 
2783 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
2788 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T