1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/strconv.cpp 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, 
   5 //              Ryan Norton, Fredrik Roubert (UTF7) 
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
  10 //              (c) 2000-2003 Vadim Zeitlin 
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert 
  12 // Licence:     wxWindows licence 
  13 ///////////////////////////////////////////////////////////////////////////// 
  15 // For compilers that support precompilation, includes "wx.h". 
  16 #include "wx/wxprec.h" 
  26     #include "wx/hashmap.h" 
  29 #include "wx/strconv.h" 
  41 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  42     #include "wx/msw/private.h" 
  43     #include "wx/msw/missing.h" 
  44     #define wxHAVE_WIN32_MB2WC 
  53     #include "wx/thread.h" 
  56 #include "wx/encconv.h" 
  57 #include "wx/fontmap.h" 
  61 #include <ATSUnicode.h> 
  62 #include <TextCommon.h> 
  63 #include <TextEncodingConverter.h> 
  66 // includes Mac headers 
  67 #include "wx/mac/private.h" 
  71 #define TRACE_STRCONV _T("strconv") 
  73 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to 
  75 #if SIZEOF_WCHAR_T == 2 
  80 // ============================================================================ 
  82 // ============================================================================ 
  84 // helper function of cMB2WC(): check if n bytes at this location are all NUL 
  85 static bool NotAllNULs(const char *p
, size_t n
) 
  87     while ( n 
&& *p
++ == '\0' ) 
  93 // ---------------------------------------------------------------------------- 
  94 // UTF-16 en/decoding to/from UCS-4 with surrogates handling 
  95 // ---------------------------------------------------------------------------- 
  97 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 102             *output 
= (wxUint16
) input
; 
 106     else if (input 
>= 0x110000) 
 108         return wxCONV_FAILED
; 
 114             *output
++ = (wxUint16
) ((input 
>> 10) + 0xd7c0); 
 115             *output 
= (wxUint16
) ((input 
& 0x3ff) + 0xdc00); 
 122 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 124     if ((*input 
< 0xd800) || (*input 
> 0xdfff)) 
 129     else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff)) 
 132         return wxCONV_FAILED
; 
 136         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 142     typedef wchar_t wxDecodeSurrogate_t
; 
 144     typedef wxUint16 wxDecodeSurrogate_t
; 
 145 #endif // WC_UTF16/!WC_UTF16 
 147 // returns the next UTF-32 character from the wchar_t buffer and advances the 
 148 // pointer to the character after this one 
 150 // if an invalid character is found, *pSrc is set to NULL, the caller must 
 152 static wxUint32 
wxDecodeSurrogate(const wxDecodeSurrogate_t 
**pSrc
) 
 156         n 
= decode_utf16(wx_reinterpret_cast(const wxUint16 
*, *pSrc
), out
); 
 157     if ( n 
== wxCONV_FAILED 
) 
 165 // ---------------------------------------------------------------------------- 
 167 // ---------------------------------------------------------------------------- 
 170 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
, 
 171                   const char *src
, size_t srcLen
) const 
 173     // although new conversion classes are supposed to implement this function 
 174     // directly, the existins ones only implement the old MB2WC() and so, to 
 175     // avoid to have to rewrite all conversion classes at once, we provide a 
 176     // default (but not efficient) implementation of this one in terms of the 
 177     // old function by copying the input to ensure that it's NUL-terminated and 
 178     // then using MB2WC() to convert it 
 180     // the number of chars [which would be] written to dst [if it were not NULL] 
 181     size_t dstWritten 
= 0; 
 183     // the number of NULs terminating this string 
 184     size_t nulLen 
= 0;  // not really needed, but just to avoid warnings 
 186     // if we were not given the input size we just have to assume that the 
 187     // string is properly terminated as we have no way of knowing how long it 
 188     // is anyhow, but if we do have the size check whether there are enough 
 192     if ( srcLen 
!= wxNO_LEN 
) 
 194         // we need to know how to find the end of this string 
 195         nulLen 
= GetMBNulLen(); 
 196         if ( nulLen 
== wxCONV_FAILED 
) 
 197             return wxCONV_FAILED
; 
 199         // if there are enough NULs we can avoid the copy 
 200         if ( srcLen 
< nulLen 
|| NotAllNULs(src 
+ srcLen 
- nulLen
, nulLen
) ) 
 202             // make a copy in order to properly NUL-terminate the string 
 203             bufTmp 
= wxCharBuffer(srcLen 
+ nulLen 
- 1 /* 1 will be added */); 
 204             char * const p 
= bufTmp
.data(); 
 205             memcpy(p
, src
, srcLen
); 
 206             for ( char *s 
= p 
+ srcLen
; s 
< p 
+ srcLen 
+ nulLen
; s
++ ) 
 212         srcEnd 
= src 
+ srcLen
; 
 214     else // quit after the first loop iteration 
 221         // try to convert the current chunk 
 222         size_t lenChunk 
= MB2WC(NULL
, src
, 0); 
 223         if ( lenChunk 
== wxCONV_FAILED 
) 
 224             return wxCONV_FAILED
; 
 226         lenChunk
++; // for the L'\0' at the end of this chunk 
 228         dstWritten 
+= lenChunk
; 
 232             // nothing left in the input string, conversion succeeded 
 238             if ( dstWritten 
> dstLen 
) 
 239                 return wxCONV_FAILED
; 
 241             if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED 
) 
 242                 return wxCONV_FAILED
; 
 249             // we convert just one chunk in this case as this is the entire 
 254         // advance the input pointer past the end of this chunk 
 255         while ( NotAllNULs(src
, nulLen
) ) 
 257             // notice that we must skip over multiple bytes here as we suppose 
 258             // that if NUL takes 2 or 4 bytes, then all the other characters do 
 259             // too and so if advanced by a single byte we might erroneously 
 260             // detect sequences of NUL bytes in the middle of the input 
 264         src 
+= nulLen
; // skipping over its terminator as well 
 266         // note that ">=" (and not just "==") is needed here as the terminator 
 267         // we skipped just above could be inside or just after the buffer 
 268         // delimited by inEnd 
 277 wxMBConv::FromWChar(char *dst
, size_t dstLen
, 
 278                     const wchar_t *src
, size_t srcLen
) const 
 280     // the number of chars [which would be] written to dst [if it were not NULL] 
 281     size_t dstWritten 
= 0; 
 283     // make a copy of the input string unless it is already properly 
 286     // if we don't know its length we have no choice but to assume that it is, 
 287     // indeed, properly terminated 
 288     wxWCharBuffer bufTmp
; 
 289     if ( srcLen 
== wxNO_LEN 
) 
 291         srcLen 
= wxWcslen(src
) + 1; 
 293     else if ( srcLen 
!= 0 && src
[srcLen 
- 1] != L
'\0' ) 
 295         // make a copy in order to properly NUL-terminate the string 
 296         bufTmp 
= wxWCharBuffer(srcLen
); 
 297         memcpy(bufTmp
.data(), src
, srcLen 
* sizeof(wchar_t)); 
 301     const size_t lenNul 
= GetMBNulLen(); 
 302     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; 
 304           src 
+= wxWcslen(src
) + 1 /* skip L'\0' too */ ) 
 306         // try to convert the current chunk 
 307         size_t lenChunk 
= WC2MB(NULL
, src
, 0); 
 309         if ( lenChunk 
== wxCONV_FAILED 
) 
 310             return wxCONV_FAILED
; 
 313         dstWritten 
+= lenChunk
; 
 317             if ( dstWritten 
> dstLen 
) 
 318                 return wxCONV_FAILED
; 
 320             if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED 
) 
 321                 return wxCONV_FAILED
; 
 330 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const 
 332     size_t rc 
= ToWChar(outBuff
, outLen
, inBuff
); 
 333     if ( rc 
!= wxCONV_FAILED 
) 
 335         // ToWChar() returns the buffer length, i.e. including the trailing 
 336         // NUL, while this method doesn't take it into account 
 343 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const 
 345     size_t rc 
= FromWChar(outBuff
, outLen
, inBuff
); 
 346     if ( rc 
!= wxCONV_FAILED 
) 
 354 wxMBConv::~wxMBConv() 
 356     // nothing to do here (necessary for Darwin linking probably) 
 359 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 363         // calculate the length of the buffer needed first 
 364         const size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 365         if ( nLen 
!= wxCONV_FAILED 
) 
 367             // now do the actual conversion 
 368             wxWCharBuffer 
buf(nLen 
/* +1 added implicitly */); 
 370             // +1 for the trailing NULL 
 371             if ( MB2WC(buf
.data(), psz
, nLen 
+ 1) != wxCONV_FAILED 
) 
 376     return wxWCharBuffer(); 
 379 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 383         const size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 384         if ( nLen 
!= wxCONV_FAILED 
) 
 386             // extra space for trailing NUL(s) 
 387             static const size_t extraLen 
= GetMaxMBNulLen(); 
 389             wxCharBuffer 
buf(nLen 
+ extraLen 
- 1); 
 390             if ( WC2MB(buf
.data(), pwz
, nLen 
+ extraLen
) != wxCONV_FAILED 
) 
 395     return wxCharBuffer(); 
 399 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const 
 401     const size_t dstLen 
= ToWChar(NULL
, 0, inBuff
, inLen
); 
 402     if ( dstLen 
!= wxCONV_FAILED 
) 
 404         wxWCharBuffer 
wbuf(dstLen 
- 1); 
 405         if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED 
) 
 410                 if ( wbuf
[dstLen 
- 1] == L
'\0' ) 
 421     return wxWCharBuffer(); 
 425 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const 
 427     size_t dstLen 
= FromWChar(NULL
, 0, inBuff
, inLen
); 
 428     if ( dstLen 
!= wxCONV_FAILED 
) 
 430         // special case of empty input: can't allocate 0 size buffer below as 
 431         // wxCharBuffer insists on NUL-terminating it 
 432         wxCharBuffer 
buf(dstLen 
? dstLen 
- 1 : 1); 
 433         if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED 
) 
 439                 const size_t nulLen 
= GetMBNulLen(); 
 440                 if ( dstLen 
>= nulLen 
&& 
 441                         !NotAllNULs(buf
.data() + dstLen 
- nulLen
, nulLen
) ) 
 443                     // in this case the output is NUL-terminated and we're not 
 444                     // supposed to count NUL 
 456     return wxCharBuffer(); 
 459 // ---------------------------------------------------------------------------- 
 461 // ---------------------------------------------------------------------------- 
 463 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 465     return wxMB2WC(buf
, psz
, n
); 
 468 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 470     return wxWC2MB(buf
, psz
, n
); 
 473 // ---------------------------------------------------------------------------- 
 474 // wxConvBrokenFileNames 
 475 // ---------------------------------------------------------------------------- 
 479 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar 
*charset
) 
 481     if ( !charset 
|| wxStricmp(charset
, _T("UTF-8")) == 0 
 482                   || wxStricmp(charset
, _T("UTF8")) == 0  ) 
 483         m_conv 
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
); 
 485         m_conv 
= new wxCSConv(charset
); 
 490 // ---------------------------------------------------------------------------- 
 492 // ---------------------------------------------------------------------------- 
 494 // Implementation (C) 2004 Fredrik Roubert 
 497 // BASE64 decoding table 
 499 static const unsigned char utf7unb64
[] = 
 501     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 502     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 503     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 504     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 505     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 506     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 
 507     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 
 508     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 509     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
 510     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
 511     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
 512     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 
 513     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 
 514     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 
 515     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 
 516     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 
 517     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 518     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 519     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 520     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 521     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 522     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 523     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 524     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 525     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 526     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 527     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 528     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 529     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 530     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 531     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 532     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 
 535 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 539     while ( *psz 
&& (!buf 
|| (len 
< n
)) ) 
 541         unsigned char cc 
= *psz
++; 
 549         else if (*psz 
== '-') 
 557         else // start of BASE64 encoded string 
 561             for ( ok 
= lsb 
= false, d 
= 0, l 
= 0; 
 562                   (cc 
= utf7unb64
[(unsigned char)*psz
]) != 0xff; 
 567                 for (l 
+= 6; l 
>= 8; lsb 
= !lsb
) 
 569                     unsigned char c 
= (unsigned char)((d 
>> (l 
-= 8)) % 256); 
 579                             *buf 
= (wchar_t)(c 
<< 8); 
 588                 // in valid UTF7 we should have valid characters after '+' 
 589                 return wxCONV_FAILED
; 
 597     if ( buf 
&& (len 
< n
) ) 
 604 // BASE64 encoding table 
 606 static const unsigned char utf7enb64
[] = 
 608     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
 609     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
 610     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
 611     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
 612     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
 613     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
 614     'w', 'x', 'y', 'z', '0', '1', '2', '3', 
 615     '4', '5', '6', '7', '8', '9', '+', '/' 
 619 // UTF-7 encoding table 
 621 // 0 - Set D (directly encoded characters) 
 622 // 1 - Set O (optional direct characters) 
 623 // 2 - whitespace characters (optional) 
 624 // 3 - special characters 
 626 static const unsigned char utf7encode
[128] = 
 628     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 
 629     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
 630     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, 
 631     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
 632     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 633     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 
 634     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 635     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 
 638 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 642     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 645         if (cc 
< 0x80 && utf7encode
[cc
] < 1) 
 654         else if (((wxUint32
)cc
) > 0xffff) 
 656             // no surrogate pair generation (yet?) 
 657             return wxCONV_FAILED
; 
 668                 // BASE64 encode string 
 669                 unsigned int lsb
, d
, l
; 
 670                 for (d 
= 0, l 
= 0; /*nothing*/; psz
++) 
 672                     for (lsb 
= 0; lsb 
< 2; lsb 
++) 
 675                         d 
+= lsb 
? cc 
& 0xff : (cc 
& 0xff00) >> 8; 
 677                         for (l 
+= 8; l 
>= 6; ) 
 681                                 *buf
++ = utf7enb64
[(d 
>> l
) % 64]; 
 687                     if (!(cc
) || (cc 
< 0x80 && utf7encode
[cc
] < 1)) 
 694                         *buf
++ = utf7enb64
[((d 
% 16) << (6 - l
)) % 64]; 
 706     if (buf 
&& (len 
< n
)) 
 712 // ---------------------------------------------------------------------------- 
 714 // ---------------------------------------------------------------------------- 
 716 static wxUint32 utf8_max
[]= 
 717     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 719 // boundaries of the private use area we use to (temporarily) remap invalid 
 720 // characters invalid in a UTF-8 encoded string 
 721 const wxUint32 wxUnicodePUA 
= 0x100000; 
 722 const wxUint32 wxUnicodePUAEnd 
= wxUnicodePUA 
+ 256; 
 724 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 728     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 730         const char *opsz 
= psz
; 
 731         bool invalid 
= false; 
 732         unsigned char cc 
= *psz
++, fc 
= cc
; 
 734         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 744             // escape the escape character for octal escapes 
 745             if ((m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 746                     && cc 
== '\\' && (!buf 
|| len 
< n
)) 
 758                 // invalid UTF-8 sequence 
 763                 unsigned ocnt 
= cnt 
- 1; 
 764                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 768                     if ((cc 
& 0xC0) != 0x80) 
 770                         // invalid UTF-8 sequence 
 776                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 779                 if (invalid 
|| res 
<= utf8_max
[ocnt
]) 
 781                     // illegal UTF-8 encoding 
 784                 else if ((m_options 
& MAP_INVALID_UTF8_TO_PUA
) && 
 785                         res 
>= wxUnicodePUA 
&& res 
< wxUnicodePUAEnd
) 
 787                     // if one of our PUA characters turns up externally 
 788                     // it must also be treated as an illegal sequence 
 789                     // (a bit like you have to escape an escape character) 
 795                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 796                     size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 797                     if (pa 
== wxCONV_FAILED
) 
 809                         *buf
++ = (wchar_t)res
; 
 811 #endif // WC_UTF16/!WC_UTF16 
 817                 if (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 819                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 822                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 823                         size_t pa 
= encode_utf16((unsigned char)*opsz 
+ wxUnicodePUA
, (wxUint16 
*)buf
); 
 824                         wxASSERT(pa 
!= wxCONV_FAILED
); 
 831                             *buf
++ = (wchar_t)(wxUnicodePUA 
+ (unsigned char)*opsz
); 
 837                 else if (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 839                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 841                         if ( buf 
&& len 
+ 3 < n 
) 
 843                             unsigned char on 
= *opsz
; 
 845                             *buf
++ = (wchar_t)( L
'0' + on 
/ 0100 ); 
 846                             *buf
++ = (wchar_t)( L
'0' + (on 
% 0100) / 010 ); 
 847                             *buf
++ = (wchar_t)( L
'0' + on 
% 010 ); 
 854                 else // MAP_INVALID_UTF8_NOT 
 856                     return wxCONV_FAILED
; 
 862     if (buf 
&& (len 
< n
)) 
 868 static inline bool isoctal(wchar_t wch
) 
 870     return L
'0' <= wch 
&& wch 
<= L
'7'; 
 873 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 877     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 882         // cast is ok for WC_UTF16 
 883         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 884         psz 
+= (pa 
== wxCONV_FAILED
) ? 1 : pa
; 
 886         cc 
= (*psz
++) & 0x7fffffff; 
 889         if ( (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 890                 && cc 
>= wxUnicodePUA 
&& cc 
< wxUnicodePUAEnd 
) 
 893                 *buf
++ = (char)(cc 
- wxUnicodePUA
); 
 896         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 897                     && cc 
== L
'\\' && psz
[0] == L
'\\' ) 
 904         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) && 
 906                         isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) ) 
 910                 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 + 
 911                                  (psz
[1] - L
'0') * 010 + 
 921             for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) 
 937                     *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 939                         *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 945     if (buf 
&& (len 
< n
)) 
 951 // ============================================================================ 
 953 // ============================================================================ 
 955 #ifdef WORDS_BIGENDIAN 
 956     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 957     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 959     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 960     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 964 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
) 
 966     if ( srcLen 
== wxNO_LEN 
) 
 968         // count the number of bytes in input, including the trailing NULs 
 969         const wxUint16 
*inBuff 
= wx_reinterpret_cast(const wxUint16 
*, src
); 
 970         for ( srcLen 
= 1; *inBuff
++; srcLen
++ ) 
 973         srcLen 
*= BYTES_PER_CHAR
; 
 975     else // we already have the length 
 977         // we can only convert an entire number of UTF-16 characters 
 978         if ( srcLen 
% BYTES_PER_CHAR 
) 
 979             return wxCONV_FAILED
; 
 985 // case when in-memory representation is UTF-16 too 
 988 // ---------------------------------------------------------------------------- 
 989 // conversions without endianness change 
 990 // ---------------------------------------------------------------------------- 
 993 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
 994                                const char *src
, size_t srcLen
) const 
 996     // set up the scene for using memcpy() (which is presumably more efficient 
 997     // than copying the bytes one by one) 
 998     srcLen 
= GetLength(src
, srcLen
); 
 999     if ( srcLen 
== wxNO_LEN 
) 
1000         return wxCONV_FAILED
; 
1002     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1005         if ( dstLen 
< inLen 
) 
1006             return wxCONV_FAILED
; 
1008         memcpy(dst
, src
, srcLen
); 
1015 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
, 
1016                                  const wchar_t *src
, size_t srcLen
) const 
1018     if ( srcLen 
== wxNO_LEN 
) 
1019         srcLen 
= wxWcslen(src
) + 1; 
1021     srcLen 
*= BYTES_PER_CHAR
; 
1025         if ( dstLen 
< srcLen 
) 
1026             return wxCONV_FAILED
; 
1028         memcpy(dst
, src
, srcLen
); 
1034 // ---------------------------------------------------------------------------- 
1035 // endian-reversing conversions 
1036 // ---------------------------------------------------------------------------- 
1039 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1040                            const char *src
, size_t srcLen
) const 
1042     srcLen 
= GetLength(src
, srcLen
); 
1043     if ( srcLen 
== wxNO_LEN 
) 
1044         return wxCONV_FAILED
; 
1046     srcLen 
/= BYTES_PER_CHAR
; 
1050         if ( dstLen 
< srcLen 
) 
1051             return wxCONV_FAILED
; 
1053         const wxUint16 
*inBuff 
= wx_reinterpret_cast(const wxUint16 
*, src
); 
1054         for ( size_t n 
= 0; n 
< srcLen
; n
++, inBuff
++ ) 
1056             *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1064 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
, 
1065                              const wchar_t *src
, size_t srcLen
) const 
1067     if ( srcLen 
== wxNO_LEN 
) 
1068         srcLen 
= wxWcslen(src
) + 1; 
1070     srcLen 
*= BYTES_PER_CHAR
; 
1074         if ( dstLen 
< srcLen 
) 
1075             return wxCONV_FAILED
; 
1077         wxUint16 
*outBuff 
= wx_reinterpret_cast(wxUint16 
*, dst
); 
1078         for ( size_t n 
= 0; n 
< srcLen
; n 
+= BYTES_PER_CHAR
, src
++ ) 
1080             *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
); 
1087 #else // !WC_UTF16: wchar_t is UTF-32 
1089 // ---------------------------------------------------------------------------- 
1090 // conversions without endianness change 
1091 // ---------------------------------------------------------------------------- 
1094 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1095                                const char *src
, size_t srcLen
) const 
1097     srcLen 
= GetLength(src
, srcLen
); 
1098     if ( srcLen 
== wxNO_LEN 
) 
1099         return wxCONV_FAILED
; 
1101     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1104         // optimization: return maximal space which could be needed for this 
1105         // string even if the real size could be smaller if the buffer contains 
1111     const wxUint16 
*inBuff 
= wx_reinterpret_cast(const wxUint16 
*, src
); 
1112     for ( const wxUint16 
* const inEnd 
= inBuff 
+ inLen
; inBuff 
< inEnd
; ) 
1114         const wxUint32 ch 
= wxDecodeSurrogate(&inBuff
); 
1116             return wxCONV_FAILED
; 
1118         if ( ++outLen 
> dstLen 
) 
1119             return wxCONV_FAILED
; 
1129 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
, 
1130                                  const wchar_t *src
, size_t srcLen
) const 
1132     if ( srcLen 
== wxNO_LEN 
) 
1133         srcLen 
= wxWcslen(src
) + 1; 
1136     wxUint16 
*outBuff 
= wx_reinterpret_cast(wxUint16 
*, dst
); 
1137     for ( size_t n 
= 0; n 
< srcLen
; n
++ ) 
1140         const size_t numChars 
= encode_utf16(*src
++, cc
); 
1141         if ( numChars 
== wxCONV_FAILED 
) 
1142             return wxCONV_FAILED
; 
1144         outLen 
+= numChars 
* BYTES_PER_CHAR
; 
1147             if ( outLen 
> dstLen 
) 
1148                 return wxCONV_FAILED
; 
1151             if ( numChars 
== 2 ) 
1153                 // second character of a surrogate 
1162 // ---------------------------------------------------------------------------- 
1163 // endian-reversing conversions 
1164 // ---------------------------------------------------------------------------- 
1167 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1168                            const char *src
, size_t srcLen
) const 
1170     srcLen 
= GetLength(src
, srcLen
); 
1171     if ( srcLen 
== wxNO_LEN 
) 
1172         return wxCONV_FAILED
; 
1174     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1177         // optimization: return maximal space which could be needed for this 
1178         // string even if the real size could be smaller if the buffer contains 
1184     const wxUint16 
*inBuff 
= wx_reinterpret_cast(const wxUint16 
*, src
); 
1185     for ( const wxUint16 
* const inEnd 
= inBuff 
+ inLen
; inBuff 
< inEnd
; ) 
1190         tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1192         tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1194         const size_t numChars 
= decode_utf16(tmp
, ch
); 
1195         if ( numChars 
== wxCONV_FAILED 
) 
1196             return wxCONV_FAILED
; 
1198         if ( numChars 
== 2 ) 
1201         if ( ++outLen 
> dstLen 
) 
1202             return wxCONV_FAILED
; 
1212 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
, 
1213                              const wchar_t *src
, size_t srcLen
) const 
1215     if ( srcLen 
== wxNO_LEN 
) 
1216         srcLen 
= wxWcslen(src
) + 1; 
1219     wxUint16 
*outBuff 
= wx_reinterpret_cast(wxUint16 
*, dst
); 
1220     for ( const wchar_t *srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; src
++ ) 
1223         const size_t numChars 
= encode_utf16(*src
, cc
); 
1224         if ( numChars 
== wxCONV_FAILED 
) 
1225             return wxCONV_FAILED
; 
1227         outLen 
+= numChars 
* BYTES_PER_CHAR
; 
1230             if ( outLen 
> dstLen 
) 
1231                 return wxCONV_FAILED
; 
1233             *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]); 
1234             if ( numChars 
== 2 ) 
1236                 // second character of a surrogate 
1237                 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]); 
1245 #endif // WC_UTF16/!WC_UTF16 
1248 // ============================================================================ 
1250 // ============================================================================ 
1252 #ifdef WORDS_BIGENDIAN 
1253     #define wxMBConvUTF32straight  wxMBConvUTF32BE 
1254     #define wxMBConvUTF32swap      wxMBConvUTF32LE 
1256     #define wxMBConvUTF32swap      wxMBConvUTF32BE 
1257     #define wxMBConvUTF32straight  wxMBConvUTF32LE 
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
1262 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
1265 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
) 
1267     if ( srcLen 
== wxNO_LEN 
) 
1269         // count the number of bytes in input, including the trailing NULs 
1270         const wxUint32 
*inBuff 
= wx_reinterpret_cast(const wxUint32 
*, src
); 
1271         for ( srcLen 
= 1; *inBuff
++; srcLen
++ ) 
1274         srcLen 
*= BYTES_PER_CHAR
; 
1276     else // we already have the length 
1278         // we can only convert an entire number of UTF-32 characters 
1279         if ( srcLen 
% BYTES_PER_CHAR 
) 
1280             return wxCONV_FAILED
; 
1286 // case when in-memory representation is UTF-16 
1289 // ---------------------------------------------------------------------------- 
1290 // conversions without endianness change 
1291 // ---------------------------------------------------------------------------- 
1294 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1295                                const char *src
, size_t srcLen
) const 
1297     srcLen 
= GetLength(src
, srcLen
); 
1298     if ( srcLen 
== wxNO_LEN 
) 
1299         return wxCONV_FAILED
; 
1301     const wxUint32 
*inBuff 
= wx_reinterpret_cast(const wxUint32 
*, src
); 
1302     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1304     for ( size_t n 
= 0; n 
< inLen
; n
++ ) 
1307         const size_t numChars 
= encode_utf16(*inBuff
++, cc
); 
1308         if ( numChars 
== wxCONV_FAILED 
) 
1309             return wxCONV_FAILED
; 
1314             if ( outLen 
> dstLen 
) 
1315                 return wxCONV_FAILED
; 
1318             if ( numChars 
== 2 ) 
1320                 // second character of a surrogate 
1330 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
, 
1331                                  const wchar_t *src
, size_t srcLen
) const 
1333     if ( srcLen 
== wxNO_LEN 
) 
1334         srcLen 
= wxWcslen(src
) + 1; 
1338         // optimization: return maximal space which could be needed for this 
1339         // string instead of the exact amount which could be less if there are 
1340         // any surrogates in the input 
1342         // we consider that surrogates are rare enough to make it worthwhile to 
1343         // avoid running the loop below at the cost of slightly extra memory 
1345         return srcLen 
* BYTES_PER_CHAR
; 
1348     wxUint32 
*outBuff 
= wx_reinterpret_cast(wxUint32 
*, dst
); 
1350     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; ) 
1352         const wxUint32 ch 
= wxDecodeSurrogate(&src
); 
1354             return wxCONV_FAILED
; 
1356         outLen 
+= BYTES_PER_CHAR
; 
1358         if ( outLen 
> dstLen 
) 
1359             return wxCONV_FAILED
; 
1367 // ---------------------------------------------------------------------------- 
1368 // endian-reversing conversions 
1369 // ---------------------------------------------------------------------------- 
1372 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1373                            const char *src
, size_t srcLen
) const 
1375     srcLen 
= GetLength(src
, srcLen
); 
1376     if ( srcLen 
== wxNO_LEN 
) 
1377         return wxCONV_FAILED
; 
1379     const wxUint32 
*inBuff 
= wx_reinterpret_cast(const wxUint32 
*, src
); 
1380     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1382     for ( size_t n 
= 0; n 
< inLen
; n
++, inBuff
++ ) 
1385         const size_t numChars 
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
); 
1386         if ( numChars 
== wxCONV_FAILED 
) 
1387             return wxCONV_FAILED
; 
1392             if ( outLen 
> dstLen 
) 
1393                 return wxCONV_FAILED
; 
1396             if ( numChars 
== 2 ) 
1398                 // second character of a surrogate 
1408 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
, 
1409                              const wchar_t *src
, size_t srcLen
) const 
1411     if ( srcLen 
== wxNO_LEN 
) 
1412         srcLen 
= wxWcslen(src
) + 1; 
1416         // optimization: return maximal space which could be needed for this 
1417         // string instead of the exact amount which could be less if there are 
1418         // any surrogates in the input 
1420         // we consider that surrogates are rare enough to make it worthwhile to 
1421         // avoid running the loop below at the cost of slightly extra memory 
1423         return srcLen
*BYTES_PER_CHAR
; 
1426     wxUint32 
*outBuff 
= wx_reinterpret_cast(wxUint32 
*, dst
); 
1428     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; ) 
1430         const wxUint32 ch 
= wxDecodeSurrogate(&src
); 
1432             return wxCONV_FAILED
; 
1434         outLen 
+= BYTES_PER_CHAR
; 
1436         if ( outLen 
> dstLen 
) 
1437             return wxCONV_FAILED
; 
1439         *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
); 
1445 #else // !WC_UTF16: wchar_t is UTF-32 
1447 // ---------------------------------------------------------------------------- 
1448 // conversions without endianness change 
1449 // ---------------------------------------------------------------------------- 
1452 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1453                                const char *src
, size_t srcLen
) const 
1455     // use memcpy() as it should be much faster than hand-written loop 
1456     srcLen 
= GetLength(src
, srcLen
); 
1457     if ( srcLen 
== wxNO_LEN 
) 
1458         return wxCONV_FAILED
; 
1460     const size_t inLen 
= srcLen
/BYTES_PER_CHAR
; 
1463         if ( dstLen 
< inLen 
) 
1464             return wxCONV_FAILED
; 
1466         memcpy(dst
, src
, srcLen
); 
1473 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
, 
1474                                  const wchar_t *src
, size_t srcLen
) const 
1476     if ( srcLen 
== wxNO_LEN 
) 
1477         srcLen 
= wxWcslen(src
) + 1; 
1479     srcLen 
*= BYTES_PER_CHAR
; 
1483         if ( dstLen 
< srcLen 
) 
1484             return wxCONV_FAILED
; 
1486         memcpy(dst
, src
, srcLen
); 
1492 // ---------------------------------------------------------------------------- 
1493 // endian-reversing conversions 
1494 // ---------------------------------------------------------------------------- 
1497 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1498                            const char *src
, size_t srcLen
) const 
1500     srcLen 
= GetLength(src
, srcLen
); 
1501     if ( srcLen 
== wxNO_LEN 
) 
1502         return wxCONV_FAILED
; 
1504     srcLen 
/= BYTES_PER_CHAR
; 
1508         if ( dstLen 
< srcLen 
) 
1509             return wxCONV_FAILED
; 
1511         const wxUint32 
*inBuff 
= wx_reinterpret_cast(const wxUint32 
*, src
); 
1512         for ( size_t n 
= 0; n 
< srcLen
; n
++, inBuff
++ ) 
1514             *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
); 
1522 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
, 
1523                              const wchar_t *src
, size_t srcLen
) const 
1525     if ( srcLen 
== wxNO_LEN 
) 
1526         srcLen 
= wxWcslen(src
) + 1; 
1528     srcLen 
*= BYTES_PER_CHAR
; 
1532         if ( dstLen 
< srcLen 
) 
1533             return wxCONV_FAILED
; 
1535         wxUint32 
*outBuff 
= wx_reinterpret_cast(wxUint32 
*, dst
); 
1536         for ( size_t n 
= 0; n 
< srcLen
; n 
+= BYTES_PER_CHAR
, src
++ ) 
1538             *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
); 
1545 #endif // WC_UTF16/!WC_UTF16 
1548 // ============================================================================ 
1549 // The classes doing conversion using the iconv_xxx() functions 
1550 // ============================================================================ 
1554 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with 
1555 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is 
1556 //     (unless there's yet another bug in glibc) the only case when iconv() 
1557 //     returns with (size_t)-1 (which means error) and says there are 0 bytes 
1558 //     left in the input buffer -- when _real_ error occurs, 
1559 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for 
1561 //     [This bug does not appear in glibc 2.2.] 
1562 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
1563 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
1564                                      (errno != E2BIG || bufLeft != 0)) 
1566 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
1569 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
1571 #define ICONV_T_INVALID ((iconv_t)-1) 
1573 #if SIZEOF_WCHAR_T == 4 
1574     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS 
1575     #define WC_ENC      wxFONTENCODING_UTF32 
1576 #elif SIZEOF_WCHAR_T == 2 
1577     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS 
1578     #define WC_ENC      wxFONTENCODING_UTF16 
1579 #else // sizeof(wchar_t) != 2 nor 4 
1580     // does this ever happen? 
1581     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
1584 // ---------------------------------------------------------------------------- 
1585 // wxMBConv_iconv: encapsulates an iconv character set 
1586 // ---------------------------------------------------------------------------- 
1588 class wxMBConv_iconv 
: public wxMBConv
 
1591     wxMBConv_iconv(const wxChar 
*name
); 
1592     virtual ~wxMBConv_iconv(); 
1594     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
1595     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
1597     // classify this encoding as explained in wxMBConv::GetMBNulLen() comment 
1598     virtual size_t GetMBNulLen() const; 
1600     virtual wxMBConv 
*Clone() const 
1602         wxMBConv_iconv 
*p 
= new wxMBConv_iconv(m_name
); 
1603         p
->m_minMBCharWidth 
= m_minMBCharWidth
; 
1608         { return (m2w 
!= ICONV_T_INVALID
) && (w2m 
!= ICONV_T_INVALID
); } 
1611     // the iconv handlers used to translate from multibyte 
1612     // to wide char and in the other direction 
1617     // guards access to m2w and w2m objects 
1618     wxMutex m_iconvMutex
; 
1622     // the name (for iconv_open()) of a wide char charset -- if none is 
1623     // available on this machine, it will remain NULL 
1624     static wxString ms_wcCharsetName
; 
1626     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
1627     // different endian-ness than the native one 
1628     static bool ms_wcNeedsSwap
; 
1631     // name of the encoding handled by this conversion 
1634     // cached result of GetMBNulLen(); set to 0 meaning "unknown" 
1636     size_t m_minMBCharWidth
; 
1639 // make the constructor available for unit testing 
1640 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name 
) 
1642     wxMBConv_iconv
* result 
= new wxMBConv_iconv( name 
); 
1643     if ( !result
->IsOk() ) 
1652 wxString 
wxMBConv_iconv::ms_wcCharsetName
; 
1653 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
1655 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
1658     m_minMBCharWidth 
= 0; 
1660     // iconv operates with chars, not wxChars, but luckily it uses only ASCII 
1661     // names for the charsets 
1662     const wxCharBuffer 
cname(wxString(name
).ToAscii()); 
1664     // check for charset that represents wchar_t: 
1665     if ( ms_wcCharsetName
.empty() ) 
1667         wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:")); 
1670         const wxChar 
**names 
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
); 
1671 #else // !wxUSE_FONTMAP 
1672         static const wxChar 
*names_static
[] = 
1674 #if SIZEOF_WCHAR_T == 4 
1676 #elif SIZEOF_WCHAR_T = 2 
1681         const wxChar 
**names 
= names_static
; 
1682 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
1684         for ( ; *names 
&& ms_wcCharsetName
.empty(); ++names 
) 
1686             const wxString 
nameCS(*names
); 
1688             // first try charset with explicit bytesex info (e.g. "UCS-4LE"): 
1689             wxString 
nameXE(nameCS
); 
1691 #ifdef WORDS_BIGENDIAN 
1693 #else // little endian 
1697             wxLogTrace(TRACE_STRCONV
, _T("  trying charset \"%s\""), 
1700             m2w 
= iconv_open(nameXE
.ToAscii(), cname
); 
1701             if ( m2w 
== ICONV_T_INVALID 
) 
1703                 // try charset w/o bytesex info (e.g. "UCS4") 
1704                 wxLogTrace(TRACE_STRCONV
, _T("  trying charset \"%s\""), 
1706                 m2w 
= iconv_open(nameCS
.ToAscii(), cname
); 
1708                 // and check for bytesex ourselves: 
1709                 if ( m2w 
!= ICONV_T_INVALID 
) 
1711                     char    buf
[2], *bufPtr
; 
1712                     wchar_t wbuf
[2], *wbufPtr
; 
1720                     outsz 
= SIZEOF_WCHAR_T 
* 2; 
1725                         m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
1726                         (char**)&wbufPtr
, &outsz
); 
1728                     if (ICONV_FAILED(res
, insz
)) 
1730                         wxLogLastError(wxT("iconv")); 
1731                         wxLogError(_("Conversion to charset '%s' doesn't work."), 
1734                     else // ok, can convert to this encoding, remember it 
1736                         ms_wcCharsetName 
= nameCS
; 
1737                         ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
1741             else // use charset not requiring byte swapping 
1743                 ms_wcCharsetName 
= nameXE
; 
1747         wxLogTrace(TRACE_STRCONV
, 
1748                    wxT("iconv wchar_t charset is \"%s\"%s"), 
1749                    ms_wcCharsetName
.empty() ? _T("<none>") 
1750                                             : ms_wcCharsetName
.c_str(), 
1751                    ms_wcNeedsSwap 
? _T(" (needs swap)") 
1754     else // we already have ms_wcCharsetName 
1756         m2w 
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
); 
1759     if ( ms_wcCharsetName
.empty() ) 
1761         w2m 
= ICONV_T_INVALID
; 
1765         w2m 
= iconv_open(cname
, ms_wcCharsetName
.ToAscii()); 
1766         if ( w2m 
== ICONV_T_INVALID 
) 
1768             wxLogTrace(TRACE_STRCONV
, 
1769                        wxT("\"%s\" -> \"%s\" works but not the converse!?"), 
1770                        ms_wcCharsetName
.c_str(), cname
.data()); 
1775 wxMBConv_iconv::~wxMBConv_iconv() 
1777     if ( m2w 
!= ICONV_T_INVALID 
) 
1779     if ( w2m 
!= ICONV_T_INVALID 
) 
1783 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1785     // find the string length: notice that must be done differently for 
1786     // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs 
1788     const size_t nulLen 
= GetMBNulLen(); 
1792             return wxCONV_FAILED
; 
1795             inbuf 
= strlen(psz
); // arguably more optimized than our version 
1800             // for UTF-16/32 not only we need to have 2/4 consecutive NULs but 
1801             // they also have to start at character boundary and not span two 
1802             // adjacent characters 
1804             for ( p 
= psz
; NotAllNULs(p
, nulLen
); p 
+= nulLen 
) 
1811     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle. 
1812     //     Unfortunately there are a couple of global wxCSConv objects such as 
1813     //     wxConvLocal that are used all over wx code, so we have to make sure 
1814     //     the handle is used by at most one thread at the time. Otherwise 
1815     //     only a few wx classes would be safe to use from non-main threads 
1816     //     as MB<->WC conversion would fail "randomly". 
1817     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1818 #endif // wxUSE_THREADS 
1820     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
1822     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
1823     wchar_t *bufPtr 
= buf
; 
1824     const char *pszPtr 
= psz
; 
1828         // have destination buffer, convert there 
1830                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1831                      (char**)&bufPtr
, &outbuf
); 
1832         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
1836             // convert to native endianness 
1837             for ( unsigned i 
= 0; i 
< res
; i
++ ) 
1838                 buf
[n
] = WC_BSWAP(buf
[i
]); 
1841         // NUL-terminate the string if there is any space left 
1847         // no destination buffer... convert using temp buffer 
1848         // to calculate destination buffer requirement 
1855             outbuf 
= 8 * SIZEOF_WCHAR_T
; 
1858                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1859                          (char**)&bufPtr
, &outbuf 
); 
1861             res 
+= 8 - (outbuf 
/ SIZEOF_WCHAR_T
); 
1863         while ((cres 
== (size_t)-1) && (errno 
== E2BIG
)); 
1866     if (ICONV_FAILED(cres
, inbuf
)) 
1868         //VS: it is ok if iconv fails, hence trace only 
1869         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1870         return wxCONV_FAILED
; 
1876 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1879     // NB: explained in MB2WC 
1880     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1883     size_t inlen 
= wxWcslen(psz
); 
1884     size_t inbuf 
= inlen 
* SIZEOF_WCHAR_T
; 
1888     wchar_t *tmpbuf 
= 0; 
1892         // need to copy to temp buffer to switch endianness 
1893         // (doing WC_BSWAP twice on the original buffer won't help, as it 
1894         //  could be in read-only memory, or be accessed in some other thread) 
1895         tmpbuf 
= (wchar_t *)malloc(inbuf 
+ SIZEOF_WCHAR_T
); 
1896         for ( size_t i 
= 0; i 
< inlen
; i
++ ) 
1897             tmpbuf
[n
] = WC_BSWAP(psz
[i
]); 
1899         tmpbuf
[inlen
] = L
'\0'; 
1905         // have destination buffer, convert there 
1906         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1910         // NB: iconv was given only wcslen(psz) characters on input, and so 
1911         //     it couldn't convert the trailing zero. Let's do it ourselves 
1912         //     if there's some room left for it in the output buffer. 
1918         // no destination buffer: convert using temp buffer 
1919         // to calculate destination buffer requirement 
1927             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1931         while ((cres 
== (size_t)-1) && (errno 
== E2BIG
)); 
1939     if (ICONV_FAILED(cres
, inbuf
)) 
1941         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1942         return wxCONV_FAILED
; 
1948 size_t wxMBConv_iconv::GetMBNulLen() const 
1950     if ( m_minMBCharWidth 
== 0 ) 
1952         wxMBConv_iconv 
* const self 
= wxConstCast(this, wxMBConv_iconv
); 
1955         // NB: explained in MB2WC 
1956         wxMutexLocker 
lock(self
->m_iconvMutex
); 
1959         wchar_t *wnul 
= L
""; 
1960         char buf
[8]; // should be enough for NUL in any encoding 
1961         size_t inLen 
= sizeof(wchar_t), 
1962                outLen 
= WXSIZEOF(buf
); 
1963         char *inBuff 
= (char *)wnul
; 
1964         char *outBuff 
= buf
; 
1965         if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 ) 
1967             self
->m_minMBCharWidth 
= (size_t)-1; 
1971             self
->m_minMBCharWidth 
= outBuff 
- buf
; 
1975     return m_minMBCharWidth
; 
1978 #endif // HAVE_ICONV 
1981 // ============================================================================ 
1982 // Win32 conversion classes 
1983 // ============================================================================ 
1985 #ifdef wxHAVE_WIN32_MB2WC 
1989 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1990 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1993 class wxMBConv_win32 
: public wxMBConv
 
1998         m_CodePage 
= CP_ACP
; 
1999         m_minMBCharWidth 
= 0; 
2002     wxMBConv_win32(const wxMBConv_win32
& conv
) 
2005         m_CodePage 
= conv
.m_CodePage
; 
2006         m_minMBCharWidth 
= conv
.m_minMBCharWidth
; 
2010     wxMBConv_win32(const wxChar
* name
) 
2012         m_CodePage 
= wxCharsetToCodepage(name
); 
2013         m_minMBCharWidth 
= 0; 
2016     wxMBConv_win32(wxFontEncoding encoding
) 
2018         m_CodePage 
= wxEncodingToCodepage(encoding
); 
2019         m_minMBCharWidth 
= 0; 
2021 #endif // wxUSE_FONTMAP 
2023     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2025         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
2026         // the behaviour is not compatible with the Unix version (using iconv) 
2027         // and break the library itself, e.g. wxTextInputStream::NextChar() 
2028         // wouldn't work if reading an incomplete MB char didn't result in an 
2031         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or 
2032         // Win XP or newer and it is not supported for UTF-[78] so we always 
2033         // use our own conversions in this case. See 
2034         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx 
2035         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp 
2036         if ( m_CodePage 
== CP_UTF8 
) 
2038             return wxMBConvUTF8().MB2WC(buf
, psz
, n
); 
2041         if ( m_CodePage 
== CP_UTF7 
) 
2043             return wxMBConvUTF7().MB2WC(buf
, psz
, n
); 
2047         if ( (m_CodePage 
< 50000 && m_CodePage 
!= CP_SYMBOL
) && 
2048                 IsAtLeastWin2kSP4() ) 
2050             flags 
= MB_ERR_INVALID_CHARS
; 
2053         const size_t len 
= ::MultiByteToWideChar
 
2055                                 m_CodePage
,     // code page 
2056                                 flags
,          // flags: fall on error 
2057                                 psz
,            // input string 
2058                                 -1,             // its length (NUL-terminated) 
2059                                 buf
,            // output string 
2060                                 buf 
? n 
: 0     // size of output buffer 
2064             // function totally failed 
2065             return wxCONV_FAILED
; 
2068         // if we were really converting and didn't use MB_ERR_INVALID_CHARS, 
2069         // check if we succeeded, by doing a double trip: 
2070         if ( !flags 
&& buf 
) 
2072             const size_t mbLen 
= strlen(psz
); 
2073             wxCharBuffer 
mbBuf(mbLen
); 
2074             if ( ::WideCharToMultiByte
 
2081                       mbLen 
+ 1,        // size in bytes, not length 
2085                   strcmp(mbBuf
, psz
) != 0 ) 
2087                 // we didn't obtain the same thing we started from, hence 
2088                 // the conversion was lossy and we consider that it failed 
2089                 return wxCONV_FAILED
; 
2093         // note that it returns count of written chars for buf != NULL and size 
2094         // of the needed buffer for buf == NULL so in either case the length of 
2095         // the string (which never includes the terminating NUL) is one less 
2099     virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
2102             we have a problem here: by default, WideCharToMultiByte() may 
2103             replace characters unrepresentable in the target code page with bad 
2104             quality approximations such as turning "1/2" symbol (U+00BD) into 
2105             "1" for the code pages which don't have it and we, obviously, want 
2106             to avoid this at any price 
2108             the trouble is that this function does it _silently_, i.e. it won't 
2109             even tell us whether it did or not... Win98/2000 and higher provide 
2110             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
2111             we have to resort to a round trip, i.e. check that converting back 
2112             results in the same string -- this is, of course, expensive but 
2113             otherwise we simply can't be sure to not garble the data. 
2116         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
2117         // it doesn't work with CJK encodings (which we test for rather roughly 
2118         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
2120         BOOL usedDef 
wxDUMMY_INITIALIZE(false); 
2123         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
2125             // it's our lucky day 
2126             flags 
= WC_NO_BEST_FIT_CHARS
; 
2127             pUsedDef 
= &usedDef
; 
2129         else // old system or unsupported encoding 
2135         const size_t len 
= ::WideCharToMultiByte
 
2137                                 m_CodePage
,     // code page 
2138                                 flags
,          // either none or no best fit 
2139                                 pwz
,            // input string 
2140                                 -1,             // it is (wide) NUL-terminated 
2141                                 buf
,            // output buffer 
2142                                 buf 
? n 
: 0,    // and its size 
2143                                 NULL
,           // default "replacement" char 
2144                                 pUsedDef        
// [out] was it used? 
2149             // function totally failed 
2150             return wxCONV_FAILED
; 
2153         // if we were really converting, check if we succeeded 
2158                 // check if the conversion failed, i.e. if any replacements 
2161                     return wxCONV_FAILED
; 
2163             else // we must resort to double tripping... 
2165                 wxWCharBuffer 
wcBuf(n
); 
2166                 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED 
|| 
2167                         wcscmp(wcBuf
, pwz
) != 0 ) 
2169                     // we didn't obtain the same thing we started from, hence 
2170                     // the conversion was lossy and we consider that it failed 
2171                     return wxCONV_FAILED
; 
2176         // see the comment above for the reason of "len - 1" 
2180     virtual size_t GetMBNulLen() const 
2182         if ( m_minMBCharWidth 
== 0 ) 
2184             int len 
= ::WideCharToMultiByte
 
2186                             m_CodePage
,     // code page 
2188                             L
"",            // input string 
2189                             1,              // translate just the NUL 
2190                             NULL
,           // output buffer 
2192                             NULL
,           // no replacement char 
2193                             NULL            
// [out] don't care if it was used 
2196             wxMBConv_win32 
* const self 
= wxConstCast(this, wxMBConv_win32
); 
2200                     wxLogDebug(_T("Unexpected NUL length %d"), len
); 
2201                     self
->m_minMBCharWidth 
= (size_t)-1; 
2205                     self
->m_minMBCharWidth 
= (size_t)-1; 
2211                     self
->m_minMBCharWidth 
= len
; 
2216         return m_minMBCharWidth
; 
2219     virtual wxMBConv 
*Clone() const { return new wxMBConv_win32(*this); } 
2221     bool IsOk() const { return m_CodePage 
!= -1; } 
2224     static bool CanUseNoBestFit() 
2226         static int s_isWin98Or2k 
= -1; 
2228         if ( s_isWin98Or2k 
== -1 ) 
2231             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
2233                 case wxOS_WINDOWS_9X
: 
2234                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
2237                 case wxOS_WINDOWS_NT
: 
2238                     s_isWin98Or2k 
= verMaj 
>= 5; 
2242                     // unknown: be conservative by default 
2247             wxASSERT_MSG( s_isWin98Or2k 
!= -1, _T("should be set above") ); 
2250         return s_isWin98Or2k 
== 1; 
2253     static bool IsAtLeastWin2kSP4() 
2258         static int s_isAtLeastWin2kSP4 
= -1; 
2260         if ( s_isAtLeastWin2kSP4 
== -1 ) 
2262             OSVERSIONINFOEX ver
; 
2264             memset(&ver
, 0, sizeof(ver
)); 
2265             ver
.dwOSVersionInfoSize 
= sizeof(ver
); 
2266             GetVersionEx((OSVERSIONINFO
*)&ver
); 
2268             s_isAtLeastWin2kSP4 
= 
2269               ((ver
.dwMajorVersion 
> 5) || // Vista+ 
2270                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
> 0) || // XP/2003 
2271                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
== 0 && 
2272                ver
.wServicePackMajor 
>= 4)) // 2000 SP4+ 
2276         return s_isAtLeastWin2kSP4 
== 1; 
2281     // the code page we're working with 
2284     // cached result of GetMBNulLen(), set to 0 initially meaning 
2286     size_t m_minMBCharWidth
; 
2289 #endif // wxHAVE_WIN32_MB2WC 
2291 // ============================================================================ 
2292 // Cocoa conversion classes 
2293 // ============================================================================ 
2295 #if defined(__WXCOCOA__) 
2297 // RN: There is no UTF-32 support in either Core Foundation or Cocoa. 
2298 // Strangely enough, internally Core Foundation uses 
2299 // UTF-32 internally quite a bit - its just not public (yet). 
2301 #include <CoreFoundation/CFString.h> 
2302 #include <CoreFoundation/CFStringEncodingExt.h> 
2304 CFStringEncoding 
wxCFStringEncFromFontEnc(wxFontEncoding encoding
) 
2306     CFStringEncoding enc 
= kCFStringEncodingInvalidId 
; 
2310         case wxFONTENCODING_DEFAULT 
: 
2311             enc 
= CFStringGetSystemEncoding(); 
2314         case wxFONTENCODING_ISO8859_1 
: 
2315             enc 
= kCFStringEncodingISOLatin1 
; 
2317         case wxFONTENCODING_ISO8859_2 
: 
2318             enc 
= kCFStringEncodingISOLatin2
; 
2320         case wxFONTENCODING_ISO8859_3 
: 
2321             enc 
= kCFStringEncodingISOLatin3 
; 
2323         case wxFONTENCODING_ISO8859_4 
: 
2324             enc 
= kCFStringEncodingISOLatin4
; 
2326         case wxFONTENCODING_ISO8859_5 
: 
2327             enc 
= kCFStringEncodingISOLatinCyrillic
; 
2329         case wxFONTENCODING_ISO8859_6 
: 
2330             enc 
= kCFStringEncodingISOLatinArabic
; 
2332         case wxFONTENCODING_ISO8859_7 
: 
2333             enc 
= kCFStringEncodingISOLatinGreek
; 
2335         case wxFONTENCODING_ISO8859_8 
: 
2336             enc 
= kCFStringEncodingISOLatinHebrew
; 
2338         case wxFONTENCODING_ISO8859_9 
: 
2339             enc 
= kCFStringEncodingISOLatin5
; 
2341         case wxFONTENCODING_ISO8859_10 
: 
2342             enc 
= kCFStringEncodingISOLatin6
; 
2344         case wxFONTENCODING_ISO8859_11 
: 
2345             enc 
= kCFStringEncodingISOLatinThai
; 
2347         case wxFONTENCODING_ISO8859_13 
: 
2348             enc 
= kCFStringEncodingISOLatin7
; 
2350         case wxFONTENCODING_ISO8859_14 
: 
2351             enc 
= kCFStringEncodingISOLatin8
; 
2353         case wxFONTENCODING_ISO8859_15 
: 
2354             enc 
= kCFStringEncodingISOLatin9
; 
2357         case wxFONTENCODING_KOI8 
: 
2358             enc 
= kCFStringEncodingKOI8_R
; 
2360         case wxFONTENCODING_ALTERNATIVE 
: // MS-DOS CP866 
2361             enc 
= kCFStringEncodingDOSRussian
; 
2364 //      case wxFONTENCODING_BULGARIAN : 
2368         case wxFONTENCODING_CP437 
: 
2369             enc 
= kCFStringEncodingDOSLatinUS 
; 
2371         case wxFONTENCODING_CP850 
: 
2372             enc 
= kCFStringEncodingDOSLatin1
; 
2374         case wxFONTENCODING_CP852 
: 
2375             enc 
= kCFStringEncodingDOSLatin2
; 
2377         case wxFONTENCODING_CP855 
: 
2378             enc 
= kCFStringEncodingDOSCyrillic
; 
2380         case wxFONTENCODING_CP866 
: 
2381             enc 
= kCFStringEncodingDOSRussian 
; 
2383         case wxFONTENCODING_CP874 
: 
2384             enc 
= kCFStringEncodingDOSThai
; 
2386         case wxFONTENCODING_CP932 
: 
2387             enc 
= kCFStringEncodingDOSJapanese
; 
2389         case wxFONTENCODING_CP936 
: 
2390             enc 
= kCFStringEncodingDOSChineseSimplif 
; 
2392         case wxFONTENCODING_CP949 
: 
2393             enc 
= kCFStringEncodingDOSKorean
; 
2395         case wxFONTENCODING_CP950 
: 
2396             enc 
= kCFStringEncodingDOSChineseTrad
; 
2398         case wxFONTENCODING_CP1250 
: 
2399             enc 
= kCFStringEncodingWindowsLatin2
; 
2401         case wxFONTENCODING_CP1251 
: 
2402             enc 
= kCFStringEncodingWindowsCyrillic 
; 
2404         case wxFONTENCODING_CP1252 
: 
2405             enc 
= kCFStringEncodingWindowsLatin1 
; 
2407         case wxFONTENCODING_CP1253 
: 
2408             enc 
= kCFStringEncodingWindowsGreek
; 
2410         case wxFONTENCODING_CP1254 
: 
2411             enc 
= kCFStringEncodingWindowsLatin5
; 
2413         case wxFONTENCODING_CP1255 
: 
2414             enc 
= kCFStringEncodingWindowsHebrew 
; 
2416         case wxFONTENCODING_CP1256 
: 
2417             enc 
= kCFStringEncodingWindowsArabic 
; 
2419         case wxFONTENCODING_CP1257 
: 
2420             enc 
= kCFStringEncodingWindowsBalticRim
; 
2422 //   This only really encodes to UTF7 (if that) evidently 
2423 //        case wxFONTENCODING_UTF7 : 
2424 //            enc = kCFStringEncodingNonLossyASCII ; 
2426         case wxFONTENCODING_UTF8 
: 
2427             enc 
= kCFStringEncodingUTF8 
; 
2429         case wxFONTENCODING_EUC_JP 
: 
2430             enc 
= kCFStringEncodingEUC_JP
; 
2432         case wxFONTENCODING_UTF16 
: 
2433             enc 
= kCFStringEncodingUnicode 
; 
2435         case wxFONTENCODING_MACROMAN 
: 
2436             enc 
= kCFStringEncodingMacRoman 
; 
2438         case wxFONTENCODING_MACJAPANESE 
: 
2439             enc 
= kCFStringEncodingMacJapanese 
; 
2441         case wxFONTENCODING_MACCHINESETRAD 
: 
2442             enc 
= kCFStringEncodingMacChineseTrad 
; 
2444         case wxFONTENCODING_MACKOREAN 
: 
2445             enc 
= kCFStringEncodingMacKorean 
; 
2447         case wxFONTENCODING_MACARABIC 
: 
2448             enc 
= kCFStringEncodingMacArabic 
; 
2450         case wxFONTENCODING_MACHEBREW 
: 
2451             enc 
= kCFStringEncodingMacHebrew 
; 
2453         case wxFONTENCODING_MACGREEK 
: 
2454             enc 
= kCFStringEncodingMacGreek 
; 
2456         case wxFONTENCODING_MACCYRILLIC 
: 
2457             enc 
= kCFStringEncodingMacCyrillic 
; 
2459         case wxFONTENCODING_MACDEVANAGARI 
: 
2460             enc 
= kCFStringEncodingMacDevanagari 
; 
2462         case wxFONTENCODING_MACGURMUKHI 
: 
2463             enc 
= kCFStringEncodingMacGurmukhi 
; 
2465         case wxFONTENCODING_MACGUJARATI 
: 
2466             enc 
= kCFStringEncodingMacGujarati 
; 
2468         case wxFONTENCODING_MACORIYA 
: 
2469             enc 
= kCFStringEncodingMacOriya 
; 
2471         case wxFONTENCODING_MACBENGALI 
: 
2472             enc 
= kCFStringEncodingMacBengali 
; 
2474         case wxFONTENCODING_MACTAMIL 
: 
2475             enc 
= kCFStringEncodingMacTamil 
; 
2477         case wxFONTENCODING_MACTELUGU 
: 
2478             enc 
= kCFStringEncodingMacTelugu 
; 
2480         case wxFONTENCODING_MACKANNADA 
: 
2481             enc 
= kCFStringEncodingMacKannada 
; 
2483         case wxFONTENCODING_MACMALAJALAM 
: 
2484             enc 
= kCFStringEncodingMacMalayalam 
; 
2486         case wxFONTENCODING_MACSINHALESE 
: 
2487             enc 
= kCFStringEncodingMacSinhalese 
; 
2489         case wxFONTENCODING_MACBURMESE 
: 
2490             enc 
= kCFStringEncodingMacBurmese 
; 
2492         case wxFONTENCODING_MACKHMER 
: 
2493             enc 
= kCFStringEncodingMacKhmer 
; 
2495         case wxFONTENCODING_MACTHAI 
: 
2496             enc 
= kCFStringEncodingMacThai 
; 
2498         case wxFONTENCODING_MACLAOTIAN 
: 
2499             enc 
= kCFStringEncodingMacLaotian 
; 
2501         case wxFONTENCODING_MACGEORGIAN 
: 
2502             enc 
= kCFStringEncodingMacGeorgian 
; 
2504         case wxFONTENCODING_MACARMENIAN 
: 
2505             enc 
= kCFStringEncodingMacArmenian 
; 
2507         case wxFONTENCODING_MACCHINESESIMP 
: 
2508             enc 
= kCFStringEncodingMacChineseSimp 
; 
2510         case wxFONTENCODING_MACTIBETAN 
: 
2511             enc 
= kCFStringEncodingMacTibetan 
; 
2513         case wxFONTENCODING_MACMONGOLIAN 
: 
2514             enc 
= kCFStringEncodingMacMongolian 
; 
2516         case wxFONTENCODING_MACETHIOPIC 
: 
2517             enc 
= kCFStringEncodingMacEthiopic 
; 
2519         case wxFONTENCODING_MACCENTRALEUR 
: 
2520             enc 
= kCFStringEncodingMacCentralEurRoman 
; 
2522         case wxFONTENCODING_MACVIATNAMESE 
: 
2523             enc 
= kCFStringEncodingMacVietnamese 
; 
2525         case wxFONTENCODING_MACARABICEXT 
: 
2526             enc 
= kCFStringEncodingMacExtArabic 
; 
2528         case wxFONTENCODING_MACSYMBOL 
: 
2529             enc 
= kCFStringEncodingMacSymbol 
; 
2531         case wxFONTENCODING_MACDINGBATS 
: 
2532             enc 
= kCFStringEncodingMacDingbats 
; 
2534         case wxFONTENCODING_MACTURKISH 
: 
2535             enc 
= kCFStringEncodingMacTurkish 
; 
2537         case wxFONTENCODING_MACCROATIAN 
: 
2538             enc 
= kCFStringEncodingMacCroatian 
; 
2540         case wxFONTENCODING_MACICELANDIC 
: 
2541             enc 
= kCFStringEncodingMacIcelandic 
; 
2543         case wxFONTENCODING_MACROMANIAN 
: 
2544             enc 
= kCFStringEncodingMacRomanian 
; 
2546         case wxFONTENCODING_MACCELTIC 
: 
2547             enc 
= kCFStringEncodingMacCeltic 
; 
2549         case wxFONTENCODING_MACGAELIC 
: 
2550             enc 
= kCFStringEncodingMacGaelic 
; 
2552 //      case wxFONTENCODING_MACKEYBOARD : 
2553 //          enc = kCFStringEncodingMacKeyboardGlyphs ; 
2557             // because gcc is picky 
2564 class wxMBConv_cocoa 
: public wxMBConv
 
2569         Init(CFStringGetSystemEncoding()) ; 
2572     wxMBConv_cocoa(const wxMBConv_cocoa
& conv
) 
2574         m_encoding 
= conv
.m_encoding
; 
2578     wxMBConv_cocoa(const wxChar
* name
) 
2580         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2584     wxMBConv_cocoa(wxFontEncoding encoding
) 
2586         Init( wxCFStringEncFromFontEnc(encoding
) ); 
2589     virtual ~wxMBConv_cocoa() 
2593     void Init( CFStringEncoding encoding
) 
2595         m_encoding 
= encoding 
; 
2598     size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const 
2602         CFStringRef theString 
= CFStringCreateWithBytes ( 
2603                                                 NULL
, //the allocator 
2604                                                 (const UInt8
*)szUnConv
, 
2607                                                 false //no BOM/external representation 
2610         wxASSERT(theString
); 
2612         size_t nOutLength 
= CFStringGetLength(theString
); 
2616             CFRelease(theString
); 
2620         CFRange theRange 
= { 0, nOutSize 
}; 
2622 #if SIZEOF_WCHAR_T == 4 
2623         UniChar
* szUniCharBuffer 
= new UniChar
[nOutSize
]; 
2626         CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
); 
2628         CFRelease(theString
); 
2630         szUniCharBuffer
[nOutLength
] = '\0'; 
2632 #if SIZEOF_WCHAR_T == 4 
2633         wxMBConvUTF16 converter
; 
2634         converter
.MB2WC( szOut
, (const char*)szUniCharBuffer
, nOutSize 
); 
2635         delete [] szUniCharBuffer
; 
2641     size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const 
2645         size_t nRealOutSize
; 
2646         size_t nBufSize 
= wxWcslen(szUnConv
); 
2647         UniChar
* szUniBuffer 
= (UniChar
*) szUnConv
; 
2649 #if SIZEOF_WCHAR_T == 4 
2650         wxMBConvUTF16 converter 
; 
2651         nBufSize 
= converter
.WC2MB( NULL
, szUnConv
, 0 ); 
2652         szUniBuffer 
= new UniChar
[ (nBufSize 
/ sizeof(UniChar
)) + 1]; 
2653         converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize 
+ sizeof(UniChar
)); 
2654         nBufSize 
/= sizeof(UniChar
); 
2657         CFStringRef theString 
= CFStringCreateWithCharactersNoCopy( 
2661                                 kCFAllocatorNull 
//deallocator - we want to deallocate it ourselves 
2664         wxASSERT(theString
); 
2666         //Note that CER puts a BOM when converting to unicode 
2667         //so we  check and use getchars instead in that case 
2668         if (m_encoding 
== kCFStringEncodingUnicode
) 
2671                 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize 
- 1), (UniChar
*) szOut
); 
2673             nRealOutSize 
= CFStringGetLength(theString
) + 1; 
2679                 CFRangeMake(0, CFStringGetLength(theString
)), 
2681                 0, //what to put in characters that can't be converted - 
2682                     //0 tells CFString to return NULL if it meets such a character 
2683                 false, //not an external representation 
2686                 (CFIndex
*) &nRealOutSize
 
2690         CFRelease(theString
); 
2692 #if SIZEOF_WCHAR_T == 4 
2693         delete[] szUniBuffer
; 
2696         return  nRealOutSize 
- 1; 
2699     virtual wxMBConv 
*Clone() const { return new wxMBConv_cocoa(*this); } 
2703         return m_encoding 
!= kCFStringEncodingInvalidId 
&& 
2704               CFStringIsEncodingAvailable(m_encoding
); 
2708     CFStringEncoding m_encoding 
; 
2711 #endif // defined(__WXCOCOA__) 
2713 // ============================================================================ 
2714 // Mac conversion classes 
2715 // ============================================================================ 
2717 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
2719 class wxMBConv_mac 
: public wxMBConv
 
2724         Init(CFStringGetSystemEncoding()) ; 
2727     wxMBConv_mac(const wxMBConv_mac
& conv
) 
2729         Init(conv
.m_char_encoding
); 
2733     wxMBConv_mac(const wxChar
* name
) 
2735         Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ); 
2739     wxMBConv_mac(wxFontEncoding encoding
) 
2741         Init( wxMacGetSystemEncFromFontEnc(encoding
) ); 
2744     virtual ~wxMBConv_mac() 
2746         OSStatus status 
= noErr 
; 
2747         if (m_MB2WC_converter
) 
2748             status 
= TECDisposeConverter(m_MB2WC_converter
); 
2749         if (m_WC2MB_converter
) 
2750             status 
= TECDisposeConverter(m_WC2MB_converter
); 
2753     void Init( TextEncodingBase encoding
,TextEncodingVariant encodingVariant 
= kTextEncodingDefaultVariant 
, 
2754             TextEncodingFormat encodingFormat 
= kTextEncodingDefaultFormat
) 
2756         m_MB2WC_converter 
= NULL 
; 
2757         m_WC2MB_converter 
= NULL 
; 
2758         m_char_encoding 
= CreateTextEncoding(encoding
, encodingVariant
, encodingFormat
) ; 
2759         m_unicode_encoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 0, kUnicode16BitFormat
) ; 
2762     virtual void CreateIfNeeded() const 
2764         if ( m_MB2WC_converter 
== NULL 
&& m_WC2MB_converter 
== NULL 
) 
2766             OSStatus status 
= noErr 
; 
2767             status 
= TECCreateConverter(&m_MB2WC_converter
, 
2769                                     m_unicode_encoding
); 
2770             wxASSERT_MSG( status 
== noErr 
, _("Unable to create TextEncodingConverter")) ; 
2771             status 
= TECCreateConverter(&m_WC2MB_converter
, 
2774             wxASSERT_MSG( status 
== noErr 
, _("Unable to create TextEncodingConverter")) ; 
2778     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2781         OSStatus status 
= noErr 
; 
2782         ByteCount byteOutLen 
; 
2783         ByteCount byteInLen 
= strlen(psz
) + 1; 
2784         wchar_t *tbuf 
= NULL 
; 
2785         UniChar
* ubuf 
= NULL 
; 
2790             // Apple specs say at least 32 
2791             n 
= wxMax( 32, byteInLen 
) ; 
2792             tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T 
) ; 
2795         ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ; 
2797 #if SIZEOF_WCHAR_T == 4 
2798         ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
2800         ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
2803         status 
= TECConvertText( 
2804             m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
, 
2805             (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
); 
2807 #if SIZEOF_WCHAR_T == 4 
2808         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
2809         // is not properly terminated we get random characters at the end 
2810         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
2811         wxMBConvUTF16 converter 
; 
2812         res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
), (const char*)ubuf
, n 
) ; 
2815         res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
2821         if ( buf  
&& res 
< n
) 
2827     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2830         OSStatus status 
= noErr 
; 
2831         ByteCount byteOutLen 
; 
2832         ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
2838             // Apple specs say at least 32 
2839             n 
= wxMax( 32, ((byteInLen 
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T 
); 
2840             tbuf 
= (char*) malloc( n 
) ; 
2843         ByteCount byteBufferLen 
= n 
; 
2844         UniChar
* ubuf 
= NULL 
; 
2846 #if SIZEOF_WCHAR_T == 4 
2847         wxMBConvUTF16 converter 
; 
2848         size_t unicharlen 
= converter
.WC2MB( NULL
, psz
, 0 ) ; 
2849         byteInLen 
= unicharlen 
; 
2850         ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
2851         converter
.WC2MB( (char*) ubuf
, psz
, unicharlen 
+ 2 ) ; 
2853         ubuf 
= (UniChar
*) psz 
; 
2856         status 
= TECConvertText( 
2857             m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
, 
2858             (TextPtr
) (buf 
? buf 
: tbuf
), byteBufferLen
, &byteOutLen
); 
2860 #if SIZEOF_WCHAR_T == 4 
2867         size_t res 
= byteOutLen 
; 
2868         if ( buf  
&& res 
< n
) 
2872             //we need to double-trip to verify it didn't insert any ? in place 
2873             //of bogus characters 
2874             wxWCharBuffer 
wcBuf(n
); 
2875             size_t pszlen 
= wxWcslen(psz
); 
2876             if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED 
|| 
2877                         wxWcslen(wcBuf
) != pszlen 
|| 
2878                         memcmp(wcBuf
, psz
, pszlen 
* sizeof(wchar_t)) != 0 ) 
2880                 // we didn't obtain the same thing we started from, hence 
2881                 // the conversion was lossy and we consider that it failed 
2882                 return wxCONV_FAILED
; 
2889     virtual wxMBConv 
*Clone() const { return new wxMBConv_mac(*this); } 
2894         return m_MB2WC_converter 
!= NULL 
&& m_WC2MB_converter 
!= NULL
; 
2898     mutable TECObjectRef m_MB2WC_converter
; 
2899     mutable TECObjectRef m_WC2MB_converter
; 
2901     TextEncodingBase m_char_encoding
; 
2902     TextEncodingBase m_unicode_encoding
; 
2905 // MB is decomposed (D) normalized UTF8 
2907 class wxMBConv_macUTF8D 
: public wxMBConv_mac
 
2912         Init( kTextEncodingUnicodeDefault 
, kUnicodeNoSubset 
, kUnicodeUTF8Format 
) ; 
2917     virtual ~wxMBConv_macUTF8D() 
2920             DisposeUnicodeToTextInfo(&m_uni
); 
2921         if (m_uniBack
!=NULL
) 
2922             DisposeUnicodeToTextInfo(&m_uniBack
); 
2925     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2928         OSStatus status 
= noErr 
; 
2929         ByteCount byteOutLen 
; 
2930         ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
2936             // Apple specs say at least 32 
2937             n 
= wxMax( 32, ((byteInLen 
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T 
); 
2938             tbuf 
= (char*) malloc( n 
) ; 
2941         ByteCount byteBufferLen 
= n 
; 
2942         UniChar
* ubuf 
= NULL 
; 
2944 #if SIZEOF_WCHAR_T == 4 
2945         wxMBConvUTF16 converter 
; 
2946         size_t unicharlen 
= converter
.WC2MB( NULL
, psz
, 0 ) ; 
2947         byteInLen 
= unicharlen 
; 
2948         ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
2949         converter
.WC2MB( (char*) ubuf
, psz
, unicharlen 
+ 2 ) ; 
2951         ubuf 
= (UniChar
*) psz 
; 
2954         // ubuf is a non-decomposed UniChar buffer 
2956         ByteCount dcubuflen 
= byteInLen 
* 2 + 2 ; 
2957         ByteCount dcubufread 
, dcubufwritten 
; 
2958         UniChar 
*dcubuf 
= (UniChar
*) malloc( dcubuflen 
) ; 
2960         ConvertFromUnicodeToText( m_uni 
, byteInLen 
, ubuf 
, 
2961             kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen  
, &dcubufread 
, &dcubufwritten 
, dcubuf 
) ; 
2963         // we now convert that decomposed buffer into UTF8 
2965         status 
= TECConvertText( 
2966             m_WC2MB_converter
, (ConstTextPtr
) dcubuf
, dcubufwritten
, &dcubufread
, 
2967             (TextPtr
) (buf 
? buf 
: tbuf
), byteBufferLen
, &byteOutLen
); 
2971 #if SIZEOF_WCHAR_T == 4 
2978         size_t res 
= byteOutLen 
; 
2979         if ( buf  
&& res 
< n
) 
2982             // don't test for round-trip fidelity yet, we cannot guarantee it yet 
2988     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2991         OSStatus status 
= noErr 
; 
2992         ByteCount byteOutLen 
; 
2993         ByteCount byteInLen 
= strlen(psz
) + 1; 
2994         wchar_t *tbuf 
= NULL 
; 
2995         UniChar
* ubuf 
= NULL 
; 
3000             // Apple specs say at least 32 
3001             n 
= wxMax( 32, byteInLen 
) ; 
3002             tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T 
) ; 
3005         ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ; 
3007 #if SIZEOF_WCHAR_T == 4 
3008         ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
3010         ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
3013         ByteCount dcubuflen 
= byteBufferLen 
* 2 + 2 ; 
3014         ByteCount dcubufread 
, dcubufwritten 
; 
3015         UniChar 
*dcubuf 
= (UniChar
*) malloc( dcubuflen 
) ; 
3017         status 
= TECConvertText( 
3018                                 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
, 
3019                                 (TextPtr
) dcubuf
, dcubuflen
, &byteOutLen
); 
3020         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
3021         // is not properly terminated we get random characters at the end 
3022         dcubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
3024         // now from the decomposed UniChar to properly composed uniChar 
3025         ConvertFromUnicodeToText( m_uniBack 
, byteOutLen 
, dcubuf 
, 
3026                                   kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen  
, &dcubufread 
, &dcubufwritten 
, ubuf 
) ; 
3029         byteOutLen 
= dcubufwritten 
; 
3030         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
3033 #if SIZEOF_WCHAR_T == 4 
3034         wxMBConvUTF16 converter 
; 
3035         res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
), (const char*)ubuf
, n 
) ; 
3038         res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
3044         if ( buf  
&& res 
< n
) 
3050     virtual void CreateIfNeeded() const 
3052         wxMBConv_mac::CreateIfNeeded() ; 
3053         if ( m_uni 
== NULL 
) 
3055             m_map
.unicodeEncoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 
3056                 kUnicodeNoSubset
, kTextEncodingDefaultFormat
); 
3057             m_map
.otherEncoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 
3058                 kUnicodeCanonicalDecompVariant
, kTextEncodingDefaultFormat
); 
3059             m_map
.mappingVersion 
= kUnicodeUseLatestMapping
; 
3061             OSStatus err 
= CreateUnicodeToTextInfo(&m_map
, &m_uni
); 
3062             wxASSERT_MSG( err 
== noErr 
, _(" Couldn't create the UnicodeConverter")) ; 
3064             m_map
.unicodeEncoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 
3065                                                        kUnicodeNoSubset
, kTextEncodingDefaultFormat
); 
3066             m_map
.otherEncoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 
3067                                                      kUnicodeCanonicalCompVariant
, kTextEncodingDefaultFormat
); 
3068             m_map
.mappingVersion 
= kUnicodeUseLatestMapping
; 
3069             err 
= CreateUnicodeToTextInfo(&m_map
, &m_uniBack
); 
3070             wxASSERT_MSG( err 
== noErr 
, _(" Couldn't create the UnicodeConverter")) ; 
3074     mutable UnicodeToTextInfo   m_uni
; 
3075     mutable UnicodeToTextInfo   m_uniBack
; 
3076     mutable UnicodeMapping      m_map
; 
3078 #endif // defined(__WXMAC__) && defined(TARGET_CARBON) 
3080 // ============================================================================ 
3081 // wxEncodingConverter based conversion classes 
3082 // ============================================================================ 
3086 class wxMBConv_wxwin 
: public wxMBConv
 
3091         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
3092                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
3096     // temporarily just use wxEncodingConverter stuff, 
3097     // so that it works while a better implementation is built 
3098     wxMBConv_wxwin(const wxChar
* name
) 
3101             m_enc 
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
3103             m_enc 
= wxFONTENCODING_SYSTEM
; 
3108     wxMBConv_wxwin(wxFontEncoding enc
) 
3115     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
3117         size_t inbuf 
= strlen(psz
); 
3120             if (!m2w
.Convert(psz
, buf
)) 
3121                 return wxCONV_FAILED
; 
3126     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
3128         const size_t inbuf 
= wxWcslen(psz
); 
3131             if (!w2m
.Convert(psz
, buf
)) 
3132                 return wxCONV_FAILED
; 
3138     virtual size_t GetMBNulLen() const 
3142             case wxFONTENCODING_UTF16BE
: 
3143             case wxFONTENCODING_UTF16LE
: 
3146             case wxFONTENCODING_UTF32BE
: 
3147             case wxFONTENCODING_UTF32LE
: 
3155     virtual wxMBConv 
*Clone() const { return new wxMBConv_wxwin(m_enc
); } 
3157     bool IsOk() const { return m_ok
; } 
3160     wxFontEncoding m_enc
; 
3161     wxEncodingConverter m2w
, w2m
; 
3164     // were we initialized successfully? 
3167     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
3170 // make the constructors available for unit testing 
3171 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name 
) 
3173     wxMBConv_wxwin
* result 
= new wxMBConv_wxwin( name 
); 
3174     if ( !result
->IsOk() ) 
3183 #endif // wxUSE_FONTMAP 
3185 // ============================================================================ 
3186 // wxCSConv implementation 
3187 // ============================================================================ 
3189 void wxCSConv::Init() 
3196 wxCSConv::wxCSConv(const wxChar 
*charset
) 
3206     m_encoding 
= wxFontMapperBase::GetEncodingFromName(charset
); 
3208     m_encoding 
= wxFONTENCODING_SYSTEM
; 
3212 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
3214     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
3216         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
3218         encoding 
= wxFONTENCODING_SYSTEM
; 
3223     m_encoding 
= encoding
; 
3226 wxCSConv::~wxCSConv() 
3231 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
3236     SetName(conv
.m_name
); 
3237     m_encoding 
= conv
.m_encoding
; 
3240 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
3244     SetName(conv
.m_name
); 
3245     m_encoding 
= conv
.m_encoding
; 
3250 void wxCSConv::Clear() 
3259 void wxCSConv::SetName(const wxChar 
*charset
) 
3263         m_name 
= wxStrdup(charset
); 
3270 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
, 
3271                      wxEncodingNameCache 
); 
3273 static wxEncodingNameCache gs_nameCache
; 
3276 wxMBConv 
*wxCSConv::DoCreate() const 
3279     wxLogTrace(TRACE_STRCONV
, 
3280                wxT("creating conversion for %s"), 
3282                        : (const wxChar
*)wxFontMapperBase::GetEncodingName(m_encoding
).c_str())); 
3283 #endif // wxUSE_FONTMAP 
3285     // check for the special case of ASCII or ISO8859-1 charset: as we have 
3286     // special knowledge of it anyhow, we don't need to create a special 
3287     // conversion object 
3288     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
|| 
3289             m_encoding 
== wxFONTENCODING_DEFAULT 
) 
3291         // don't convert at all 
3295     // we trust OS to do conversion better than we can so try external 
3296     // conversion methods first 
3298     // the full order is: 
3299     //      1. OS conversion (iconv() under Unix or Win32 API) 
3300     //      2. hard coded conversions for UTF 
3301     //      3. wxEncodingConverter as fall back 
3307 #endif // !wxUSE_FONTMAP 
3309         wxString 
name(m_name
); 
3311         wxFontEncoding 
encoding(m_encoding
); 
3314         if ( !name
.empty() ) 
3316             wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
); 
3324                 wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
3325 #endif // wxUSE_FONTMAP 
3329             const wxEncodingNameCache::iterator it 
= gs_nameCache
.find(encoding
); 
3330             if ( it 
!= gs_nameCache
.end() ) 
3332                 if ( it
->second
.empty() ) 
3335                 wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(it
->second
); 
3342             const wxChar
** names 
= wxFontMapperBase::GetAllEncodingNames(encoding
); 
3343             // CS : in case this does not return valid names (eg for MacRoman) encoding 
3344             // got a 'failure' entry in the cache all the same, although it just has to 
3345             // be created using a different method, so only store failed iconv creation 
3346             // attempts (or perhaps we shoulnd't do this at all ?) 
3347             if ( names
[0] != NULL 
) 
3349                 for ( ; *names
; ++names 
) 
3351                     wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(*names
); 
3354                         gs_nameCache
[encoding
] = *names
; 
3361                 gs_nameCache
[encoding
] = _T(""); // cache the failure 
3364 #endif // wxUSE_FONTMAP 
3366 #endif // HAVE_ICONV 
3368 #ifdef wxHAVE_WIN32_MB2WC 
3371         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
3372                                       : new wxMBConv_win32(m_encoding
); 
3381 #endif // wxHAVE_WIN32_MB2WC 
3383 #if defined(__WXMAC__) 
3385         // leave UTF16 and UTF32 to the built-ins of wx 
3386         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
|| 
3387             ( m_encoding 
>= wxFONTENCODING_MACMIN 
&& m_encoding 
<= wxFONTENCODING_MACMAX 
) ) ) 
3390             wxMBConv_mac 
*conv 
= m_name 
? new wxMBConv_mac(m_name
) 
3391                                         : new wxMBConv_mac(m_encoding
); 
3393             wxMBConv_mac 
*conv 
= new wxMBConv_mac(m_encoding
); 
3403 #if defined(__WXCOCOA__) 
3405         if ( m_name 
|| ( m_encoding 
<= wxFONTENCODING_UTF16 
) ) 
3408             wxMBConv_cocoa 
*conv 
= m_name 
? new wxMBConv_cocoa(m_name
) 
3409                                           : new wxMBConv_cocoa(m_encoding
); 
3411             wxMBConv_cocoa 
*conv 
= new wxMBConv_cocoa(m_encoding
); 
3422     wxFontEncoding enc 
= m_encoding
; 
3424     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
3426         // use "false" to suppress interactive dialogs -- we can be called from 
3427         // anywhere and popping up a dialog from here is the last thing we want to 
3429         enc 
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
3431 #endif // wxUSE_FONTMAP 
3435         case wxFONTENCODING_UTF7
: 
3436              return new wxMBConvUTF7
; 
3438         case wxFONTENCODING_UTF8
: 
3439              return new wxMBConvUTF8
; 
3441         case wxFONTENCODING_UTF16BE
: 
3442              return new wxMBConvUTF16BE
; 
3444         case wxFONTENCODING_UTF16LE
: 
3445              return new wxMBConvUTF16LE
; 
3447         case wxFONTENCODING_UTF32BE
: 
3448              return new wxMBConvUTF32BE
; 
3450         case wxFONTENCODING_UTF32LE
: 
3451              return new wxMBConvUTF32LE
; 
3454              // nothing to do but put here to suppress gcc warnings 
3461         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
3462                                       : new wxMBConv_wxwin(m_encoding
); 
3468 #endif // wxUSE_FONTMAP 
3470     // NB: This is a hack to prevent deadlock. What could otherwise happen 
3471     //     in Unicode build: wxConvLocal creation ends up being here 
3472     //     because of some failure and logs the error. But wxLog will try to 
3473     //     attach a timestamp, for which it will need wxConvLocal (to convert 
3474     //     time to char* and then wchar_t*), but that fails, tries to log the 
3475     //     error, but wxLog has an (already locked) critical section that 
3476     //     guards the static buffer. 
3477     static bool alreadyLoggingError 
= false; 
3478     if (!alreadyLoggingError
) 
3480         alreadyLoggingError 
= true; 
3481         wxLogError(_("Cannot convert from the charset '%s'!"), 
3485                          (const wxChar
*)wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str() 
3486 #else // !wxUSE_FONTMAP 
3487                          (const wxChar
*)wxString::Format(_("encoding %i"), m_encoding
).c_str() 
3488 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
3491         alreadyLoggingError 
= false; 
3497 void wxCSConv::CreateConvIfNeeded() const 
3501         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
3503         // if we don't have neither the name nor the encoding, use the default 
3504         // encoding for this system 
3505         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
3508             self
->m_encoding 
= wxLocale::GetSystemEncoding(); 
3510             // fallback to some reasonable default: 
3511             self
->m_encoding 
= wxFONTENCODING_ISO8859_1
; 
3512 #endif // wxUSE_INTL 
3515         self
->m_convReal 
= DoCreate(); 
3516         self
->m_deferred 
= false; 
3520 bool wxCSConv::IsOk() const 
3522     CreateConvIfNeeded(); 
3524     // special case: no convReal created for wxFONTENCODING_ISO8859_1 
3525     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
) 
3526         return true; // always ok as we do it ourselves 
3528     // m_convReal->IsOk() is called at its own creation, so we know it must 
3529     // be ok if m_convReal is non-NULL 
3530     return m_convReal 
!= NULL
; 
3533 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
, 
3534                          const char *src
, size_t srcLen
) const 
3536     CreateConvIfNeeded(); 
3539         return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
); 
3542     return wxMBConv::ToWChar(dst
, dstLen
, src
, srcLen
); 
3545 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
, 
3546                            const wchar_t *src
, size_t srcLen
) const 
3548     CreateConvIfNeeded(); 
3551         return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
); 
3554     return wxMBConv::FromWChar(dst
, dstLen
, src
, srcLen
); 
3557 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
3559     CreateConvIfNeeded(); 
3562         return m_convReal
->MB2WC(buf
, psz
, n
); 
3565     size_t len 
= strlen(psz
); 
3569         for (size_t c 
= 0; c 
<= len
; c
++) 
3570             buf
[c
] = (unsigned char)(psz
[c
]); 
3576 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
3578     CreateConvIfNeeded(); 
3581         return m_convReal
->WC2MB(buf
, psz
, n
); 
3584     const size_t len 
= wxWcslen(psz
); 
3587         for (size_t c 
= 0; c 
<= len
; c
++) 
3590                 return wxCONV_FAILED
; 
3592             buf
[c
] = (char)psz
[c
]; 
3597         for (size_t c 
= 0; c 
<= len
; c
++) 
3600                 return wxCONV_FAILED
; 
3607 size_t wxCSConv::GetMBNulLen() const 
3609     CreateConvIfNeeded(); 
3613         return m_convReal
->GetMBNulLen(); 
3622 wxWCharBuffer 
wxSafeConvertMB2WX(const char *s
) 
3625         return wxWCharBuffer(); 
3627     wxWCharBuffer 
wbuf(wxConvLibc
.cMB2WX(s
)); 
3629         wbuf 
= wxMBConvUTF8().cMB2WX(s
); 
3631         wbuf 
= wxConvISO8859_1
.cMB2WX(s
); 
3636 wxCharBuffer 
wxSafeConvertWX2MB(const wchar_t *ws
) 
3639         return wxCharBuffer(); 
3641     wxCharBuffer 
buf(wxConvLibc
.cWX2MB(ws
)); 
3643         buf 
= wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
).cWX2MB(ws
); 
3648 #endif // wxUSE_UNICODE 
3650 // ---------------------------------------------------------------------------- 
3652 // ---------------------------------------------------------------------------- 
3654 // NB: The reason why we create converted objects in this convoluted way, 
3655 //     using a factory function instead of global variable, is that they 
3656 //     may be used at static initialization time (some of them are used by 
3657 //     wxString ctors and there may be a global wxString object). In other 
3658 //     words, possibly _before_ the converter global object would be 
3665 #undef wxConvISO8859_1 
3667 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \ 
3668     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \ 
3669     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \ 
3671         static impl_klass name##Obj ctor_args;                          \ 
3672         return &name##Obj;                                              \ 
3674     /* this ensures that all global converter objects are created */    \ 
3675     /* by the time static initialization is done, i.e. before any */    \ 
3676     /* thread is launched: */                                           \ 
3677     static klass* gs_##name##instance = wxGet_##name##Ptr() 
3679 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \ 
3680     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args) 
3683     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_win32
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
); 
3684 #elif defined(__WXMAC__) && !defined(__MACH__) 
3685     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_mac
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
); 
3687     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConvLibc
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
); 
3690 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8
, wxConvUTF8
, wxEMPTY_PARAMETER_VALUE
); 
3691 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7
, wxConvUTF7
, wxEMPTY_PARAMETER_VALUE
); 
3693 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvLocal
, (wxFONTENCODING_SYSTEM
)); 
3694 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvISO8859_1
, (wxFONTENCODING_ISO8859_1
)); 
3696 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= wxGet_wxConvLibcPtr(); 
3697 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvUI 
= wxGet_wxConvLocalPtr(); 
3699 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
3700 static wxMBConv_macUTF8D wxConvMacUTF8DObj
; 
3702 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvFileName 
= 
3704 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
3707                                     wxGet_wxConvUTF8Ptr(); 
3710                                     wxGet_wxConvLibcPtr(); 
3711 #endif // __WXOSX__/!__WXOSX__ 
3713 #else // !wxUSE_WCHAR_T 
3715 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now 
3716 // stand-ins in absence of wchar_t 
3717 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
3722 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T