1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/strconv.cpp 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, 
   5 //              Ryan Norton, Fredrik Roubert (UTF7) 
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
  10 //              (c) 2000-2003 Vadim Zeitlin 
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert 
  12 // Licence:     wxWindows licence 
  13 ///////////////////////////////////////////////////////////////////////////// 
  15 // ============================================================================ 
  17 // ============================================================================ 
  19 // ---------------------------------------------------------------------------- 
  21 // ---------------------------------------------------------------------------- 
  23 // For compilers that support precompilation, includes "wx.h". 
  24 #include "wx/wxprec.h" 
  35 #include "wx/strconv.h" 
  40     #include "wx/msw/private.h" 
  41     #include "wx/msw/missing.h" 
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  53     #define wxHAVE_WIN32_MB2WC 
  54 #endif // __WIN32__ but !__WXMICROWIN__ 
  62     #include "wx/thread.h" 
  65 #include "wx/encconv.h" 
  66 #include "wx/fontmap.h" 
  71 #include <ATSUnicode.h> 
  72 #include <TextCommon.h> 
  73 #include <TextEncodingConverter.h> 
  76 #include  "wx/mac/private.h"  // includes mac headers 
  79 #define TRACE_STRCONV _T("strconv") 
  81 #if SIZEOF_WCHAR_T == 2 
  85 // ============================================================================ 
  87 // ============================================================================ 
  89 // helper function of cMB2WC(): check if n bytes at this location are all NUL 
  90 static bool NotAllNULs(const char *p
, size_t n
) 
  92     while ( n 
&& *p
++ == '\0' ) 
  98 // ---------------------------------------------------------------------------- 
  99 // UTF-16 en/decoding to/from UCS-4 
 100 // ---------------------------------------------------------------------------- 
 103 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 108             *output 
= (wxUint16
) input
; 
 111     else if (input
>=0x110000) 
 119             *output
++ = (wxUint16
) ((input 
>> 10)+0xd7c0); 
 120             *output 
= (wxUint16
) ((input
&0x3ff)+0xdc00); 
 126 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 128     if ((*input
<0xd800) || (*input
>0xdfff)) 
 133     else if ((input
[1]<0xdc00) || (input
[1]>0xdfff)) 
 140         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 146 // ---------------------------------------------------------------------------- 
 148 // ---------------------------------------------------------------------------- 
 151 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
, 
 152                   const char *src
, size_t srcLen
) const 
 154     // although new conversion classes are supposed to implement this function 
 155     // directly, the existins ones only implement the old MB2WC() and so, to 
 156     // avoid to have to rewrite all conversion classes at once, we provide a 
 157     // default (but not efficient) implementation of this one in terms of the 
 158     // old function by copying the input to ensure that it's NUL-terminated and 
 159     // then using MB2WC() to convert it 
 161     // the number of chars [which would be] written to dst [if it were not NULL] 
 162     size_t dstWritten 
= 0; 
 164     // the number of NULs terminating this string 
 165     size_t nulLen 
wxDUMMY_INITIALIZE(0); 
 167     // if we were not given the input size we just have to assume that the 
 168     // string is properly terminated as we have no way of knowing how long it 
 169     // is anyhow, but if we do have the size check whether there are enough 
 173     if ( srcLen 
!= (size_t)-1 ) 
 175         // we need to know how to find the end of this string 
 176         nulLen 
= GetMBNulLen(); 
 177         if ( nulLen 
== wxCONV_FAILED 
) 
 178             return wxCONV_FAILED
; 
 180         // if there are enough NULs we can avoid the copy 
 181         if ( srcLen 
< nulLen 
|| NotAllNULs(src 
+ srcLen 
- nulLen
, nulLen
) ) 
 183             // make a copy in order to properly NUL-terminate the string 
 184             bufTmp 
= wxCharBuffer(srcLen 
+ nulLen 
- 1 /* 1 will be added */); 
 185             char * const p 
= bufTmp
.data(); 
 186             memcpy(p
, src
, srcLen
); 
 187             for ( char *s 
= p 
+ srcLen
; s 
< p 
+ srcLen 
+ nulLen
; s
++ ) 
 193         srcEnd 
= src 
+ srcLen
; 
 195     else // quit after the first loop iteration 
 202         // try to convert the current chunk 
 203         size_t lenChunk 
= MB2WC(NULL
, src
, 0); 
 206             // nothing left in the input string, conversion succeeded; but 
 207             // still account for the trailing NULL 
 212         if ( lenChunk 
== wxCONV_FAILED 
) 
 213             return wxCONV_FAILED
; 
 215         lenChunk
++; // for trailing NUL 
 217         dstWritten 
+= lenChunk
; 
 221             if ( dstWritten 
> dstLen 
) 
 222                 return wxCONV_FAILED
; 
 224             if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED 
) 
 225                 return wxCONV_FAILED
; 
 232             // we convert the entire string in this cas, as we suppose that the 
 233             // string is NUL-terminated and so srcEnd is not used at all 
 237         // advance the input pointer past the end of this chunk 
 238         while ( NotAllNULs(src
, nulLen
) ) 
 240             // notice that we must skip over multiple bytes here as we suppose 
 241             // that if NUL takes 2 or 4 bytes, then all the other characters do 
 242             // too and so if advanced by a single byte we might erroneously 
 243             // detect sequences of NUL bytes in the middle of the input 
 247         src 
+= nulLen
; // skipping over its terminator as well 
 249         // note that ">=" (and not just "==") is needed here as the terminator 
 250         // we skipped just above could be inside or just after the buffer 
 251         // delimited by inEnd 
 260 wxMBConv::FromWChar(char *dst
, size_t dstLen
, 
 261                     const wchar_t *src
, size_t srcLen
) const 
 263     // the number of chars [which would be] written to dst [if it were not NULL] 
 264     size_t dstWritten 
= 0; 
 266     // make a copy of the input string unless it is already properly 
 269     // if we don't know its length we have no choice but to assume that it is, 
 270     // indeed, properly terminated 
 271     wxWCharBuffer bufTmp
; 
 272     if ( srcLen 
== (size_t)-1 ) 
 274         srcLen 
= wxWcslen(src
) + 1; 
 276     else if ( srcLen 
!= 0 && src
[srcLen 
- 1] != L
'\0' ) 
 278         // make a copy in order to properly NUL-terminate the string 
 279         bufTmp 
= wxWCharBuffer(srcLen
); 
 280         memcpy(bufTmp
.data(), src
, srcLen
*sizeof(wchar_t)); 
 284     const size_t lenNul 
= GetMBNulLen(); 
 285     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; 
 287           src 
+= wxWcslen(src
) + 1 /* skip L'\0' too */ ) 
 289         // try to convert the current chunk 
 290         size_t lenChunk 
= WC2MB(NULL
, src
, 0); 
 292         if ( lenChunk 
== wxCONV_FAILED 
) 
 293             return wxCONV_FAILED
; 
 296         dstWritten 
+= lenChunk
; 
 300             if ( dstWritten 
> dstLen 
) 
 301                 return wxCONV_FAILED
; 
 303             if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED 
) 
 304                 return wxCONV_FAILED
; 
 313 size_t wxMBConv::MB2WC(wchar_t *out
, const char *in
, size_t outLen
) const 
 315     size_t rc 
= ToWChar(out
, outLen
, in
); 
 316     if ( rc 
!= wxCONV_FAILED 
) 
 318         // ToWChar() returns the buffer length, i.e. including the trailing 
 319         // NUL, while this method doesn't take it into account 
 326 size_t wxMBConv::WC2MB(char *out
, const wchar_t *in
, size_t outLen
) const 
 328     size_t rc 
= FromWChar(out
, outLen
, in
); 
 329     if ( rc 
!= wxCONV_FAILED 
) 
 337 wxMBConv::~wxMBConv() 
 339     // nothing to do here (necessary for Darwin linking probably) 
 342 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 346         // calculate the length of the buffer needed first 
 347         const size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 348         if ( nLen 
!= wxCONV_FAILED 
) 
 350             // now do the actual conversion 
 351             wxWCharBuffer 
buf(nLen 
/* +1 added implicitly */); 
 353             // +1 for the trailing NULL 
 354             if ( MB2WC(buf
.data(), psz
, nLen 
+ 1) != wxCONV_FAILED 
) 
 359     return wxWCharBuffer(); 
 362 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 366         const size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 367         if ( nLen 
!= wxCONV_FAILED 
) 
 369             // extra space for trailing NUL(s) 
 370             static const size_t extraLen 
= GetMaxMBNulLen(); 
 372             wxCharBuffer 
buf(nLen 
+ extraLen 
- 1); 
 373             if ( WC2MB(buf
.data(), pwz
, nLen 
+ extraLen
) != wxCONV_FAILED 
) 
 378     return wxCharBuffer(); 
 382 wxMBConv::cMB2WC(const char *in
, size_t inLen
, size_t *outLen
) const 
 384     const size_t dstLen 
= ToWChar(NULL
, 0, in
, inLen
); 
 385     if ( dstLen 
!= wxCONV_FAILED 
) 
 387         wxWCharBuffer 
wbuf(dstLen 
- 1); 
 388         if ( ToWChar(wbuf
.data(), dstLen
, in
, inLen
) ) 
 391                 *outLen 
= dstLen 
- 1; 
 399     return wxWCharBuffer(); 
 403 wxMBConv::cWC2MB(const wchar_t *in
, size_t inLen
, size_t *outLen
) const 
 405     const size_t dstLen 
= FromWChar(NULL
, 0, in
, inLen
); 
 406     if ( dstLen 
!= wxCONV_FAILED 
) 
 408         wxCharBuffer 
buf(dstLen 
- 1); 
 409         if ( FromWChar(buf
.data(), dstLen
, in
, inLen
) ) 
 412                 *outLen 
= dstLen 
- 1; 
 420     return wxCharBuffer(); 
 423 // ---------------------------------------------------------------------------- 
 425 // ---------------------------------------------------------------------------- 
 427 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 429     return wxMB2WC(buf
, psz
, n
); 
 432 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 434     return wxWC2MB(buf
, psz
, n
); 
 437 // ---------------------------------------------------------------------------- 
 438 // wxConvBrokenFileNames 
 439 // ---------------------------------------------------------------------------- 
 443 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar 
*charset
) 
 445     if ( !charset 
|| wxStricmp(charset
, _T("UTF-8")) == 0 
 446                   || wxStricmp(charset
, _T("UTF8")) == 0  ) 
 447         m_conv 
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
); 
 449         m_conv 
= new wxCSConv(charset
); 
 454 // ---------------------------------------------------------------------------- 
 456 // ---------------------------------------------------------------------------- 
 458 // Implementation (C) 2004 Fredrik Roubert 
 461 // BASE64 decoding table 
 463 static const unsigned char utf7unb64
[] = 
 465     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 466     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 467     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 468     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 469     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 470     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 
 471     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 
 472     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 473     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
 474     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
 475     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
 476     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 
 477     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 
 478     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 
 479     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 
 480     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 
 481     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 482     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 483     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 484     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 485     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 486     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 487     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 488     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 489     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 490     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 491     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 492     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 493     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 494     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 495     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 496     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 
 499 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 503     while ( *psz 
&& (!buf 
|| (len 
< n
)) ) 
 505         unsigned char cc 
= *psz
++; 
 513         else if (*psz 
== '-') 
 521         else // start of BASE64 encoded string 
 525             for ( ok 
= lsb 
= false, d 
= 0, l 
= 0; 
 526                   (cc 
= utf7unb64
[(unsigned char)*psz
]) != 0xff; 
 531                 for (l 
+= 6; l 
>= 8; lsb 
= !lsb
) 
 533                     unsigned char c 
= (unsigned char)((d 
>> (l 
-= 8)) % 256); 
 543                             *buf 
= (wchar_t)(c 
<< 8); 
 552                 // in valid UTF7 we should have valid characters after '+' 
 561     if ( buf 
&& (len 
< n
) ) 
 568 // BASE64 encoding table 
 570 static const unsigned char utf7enb64
[] = 
 572     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
 573     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
 574     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
 575     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
 576     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
 577     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
 578     'w', 'x', 'y', 'z', '0', '1', '2', '3', 
 579     '4', '5', '6', '7', '8', '9', '+', '/' 
 583 // UTF-7 encoding table 
 585 // 0 - Set D (directly encoded characters) 
 586 // 1 - Set O (optional direct characters) 
 587 // 2 - whitespace characters (optional) 
 588 // 3 - special characters 
 590 static const unsigned char utf7encode
[128] = 
 592     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 
 593     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
 594     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, 
 595     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
 596     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 597     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 
 598     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 599     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 
 602 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 606     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 609         if (cc 
< 0x80 && utf7encode
[cc
] < 1) 
 617         else if (((wxUint32
)cc
) > 0xffff) 
 619             // no surrogate pair generation (yet?) 
 630                 // BASE64 encode string 
 631                 unsigned int lsb
, d
, l
; 
 632                 for (d 
= 0, l 
= 0; /*nothing*/; psz
++) 
 634                     for (lsb 
= 0; lsb 
< 2; lsb 
++) 
 637                         d 
+= lsb 
? cc 
& 0xff : (cc 
& 0xff00) >> 8; 
 639                         for (l 
+= 8; l 
>= 6; ) 
 643                                 *buf
++ = utf7enb64
[(d 
>> l
) % 64]; 
 648                     if (!(cc
) || (cc 
< 0x80 && utf7encode
[cc
] < 1)) 
 654                         *buf
++ = utf7enb64
[((d 
% 16) << (6 - l
)) % 64]; 
 663     if (buf 
&& (len 
< n
)) 
 668 // ---------------------------------------------------------------------------- 
 670 // ---------------------------------------------------------------------------- 
 672 static wxUint32 utf8_max
[]= 
 673     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 675 // boundaries of the private use area we use to (temporarily) remap invalid 
 676 // characters invalid in a UTF-8 encoded string 
 677 const wxUint32 wxUnicodePUA 
= 0x100000; 
 678 const wxUint32 wxUnicodePUAEnd 
= wxUnicodePUA 
+ 256; 
 680 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 684     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 686         const char *opsz 
= psz
; 
 687         bool invalid 
= false; 
 688         unsigned char cc 
= *psz
++, fc 
= cc
; 
 690         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 699             // escape the escape character for octal escapes 
 700             if ((m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 701                     && cc 
== '\\' && (!buf 
|| len 
< n
)) 
 713                 // invalid UTF-8 sequence 
 718                 unsigned ocnt 
= cnt 
- 1; 
 719                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 723                     if ((cc 
& 0xC0) != 0x80) 
 725                         // invalid UTF-8 sequence 
 730                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 732                 if (invalid 
|| res 
<= utf8_max
[ocnt
]) 
 734                     // illegal UTF-8 encoding 
 737                 else if ((m_options 
& MAP_INVALID_UTF8_TO_PUA
) && 
 738                         res 
>= wxUnicodePUA 
&& res 
< wxUnicodePUAEnd
) 
 740                     // if one of our PUA characters turns up externally 
 741                     // it must also be treated as an illegal sequence 
 742                     // (a bit like you have to escape an escape character) 
 748                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 749                     size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 750                     if (pa 
== (size_t)-1) 
 762                         *buf
++ = (wchar_t)res
; 
 764 #endif // WC_UTF16/!WC_UTF16 
 769                 if (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 771                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 774                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 775                         size_t pa 
= encode_utf16((unsigned char)*opsz 
+ wxUnicodePUA
, (wxUint16 
*)buf
); 
 776                         wxASSERT(pa 
!= (size_t)-1); 
 783                             *buf
++ = (wchar_t)(wxUnicodePUA 
+ (unsigned char)*opsz
); 
 789                 else if (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 791                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
 793                         if ( buf 
&& len 
+ 3 < n 
) 
 795                             unsigned char on 
= *opsz
; 
 797                             *buf
++ = (wchar_t)( L
'0' + on 
/ 0100 ); 
 798                             *buf
++ = (wchar_t)( L
'0' + (on 
% 0100) / 010 ); 
 799                             *buf
++ = (wchar_t)( L
'0' + on 
% 010 ); 
 805                 else // MAP_INVALID_UTF8_NOT 
 812     if (buf 
&& (len 
< n
)) 
 817 static inline bool isoctal(wchar_t wch
) 
 819     return L
'0' <= wch 
&& wch 
<= L
'7'; 
 822 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 826     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 830         // cast is ok for WC_UTF16 
 831         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 832         psz 
+= (pa 
== (size_t)-1) ? 1 : pa
; 
 834         cc
=(*psz
++) & 0x7fffffff; 
 837         if ( (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
 838                 && cc 
>= wxUnicodePUA 
&& cc 
< wxUnicodePUAEnd 
) 
 841                 *buf
++ = (char)(cc 
- wxUnicodePUA
); 
 844         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
 845                     && cc 
== L
'\\' && psz
[0] == L
'\\' ) 
 852         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) && 
 854                         isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) ) 
 858                 *buf
++ = (char) ((psz
[0] - L
'0')*0100 + 
 859                                  (psz
[1] - L
'0')*010 + 
 869             for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) {} 
 883                     *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 885                         *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 897 // ---------------------------------------------------------------------------- 
 899 // ---------------------------------------------------------------------------- 
 901 #ifdef WORDS_BIGENDIAN 
 902     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 903     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 905     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 906     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 912 // copy 16bit MB to 16bit String 
 913 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 917     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 920             *buf
++ = *(wxUint16
*)psz
; 
 923         psz 
+= sizeof(wxUint16
); 
 925     if (buf 
&& len
<n
)   *buf
=0; 
 931 // copy 16bit String to 16bit MB 
 932 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 936     while (*psz 
&& (!buf 
|| len 
< n
)) 
 940             *(wxUint16
*)buf 
= *psz
; 
 941             buf 
+= sizeof(wxUint16
); 
 943         len 
+= sizeof(wxUint16
); 
 946     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 952 // swap 16bit MB to 16bit String 
 953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 957     // UTF16 string must be terminated by 2 NULs as single NULs may occur 
 959     while ( (psz
[0] || psz
[1]) && (!buf 
|| len 
< n
) ) 
 963             ((char *)buf
)[0] = psz
[1]; 
 964             ((char *)buf
)[1] = psz
[0]; 
 971     if ( buf 
&& len 
< n 
) 
 978 // swap 16bit MB to 16bit String 
 979 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 983     while ( *psz 
&& (!buf 
|| len 
< n
) ) 
 987             *buf
++ = ((char*)psz
)[1]; 
 988             *buf
++ = ((char*)psz
)[0]; 
 994     if ( buf 
&& len 
< n 
- 1 ) 
1007 // copy 16bit MB to 32bit String 
1008 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1012     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
1015         size_t pa
=decode_utf16((wxUint16
*)psz
, cc
); 
1016         if (pa 
== (size_t)-1) 
1020             *buf
++ = (wchar_t)cc
; 
1022         psz 
+= pa 
* sizeof(wxUint16
); 
1024     if (buf 
&& len
<n
)   *buf
=0; 
1030 // copy 32bit String to 16bit MB 
1031 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1035     while (*psz 
&& (!buf 
|| len 
< n
)) 
1038         size_t pa
=encode_utf16(*psz
, cc
); 
1040         if (pa 
== (size_t)-1) 
1045             *(wxUint16
*)buf 
= cc
[0]; 
1046             buf 
+= sizeof(wxUint16
); 
1049                 *(wxUint16
*)buf 
= cc
[1]; 
1050                 buf 
+= sizeof(wxUint16
); 
1054         len 
+= pa
*sizeof(wxUint16
); 
1057     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
1063 // swap 16bit MB to 32bit String 
1064 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1068     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
1072         tmp
[0]=psz
[1];  tmp
[1]=psz
[0]; 
1073         tmp
[2]=psz
[3];  tmp
[3]=psz
[2]; 
1075         size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
); 
1076         if (pa 
== (size_t)-1) 
1080             *buf
++ = (wchar_t)cc
; 
1083         psz 
+= pa 
* sizeof(wxUint16
); 
1085     if (buf 
&& len
<n
)   *buf
=0; 
1091 // swap 32bit String to 16bit MB 
1092 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1096     while (*psz 
&& (!buf 
|| len 
< n
)) 
1099         size_t pa
=encode_utf16(*psz
, cc
); 
1101         if (pa 
== (size_t)-1) 
1106             *buf
++ = ((char*)cc
)[1]; 
1107             *buf
++ = ((char*)cc
)[0]; 
1110                 *buf
++ = ((char*)cc
)[3]; 
1111                 *buf
++ = ((char*)cc
)[2]; 
1115         len 
+= pa
*sizeof(wxUint16
); 
1118     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
1126 // ---------------------------------------------------------------------------- 
1128 // ---------------------------------------------------------------------------- 
1130 #ifdef WORDS_BIGENDIAN 
1131 #define wxMBConvUTF32straight  wxMBConvUTF32BE 
1132 #define wxMBConvUTF32swap      wxMBConvUTF32LE 
1134 #define wxMBConvUTF32swap      wxMBConvUTF32BE 
1135 #define wxMBConvUTF32straight  wxMBConvUTF32LE 
1139 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
1140 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
1145 // copy 32bit MB to 16bit String 
1146 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1150     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1154         size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
); 
1155         if (pa 
== (size_t)-1) 
1165         psz 
+= sizeof(wxUint32
); 
1167     if (buf 
&& len
<n
)   *buf
=0; 
1173 // copy 16bit String to 32bit MB 
1174 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1178     while (*psz 
&& (!buf 
|| len 
< n
)) 
1182         // cast is ok for WC_UTF16 
1183         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
1184         if (pa 
== (size_t)-1) 
1189             *(wxUint32
*)buf 
= cc
; 
1190             buf 
+= sizeof(wxUint32
); 
1192         len 
+= sizeof(wxUint32
); 
1196     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1204 // swap 32bit MB to 16bit String 
1205 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1209     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1212         tmp
[0] = psz
[3];   tmp
[1] = psz
[2]; 
1213         tmp
[2] = psz
[1];   tmp
[3] = psz
[0]; 
1218         size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
); 
1219         if (pa 
== (size_t)-1) 
1229         psz 
+= sizeof(wxUint32
); 
1239 // swap 16bit String to 32bit MB 
1240 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1244     while (*psz 
&& (!buf 
|| len 
< n
)) 
1248         // cast is ok for WC_UTF16 
1249         size_t pa
=decode_utf16((const wxUint16 
*)psz
, *(wxUint32
*)cc
); 
1250         if (pa 
== (size_t)-1) 
1260         len 
+= sizeof(wxUint32
); 
1264     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1273 // copy 32bit MB to 32bit String 
1274 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1278     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1281             *buf
++ = (wchar_t)(*(wxUint32
*)psz
); 
1283         psz 
+= sizeof(wxUint32
); 
1293 // copy 32bit String to 32bit MB 
1294 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1298     while (*psz 
&& (!buf 
|| len 
< n
)) 
1302             *(wxUint32
*)buf 
= *psz
; 
1303             buf 
+= sizeof(wxUint32
); 
1306         len 
+= sizeof(wxUint32
); 
1310     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1317 // swap 32bit MB to 32bit String 
1318 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1322     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
1326             ((char *)buf
)[0] = psz
[3]; 
1327             ((char *)buf
)[1] = psz
[2]; 
1328             ((char *)buf
)[2] = psz
[1]; 
1329             ((char *)buf
)[3] = psz
[0]; 
1333         psz 
+= sizeof(wxUint32
); 
1343 // swap 32bit String to 32bit MB 
1344 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1348     while (*psz 
&& (!buf 
|| len 
< n
)) 
1352             *buf
++ = ((char *)psz
)[3]; 
1353             *buf
++ = ((char *)psz
)[2]; 
1354             *buf
++ = ((char *)psz
)[1]; 
1355             *buf
++ = ((char *)psz
)[0]; 
1357         len 
+= sizeof(wxUint32
); 
1361     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
1371 // ============================================================================ 
1372 // The classes doing conversion using the iconv_xxx() functions 
1373 // ============================================================================ 
1377 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with 
1378 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is 
1379 //     (unless there's yet another bug in glibc) the only case when iconv() 
1380 //     returns with (size_t)-1 (which means error) and says there are 0 bytes 
1381 //     left in the input buffer -- when _real_ error occurs, 
1382 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for 
1384 //     [This bug does not appear in glibc 2.2.] 
1385 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
1386 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
1387                                      (errno != E2BIG || bufLeft != 0)) 
1389 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
1392 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
1394 #define ICONV_T_INVALID ((iconv_t)-1) 
1396 #if SIZEOF_WCHAR_T == 4 
1397     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS 
1398     #define WC_ENC      wxFONTENCODING_UTF32 
1399 #elif SIZEOF_WCHAR_T == 2 
1400     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS 
1401     #define WC_ENC      wxFONTENCODING_UTF16 
1402 #else // sizeof(wchar_t) != 2 nor 4 
1403     // does this ever happen? 
1404     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
1407 // ---------------------------------------------------------------------------- 
1408 // wxMBConv_iconv: encapsulates an iconv character set 
1409 // ---------------------------------------------------------------------------- 
1411 class wxMBConv_iconv 
: public wxMBConv
 
1414     wxMBConv_iconv(const wxChar 
*name
); 
1415     virtual ~wxMBConv_iconv(); 
1417     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
1418     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
1420     // classify this encoding as explained in wxMBConv::GetMBNulLen() comment 
1421     virtual size_t GetMBNulLen() const; 
1423     virtual wxMBConv 
*Clone() const 
1425         wxMBConv_iconv 
*p 
= new wxMBConv_iconv(m_name
); 
1426         p
->m_minMBCharWidth 
= m_minMBCharWidth
; 
1431         { return (m2w 
!= ICONV_T_INVALID
) && (w2m 
!= ICONV_T_INVALID
); } 
1434     // the iconv handlers used to translate from multibyte to wide char and in 
1435     // the other direction 
1439     // guards access to m2w and w2m objects 
1440     wxMutex m_iconvMutex
; 
1444     // the name (for iconv_open()) of a wide char charset -- if none is 
1445     // available on this machine, it will remain NULL 
1446     static wxString ms_wcCharsetName
; 
1448     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
1449     // different endian-ness than the native one 
1450     static bool ms_wcNeedsSwap
; 
1453     // name of the encoding handled by this conversion 
1456     // cached result of GetMBNulLen(); set to 0 meaning "unknown" 
1458     size_t m_minMBCharWidth
; 
1461 // make the constructor available for unit testing 
1462 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name 
) 
1464     wxMBConv_iconv
* result 
= new wxMBConv_iconv( name 
); 
1465     if ( !result
->IsOk() ) 
1473 wxString 
wxMBConv_iconv::ms_wcCharsetName
; 
1474 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
1476 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
1479     m_minMBCharWidth 
= 0; 
1481     // iconv operates with chars, not wxChars, but luckily it uses only ASCII 
1482     // names for the charsets 
1483     const wxCharBuffer 
cname(wxString(name
).ToAscii()); 
1485     // check for charset that represents wchar_t: 
1486     if ( ms_wcCharsetName
.empty() ) 
1488         wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:")); 
1491         const wxChar 
**names 
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
); 
1492 #else // !wxUSE_FONTMAP 
1493         static const wxChar 
*names
[] = 
1495 #if SIZEOF_WCHAR_T == 4 
1497 #elif SIZEOF_WCHAR_T = 2 
1502 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
1504         for ( ; *names 
&& ms_wcCharsetName
.empty(); ++names 
) 
1506             const wxString 
nameCS(*names
); 
1508             // first try charset with explicit bytesex info (e.g. "UCS-4LE"): 
1509             wxString 
nameXE(nameCS
); 
1510             #ifdef WORDS_BIGENDIAN 
1512             #else // little endian 
1516             wxLogTrace(TRACE_STRCONV
, _T("  trying charset \"%s\""), 
1519             m2w 
= iconv_open(nameXE
.ToAscii(), cname
); 
1520             if ( m2w 
== ICONV_T_INVALID 
) 
1522                 // try charset w/o bytesex info (e.g. "UCS4") 
1523                 wxLogTrace(TRACE_STRCONV
, _T("  trying charset \"%s\""), 
1525                 m2w 
= iconv_open(nameCS
.ToAscii(), cname
); 
1527                 // and check for bytesex ourselves: 
1528                 if ( m2w 
!= ICONV_T_INVALID 
) 
1530                     char    buf
[2], *bufPtr
; 
1531                     wchar_t wbuf
[2], *wbufPtr
; 
1539                     outsz 
= SIZEOF_WCHAR_T 
* 2; 
1543                     res 
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
1544                                 (char**)&wbufPtr
, &outsz
); 
1546                     if (ICONV_FAILED(res
, insz
)) 
1548                         wxLogLastError(wxT("iconv")); 
1549                         wxLogError(_("Conversion to charset '%s' doesn't work."), 
1552                     else // ok, can convert to this encoding, remember it 
1554                         ms_wcCharsetName 
= nameCS
; 
1555                         ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
1559             else // use charset not requiring byte swapping 
1561                 ms_wcCharsetName 
= nameXE
; 
1565         wxLogTrace(TRACE_STRCONV
, 
1566                    wxT("iconv wchar_t charset is \"%s\"%s"), 
1567                    ms_wcCharsetName
.empty() ? _T("<none>") 
1568                                             : ms_wcCharsetName
.c_str(), 
1569                    ms_wcNeedsSwap 
? _T(" (needs swap)") 
1572     else // we already have ms_wcCharsetName 
1574         m2w 
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
); 
1577     if ( ms_wcCharsetName
.empty() ) 
1579         w2m 
= ICONV_T_INVALID
; 
1583         w2m 
= iconv_open(cname
, ms_wcCharsetName
.ToAscii()); 
1584         if ( w2m 
== ICONV_T_INVALID 
) 
1586             wxLogTrace(TRACE_STRCONV
, 
1587                        wxT("\"%s\" -> \"%s\" works but not the converse!?"), 
1588                        ms_wcCharsetName
.c_str(), cname
.data()); 
1593 wxMBConv_iconv::~wxMBConv_iconv() 
1595     if ( m2w 
!= ICONV_T_INVALID 
) 
1597     if ( w2m 
!= ICONV_T_INVALID 
) 
1601 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1603     // find the string length: notice that must be done differently for 
1604     // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs 
1606     const size_t nulLen 
= GetMBNulLen(); 
1613             inbuf 
= strlen(psz
); // arguably more optimized than our version 
1618             // for UTF-16/32 not only we need to have 2/4 consecutive NULs but 
1619             // they also have to start at character boundary and not span two 
1620             // adjacent characters 
1622             for ( p 
= psz
; NotAllNULs(p
, nulLen
); p 
+= nulLen 
) 
1629     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. 
1630     //     Unfortunately there is a couple of global wxCSConv objects such as 
1631     //     wxConvLocal that are used all over wx code, so we have to make sure 
1632     //     the handle is used by at most one thread at the time. Otherwise 
1633     //     only a few wx classes would be safe to use from non-main threads 
1634     //     as MB<->WC conversion would fail "randomly". 
1635     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1636 #endif // wxUSE_THREADS 
1639     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
1641     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
1642     wchar_t *bufPtr 
= buf
; 
1643     const char *pszPtr 
= psz
; 
1647         // have destination buffer, convert there 
1649                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1650                      (char**)&bufPtr
, &outbuf
); 
1651         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
1655             // convert to native endianness 
1656             for ( unsigned i 
= 0; i 
< res
; i
++ ) 
1657                 buf
[n
] = WC_BSWAP(buf
[i
]); 
1660         // NUL-terminate the string if there is any space left 
1666         // no destination buffer... convert using temp buffer 
1667         // to calculate destination buffer requirement 
1672             outbuf 
= 8*SIZEOF_WCHAR_T
; 
1675                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1676                          (char**)&bufPtr
, &outbuf 
); 
1678             res 
+= 8-(outbuf
/SIZEOF_WCHAR_T
); 
1679         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1682     if (ICONV_FAILED(cres
, inbuf
)) 
1684         //VS: it is ok if iconv fails, hence trace only 
1685         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1692 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1695     // NB: explained in MB2WC 
1696     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
1699     size_t inlen 
= wxWcslen(psz
); 
1700     size_t inbuf 
= inlen 
* SIZEOF_WCHAR_T
; 
1704     wchar_t *tmpbuf 
= 0; 
1708         // need to copy to temp buffer to switch endianness 
1709         // (doing WC_BSWAP twice on the original buffer won't help, as it 
1710         //  could be in read-only memory, or be accessed in some other thread) 
1711         tmpbuf 
= (wchar_t *)malloc(inbuf 
+ SIZEOF_WCHAR_T
); 
1712         for ( size_t i 
= 0; i 
< inlen
; i
++ ) 
1713             tmpbuf
[n
] = WC_BSWAP(psz
[i
]); 
1714         tmpbuf
[inlen
] = L
'\0'; 
1720         // have destination buffer, convert there 
1721         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1725         // NB: iconv was given only wcslen(psz) characters on input, and so 
1726         //     it couldn't convert the trailing zero. Let's do it ourselves 
1727         //     if there's some room left for it in the output buffer. 
1733         // no destination buffer... convert using temp buffer 
1734         // to calculate destination buffer requirement 
1738             buf 
= tbuf
; outbuf 
= 16; 
1740             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1743         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1751     if (ICONV_FAILED(cres
, inbuf
)) 
1753         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1760 size_t wxMBConv_iconv::GetMBNulLen() const 
1762     if ( m_minMBCharWidth 
== 0 ) 
1764         wxMBConv_iconv 
* const self 
= wxConstCast(this, wxMBConv_iconv
); 
1767         // NB: explained in MB2WC 
1768         wxMutexLocker 
lock(self
->m_iconvMutex
); 
1771         wchar_t *wnul 
= L
""; 
1772         char buf
[8]; // should be enough for NUL in any encoding 
1773         size_t inLen 
= sizeof(wchar_t), 
1774                outLen 
= WXSIZEOF(buf
); 
1775         char *in 
= (char *)wnul
; 
1777         if ( iconv(w2m
, ICONV_CHAR_CAST(&in
), &inLen
, &out
, &outLen
) == (size_t)-1 ) 
1779             self
->m_minMBCharWidth 
= (size_t)-1; 
1783             self
->m_minMBCharWidth 
= out 
- buf
; 
1787     return m_minMBCharWidth
; 
1790 #endif // HAVE_ICONV 
1793 // ============================================================================ 
1794 // Win32 conversion classes 
1795 // ============================================================================ 
1797 #ifdef wxHAVE_WIN32_MB2WC 
1801 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1802 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1805 class wxMBConv_win32 
: public wxMBConv
 
1810         m_CodePage 
= CP_ACP
; 
1811         m_minMBCharWidth 
= 0; 
1814     wxMBConv_win32(const wxMBConv_win32
& conv
) 
1816         m_CodePage 
= conv
.m_CodePage
; 
1817         m_minMBCharWidth 
= conv
.m_minMBCharWidth
; 
1821     wxMBConv_win32(const wxChar
* name
) 
1823         m_CodePage 
= wxCharsetToCodepage(name
); 
1824         m_minMBCharWidth 
= 0; 
1827     wxMBConv_win32(wxFontEncoding encoding
) 
1829         m_CodePage 
= wxEncodingToCodepage(encoding
); 
1830         m_minMBCharWidth 
= 0; 
1832 #endif // wxUSE_FONTMAP 
1834     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1836         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
1837         // the behaviour is not compatible with the Unix version (using iconv) 
1838         // and break the library itself, e.g. wxTextInputStream::NextChar() 
1839         // wouldn't work if reading an incomplete MB char didn't result in an 
1842         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or 
1843         // Win XP or newer and it is not supported for UTF-[78] so we always 
1844         // use our own conversions in this case. See 
1845         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx 
1846         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp 
1847         if ( m_CodePage 
== CP_UTF8 
) 
1849             return wxConvUTF8
.MB2WC(buf
, psz
, n
); 
1852         if ( m_CodePage 
== CP_UTF7 
) 
1854             return wxConvUTF7
.MB2WC(buf
, psz
, n
); 
1858         if ( (m_CodePage 
< 50000 && m_CodePage 
!= CP_SYMBOL
) && 
1859                 IsAtLeastWin2kSP4() ) 
1861             flags 
= MB_ERR_INVALID_CHARS
; 
1864         const size_t len 
= ::MultiByteToWideChar
 
1866                                 m_CodePage
,     // code page 
1867                                 flags
,          // flags: fall on error 
1868                                 psz
,            // input string 
1869                                 -1,             // its length (NUL-terminated) 
1870                                 buf
,            // output string 
1871                                 buf 
? n 
: 0     // size of output buffer 
1875             // function totally failed 
1879         // if we were really converting and didn't use MB_ERR_INVALID_CHARS, 
1880         // check if we succeeded, by doing a double trip: 
1881         if ( !flags 
&& buf 
) 
1883             const size_t mbLen 
= strlen(psz
); 
1884             wxCharBuffer 
mbBuf(mbLen
); 
1885             if ( ::WideCharToMultiByte
 
1892                       mbLen 
+ 1,        // size in bytes, not length 
1896                   strcmp(mbBuf
, psz
) != 0 ) 
1898                 // we didn't obtain the same thing we started from, hence 
1899                 // the conversion was lossy and we consider that it failed 
1904         // note that it returns count of written chars for buf != NULL and size 
1905         // of the needed buffer for buf == NULL so in either case the length of 
1906         // the string (which never includes the terminating NUL) is one less 
1910     virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
1913             we have a problem here: by default, WideCharToMultiByte() may 
1914             replace characters unrepresentable in the target code page with bad 
1915             quality approximations such as turning "1/2" symbol (U+00BD) into 
1916             "1" for the code pages which don't have it and we, obviously, want 
1917             to avoid this at any price 
1919             the trouble is that this function does it _silently_, i.e. it won't 
1920             even tell us whether it did or not... Win98/2000 and higher provide 
1921             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
1922             we have to resort to a round trip, i.e. check that converting back 
1923             results in the same string -- this is, of course, expensive but 
1924             otherwise we simply can't be sure to not garble the data. 
1927         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
1928         // it doesn't work with CJK encodings (which we test for rather roughly 
1929         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
1931         BOOL usedDef 
wxDUMMY_INITIALIZE(false); 
1934         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
1936             // it's our lucky day 
1937             flags 
= WC_NO_BEST_FIT_CHARS
; 
1938             pUsedDef 
= &usedDef
; 
1940         else // old system or unsupported encoding 
1946         const size_t len 
= ::WideCharToMultiByte
 
1948                                 m_CodePage
,     // code page 
1949                                 flags
,          // either none or no best fit 
1950                                 pwz
,            // input string 
1951                                 -1,             // it is (wide) NUL-terminated 
1952                                 buf
,            // output buffer 
1953                                 buf 
? n 
: 0,    // and its size 
1954                                 NULL
,           // default "replacement" char 
1955                                 pUsedDef        
// [out] was it used? 
1960             // function totally failed 
1964         // if we were really converting, check if we succeeded 
1969                 // check if the conversion failed, i.e. if any replacements 
1974             else // we must resort to double tripping... 
1976                 wxWCharBuffer 
wcBuf(n
); 
1977                 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
1978                         wcscmp(wcBuf
, pwz
) != 0 ) 
1980                     // we didn't obtain the same thing we started from, hence 
1981                     // the conversion was lossy and we consider that it failed 
1987         // see the comment above for the reason of "len - 1" 
1991     virtual size_t GetMBNulLen() const 
1993         if ( m_minMBCharWidth 
== 0 ) 
1995             int len 
= ::WideCharToMultiByte
 
1997                             m_CodePage
,     // code page 
1999                             L
"",            // input string 
2000                             1,              // translate just the NUL 
2001                             NULL
,           // output buffer 
2003                             NULL
,           // no replacement char 
2004                             NULL            
// [out] don't care if it was used 
2007             wxMBConv_win32 
* const self 
= wxConstCast(this, wxMBConv_win32
); 
2011                     wxLogDebug(_T("Unexpected NUL length %d"), len
); 
2015                     self
->m_minMBCharWidth 
= (size_t)-1; 
2021                     self
->m_minMBCharWidth 
= len
; 
2026         return m_minMBCharWidth
; 
2029     virtual wxMBConv 
*Clone() const { return new wxMBConv_win32(*this); } 
2031     bool IsOk() const { return m_CodePage 
!= -1; } 
2034     static bool CanUseNoBestFit() 
2036         static int s_isWin98Or2k 
= -1; 
2038         if ( s_isWin98Or2k 
== -1 ) 
2041             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
2044                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
2048                     s_isWin98Or2k 
= verMaj 
>= 5; 
2052                     // unknown, be conseravtive by default 
2056             wxASSERT_MSG( s_isWin98Or2k 
!= -1, _T("should be set above") ); 
2059         return s_isWin98Or2k 
== 1; 
2062     static bool IsAtLeastWin2kSP4() 
2067         static int s_isAtLeastWin2kSP4 
= -1; 
2069         if ( s_isAtLeastWin2kSP4 
== -1 ) 
2071             OSVERSIONINFOEX ver
; 
2073             memset(&ver
, 0, sizeof(ver
)); 
2074             ver
.dwOSVersionInfoSize 
= sizeof(ver
); 
2075             GetVersionEx((OSVERSIONINFO
*)&ver
); 
2077             s_isAtLeastWin2kSP4 
= 
2078               ((ver
.dwMajorVersion 
> 5) || // Vista+ 
2079                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
> 0) || // XP/2003 
2080                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
== 0 && 
2081                ver
.wServicePackMajor 
>= 4)) // 2000 SP4+ 
2085         return s_isAtLeastWin2kSP4 
== 1; 
2090     // the code page we're working with 
2093     // cached result of GetMBNulLen(), set to 0 initially meaning 
2095     size_t m_minMBCharWidth
; 
2098 #endif // wxHAVE_WIN32_MB2WC 
2100 // ============================================================================ 
2101 // Cocoa conversion classes 
2102 // ============================================================================ 
2104 #if defined(__WXCOCOA__) 
2106 // RN:  There is no UTF-32 support in either Core Foundation or 
2107 // Cocoa.  Strangely enough, internally Core Foundation uses 
2108 // UTF 32 internally quite a bit - its just not public (yet). 
2110 #include <CoreFoundation/CFString.h> 
2111 #include <CoreFoundation/CFStringEncodingExt.h> 
2113 CFStringEncoding 
wxCFStringEncFromFontEnc(wxFontEncoding encoding
) 
2115     CFStringEncoding enc 
= kCFStringEncodingInvalidId 
; 
2116     if ( encoding 
== wxFONTENCODING_DEFAULT 
) 
2118         enc 
= CFStringGetSystemEncoding(); 
2120     else switch( encoding
) 
2122         case wxFONTENCODING_ISO8859_1 
: 
2123             enc 
= kCFStringEncodingISOLatin1 
; 
2125         case wxFONTENCODING_ISO8859_2 
: 
2126             enc 
= kCFStringEncodingISOLatin2
; 
2128         case wxFONTENCODING_ISO8859_3 
: 
2129             enc 
= kCFStringEncodingISOLatin3 
; 
2131         case wxFONTENCODING_ISO8859_4 
: 
2132             enc 
= kCFStringEncodingISOLatin4
; 
2134         case wxFONTENCODING_ISO8859_5 
: 
2135             enc 
= kCFStringEncodingISOLatinCyrillic
; 
2137         case wxFONTENCODING_ISO8859_6 
: 
2138             enc 
= kCFStringEncodingISOLatinArabic
; 
2140         case wxFONTENCODING_ISO8859_7 
: 
2141             enc 
= kCFStringEncodingISOLatinGreek
; 
2143         case wxFONTENCODING_ISO8859_8 
: 
2144             enc 
= kCFStringEncodingISOLatinHebrew
; 
2146         case wxFONTENCODING_ISO8859_9 
: 
2147             enc 
= kCFStringEncodingISOLatin5
; 
2149         case wxFONTENCODING_ISO8859_10 
: 
2150             enc 
= kCFStringEncodingISOLatin6
; 
2152         case wxFONTENCODING_ISO8859_11 
: 
2153             enc 
= kCFStringEncodingISOLatinThai
; 
2155         case wxFONTENCODING_ISO8859_13 
: 
2156             enc 
= kCFStringEncodingISOLatin7
; 
2158         case wxFONTENCODING_ISO8859_14 
: 
2159             enc 
= kCFStringEncodingISOLatin8
; 
2161         case wxFONTENCODING_ISO8859_15 
: 
2162             enc 
= kCFStringEncodingISOLatin9
; 
2165         case wxFONTENCODING_KOI8 
: 
2166             enc 
= kCFStringEncodingKOI8_R
; 
2168         case wxFONTENCODING_ALTERNATIVE 
: // MS-DOS CP866 
2169             enc 
= kCFStringEncodingDOSRussian
; 
2172 //      case wxFONTENCODING_BULGARIAN : 
2176         case wxFONTENCODING_CP437 
: 
2177             enc 
=kCFStringEncodingDOSLatinUS 
; 
2179         case wxFONTENCODING_CP850 
: 
2180             enc 
= kCFStringEncodingDOSLatin1
; 
2182         case wxFONTENCODING_CP852 
: 
2183             enc 
= kCFStringEncodingDOSLatin2
; 
2185         case wxFONTENCODING_CP855 
: 
2186             enc 
= kCFStringEncodingDOSCyrillic
; 
2188         case wxFONTENCODING_CP866 
: 
2189             enc 
=kCFStringEncodingDOSRussian 
; 
2191         case wxFONTENCODING_CP874 
: 
2192             enc 
= kCFStringEncodingDOSThai
; 
2194         case wxFONTENCODING_CP932 
: 
2195             enc 
= kCFStringEncodingDOSJapanese
; 
2197         case wxFONTENCODING_CP936 
: 
2198             enc 
=kCFStringEncodingDOSChineseSimplif 
; 
2200         case wxFONTENCODING_CP949 
: 
2201             enc 
= kCFStringEncodingDOSKorean
; 
2203         case wxFONTENCODING_CP950 
: 
2204             enc 
= kCFStringEncodingDOSChineseTrad
; 
2206         case wxFONTENCODING_CP1250 
: 
2207             enc 
= kCFStringEncodingWindowsLatin2
; 
2209         case wxFONTENCODING_CP1251 
: 
2210             enc 
=kCFStringEncodingWindowsCyrillic 
; 
2212         case wxFONTENCODING_CP1252 
: 
2213             enc 
=kCFStringEncodingWindowsLatin1 
; 
2215         case wxFONTENCODING_CP1253 
: 
2216             enc 
= kCFStringEncodingWindowsGreek
; 
2218         case wxFONTENCODING_CP1254 
: 
2219             enc 
= kCFStringEncodingWindowsLatin5
; 
2221         case wxFONTENCODING_CP1255 
: 
2222             enc 
=kCFStringEncodingWindowsHebrew 
; 
2224         case wxFONTENCODING_CP1256 
: 
2225             enc 
=kCFStringEncodingWindowsArabic 
; 
2227         case wxFONTENCODING_CP1257 
: 
2228             enc 
= kCFStringEncodingWindowsBalticRim
; 
2230 //   This only really encodes to UTF7 (if that) evidently 
2231 //        case wxFONTENCODING_UTF7 : 
2232 //            enc = kCFStringEncodingNonLossyASCII ; 
2234         case wxFONTENCODING_UTF8 
: 
2235             enc 
= kCFStringEncodingUTF8 
; 
2237         case wxFONTENCODING_EUC_JP 
: 
2238             enc 
= kCFStringEncodingEUC_JP
; 
2240         case wxFONTENCODING_UTF16 
: 
2241             enc 
= kCFStringEncodingUnicode 
; 
2243         case wxFONTENCODING_MACROMAN 
: 
2244             enc 
= kCFStringEncodingMacRoman 
; 
2246         case wxFONTENCODING_MACJAPANESE 
: 
2247             enc 
= kCFStringEncodingMacJapanese 
; 
2249         case wxFONTENCODING_MACCHINESETRAD 
: 
2250             enc 
= kCFStringEncodingMacChineseTrad 
; 
2252         case wxFONTENCODING_MACKOREAN 
: 
2253             enc 
= kCFStringEncodingMacKorean 
; 
2255         case wxFONTENCODING_MACARABIC 
: 
2256             enc 
= kCFStringEncodingMacArabic 
; 
2258         case wxFONTENCODING_MACHEBREW 
: 
2259             enc 
= kCFStringEncodingMacHebrew 
; 
2261         case wxFONTENCODING_MACGREEK 
: 
2262             enc 
= kCFStringEncodingMacGreek 
; 
2264         case wxFONTENCODING_MACCYRILLIC 
: 
2265             enc 
= kCFStringEncodingMacCyrillic 
; 
2267         case wxFONTENCODING_MACDEVANAGARI 
: 
2268             enc 
= kCFStringEncodingMacDevanagari 
; 
2270         case wxFONTENCODING_MACGURMUKHI 
: 
2271             enc 
= kCFStringEncodingMacGurmukhi 
; 
2273         case wxFONTENCODING_MACGUJARATI 
: 
2274             enc 
= kCFStringEncodingMacGujarati 
; 
2276         case wxFONTENCODING_MACORIYA 
: 
2277             enc 
= kCFStringEncodingMacOriya 
; 
2279         case wxFONTENCODING_MACBENGALI 
: 
2280             enc 
= kCFStringEncodingMacBengali 
; 
2282         case wxFONTENCODING_MACTAMIL 
: 
2283             enc 
= kCFStringEncodingMacTamil 
; 
2285         case wxFONTENCODING_MACTELUGU 
: 
2286             enc 
= kCFStringEncodingMacTelugu 
; 
2288         case wxFONTENCODING_MACKANNADA 
: 
2289             enc 
= kCFStringEncodingMacKannada 
; 
2291         case wxFONTENCODING_MACMALAJALAM 
: 
2292             enc 
= kCFStringEncodingMacMalayalam 
; 
2294         case wxFONTENCODING_MACSINHALESE 
: 
2295             enc 
= kCFStringEncodingMacSinhalese 
; 
2297         case wxFONTENCODING_MACBURMESE 
: 
2298             enc 
= kCFStringEncodingMacBurmese 
; 
2300         case wxFONTENCODING_MACKHMER 
: 
2301             enc 
= kCFStringEncodingMacKhmer 
; 
2303         case wxFONTENCODING_MACTHAI 
: 
2304             enc 
= kCFStringEncodingMacThai 
; 
2306         case wxFONTENCODING_MACLAOTIAN 
: 
2307             enc 
= kCFStringEncodingMacLaotian 
; 
2309         case wxFONTENCODING_MACGEORGIAN 
: 
2310             enc 
= kCFStringEncodingMacGeorgian 
; 
2312         case wxFONTENCODING_MACARMENIAN 
: 
2313             enc 
= kCFStringEncodingMacArmenian 
; 
2315         case wxFONTENCODING_MACCHINESESIMP 
: 
2316             enc 
= kCFStringEncodingMacChineseSimp 
; 
2318         case wxFONTENCODING_MACTIBETAN 
: 
2319             enc 
= kCFStringEncodingMacTibetan 
; 
2321         case wxFONTENCODING_MACMONGOLIAN 
: 
2322             enc 
= kCFStringEncodingMacMongolian 
; 
2324         case wxFONTENCODING_MACETHIOPIC 
: 
2325             enc 
= kCFStringEncodingMacEthiopic 
; 
2327         case wxFONTENCODING_MACCENTRALEUR 
: 
2328             enc 
= kCFStringEncodingMacCentralEurRoman 
; 
2330         case wxFONTENCODING_MACVIATNAMESE 
: 
2331             enc 
= kCFStringEncodingMacVietnamese 
; 
2333         case wxFONTENCODING_MACARABICEXT 
: 
2334             enc 
= kCFStringEncodingMacExtArabic 
; 
2336         case wxFONTENCODING_MACSYMBOL 
: 
2337             enc 
= kCFStringEncodingMacSymbol 
; 
2339         case wxFONTENCODING_MACDINGBATS 
: 
2340             enc 
= kCFStringEncodingMacDingbats 
; 
2342         case wxFONTENCODING_MACTURKISH 
: 
2343             enc 
= kCFStringEncodingMacTurkish 
; 
2345         case wxFONTENCODING_MACCROATIAN 
: 
2346             enc 
= kCFStringEncodingMacCroatian 
; 
2348         case wxFONTENCODING_MACICELANDIC 
: 
2349             enc 
= kCFStringEncodingMacIcelandic 
; 
2351         case wxFONTENCODING_MACROMANIAN 
: 
2352             enc 
= kCFStringEncodingMacRomanian 
; 
2354         case wxFONTENCODING_MACCELTIC 
: 
2355             enc 
= kCFStringEncodingMacCeltic 
; 
2357         case wxFONTENCODING_MACGAELIC 
: 
2358             enc 
= kCFStringEncodingMacGaelic 
; 
2360 //      case wxFONTENCODING_MACKEYBOARD : 
2361 //          enc = kCFStringEncodingMacKeyboardGlyphs ; 
2364             // because gcc is picky 
2370 class wxMBConv_cocoa 
: public wxMBConv
 
2375         Init(CFStringGetSystemEncoding()) ; 
2378     wxMBConv_cocoa(const wxMBConv_cocoa
& conv
) 
2380         m_encoding 
= conv
.m_encoding
; 
2384     wxMBConv_cocoa(const wxChar
* name
) 
2386         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2390     wxMBConv_cocoa(wxFontEncoding encoding
) 
2392         Init( wxCFStringEncFromFontEnc(encoding
) ); 
2399     void Init( CFStringEncoding encoding
) 
2401         m_encoding 
= encoding 
; 
2404     size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const 
2408         CFStringRef theString 
= CFStringCreateWithBytes ( 
2409                                                 NULL
, //the allocator 
2410                                                 (const UInt8
*)szUnConv
, 
2413                                                 false //no BOM/external representation 
2416         wxASSERT(theString
); 
2418         size_t nOutLength 
= CFStringGetLength(theString
); 
2422             CFRelease(theString
); 
2426         CFRange theRange 
= { 0, nOutSize 
}; 
2428 #if SIZEOF_WCHAR_T == 4 
2429         UniChar
* szUniCharBuffer 
= new UniChar
[nOutSize
]; 
2432         CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
); 
2434         CFRelease(theString
); 
2436         szUniCharBuffer
[nOutLength
] = '\0' ; 
2438 #if SIZEOF_WCHAR_T == 4 
2439         wxMBConvUTF16 converter 
; 
2440         converter
.MB2WC(szOut
, (const char*)szUniCharBuffer 
, nOutSize 
) ; 
2441         delete[] szUniCharBuffer
; 
2447     size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const 
2451         size_t nRealOutSize
; 
2452         size_t nBufSize 
= wxWcslen(szUnConv
); 
2453         UniChar
* szUniBuffer 
= (UniChar
*) szUnConv
; 
2455 #if SIZEOF_WCHAR_T == 4 
2456         wxMBConvUTF16 converter 
; 
2457         nBufSize 
= converter
.WC2MB( NULL 
, szUnConv 
, 0 ); 
2458         szUniBuffer 
= new UniChar
[ (nBufSize 
/ sizeof(UniChar
)) + 1] ; 
2459         converter
.WC2MB( (char*) szUniBuffer 
, szUnConv
, nBufSize 
+ sizeof(UniChar
)) ; 
2460         nBufSize 
/= sizeof(UniChar
); 
2463         CFStringRef theString 
= CFStringCreateWithCharactersNoCopy( 
2467                                 kCFAllocatorNull 
//deallocator - we want to deallocate it ourselves 
2470         wxASSERT(theString
); 
2472         //Note that CER puts a BOM when converting to unicode 
2473         //so we  check and use getchars instead in that case 
2474         if (m_encoding 
== kCFStringEncodingUnicode
) 
2477                 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize 
- 1), (UniChar
*) szOut
); 
2479             nRealOutSize 
= CFStringGetLength(theString
) + 1; 
2485                 CFRangeMake(0, CFStringGetLength(theString
)), 
2487                 0, //what to put in characters that can't be converted - 
2488                     //0 tells CFString to return NULL if it meets such a character 
2489                 false, //not an external representation 
2492                 (CFIndex
*) &nRealOutSize
 
2496         CFRelease(theString
); 
2498 #if SIZEOF_WCHAR_T == 4 
2499         delete[] szUniBuffer
; 
2502         return  nRealOutSize 
- 1; 
2505     virtual wxMBConv 
*Clone() const { return new wxMBConv_cocoa(*this); } 
2511         return m_encoding 
!= kCFStringEncodingInvalidId 
&& 
2512               CFStringIsEncodingAvailable(m_encoding
); 
2516     CFStringEncoding m_encoding 
; 
2519 #endif // defined(__WXCOCOA__) 
2521 // ============================================================================ 
2522 // Mac conversion classes 
2523 // ============================================================================ 
2525 #if defined(__WXMAC__) && defined(TARGET_CARBON) 
2527 class wxMBConv_mac 
: public wxMBConv
 
2532         Init(CFStringGetSystemEncoding()) ; 
2535     wxMBConv_mac(const wxMBConv_mac
& conv
) 
2537         Init(conv
.m_char_encoding
); 
2541     wxMBConv_mac(const wxChar
* name
) 
2543         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ; 
2547     wxMBConv_mac(wxFontEncoding encoding
) 
2549         Init( wxMacGetSystemEncFromFontEnc(encoding
) ); 
2554         OSStatus status 
= noErr 
; 
2555         status 
= TECDisposeConverter(m_MB2WC_converter
); 
2556         status 
= TECDisposeConverter(m_WC2MB_converter
); 
2560     void Init( TextEncodingBase encoding
) 
2562         OSStatus status 
= noErr 
; 
2563         m_char_encoding 
= encoding 
; 
2564         m_unicode_encoding 
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ; 
2566         status 
= TECCreateConverter(&m_MB2WC_converter
, 
2568                                     m_unicode_encoding
); 
2569         status 
= TECCreateConverter(&m_WC2MB_converter
, 
2574     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2576         OSStatus status 
= noErr 
; 
2577         ByteCount byteOutLen 
; 
2578         ByteCount byteInLen 
= strlen(psz
) ; 
2579         wchar_t *tbuf 
= NULL 
; 
2580         UniChar
* ubuf 
= NULL 
; 
2585             //apple specs say at least 32 
2586             n 
= wxMax( 32 , byteInLen 
) ; 
2587             tbuf 
= (wchar_t*) malloc( n 
* SIZEOF_WCHAR_T
) ; 
2589         ByteCount byteBufferLen 
= n 
* sizeof( UniChar 
) ; 
2590 #if SIZEOF_WCHAR_T == 4 
2591         ubuf 
= (UniChar
*) malloc( byteBufferLen 
+ 2 ) ; 
2593         ubuf 
= (UniChar
*) (buf 
? buf 
: tbuf
) ; 
2595         status 
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz 
, byteInLen
, &byteInLen
, 
2596           (TextPtr
) ubuf 
, byteBufferLen
, &byteOutLen
); 
2597 #if SIZEOF_WCHAR_T == 4 
2598         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar 
2599         // is not properly terminated we get random characters at the end 
2600         ubuf
[byteOutLen 
/ sizeof( UniChar 
) ] = 0 ; 
2601         wxMBConvUTF16 converter 
; 
2602         res 
= converter
.MB2WC( (buf 
? buf 
: tbuf
) , (const char*)ubuf 
, n 
) ; 
2605         res 
= byteOutLen 
/ sizeof( UniChar 
) ; 
2610         if ( buf  
&& res 
< n
) 
2616     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
2618         OSStatus status 
= noErr 
; 
2619         ByteCount byteOutLen 
; 
2620         ByteCount byteInLen 
= wxWcslen(psz
) * SIZEOF_WCHAR_T 
; 
2626             //apple specs say at least 32 
2627             n 
= wxMax( 32 , ((byteInLen 
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T 
); 
2628             tbuf 
= (char*) malloc( n 
) ; 
2631         ByteCount byteBufferLen 
= n 
; 
2632         UniChar
* ubuf 
= NULL 
; 
2633 #if SIZEOF_WCHAR_T == 4 
2634         wxMBConvUTF16 converter 
; 
2635         size_t unicharlen 
= converter
.WC2MB( NULL 
, psz 
, 0 ) ; 
2636         byteInLen 
= unicharlen 
; 
2637         ubuf 
= (UniChar
*) malloc( byteInLen 
+ 2 ) ; 
2638         converter
.WC2MB( (char*) ubuf 
, psz
, unicharlen 
+ 2 ) ; 
2640         ubuf 
= (UniChar
*) psz 
; 
2642         status 
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf 
, byteInLen
, &byteInLen
, 
2643             (TextPtr
) (buf 
? buf 
: tbuf
) , byteBufferLen
, &byteOutLen
); 
2644 #if SIZEOF_WCHAR_T == 4 
2650         size_t res 
= byteOutLen 
; 
2651         if ( buf  
&& res 
< n
) 
2655             //we need to double-trip to verify it didn't insert any ? in place 
2656             //of bogus characters 
2657             wxWCharBuffer 
wcBuf(n
); 
2658             size_t pszlen 
= wxWcslen(psz
); 
2659             if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 || 
2660                         wxWcslen(wcBuf
) != pszlen 
|| 
2661                         memcmp(wcBuf
, psz
, pszlen 
* sizeof(wchar_t)) != 0 ) 
2663                 // we didn't obtain the same thing we started from, hence 
2664                 // the conversion was lossy and we consider that it failed 
2672     virtual wxMBConv 
*Clone() const { return wxMBConv_mac(*this); } 
2676         { return m_MB2WC_converter 
!=  NULL 
&& m_WC2MB_converter 
!= NULL  
; } 
2679     TECObjectRef m_MB2WC_converter 
; 
2680     TECObjectRef m_WC2MB_converter 
; 
2682     TextEncodingBase m_char_encoding 
; 
2683     TextEncodingBase m_unicode_encoding 
; 
2686 #endif // defined(__WXMAC__) && defined(TARGET_CARBON) 
2688 // ============================================================================ 
2689 // wxEncodingConverter based conversion classes 
2690 // ============================================================================ 
2694 class wxMBConv_wxwin 
: public wxMBConv
 
2699         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
2700                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
2704     // temporarily just use wxEncodingConverter stuff, 
2705     // so that it works while a better implementation is built 
2706     wxMBConv_wxwin(const wxChar
* name
) 
2709             m_enc 
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
2711             m_enc 
= wxFONTENCODING_SYSTEM
; 
2716     wxMBConv_wxwin(wxFontEncoding enc
) 
2723     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
2725         size_t inbuf 
= strlen(psz
); 
2728             if (!m2w
.Convert(psz
,buf
)) 
2734     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
2736         const size_t inbuf 
= wxWcslen(psz
); 
2739             if (!w2m
.Convert(psz
,buf
)) 
2746     virtual size_t GetMBNulLen() const 
2750             case wxFONTENCODING_UTF16BE
: 
2751             case wxFONTENCODING_UTF16LE
: 
2754             case wxFONTENCODING_UTF32BE
: 
2755             case wxFONTENCODING_UTF32LE
: 
2763     virtual wxMBConv 
*Clone() const { return new wxMBConv_wxwin(m_enc
); } 
2765     bool IsOk() const { return m_ok
; } 
2768     wxFontEncoding m_enc
; 
2769     wxEncodingConverter m2w
, w2m
; 
2772     // were we initialized successfully? 
2775     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
2778 // make the constructors available for unit testing 
2779 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name 
) 
2781     wxMBConv_wxwin
* result 
= new wxMBConv_wxwin( name 
); 
2782     if ( !result
->IsOk() ) 
2790 #endif // wxUSE_FONTMAP 
2792 // ============================================================================ 
2793 // wxCSConv implementation 
2794 // ============================================================================ 
2796 void wxCSConv::Init() 
2803 wxCSConv::wxCSConv(const wxChar 
*charset
) 
2813     m_encoding 
= wxFontMapperBase::GetEncodingFromName(charset
); 
2815     m_encoding 
= wxFONTENCODING_SYSTEM
; 
2819 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
2821     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
2823         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
2825         encoding 
= wxFONTENCODING_SYSTEM
; 
2830     m_encoding 
= encoding
; 
2833 wxCSConv::~wxCSConv() 
2838 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
2843     SetName(conv
.m_name
); 
2844     m_encoding 
= conv
.m_encoding
; 
2847 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
2851     SetName(conv
.m_name
); 
2852     m_encoding 
= conv
.m_encoding
; 
2857 void wxCSConv::Clear() 
2866 void wxCSConv::SetName(const wxChar 
*charset
) 
2870         m_name 
= wxStrdup(charset
); 
2876 #include "wx/hashmap.h" 
2878 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
, 
2879                      wxEncodingNameCache 
); 
2881 static wxEncodingNameCache gs_nameCache
; 
2884 wxMBConv 
*wxCSConv::DoCreate() const 
2887     wxLogTrace(TRACE_STRCONV
, 
2888                wxT("creating conversion for %s"), 
2890                        : wxFontMapperBase::GetEncodingName(m_encoding
).c_str())); 
2891 #endif // wxUSE_FONTMAP 
2893     // check for the special case of ASCII or ISO8859-1 charset: as we have 
2894     // special knowledge of it anyhow, we don't need to create a special 
2895     // conversion object 
2896     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
|| 
2897             m_encoding 
== wxFONTENCODING_DEFAULT 
) 
2899         // don't convert at all 
2903     // we trust OS to do conversion better than we can so try external 
2904     // conversion methods first 
2906     // the full order is: 
2907     //      1. OS conversion (iconv() under Unix or Win32 API) 
2908     //      2. hard coded conversions for UTF 
2909     //      3. wxEncodingConverter as fall back 
2915 #endif // !wxUSE_FONTMAP 
2917         wxString 
name(m_name
); 
2918         wxFontEncoding 
encoding(m_encoding
); 
2920         if ( !name
.empty() ) 
2922             wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
); 
2930                 wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
2931 #endif // wxUSE_FONTMAP 
2935             const wxEncodingNameCache::iterator it 
= gs_nameCache
.find(encoding
); 
2936             if ( it 
!= gs_nameCache
.end() ) 
2938                 if ( it
->second
.empty() ) 
2941                 wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(it
->second
); 
2948             const wxChar
** names 
= wxFontMapperBase::GetAllEncodingNames(encoding
); 
2950             for ( ; *names
; ++names 
) 
2952                 wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(*names
); 
2955                     gs_nameCache
[encoding
] = *names
; 
2962             gs_nameCache
[encoding
] = _T(""); // cache the failure 
2964 #endif // wxUSE_FONTMAP 
2966 #endif // HAVE_ICONV 
2968 #ifdef wxHAVE_WIN32_MB2WC 
2971         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
2972                                       : new wxMBConv_win32(m_encoding
); 
2981 #endif // wxHAVE_WIN32_MB2WC 
2982 #if defined(__WXMAC__) 
2984         // leave UTF16 and UTF32 to the built-ins of wx 
2985         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
|| 
2986             ( m_encoding 
>= wxFONTENCODING_MACMIN 
&& m_encoding 
<= wxFONTENCODING_MACMAX 
) ) ) 
2990             wxMBConv_mac 
*conv 
= m_name 
? new wxMBConv_mac(m_name
) 
2991                                         : new wxMBConv_mac(m_encoding
); 
2993             wxMBConv_mac 
*conv 
= new wxMBConv_mac(m_encoding
); 
3002 #if defined(__WXCOCOA__) 
3004         if ( m_name 
|| ( m_encoding 
<= wxFONTENCODING_UTF16 
) ) 
3008             wxMBConv_cocoa 
*conv 
= m_name 
? new wxMBConv_cocoa(m_name
) 
3009                                           : new wxMBConv_cocoa(m_encoding
); 
3011             wxMBConv_cocoa 
*conv 
= new wxMBConv_cocoa(m_encoding
); 
3021     wxFontEncoding enc 
= m_encoding
; 
3023     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
3025         // use "false" to suppress interactive dialogs -- we can be called from 
3026         // anywhere and popping up a dialog from here is the last thing we want to 
3028         enc 
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
3030 #endif // wxUSE_FONTMAP 
3034         case wxFONTENCODING_UTF7
: 
3035              return new wxMBConvUTF7
; 
3037         case wxFONTENCODING_UTF8
: 
3038              return new wxMBConvUTF8
; 
3040         case wxFONTENCODING_UTF16BE
: 
3041              return new wxMBConvUTF16BE
; 
3043         case wxFONTENCODING_UTF16LE
: 
3044              return new wxMBConvUTF16LE
; 
3046         case wxFONTENCODING_UTF32BE
: 
3047              return new wxMBConvUTF32BE
; 
3049         case wxFONTENCODING_UTF32LE
: 
3050              return new wxMBConvUTF32LE
; 
3053              // nothing to do but put here to suppress gcc warnings 
3060         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
3061                                       : new wxMBConv_wxwin(m_encoding
); 
3067 #endif // wxUSE_FONTMAP 
3069     // NB: This is a hack to prevent deadlock. What could otherwise happen 
3070     //     in Unicode build: wxConvLocal creation ends up being here 
3071     //     because of some failure and logs the error. But wxLog will try to 
3072     //     attach timestamp, for which it will need wxConvLocal (to convert 
3073     //     time to char* and then wchar_t*), but that fails, tries to log 
3074     //     error, but wxLog has a (already locked) critical section that 
3075     //     guards static buffer. 
3076     static bool alreadyLoggingError 
= false; 
3077     if (!alreadyLoggingError
) 
3079         alreadyLoggingError 
= true; 
3080         wxLogError(_("Cannot convert from the charset '%s'!"), 
3084                          wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str() 
3085 #else // !wxUSE_FONTMAP 
3086                          wxString::Format(_("encoding %s"), m_encoding
).c_str() 
3087 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
3089         alreadyLoggingError 
= false; 
3095 void wxCSConv::CreateConvIfNeeded() const 
3099         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
3102         // if we don't have neither the name nor the encoding, use the default 
3103         // encoding for this system 
3104         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
3106             self
->m_name 
= wxStrdup(wxLocale::GetSystemEncodingName()); 
3108 #endif // wxUSE_INTL 
3110         self
->m_convReal 
= DoCreate(); 
3111         self
->m_deferred 
= false; 
3115 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
3117     CreateConvIfNeeded(); 
3120         return m_convReal
->MB2WC(buf
, psz
, n
); 
3123     size_t len 
= strlen(psz
); 
3127         for (size_t c 
= 0; c 
<= len
; c
++) 
3128             buf
[c
] = (unsigned char)(psz
[c
]); 
3134 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
3136     CreateConvIfNeeded(); 
3139         return m_convReal
->WC2MB(buf
, psz
, n
); 
3142     const size_t len 
= wxWcslen(psz
); 
3145         for (size_t c 
= 0; c 
<= len
; c
++) 
3149             buf
[c
] = (char)psz
[c
]; 
3154         for (size_t c 
= 0; c 
<= len
; c
++) 
3164 size_t wxCSConv::GetMBNulLen() const 
3166     CreateConvIfNeeded(); 
3170         return m_convReal
->GetMBNulLen(); 
3176 // ---------------------------------------------------------------------------- 
3178 // ---------------------------------------------------------------------------- 
3181     static wxMBConv_win32 wxConvLibcObj
; 
3182 #elif defined(__WXMAC__) && !defined(__MACH__) 
3183     static wxMBConv_mac wxConvLibcObj 
; 
3185     static wxMBConvLibc wxConvLibcObj
; 
3188 static wxCSConv 
wxConvLocalObj(wxFONTENCODING_SYSTEM
); 
3189 static wxCSConv 
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
); 
3190 static wxMBConvUTF7 wxConvUTF7Obj
; 
3191 static wxMBConvUTF8 wxConvUTF8Obj
; 
3193 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc 
= wxConvLibcObj
; 
3194 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal 
= wxConvLocalObj
; 
3195 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1 
= wxConvISO8859_1Obj
; 
3196 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7 
= wxConvUTF7Obj
; 
3197 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8 
= wxConvUTF8Obj
; 
3198 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= &wxConvLibcObj
; 
3199 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvFileName 
= & 
3207 #else // !wxUSE_WCHAR_T 
3209 // stand-ins in absence of wchar_t 
3210 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
3215 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T