1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/strconv.cpp 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, 
   5 //              Ryan Norton, Fredrik Roubert (UTF7) 
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
  10 //              (c) 2000-2003 Vadim Zeitlin 
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert 
  12 // Licence:     wxWindows licence 
  13 ///////////////////////////////////////////////////////////////////////////// 
  15 // For compilers that support precompilation, includes "wx.h". 
  16 #include "wx/wxprec.h" 
  26     #include "wx/hashmap.h" 
  29 #include "wx/strconv.h" 
  41 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  42     #include "wx/msw/private.h" 
  43     #include "wx/msw/missing.h" 
  44     #define wxHAVE_WIN32_MB2WC 
  49     #include "wx/thread.h" 
  52 #include "wx/encconv.h" 
  53 #include "wx/fontmap.h" 
  56 #include "wx/osx/core/private/strconv_cf.h" 
  57 #endif //def __DARWIN__ 
  60 #define TRACE_STRCONV wxT("strconv") 
  62 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to 
  64 #if SIZEOF_WCHAR_T == 2 
  69 // ============================================================================ 
  71 // ============================================================================ 
  73 // helper function of cMB2WC(): check if n bytes at this location are all NUL 
  74 static bool NotAllNULs(const char *p
, size_t n
) 
  76     while ( n 
&& *p
++ == '\0' ) 
  82 // ---------------------------------------------------------------------------- 
  83 // UTF-16 en/decoding to/from UCS-4 with surrogates handling 
  84 // ---------------------------------------------------------------------------- 
  86 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
  91             *output 
= (wxUint16
) input
; 
  95     else if (input 
>= 0x110000) 
 103             *output
++ = (wxUint16
) ((input 
>> 10) + 0xd7c0); 
 104             *output 
= (wxUint16
) ((input 
& 0x3ff) + 0xdc00); 
 111 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 113     if ((*input 
< 0xd800) || (*input 
> 0xdfff)) 
 118     else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff)) 
 121         return wxCONV_FAILED
; 
 125         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 131     typedef wchar_t wxDecodeSurrogate_t
; 
 133     typedef wxUint16 wxDecodeSurrogate_t
; 
 134 #endif // WC_UTF16/!WC_UTF16 
 136 // returns the next UTF-32 character from the wchar_t buffer and advances the 
 137 // pointer to the character after this one 
 139 // if an invalid character is found, *pSrc is set to NULL, the caller must 
 141 static wxUint32 
wxDecodeSurrogate(const wxDecodeSurrogate_t 
**pSrc
) 
 145         n 
= decode_utf16(reinterpret_cast<const wxUint16 
*>(*pSrc
), out
); 
 146     if ( n 
== wxCONV_FAILED 
) 
 154 // ---------------------------------------------------------------------------- 
 156 // ---------------------------------------------------------------------------- 
 159 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
, 
 160                   const char *src
, size_t srcLen
) const 
 162     // although new conversion classes are supposed to implement this function 
 163     // directly, the existing ones only implement the old MB2WC() and so, to 
 164     // avoid to have to rewrite all conversion classes at once, we provide a 
 165     // default (but not efficient) implementation of this one in terms of the 
 166     // old function by copying the input to ensure that it's NUL-terminated and 
 167     // then using MB2WC() to convert it 
 169     // moreover, some conversion classes simply can't implement ToWChar() 
 170     // directly, the primary example is wxConvLibc: mbstowcs() only handles 
 171     // NUL-terminated strings 
 173     // the number of chars [which would be] written to dst [if it were not NULL] 
 174     size_t dstWritten 
= 0; 
 176     // the number of NULs terminating this string 
 177     size_t nulLen 
= 0;  // not really needed, but just to avoid warnings 
 179     // if we were not given the input size we just have to assume that the 
 180     // string is properly terminated as we have no way of knowing how long it 
 181     // is anyhow, but if we do have the size check whether there are enough 
 185     if ( srcLen 
!= wxNO_LEN 
) 
 187         // we need to know how to find the end of this string 
 188         nulLen 
= GetMBNulLen(); 
 189         if ( nulLen 
== wxCONV_FAILED 
) 
 190             return wxCONV_FAILED
; 
 192         // if there are enough NULs we can avoid the copy 
 193         if ( srcLen 
< nulLen 
|| NotAllNULs(src 
+ srcLen 
- nulLen
, nulLen
) ) 
 195             // make a copy in order to properly NUL-terminate the string 
 196             bufTmp 
= wxCharBuffer(srcLen 
+ nulLen 
- 1 /* 1 will be added */); 
 197             char * const p 
= bufTmp
.data(); 
 198             memcpy(p
, src
, srcLen
); 
 199             for ( char *s 
= p 
+ srcLen
; s 
< p 
+ srcLen 
+ nulLen
; s
++ ) 
 205         srcEnd 
= src 
+ srcLen
; 
 207     else // quit after the first loop iteration 
 212     // the idea of this code is straightforward: it converts a NUL-terminated 
 213     // chunk of the string during each iteration and updates the output buffer 
 216     // all the complication come from the fact that this function, for 
 217     // historical reasons, must behave in 2 subtly different ways when it's 
 218     // called with a fixed number of characters and when it's called for the 
 219     // entire NUL-terminated string: in the former case (srcEnd != NULL) we 
 220     // must count all characters we convert, NUL or not; but in the latter we 
 221     // do not count the trailing NUL -- but still count all the NULs inside the 
 224     // so for the (simple) former case we just always count the trailing NUL, 
 225     // but for the latter we need to wait until we see if there is going to be 
 226     // another loop iteration and only count it then 
 229         // try to convert the current chunk 
 230         size_t lenChunk 
= MB2WC(NULL
, src
, 0); 
 231         if ( lenChunk 
== wxCONV_FAILED 
) 
 232             return wxCONV_FAILED
; 
 234         dstWritten 
+= lenChunk
; 
 240             // nothing left in the input string, conversion succeeded 
 246             if ( dstWritten 
> dstLen 
) 
 247                 return wxCONV_FAILED
; 
 249             // +1 is for trailing NUL 
 250             if ( MB2WC(dst
, src
, lenChunk 
+ 1) == wxCONV_FAILED 
) 
 251                 return wxCONV_FAILED
; 
 260             // we convert just one chunk in this case as this is the entire 
 261             // string anyhow (and we don't count the trailing NUL in this case) 
 265         // advance the input pointer past the end of this chunk: notice that we 
 266         // will always stop before srcEnd because we know that the chunk is 
 267         // always properly NUL-terminated 
 268         while ( NotAllNULs(src
, nulLen
) ) 
 270             // notice that we must skip over multiple bytes here as we suppose 
 271             // that if NUL takes 2 or 4 bytes, then all the other characters do 
 272             // too and so if advanced by a single byte we might erroneously 
 273             // detect sequences of NUL bytes in the middle of the input 
 277         // if the buffer ends before this NUL, we shouldn't count it in our 
 278         // output so skip the code below 
 282         // do count this terminator as it's inside the buffer we convert 
 287         src 
+= nulLen
; // skip the terminator itself 
 297 wxMBConv::FromWChar(char *dst
, size_t dstLen
, 
 298                     const wchar_t *src
, size_t srcLen
) const 
 300     // the number of chars [which would be] written to dst [if it were not NULL] 
 301     size_t dstWritten 
= 0; 
 303     // if we don't know its length we have no choice but to assume that it is 
 304     // NUL-terminated (notice that it can still be NUL-terminated even if 
 305     // explicit length is given but it doesn't change our return value) 
 306     const bool isNulTerminated 
= srcLen 
== wxNO_LEN
; 
 308     // make a copy of the input string unless it is already properly 
 310     wxWCharBuffer bufTmp
; 
 311     if ( isNulTerminated 
) 
 313         srcLen 
= wxWcslen(src
) + 1; 
 315     else if ( srcLen 
!= 0 && src
[srcLen 
- 1] != L
'\0' ) 
 317         // make a copy in order to properly NUL-terminate the string 
 318         bufTmp 
= wxWCharBuffer(srcLen
); 
 319         memcpy(bufTmp
.data(), src
, srcLen 
* sizeof(wchar_t)); 
 323     const size_t lenNul 
= GetMBNulLen(); 
 324     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; 
 326           src
++ /* skip L'\0' too */ ) 
 328         // try to convert the current chunk 
 329         size_t lenChunk 
= WC2MB(NULL
, src
, 0); 
 330         if ( lenChunk 
== wxCONV_FAILED 
) 
 331             return wxCONV_FAILED
; 
 333         dstWritten 
+= lenChunk
; 
 335         const wchar_t * const 
 336             chunkEnd 
= isNulTerminated 
? srcEnd 
- 1 : src 
+ wxWcslen(src
); 
 338         // our return value accounts for the trailing NUL(s), unlike that of 
 339         // WC2MB(), however don't do it for the last NUL we artificially added 
 341         if ( chunkEnd 
< srcEnd 
) 
 342             dstWritten 
+= lenNul
; 
 346             if ( dstWritten 
> dstLen 
) 
 347                 return wxCONV_FAILED
; 
 349             // if we know that there is enough space in the destination buffer 
 350             // (because we accounted for lenNul in dstWritten above), we can 
 351             // convert directly in place -- but otherwise we need another 
 352             // temporary buffer to ensure that we don't overwrite the output 
 355             if ( chunkEnd 
== srcEnd 
) 
 357                 dstBuf 
= wxCharBuffer(lenChunk 
+ lenNul 
- 1); 
 358                 dstTmp 
= dstBuf
.data(); 
 365             if ( WC2MB(dstTmp
, src
, lenChunk 
+ lenNul
) == wxCONV_FAILED 
) 
 366                 return wxCONV_FAILED
; 
 370                 // copy everything up to but excluding the terminating NUL(s) 
 371                 // into the real output buffer 
 372                 memcpy(dst
, dstTmp
, lenChunk
); 
 374                 // micro-optimization: if dstTmp != dst it means that chunkEnd 
 375                 // == srcEnd and so we're done, no need to update anything below 
 380             if ( chunkEnd 
< srcEnd 
) 
 390 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const 
 392     size_t rc 
= ToWChar(outBuff
, outLen
, inBuff
); 
 393     if ( rc 
!= wxCONV_FAILED 
) 
 395         // ToWChar() returns the buffer length, i.e. including the trailing 
 396         // NUL, while this method doesn't take it into account 
 403 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const 
 405     size_t rc 
= FromWChar(outBuff
, outLen
, inBuff
); 
 406     if ( rc 
!= wxCONV_FAILED 
) 
 414 wxMBConv::~wxMBConv() 
 416     // nothing to do here (necessary for Darwin linking probably) 
 419 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 423         // calculate the length of the buffer needed first 
 424         const size_t nLen 
= ToWChar(NULL
, 0, psz
); 
 425         if ( nLen 
!= wxCONV_FAILED 
) 
 427             // now do the actual conversion 
 428             wxWCharBuffer 
buf(nLen 
- 1 /* +1 added implicitly */); 
 430             // +1 for the trailing NULL 
 431             if ( ToWChar(buf
.data(), nLen
, psz
) != wxCONV_FAILED 
) 
 436     return wxWCharBuffer(); 
 439 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 443         const size_t nLen 
= FromWChar(NULL
, 0, pwz
); 
 444         if ( nLen 
!= wxCONV_FAILED 
) 
 446             wxCharBuffer 
buf(nLen 
- 1); 
 447             if ( FromWChar(buf
.data(), nLen
, pwz
) != wxCONV_FAILED 
) 
 452     return wxCharBuffer(); 
 456 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const 
 458     const size_t dstLen 
= ToWChar(NULL
, 0, inBuff
, inLen
); 
 459     if ( dstLen 
!= wxCONV_FAILED 
) 
 461         // notice that we allocate space for dstLen+1 wide characters here 
 462         // because we want the buffer to always be NUL-terminated, even if the 
 463         // input isn't (as otherwise the caller has no way to know its length) 
 464         wxWCharBuffer 
wbuf(dstLen
); 
 465         wbuf
.data()[dstLen
] = L
'\0'; 
 466         if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED 
) 
 472                 // we also need to handle NUL-terminated input strings 
 473                 // specially: for them the output is the length of the string 
 474                 // excluding the trailing NUL, however if we're asked to 
 475                 // convert a specific number of characters we return the length 
 476                 // of the resulting output even if it's NUL-terminated 
 477                 if ( inLen 
== wxNO_LEN 
) 
 488     return wxWCharBuffer(); 
 492 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const 
 494     size_t dstLen 
= FromWChar(NULL
, 0, inBuff
, inLen
); 
 495     if ( dstLen 
!= wxCONV_FAILED 
) 
 497         const size_t nulLen 
= GetMBNulLen(); 
 499         // as above, ensure that the buffer is always NUL-terminated, even if 
 501         wxCharBuffer 
buf(dstLen 
+ nulLen 
- 1); 
 502         memset(buf
.data() + dstLen
, 0, nulLen
); 
 503         if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED 
) 
 509                 if ( inLen 
== wxNO_LEN 
) 
 511                     // in this case both input and output are NUL-terminated 
 512                     // and we're not supposed to count NUL 
 524     return wxCharBuffer(); 
 527 const wxWCharBuffer 
wxMBConv::cMB2WC(const wxScopedCharBuffer
& buf
) const 
 529     const size_t srcLen 
= buf
.length(); 
 532         const size_t dstLen 
= ToWChar(NULL
, 0, buf
, srcLen
); 
 533         if ( dstLen 
!= wxCONV_FAILED 
) 
 535             wxWCharBuffer 
wbuf(dstLen
); 
 536             wbuf
.data()[dstLen
] = L
'\0'; 
 537             if ( ToWChar(wbuf
.data(), dstLen
, buf
, srcLen
) != wxCONV_FAILED 
) 
 542     return wxWCharBuffer(); 
 545 const wxCharBuffer 
wxMBConv::cWC2MB(const wxScopedWCharBuffer
& wbuf
) const 
 547     const size_t srcLen 
= wbuf
.length(); 
 550         const size_t dstLen 
= FromWChar(NULL
, 0, wbuf
, srcLen
); 
 551         if ( dstLen 
!= wxCONV_FAILED 
) 
 553             wxCharBuffer 
buf(dstLen
); 
 554             buf
.data()[dstLen
] = '\0'; 
 555             if ( FromWChar(buf
.data(), dstLen
, wbuf
, srcLen
) != wxCONV_FAILED 
) 
 560     return wxCharBuffer(); 
 563 // ---------------------------------------------------------------------------- 
 565 // ---------------------------------------------------------------------------- 
 567 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 569     return wxMB2WC(buf
, psz
, n
); 
 572 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 574     return wxWC2MB(buf
, psz
, n
); 
 577 // ---------------------------------------------------------------------------- 
 578 // wxConvBrokenFileNames 
 579 // ---------------------------------------------------------------------------- 
 583 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString
& charset
) 
 585     if ( wxStricmp(charset
, wxT("UTF-8")) == 0 || 
 586          wxStricmp(charset
, wxT("UTF8")) == 0  ) 
 587         m_conv 
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
); 
 589         m_conv 
= new wxCSConv(charset
); 
 594 // ---------------------------------------------------------------------------- 
 596 // ---------------------------------------------------------------------------- 
 598 // Implementation (C) 2004 Fredrik Roubert 
 600 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin 
 603 // BASE64 decoding table 
 605 static const unsigned char utf7unb64
[] = 
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 608     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 609     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 610     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 611     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 612     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 
 613     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 
 614     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 615     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
 616     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 
 617     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 
 618     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 
 619     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 
 620     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 
 621     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 
 622     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 635     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 636     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 637     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
 638     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 
 641 size_t wxMBConvUTF7::ToWChar(wchar_t *dst
, size_t dstLen
, 
 642                              const char *src
, size_t srcLen
) const 
 644     DecoderState stateOrig
, 
 646     if ( srcLen 
== wxNO_LEN 
) 
 648         // convert the entire string, up to and including the trailing NUL 
 649         srcLen 
= strlen(src
) + 1; 
 651         // when working on the entire strings we don't update nor use the shift 
 652         // state from the previous call 
 653         statePtr 
= &stateOrig
; 
 655     else // when working with partial strings we do use the shift state 
 657         statePtr 
= const_cast<DecoderState 
*>(&m_stateDecoder
); 
 659         // also save the old state to be able to rollback to it on error 
 660         stateOrig 
= m_stateDecoder
; 
 663     // but to simplify the code below we use this variable in both cases 
 664     DecoderState
& state 
= *statePtr
; 
 667     // number of characters [which would have been] written to dst [if it were 
 671     const char * const srcEnd 
= src 
+ srcLen
; 
 673     while ( (src 
< srcEnd
) && (!dst 
|| (len 
< dstLen
)) ) 
 675         const unsigned char cc 
= *src
++; 
 677         if ( state
.IsShifted() ) 
 679             const unsigned char dc 
= utf7unb64
[cc
]; 
 682                 // end of encoded part, check that nothing was left: there can 
 683                 // be up to 4 bits of 0 padding but nothing else (we also need 
 684                 // to check isLSB as we count bits modulo 8 while a valid UTF-7 
 685                 // encoded sequence must contain an integral number of UTF-16 
 687                 if ( state
.isLSB 
|| state
.bit 
> 4 || 
 688                         (state
.accum 
& ((1 << state
.bit
) - 1)) ) 
 693                     return wxCONV_FAILED
; 
 698                 // re-parse this character normally below unless it's '-' which 
 699                 // is consumed by the decoder 
 703             else // valid encoded character 
 705                 // mini base64 decoder: each character is 6 bits 
 710                 if ( state
.bit 
>= 8 ) 
 712                     // got the full byte, consume it 
 714                     unsigned char b 
= (state
.accum 
>> state
.bit
) & 0x00ff; 
 718                         // we've got the full word, output it 
 720                             *dst
++ = (state
.msb 
<< 8) | b
; 
 726                         // just store it while we wait for LSB 
 734         if ( state
.IsDirect() ) 
 736             // start of an encoded segment? 
 741                     // just the encoded plus sign, don't switch to shifted mode 
 747                 else if ( utf7unb64
[(unsigned)*src
] == 0xff ) 
 749                     // empty encoded chunks are not allowed 
 753                     return wxCONV_FAILED
; 
 755                 else // base-64 encoded chunk follows 
 762                 // only printable 7 bit ASCII characters (with the exception of 
 763                 // NUL, TAB, CR and LF) can be used directly 
 764                 if ( cc 
>= 0x7f || (cc 
< ' ' && 
 765                       !(cc 
== '\0' || cc 
== '\t' || cc 
== '\r' || cc 
== '\n')) ) 
 766                     return wxCONV_FAILED
; 
 777         // as we didn't read any characters we should be called with the same 
 778         // data (followed by some more new data) again later so don't save our 
 782         return wxCONV_FAILED
; 
 789 // BASE64 encoding table 
 791 static const unsigned char utf7enb64
[] = 
 793     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
 794     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
 795     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
 796     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
 797     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
 798     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
 799     'w', 'x', 'y', 'z', '0', '1', '2', '3', 
 800     '4', '5', '6', '7', '8', '9', '+', '/' 
 804 // UTF-7 encoding table 
 806 // 0 - Set D (directly encoded characters) 
 807 // 1 - Set O (optional direct characters) 
 808 // 2 - whitespace characters (optional) 
 809 // 3 - special characters 
 811 static const unsigned char utf7encode
[128] = 
 813     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 
 814     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
 815     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, 
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
 817     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 818     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 
 819     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 820     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 
 823 static inline bool wxIsUTF7Direct(wchar_t wc
) 
 825     return wc 
< 0x80 && utf7encode
[wc
] < 1; 
 828 size_t wxMBConvUTF7::FromWChar(char *dst
, size_t dstLen
, 
 829                                const wchar_t *src
, size_t srcLen
) const 
 831     EncoderState stateOrig
, 
 833     if ( srcLen 
== wxNO_LEN 
) 
 835         // we don't apply the stored state when operating on entire strings at 
 837         statePtr 
= &stateOrig
; 
 839         srcLen 
= wxWcslen(src
) + 1; 
 841     else // do use the mode we left the output in previously 
 843         stateOrig 
= m_stateEncoder
; 
 844         statePtr 
= const_cast<EncoderState 
*>(&m_stateEncoder
); 
 847     EncoderState
& state 
= *statePtr
; 
 852     const wchar_t * const srcEnd 
= src 
+ srcLen
; 
 853     while ( src 
< srcEnd 
&& (!dst 
|| len 
< dstLen
) ) 
 856         if ( wxIsUTF7Direct(cc
) ) 
 858             if ( state
.IsShifted() ) 
 860                 // pad with zeros the last encoded block if necessary 
 864                         *dst
++ = utf7enb64
[((state
.accum 
% 16) << (6 - state
.bit
)) % 64]; 
 879         else if ( cc 
== '+' && state
.IsDirect() ) 
 890         else if (((wxUint32
)cc
) > 0xffff) 
 892             // no surrogate pair generation (yet?) 
 893             return wxCONV_FAILED
; 
 898             if ( state
.IsDirect() ) 
 907             // BASE64 encode string 
 910                 for ( unsigned lsb 
= 0; lsb 
< 2; lsb
++ ) 
 913                     state
.accum 
+= lsb 
? cc 
& 0xff : (cc 
& 0xff00) >> 8; 
 915                     for (state
.bit 
+= 8; state
.bit 
>= 6; ) 
 919                             *dst
++ = utf7enb64
[(state
.accum 
>> state
.bit
) % 64]; 
 924                 if ( src 
== srcEnd 
|| wxIsUTF7Direct(cc 
= *src
) ) 
 932     // we need to restore the original encoder state if we were called just to 
 933     // calculate the amount of space needed as we will presumably be called 
 934     // again to really convert the data now 
 941 // ---------------------------------------------------------------------------- 
 943 // ---------------------------------------------------------------------------- 
 945 static const wxUint32 utf8_max
[]= 
 946     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 948 // boundaries of the private use area we use to (temporarily) remap invalid 
 949 // characters invalid in a UTF-8 encoded string 
 950 const wxUint32 wxUnicodePUA 
= 0x100000; 
 951 const wxUint32 wxUnicodePUAEnd 
= wxUnicodePUA 
+ 256; 
 953 // this table gives the length of the UTF-8 encoding from its first character: 
 954 const unsigned char tableUtf8Lengths
[256] = { 
 955     // single-byte sequences (ASCII): 
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F 
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F 
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F 
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F 
 960     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F 
 961     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F 
 962     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F 
 963     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F 
 965     // these are invalid: 
 966     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F 
 967     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F 
 968     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF 
 969     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF 
 972     // two-byte sequences: 
 973           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF 
 974     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF 
 976     // three-byte sequences: 
 977     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF 
 979     // four-byte sequences: 
 980     4, 4, 4, 4, 4,                                   // F0..F4 
 982     // these are invalid again (5- or 6-byte 
 983     // sequences and sequences for code points 
 984     // above U+10FFFF, as restricted by RFC 3629): 
 985                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF 
 989 wxMBConvStrictUTF8::ToWChar(wchar_t *dst
, size_t dstLen
, 
 990                             const char *src
, size_t srcLen
) const 
 992     wchar_t *out 
= dstLen 
? dst 
: NULL
; 
 995     if ( srcLen 
== wxNO_LEN 
) 
 996         srcLen 
= strlen(src
) + 1; 
 998     for ( const char *p 
= src
; ; p
++ ) 
1000         if ( !(srcLen 
== wxNO_LEN 
? *p 
: srcLen
) ) 
1002             // all done successfully, just add the trailing NULL if we are not 
1003             // using explicit length 
1004             if ( srcLen 
== wxNO_LEN 
) 
1020         if ( out 
&& !dstLen
-- ) 
1024         unsigned char c 
= *p
; 
1028             if ( srcLen 
== 0 ) // the test works for wxNO_LEN too 
1031             if ( srcLen 
!= wxNO_LEN 
) 
1038             unsigned len 
= tableUtf8Lengths
[c
]; 
1042             if ( srcLen 
< len 
) // the test works for wxNO_LEN too 
1045             if ( srcLen 
!= wxNO_LEN 
) 
1048             //   Char. number range   |        UTF-8 octet sequence 
1049             //      (hexadecimal)     |              (binary) 
1050             //  ----------------------+---------------------------------------- 
1051             //  0000 0000 - 0000 007F | 0xxxxxxx 
1052             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx 
1053             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
1054             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
1056             //  Code point value is stored in bits marked with 'x', 
1057             //  lowest-order bit of the value on the right side in the diagram 
1058             //  above.                                         (from RFC 3629) 
1060             // mask to extract lead byte's value ('x' bits above), by sequence 
1062             static const unsigned char leadValueMask
[] = { 0x7F, 0x1F, 0x0F, 0x07 }; 
1064             // mask and value of lead byte's most significant bits, by length: 
1065             static const unsigned char leadMarkerMask
[] = { 0x80, 0xE0, 0xF0, 0xF8 }; 
1066             static const unsigned char leadMarkerVal
[] = { 0x00, 0xC0, 0xE0, 0xF0 }; 
1068             len
--; // it's more convenient to work with 0-based length here 
1070             // extract the lead byte's value bits: 
1071             if ( (c 
& leadMarkerMask
[len
]) != leadMarkerVal
[len
] ) 
1074             code 
= c 
& leadValueMask
[len
]; 
1076             // all remaining bytes, if any, are handled in the same way 
1077             // regardless of sequence's length: 
1078             for ( ; len
; --len 
) 
1081                 if ( (c 
& 0xC0) != 0x80 ) 
1082                     return wxCONV_FAILED
; 
1090         // cast is ok because wchar_t == wxUint16 if WC_UTF16 
1091         if ( encode_utf16(code
, (wxUint16 
*)out
) == 2 ) 
1100 #endif // WC_UTF16/!WC_UTF16 
1108     return wxCONV_FAILED
; 
1112 wxMBConvStrictUTF8::FromWChar(char *dst
, size_t dstLen
, 
1113                               const wchar_t *src
, size_t srcLen
) const 
1115     char *out 
= dstLen 
? dst 
: NULL
; 
1118     for ( const wchar_t *wp 
= src
; ; wp
++ ) 
1120         if ( !(srcLen 
== wxNO_LEN 
? *wp 
: srcLen
) ) 
1122             // all done successfully, just add the trailing NULL if we are not 
1123             // using explicit length 
1124             if ( srcLen 
== wxNO_LEN 
) 
1140         if ( srcLen 
!= wxNO_LEN 
) 
1145         // cast is ok for WC_UTF16 
1146         if ( decode_utf16((const wxUint16 
*)wp
, code
) == 2 ) 
1148             // skip the next char too as we decoded a surrogate 
1151 #else // wchar_t is UTF-32 
1152         code 
= *wp 
& 0x7fffffff; 
1164                 out
[0] = (char)code
; 
1167         else if ( code 
<= 0x07FF ) 
1175                 // NB: this line takes 6 least significant bits, encodes them as 
1176                 // 10xxxxxx and discards them so that the next byte can be encoded: 
1177                 out
[1] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1178                 out
[0] = 0xC0 | code
; 
1181         else if ( code 
< 0xFFFF ) 
1189                 out
[2] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1190                 out
[1] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1191                 out
[0] = 0xE0 | code
; 
1194         else if ( code 
<= 0x10FFFF ) 
1202                 out
[3] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1203                 out
[2] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1204                 out
[1] = 0x80 | (code 
& 0x3F);  code 
>>= 6; 
1205                 out
[0] = 0xF0 | code
; 
1210             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") ); 
1223     // we only get here if an error occurs during decoding 
1224     return wxCONV_FAILED
; 
1227 size_t wxMBConvUTF8::ToWChar(wchar_t *buf
, size_t n
, 
1228                              const char *psz
, size_t srcLen
) const 
1230     if ( m_options 
== MAP_INVALID_UTF8_NOT 
) 
1231         return wxMBConvStrictUTF8::ToWChar(buf
, n
, psz
, srcLen
); 
1235     while ((srcLen 
== wxNO_LEN 
? *psz 
: srcLen
--) && ((!buf
) || (len 
< n
))) 
1237         const char *opsz 
= psz
; 
1238         bool invalid 
= false; 
1239         unsigned char cc 
= *psz
++, fc 
= cc
; 
1241         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
1251             // escape the escape character for octal escapes 
1252             if ((m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
1253                     && cc 
== '\\' && (!buf 
|| len 
< n
)) 
1265                 // invalid UTF-8 sequence 
1270                 unsigned ocnt 
= cnt 
- 1; 
1271                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
1275                     if ((cc 
& 0xC0) != 0x80) 
1277                         // invalid UTF-8 sequence 
1283                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
1286                 if (invalid 
|| res 
<= utf8_max
[ocnt
]) 
1288                     // illegal UTF-8 encoding 
1291                 else if ((m_options 
& MAP_INVALID_UTF8_TO_PUA
) && 
1292                         res 
>= wxUnicodePUA 
&& res 
< wxUnicodePUAEnd
) 
1294                     // if one of our PUA characters turns up externally 
1295                     // it must also be treated as an illegal sequence 
1296                     // (a bit like you have to escape an escape character) 
1302                     // cast is ok because wchar_t == wxUint16 if WC_UTF16 
1303                     size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
1304                     if (pa 
== wxCONV_FAILED
) 
1316                         *buf
++ = (wchar_t)res
; 
1318 #endif // WC_UTF16/!WC_UTF16 
1324                 if (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
1326                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
1329                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
1330                         size_t pa 
= encode_utf16((unsigned char)*opsz 
+ wxUnicodePUA
, (wxUint16 
*)buf
); 
1331                         wxASSERT(pa 
!= wxCONV_FAILED
); 
1338                             *buf
++ = (wchar_t)(wxUnicodePUA 
+ (unsigned char)*opsz
); 
1344                 else if (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
1346                     while (opsz 
< psz 
&& (!buf 
|| len 
< n
)) 
1348                         if ( buf 
&& len 
+ 3 < n 
) 
1350                             unsigned char on 
= *opsz
; 
1352                             *buf
++ = (wchar_t)( L
'0' + on 
/ 0100 ); 
1353                             *buf
++ = (wchar_t)( L
'0' + (on 
% 0100) / 010 ); 
1354                             *buf
++ = (wchar_t)( L
'0' + on 
% 010 ); 
1361                 else // MAP_INVALID_UTF8_NOT 
1363                     return wxCONV_FAILED
; 
1369     if (srcLen 
== wxNO_LEN 
&& buf 
&& (len 
< n
)) 
1375 static inline bool isoctal(wchar_t wch
) 
1377     return L
'0' <= wch 
&& wch 
<= L
'7'; 
1380 size_t wxMBConvUTF8::FromWChar(char *buf
, size_t n
, 
1381                                const wchar_t *psz
, size_t srcLen
) const 
1383     if ( m_options 
== MAP_INVALID_UTF8_NOT 
) 
1384         return wxMBConvStrictUTF8::FromWChar(buf
, n
, psz
, srcLen
); 
1388     while ((srcLen 
== wxNO_LEN 
? *psz 
: srcLen
--) && ((!buf
) || (len 
< n
))) 
1393         // cast is ok for WC_UTF16 
1394         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
1395         psz 
+= (pa 
== wxCONV_FAILED
) ? 1 : pa
; 
1397         cc 
= (*psz
++) & 0x7fffffff; 
1400         if ( (m_options 
& MAP_INVALID_UTF8_TO_PUA
) 
1401                 && cc 
>= wxUnicodePUA 
&& cc 
< wxUnicodePUAEnd 
) 
1404                 *buf
++ = (char)(cc 
- wxUnicodePUA
); 
1407         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) 
1408                     && cc 
== L
'\\' && psz
[0] == L
'\\' ) 
1415         else if ( (m_options 
& MAP_INVALID_UTF8_TO_OCTAL
) && 
1417                         isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) ) 
1421                 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 + 
1422                                  (psz
[1] - L
'0') * 010 + 
1432             for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) 
1448                     *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
1450                         *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
1456     if (srcLen 
== wxNO_LEN 
&& buf 
&& (len 
< n
)) 
1462 // ============================================================================ 
1464 // ============================================================================ 
1466 #ifdef WORDS_BIGENDIAN 
1467     #define wxMBConvUTF16straight wxMBConvUTF16BE 
1468     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
1470     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
1471     #define wxMBConvUTF16straight wxMBConvUTF16LE 
1475 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
) 
1477     if ( srcLen 
== wxNO_LEN 
) 
1479         // count the number of bytes in input, including the trailing NULs 
1480         const wxUint16 
*inBuff 
= reinterpret_cast<const wxUint16 
*>(src
); 
1481         for ( srcLen 
= 1; *inBuff
++; srcLen
++ ) 
1484         srcLen 
*= BYTES_PER_CHAR
; 
1486     else // we already have the length 
1488         // we can only convert an entire number of UTF-16 characters 
1489         if ( srcLen 
% BYTES_PER_CHAR 
) 
1490             return wxCONV_FAILED
; 
1496 // case when in-memory representation is UTF-16 too 
1499 // ---------------------------------------------------------------------------- 
1500 // conversions without endianness change 
1501 // ---------------------------------------------------------------------------- 
1504 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1505                                const char *src
, size_t srcLen
) const 
1507     // set up the scene for using memcpy() (which is presumably more efficient 
1508     // than copying the bytes one by one) 
1509     srcLen 
= GetLength(src
, srcLen
); 
1510     if ( srcLen 
== wxNO_LEN 
) 
1511         return wxCONV_FAILED
; 
1513     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1516         if ( dstLen 
< inLen 
) 
1517             return wxCONV_FAILED
; 
1519         memcpy(dst
, src
, srcLen
); 
1526 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
, 
1527                                  const wchar_t *src
, size_t srcLen
) const 
1529     if ( srcLen 
== wxNO_LEN 
) 
1530         srcLen 
= wxWcslen(src
) + 1; 
1532     srcLen 
*= BYTES_PER_CHAR
; 
1536         if ( dstLen 
< srcLen 
) 
1537             return wxCONV_FAILED
; 
1539         memcpy(dst
, src
, srcLen
); 
1545 // ---------------------------------------------------------------------------- 
1546 // endian-reversing conversions 
1547 // ---------------------------------------------------------------------------- 
1550 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1551                            const char *src
, size_t srcLen
) const 
1553     srcLen 
= GetLength(src
, srcLen
); 
1554     if ( srcLen 
== wxNO_LEN 
) 
1555         return wxCONV_FAILED
; 
1557     srcLen 
/= BYTES_PER_CHAR
; 
1561         if ( dstLen 
< srcLen 
) 
1562             return wxCONV_FAILED
; 
1564         const wxUint16 
*inBuff 
= reinterpret_cast<const wxUint16 
*>(src
); 
1565         for ( size_t n 
= 0; n 
< srcLen
; n
++, inBuff
++ ) 
1567             *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1575 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
, 
1576                              const wchar_t *src
, size_t srcLen
) const 
1578     if ( srcLen 
== wxNO_LEN 
) 
1579         srcLen 
= wxWcslen(src
) + 1; 
1581     srcLen 
*= BYTES_PER_CHAR
; 
1585         if ( dstLen 
< srcLen 
) 
1586             return wxCONV_FAILED
; 
1588         wxUint16 
*outBuff 
= reinterpret_cast<wxUint16 
*>(dst
); 
1589         for ( size_t n 
= 0; n 
< srcLen
; n 
+= BYTES_PER_CHAR
, src
++ ) 
1591             *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
); 
1598 #else // !WC_UTF16: wchar_t is UTF-32 
1600 // ---------------------------------------------------------------------------- 
1601 // conversions without endianness change 
1602 // ---------------------------------------------------------------------------- 
1605 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1606                                const char *src
, size_t srcLen
) const 
1608     srcLen 
= GetLength(src
, srcLen
); 
1609     if ( srcLen 
== wxNO_LEN 
) 
1610         return wxCONV_FAILED
; 
1612     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1615         // optimization: return maximal space which could be needed for this 
1616         // string even if the real size could be smaller if the buffer contains 
1622     const wxUint16 
*inBuff 
= reinterpret_cast<const wxUint16 
*>(src
); 
1623     for ( const wxUint16 
* const inEnd 
= inBuff 
+ inLen
; inBuff 
< inEnd
; ) 
1625         const wxUint32 ch 
= wxDecodeSurrogate(&inBuff
); 
1627             return wxCONV_FAILED
; 
1629         if ( ++outLen 
> dstLen 
) 
1630             return wxCONV_FAILED
; 
1640 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
, 
1641                                  const wchar_t *src
, size_t srcLen
) const 
1643     if ( srcLen 
== wxNO_LEN 
) 
1644         srcLen 
= wxWcslen(src
) + 1; 
1647     wxUint16 
*outBuff 
= reinterpret_cast<wxUint16 
*>(dst
); 
1648     for ( size_t n 
= 0; n 
< srcLen
; n
++ ) 
1651         const size_t numChars 
= encode_utf16(*src
++, cc
); 
1652         if ( numChars 
== wxCONV_FAILED 
) 
1653             return wxCONV_FAILED
; 
1655         outLen 
+= numChars 
* BYTES_PER_CHAR
; 
1658             if ( outLen 
> dstLen 
) 
1659                 return wxCONV_FAILED
; 
1662             if ( numChars 
== 2 ) 
1664                 // second character of a surrogate 
1673 // ---------------------------------------------------------------------------- 
1674 // endian-reversing conversions 
1675 // ---------------------------------------------------------------------------- 
1678 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1679                            const char *src
, size_t srcLen
) const 
1681     srcLen 
= GetLength(src
, srcLen
); 
1682     if ( srcLen 
== wxNO_LEN 
) 
1683         return wxCONV_FAILED
; 
1685     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1688         // optimization: return maximal space which could be needed for this 
1689         // string even if the real size could be smaller if the buffer contains 
1695     const wxUint16 
*inBuff 
= reinterpret_cast<const wxUint16 
*>(src
); 
1696     for ( const wxUint16 
* const inEnd 
= inBuff 
+ inLen
; inBuff 
< inEnd
; ) 
1701         tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1703         tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
); 
1705         const size_t numChars 
= decode_utf16(tmp
, ch
); 
1706         if ( numChars 
== wxCONV_FAILED 
) 
1707             return wxCONV_FAILED
; 
1709         if ( numChars 
== 2 ) 
1712         if ( ++outLen 
> dstLen 
) 
1713             return wxCONV_FAILED
; 
1723 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
, 
1724                              const wchar_t *src
, size_t srcLen
) const 
1726     if ( srcLen 
== wxNO_LEN 
) 
1727         srcLen 
= wxWcslen(src
) + 1; 
1730     wxUint16 
*outBuff 
= reinterpret_cast<wxUint16 
*>(dst
); 
1731     for ( const wchar_t *srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; src
++ ) 
1734         const size_t numChars 
= encode_utf16(*src
, cc
); 
1735         if ( numChars 
== wxCONV_FAILED 
) 
1736             return wxCONV_FAILED
; 
1738         outLen 
+= numChars 
* BYTES_PER_CHAR
; 
1741             if ( outLen 
> dstLen 
) 
1742                 return wxCONV_FAILED
; 
1744             *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]); 
1745             if ( numChars 
== 2 ) 
1747                 // second character of a surrogate 
1748                 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]); 
1756 #endif // WC_UTF16/!WC_UTF16 
1759 // ============================================================================ 
1761 // ============================================================================ 
1763 #ifdef WORDS_BIGENDIAN 
1764     #define wxMBConvUTF32straight  wxMBConvUTF32BE 
1765     #define wxMBConvUTF32swap      wxMBConvUTF32LE 
1767     #define wxMBConvUTF32swap      wxMBConvUTF32BE 
1768     #define wxMBConvUTF32straight  wxMBConvUTF32LE 
1772 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
1773 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
1776 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
) 
1778     if ( srcLen 
== wxNO_LEN 
) 
1780         // count the number of bytes in input, including the trailing NULs 
1781         const wxUint32 
*inBuff 
= reinterpret_cast<const wxUint32 
*>(src
); 
1782         for ( srcLen 
= 1; *inBuff
++; srcLen
++ ) 
1785         srcLen 
*= BYTES_PER_CHAR
; 
1787     else // we already have the length 
1789         // we can only convert an entire number of UTF-32 characters 
1790         if ( srcLen 
% BYTES_PER_CHAR 
) 
1791             return wxCONV_FAILED
; 
1797 // case when in-memory representation is UTF-16 
1800 // ---------------------------------------------------------------------------- 
1801 // conversions without endianness change 
1802 // ---------------------------------------------------------------------------- 
1805 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1806                                const char *src
, size_t srcLen
) const 
1808     srcLen 
= GetLength(src
, srcLen
); 
1809     if ( srcLen 
== wxNO_LEN 
) 
1810         return wxCONV_FAILED
; 
1812     const wxUint32 
*inBuff 
= reinterpret_cast<const wxUint32 
*>(src
); 
1813     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1815     for ( size_t n 
= 0; n 
< inLen
; n
++ ) 
1818         const size_t numChars 
= encode_utf16(*inBuff
++, cc
); 
1819         if ( numChars 
== wxCONV_FAILED 
) 
1820             return wxCONV_FAILED
; 
1825             if ( outLen 
> dstLen 
) 
1826                 return wxCONV_FAILED
; 
1829             if ( numChars 
== 2 ) 
1831                 // second character of a surrogate 
1841 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
, 
1842                                  const wchar_t *src
, size_t srcLen
) const 
1844     if ( srcLen 
== wxNO_LEN 
) 
1845         srcLen 
= wxWcslen(src
) + 1; 
1849         // optimization: return maximal space which could be needed for this 
1850         // string instead of the exact amount which could be less if there are 
1851         // any surrogates in the input 
1853         // we consider that surrogates are rare enough to make it worthwhile to 
1854         // avoid running the loop below at the cost of slightly extra memory 
1856         return srcLen 
* BYTES_PER_CHAR
; 
1859     wxUint32 
*outBuff 
= reinterpret_cast<wxUint32 
*>(dst
); 
1861     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; ) 
1863         const wxUint32 ch 
= wxDecodeSurrogate(&src
); 
1865             return wxCONV_FAILED
; 
1867         outLen 
+= BYTES_PER_CHAR
; 
1869         if ( outLen 
> dstLen 
) 
1870             return wxCONV_FAILED
; 
1878 // ---------------------------------------------------------------------------- 
1879 // endian-reversing conversions 
1880 // ---------------------------------------------------------------------------- 
1883 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
1884                            const char *src
, size_t srcLen
) const 
1886     srcLen 
= GetLength(src
, srcLen
); 
1887     if ( srcLen 
== wxNO_LEN 
) 
1888         return wxCONV_FAILED
; 
1890     const wxUint32 
*inBuff 
= reinterpret_cast<const wxUint32 
*>(src
); 
1891     const size_t inLen 
= srcLen 
/ BYTES_PER_CHAR
; 
1893     for ( size_t n 
= 0; n 
< inLen
; n
++, inBuff
++ ) 
1896         const size_t numChars 
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
); 
1897         if ( numChars 
== wxCONV_FAILED 
) 
1898             return wxCONV_FAILED
; 
1903             if ( outLen 
> dstLen 
) 
1904                 return wxCONV_FAILED
; 
1907             if ( numChars 
== 2 ) 
1909                 // second character of a surrogate 
1919 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
, 
1920                              const wchar_t *src
, size_t srcLen
) const 
1922     if ( srcLen 
== wxNO_LEN 
) 
1923         srcLen 
= wxWcslen(src
) + 1; 
1927         // optimization: return maximal space which could be needed for this 
1928         // string instead of the exact amount which could be less if there are 
1929         // any surrogates in the input 
1931         // we consider that surrogates are rare enough to make it worthwhile to 
1932         // avoid running the loop below at the cost of slightly extra memory 
1934         return srcLen
*BYTES_PER_CHAR
; 
1937     wxUint32 
*outBuff 
= reinterpret_cast<wxUint32 
*>(dst
); 
1939     for ( const wchar_t * const srcEnd 
= src 
+ srcLen
; src 
< srcEnd
; ) 
1941         const wxUint32 ch 
= wxDecodeSurrogate(&src
); 
1943             return wxCONV_FAILED
; 
1945         outLen 
+= BYTES_PER_CHAR
; 
1947         if ( outLen 
> dstLen 
) 
1948             return wxCONV_FAILED
; 
1950         *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
); 
1956 #else // !WC_UTF16: wchar_t is UTF-32 
1958 // ---------------------------------------------------------------------------- 
1959 // conversions without endianness change 
1960 // ---------------------------------------------------------------------------- 
1963 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
, 
1964                                const char *src
, size_t srcLen
) const 
1966     // use memcpy() as it should be much faster than hand-written loop 
1967     srcLen 
= GetLength(src
, srcLen
); 
1968     if ( srcLen 
== wxNO_LEN 
) 
1969         return wxCONV_FAILED
; 
1971     const size_t inLen 
= srcLen
/BYTES_PER_CHAR
; 
1974         if ( dstLen 
< inLen 
) 
1975             return wxCONV_FAILED
; 
1977         memcpy(dst
, src
, srcLen
); 
1984 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
, 
1985                                  const wchar_t *src
, size_t srcLen
) const 
1987     if ( srcLen 
== wxNO_LEN 
) 
1988         srcLen 
= wxWcslen(src
) + 1; 
1990     srcLen 
*= BYTES_PER_CHAR
; 
1994         if ( dstLen 
< srcLen 
) 
1995             return wxCONV_FAILED
; 
1997         memcpy(dst
, src
, srcLen
); 
2003 // ---------------------------------------------------------------------------- 
2004 // endian-reversing conversions 
2005 // ---------------------------------------------------------------------------- 
2008 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
, 
2009                            const char *src
, size_t srcLen
) const 
2011     srcLen 
= GetLength(src
, srcLen
); 
2012     if ( srcLen 
== wxNO_LEN 
) 
2013         return wxCONV_FAILED
; 
2015     srcLen 
/= BYTES_PER_CHAR
; 
2019         if ( dstLen 
< srcLen 
) 
2020             return wxCONV_FAILED
; 
2022         const wxUint32 
*inBuff 
= reinterpret_cast<const wxUint32 
*>(src
); 
2023         for ( size_t n 
= 0; n 
< srcLen
; n
++, inBuff
++ ) 
2025             *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
); 
2033 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
, 
2034                              const wchar_t *src
, size_t srcLen
) const 
2036     if ( srcLen 
== wxNO_LEN 
) 
2037         srcLen 
= wxWcslen(src
) + 1; 
2039     srcLen 
*= BYTES_PER_CHAR
; 
2043         if ( dstLen 
< srcLen 
) 
2044             return wxCONV_FAILED
; 
2046         wxUint32 
*outBuff 
= reinterpret_cast<wxUint32 
*>(dst
); 
2047         for ( size_t n 
= 0; n 
< srcLen
; n 
+= BYTES_PER_CHAR
, src
++ ) 
2049             *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
); 
2056 #endif // WC_UTF16/!WC_UTF16 
2059 // ============================================================================ 
2060 // The classes doing conversion using the iconv_xxx() functions 
2061 // ============================================================================ 
2065 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with 
2066 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is 
2067 //     (unless there's yet another bug in glibc) the only case when iconv() 
2068 //     returns with (size_t)-1 (which means error) and says there are 0 bytes 
2069 //     left in the input buffer -- when _real_ error occurs, 
2070 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for 
2072 //     [This bug does not appear in glibc 2.2.] 
2073 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
2074 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
2075                                      (errno != E2BIG || bufLeft != 0)) 
2077 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
2080 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
2082 #define ICONV_T_INVALID ((iconv_t)-1) 
2084 #if SIZEOF_WCHAR_T == 4 
2085     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS 
2086     #define WC_ENC      wxFONTENCODING_UTF32 
2087 #elif SIZEOF_WCHAR_T == 2 
2088     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS 
2089     #define WC_ENC      wxFONTENCODING_UTF16 
2090 #else // sizeof(wchar_t) != 2 nor 4 
2091     // does this ever happen? 
2092     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
2095 // ---------------------------------------------------------------------------- 
2096 // wxMBConv_iconv: encapsulates an iconv character set 
2097 // ---------------------------------------------------------------------------- 
2099 class wxMBConv_iconv 
: public wxMBConv
 
2102     wxMBConv_iconv(const char *name
); 
2103     virtual ~wxMBConv_iconv(); 
2105     // implement base class virtual methods 
2106     virtual size_t ToWChar(wchar_t *dst
, size_t dstLen
, 
2107                            const char *src
, size_t srcLen 
= wxNO_LEN
) const; 
2108     virtual size_t FromWChar(char *dst
, size_t dstLen
, 
2109                              const wchar_t *src
, size_t srcLen 
= wxNO_LEN
) const; 
2110     virtual size_t GetMBNulLen() const; 
2112 #if wxUSE_UNICODE_UTF8 
2113     virtual bool IsUTF8() const; 
2116     virtual wxMBConv 
*Clone() const 
2118         wxMBConv_iconv 
*p 
= new wxMBConv_iconv(m_name
.ToAscii()); 
2119         p
->m_minMBCharWidth 
= m_minMBCharWidth
; 
2124         { return (m2w 
!= ICONV_T_INVALID
) && (w2m 
!= ICONV_T_INVALID
); } 
2127     // the iconv handlers used to translate from multibyte 
2128     // to wide char and in the other direction 
2133     // guards access to m2w and w2m objects 
2134     wxMutex m_iconvMutex
; 
2138     // the name (for iconv_open()) of a wide char charset -- if none is 
2139     // available on this machine, it will remain NULL 
2140     static wxString ms_wcCharsetName
; 
2142     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
2143     // different endian-ness than the native one 
2144     static bool ms_wcNeedsSwap
; 
2147     // name of the encoding handled by this conversion 
2150     // cached result of GetMBNulLen(); set to 0 meaning "unknown" 
2152     size_t m_minMBCharWidth
; 
2155 // make the constructor available for unit testing 
2156 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const char* name 
) 
2158     wxMBConv_iconv
* result 
= new wxMBConv_iconv( name 
); 
2159     if ( !result
->IsOk() ) 
2168 wxString 
wxMBConv_iconv::ms_wcCharsetName
; 
2169 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
2171 wxMBConv_iconv::wxMBConv_iconv(const char *name
) 
2174     m_minMBCharWidth 
= 0; 
2176     // check for charset that represents wchar_t: 
2177     if ( ms_wcCharsetName
.empty() ) 
2179         wxLogTrace(TRACE_STRCONV
, wxT("Looking for wide char codeset:")); 
2182         const wxChar 
*const *names 
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
); 
2183 #else // !wxUSE_FONTMAP 
2184         static const wxChar 
*const names_static
[] = 
2186 #if SIZEOF_WCHAR_T == 4 
2188 #elif SIZEOF_WCHAR_T = 2 
2193         const wxChar 
*const *names 
= names_static
; 
2194 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
2196         for ( ; *names 
&& ms_wcCharsetName
.empty(); ++names 
) 
2198             const wxString 
nameCS(*names
); 
2200             // first try charset with explicit bytesex info (e.g. "UCS-4LE"): 
2201             wxString 
nameXE(nameCS
); 
2203 #ifdef WORDS_BIGENDIAN 
2204                 nameXE 
+= wxT("BE"); 
2205 #else // little endian 
2206                 nameXE 
+= wxT("LE"); 
2209             wxLogTrace(TRACE_STRCONV
, wxT("  trying charset \"%s\""), 
2212             m2w 
= iconv_open(nameXE
.ToAscii(), name
); 
2213             if ( m2w 
== ICONV_T_INVALID 
) 
2215                 // try charset w/o bytesex info (e.g. "UCS4") 
2216                 wxLogTrace(TRACE_STRCONV
, wxT("  trying charset \"%s\""), 
2218                 m2w 
= iconv_open(nameCS
.ToAscii(), name
); 
2220                 // and check for bytesex ourselves: 
2221                 if ( m2w 
!= ICONV_T_INVALID 
) 
2223                     char    buf
[2], *bufPtr
; 
2232                     outsz 
= SIZEOF_WCHAR_T 
* 2; 
2233                     char* wbufPtr 
= (char*)wbuf
; 
2237                         m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
2240                     if (ICONV_FAILED(res
, insz
)) 
2242                         wxLogLastError(wxT("iconv")); 
2243                         wxLogError(_("Conversion to charset '%s' doesn't work."), 
2246                     else // ok, can convert to this encoding, remember it 
2248                         ms_wcCharsetName 
= nameCS
; 
2249                         ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
2253             else // use charset not requiring byte swapping 
2255                 ms_wcCharsetName 
= nameXE
; 
2259         wxLogTrace(TRACE_STRCONV
, 
2260                    wxT("iconv wchar_t charset is \"%s\"%s"), 
2261                    ms_wcCharsetName
.empty() ? wxString("<none>") 
2263                    ms_wcNeedsSwap 
? wxT(" (needs swap)") 
2266     else // we already have ms_wcCharsetName 
2268         m2w 
= iconv_open(ms_wcCharsetName
.ToAscii(), name
); 
2271     if ( ms_wcCharsetName
.empty() ) 
2273         w2m 
= ICONV_T_INVALID
; 
2277         w2m 
= iconv_open(name
, ms_wcCharsetName
.ToAscii()); 
2278         if ( w2m 
== ICONV_T_INVALID 
) 
2280             wxLogTrace(TRACE_STRCONV
, 
2281                        wxT("\"%s\" -> \"%s\" works but not the converse!?"), 
2282                        ms_wcCharsetName
.c_str(), name
); 
2287 wxMBConv_iconv::~wxMBConv_iconv() 
2289     if ( m2w 
!= ICONV_T_INVALID 
) 
2291     if ( w2m 
!= ICONV_T_INVALID 
) 
2296 wxMBConv_iconv::ToWChar(wchar_t *dst
, size_t dstLen
, 
2297                         const char *src
, size_t srcLen
) const 
2299     if ( srcLen 
== wxNO_LEN 
) 
2301         // find the string length: notice that must be done differently for 
2302         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 
2304         const size_t nulLen 
= GetMBNulLen(); 
2308                 return wxCONV_FAILED
; 
2311                 srcLen 
= strlen(src
); // arguably more optimized than our version 
2316                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs 
2317                 // but they also have to start at character boundary and not 
2318                 // span two adjacent characters 
2320                 for ( p 
= src
; NotAllNULs(p
, nulLen
); p 
+= nulLen 
) 
2326         // when we're determining the length of the string ourselves we count 
2327         // the terminating NUL(s) as part of it and always NUL-terminate the 
2332     // we express length in the number of (wide) characters but iconv always 
2333     // counts buffer sizes it in bytes 
2334     dstLen 
*= SIZEOF_WCHAR_T
; 
2337     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle. 
2338     //     Unfortunately there are a couple of global wxCSConv objects such as 
2339     //     wxConvLocal that are used all over wx code, so we have to make sure 
2340     //     the handle is used by at most one thread at the time. Otherwise 
2341     //     only a few wx classes would be safe to use from non-main threads 
2342     //     as MB<->WC conversion would fail "randomly". 
2343     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
2344 #endif // wxUSE_THREADS 
2347     const char *pszPtr 
= src
; 
2351         char* bufPtr 
= (char*)dst
; 
2353         // have destination buffer, convert there 
2354         size_t dstLenOrig 
= dstLen
; 
2356                      ICONV_CHAR_CAST(&pszPtr
), &srcLen
, 
2359         // convert the number of bytes converted as returned by iconv to the 
2360         // number of (wide) characters converted that we need 
2361         res 
= (dstLenOrig 
- dstLen
) / SIZEOF_WCHAR_T
; 
2365             // convert to native endianness 
2366             for ( unsigned i 
= 0; i 
< res
; i
++ ) 
2367                 dst
[i
] = WC_BSWAP(dst
[i
]); 
2370     else // no destination buffer 
2372         // convert using temp buffer to calculate the size of the buffer needed 
2378             char* bufPtr 
= (char*)tbuf
; 
2379             dstLen 
= 8 * SIZEOF_WCHAR_T
; 
2382                          ICONV_CHAR_CAST(&pszPtr
), &srcLen
, 
2385             res 
+= 8 - (dstLen 
/ SIZEOF_WCHAR_T
); 
2387         while ((cres 
== (size_t)-1) && (errno 
== E2BIG
)); 
2390     if (ICONV_FAILED(cres
, srcLen
)) 
2392         //VS: it is ok if iconv fails, hence trace only 
2393         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
2394         return wxCONV_FAILED
; 
2400 size_t wxMBConv_iconv::FromWChar(char *dst
, size_t dstLen
, 
2401                                  const wchar_t *src
, size_t srcLen
) const 
2404     // NB: explained in MB2WC 
2405     wxMutexLocker 
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
); 
2408     if ( srcLen 
== wxNO_LEN 
) 
2409         srcLen 
= wxWcslen(src
) + 1; 
2411     size_t inbuflen 
= srcLen 
* SIZEOF_WCHAR_T
; 
2412     size_t outbuflen 
= dstLen
; 
2415     wchar_t *tmpbuf 
= 0; 
2419         // need to copy to temp buffer to switch endianness 
2420         // (doing WC_BSWAP twice on the original buffer won't work, as it 
2421         //  could be in read-only memory, or be accessed in some other thread) 
2422         tmpbuf 
= (wchar_t *)malloc(inbuflen
); 
2423         for ( size_t i 
= 0; i 
< srcLen
; i
++ ) 
2424             tmpbuf
[i
] = WC_BSWAP(src
[i
]); 
2429     char* inbuf 
= (char*)src
; 
2432         // have destination buffer, convert there 
2433         cres 
= iconv(w2m
, ICONV_CHAR_CAST(&inbuf
), &inbuflen
, &dst
, &outbuflen
); 
2435         res 
= dstLen 
- outbuflen
; 
2437     else // no destination buffer 
2439         // convert using temp buffer to calculate the size of the buffer needed 
2445             outbuflen 
= WXSIZEOF(tbuf
); 
2447             cres 
= iconv(w2m
, ICONV_CHAR_CAST(&inbuf
), &inbuflen
, &dst
, &outbuflen
); 
2449             res 
+= WXSIZEOF(tbuf
) - outbuflen
; 
2451         while ((cres 
== (size_t)-1) && (errno 
== E2BIG
)); 
2459     if (ICONV_FAILED(cres
, inbuflen
)) 
2461         wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
2462         return wxCONV_FAILED
; 
2468 size_t wxMBConv_iconv::GetMBNulLen() const 
2470     if ( m_minMBCharWidth 
== 0 ) 
2472         wxMBConv_iconv 
* const self 
= wxConstCast(this, wxMBConv_iconv
); 
2475         // NB: explained in MB2WC 
2476         wxMutexLocker 
lock(self
->m_iconvMutex
); 
2479         const wchar_t *wnul 
= L
""; 
2480         char buf
[8]; // should be enough for NUL in any encoding 
2481         size_t inLen 
= sizeof(wchar_t), 
2482                outLen 
= WXSIZEOF(buf
); 
2483         char *inBuff 
= (char *)wnul
; 
2484         char *outBuff 
= buf
; 
2485         if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 ) 
2487             self
->m_minMBCharWidth 
= (size_t)-1; 
2491             self
->m_minMBCharWidth 
= outBuff 
- buf
; 
2495     return m_minMBCharWidth
; 
2498 #if wxUSE_UNICODE_UTF8 
2499 bool wxMBConv_iconv::IsUTF8() const 
2501     return wxStricmp(m_name
, "UTF-8") == 0 || 
2502            wxStricmp(m_name
, "UTF8") == 0; 
2506 #endif // HAVE_ICONV 
2509 // ============================================================================ 
2510 // Win32 conversion classes 
2511 // ============================================================================ 
2513 #ifdef wxHAVE_WIN32_MB2WC 
2517 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const char *charset
); 
2518 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
2521 class wxMBConv_win32 
: public wxMBConv
 
2526         m_CodePage 
= CP_ACP
; 
2527         m_minMBCharWidth 
= 0; 
2530     wxMBConv_win32(const wxMBConv_win32
& conv
) 
2533         m_CodePage 
= conv
.m_CodePage
; 
2534         m_minMBCharWidth 
= conv
.m_minMBCharWidth
; 
2538     wxMBConv_win32(const char* name
) 
2540         m_CodePage 
= wxCharsetToCodepage(name
); 
2541         m_minMBCharWidth 
= 0; 
2544     wxMBConv_win32(wxFontEncoding encoding
) 
2546         m_CodePage 
= wxEncodingToCodepage(encoding
); 
2547         m_minMBCharWidth 
= 0; 
2549 #endif // wxUSE_FONTMAP 
2551     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
2553         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it 
2554         // the behaviour is not compatible with the Unix version (using iconv) 
2555         // and break the library itself, e.g. wxTextInputStream::NextChar() 
2556         // wouldn't work if reading an incomplete MB char didn't result in an 
2559         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or 
2560         // Win XP or newer and it is not supported for UTF-[78] so we always 
2561         // use our own conversions in this case. See 
2562         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx 
2563         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp 
2564         if ( m_CodePage 
== CP_UTF8 
) 
2566             return wxMBConvUTF8().MB2WC(buf
, psz
, n
); 
2569         if ( m_CodePage 
== CP_UTF7 
) 
2571             return wxMBConvUTF7().MB2WC(buf
, psz
, n
); 
2575         if ( (m_CodePage 
< 50000 && m_CodePage 
!= CP_SYMBOL
) && 
2576                 IsAtLeastWin2kSP4() ) 
2578             flags 
= MB_ERR_INVALID_CHARS
; 
2581         const size_t len 
= ::MultiByteToWideChar
 
2583                                 m_CodePage
,     // code page 
2584                                 flags
,          // flags: fall on error 
2585                                 psz
,            // input string 
2586                                 -1,             // its length (NUL-terminated) 
2587                                 buf
,            // output string 
2588                                 buf 
? n 
: 0     // size of output buffer 
2592             // function totally failed 
2593             return wxCONV_FAILED
; 
2596         // if we were really converting and didn't use MB_ERR_INVALID_CHARS, 
2597         // check if we succeeded, by doing a double trip: 
2598         if ( !flags 
&& buf 
) 
2600             const size_t mbLen 
= strlen(psz
); 
2601             wxCharBuffer 
mbBuf(mbLen
); 
2602             if ( ::WideCharToMultiByte
 
2609                       mbLen 
+ 1,        // size in bytes, not length 
2613                   strcmp(mbBuf
, psz
) != 0 ) 
2615                 // we didn't obtain the same thing we started from, hence 
2616                 // the conversion was lossy and we consider that it failed 
2617                 return wxCONV_FAILED
; 
2621         // note that it returns count of written chars for buf != NULL and size 
2622         // of the needed buffer for buf == NULL so in either case the length of 
2623         // the string (which never includes the terminating NUL) is one less 
2627     virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const 
2630             we have a problem here: by default, WideCharToMultiByte() may 
2631             replace characters unrepresentable in the target code page with bad 
2632             quality approximations such as turning "1/2" symbol (U+00BD) into 
2633             "1" for the code pages which don't have it and we, obviously, want 
2634             to avoid this at any price 
2636             the trouble is that this function does it _silently_, i.e. it won't 
2637             even tell us whether it did or not... Win98/2000 and higher provide 
2638             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and 
2639             we have to resort to a round trip, i.e. check that converting back 
2640             results in the same string -- this is, of course, expensive but 
2641             otherwise we simply can't be sure to not garble the data. 
2644         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN 
2645         // it doesn't work with CJK encodings (which we test for rather roughly 
2646         // here...) nor with UTF-7/8 nor, of course, with Windows versions not 
2648         BOOL usedDef 
wxDUMMY_INITIALIZE(false); 
2651         if ( CanUseNoBestFit() && m_CodePage 
< 50000 ) 
2653             // it's our lucky day 
2654             flags 
= WC_NO_BEST_FIT_CHARS
; 
2655             pUsedDef 
= &usedDef
; 
2657         else // old system or unsupported encoding 
2663         const size_t len 
= ::WideCharToMultiByte
 
2665                                 m_CodePage
,     // code page 
2666                                 flags
,          // either none or no best fit 
2667                                 pwz
,            // input string 
2668                                 -1,             // it is (wide) NUL-terminated 
2669                                 buf
,            // output buffer 
2670                                 buf 
? n 
: 0,    // and its size 
2671                                 NULL
,           // default "replacement" char 
2672                                 pUsedDef        
// [out] was it used? 
2677             // function totally failed 
2678             return wxCONV_FAILED
; 
2681         // we did something, check if we really succeeded 
2684             // check if the conversion failed, i.e. if any replacements 
2687                 return wxCONV_FAILED
; 
2689         else // we must resort to double tripping... 
2691             // first we need to ensure that we really have the MB data: this is 
2692             // not the case if we're called with NULL buffer, in which case we 
2693             // need to do the conversion yet again 
2694             wxCharBuffer bufDef
; 
2697                 bufDef 
= wxCharBuffer(len
); 
2698                 buf 
= bufDef
.data(); 
2699                 if ( !::WideCharToMultiByte(m_CodePage
, flags
, pwz
, -1, 
2700                                             buf
, len
, NULL
, NULL
) ) 
2701                     return wxCONV_FAILED
; 
2706             wxWCharBuffer 
wcBuf(n
); 
2707             if ( MB2WC(wcBuf
.data(), buf
, n 
+ 1) == wxCONV_FAILED 
|| 
2708                     wcscmp(wcBuf
, pwz
) != 0 ) 
2710                 // we didn't obtain the same thing we started from, hence 
2711                 // the conversion was lossy and we consider that it failed 
2712                 return wxCONV_FAILED
; 
2716         // see the comment above for the reason of "len - 1" 
2720     virtual size_t GetMBNulLen() const 
2722         if ( m_minMBCharWidth 
== 0 ) 
2724             int len 
= ::WideCharToMultiByte
 
2726                             m_CodePage
,     // code page 
2728                             L
"",            // input string 
2729                             1,              // translate just the NUL 
2730                             NULL
,           // output buffer 
2732                             NULL
,           // no replacement char 
2733                             NULL            
// [out] don't care if it was used 
2736             wxMBConv_win32 
* const self 
= wxConstCast(this, wxMBConv_win32
); 
2740                     wxLogDebug(wxT("Unexpected NUL length %d"), len
); 
2741                     self
->m_minMBCharWidth 
= (size_t)-1; 
2745                     self
->m_minMBCharWidth 
= (size_t)-1; 
2751                     self
->m_minMBCharWidth 
= len
; 
2756         return m_minMBCharWidth
; 
2759     virtual wxMBConv 
*Clone() const { return new wxMBConv_win32(*this); } 
2761     bool IsOk() const { return m_CodePage 
!= -1; } 
2764     static bool CanUseNoBestFit() 
2766         static int s_isWin98Or2k 
= -1; 
2768         if ( s_isWin98Or2k 
== -1 ) 
2771             switch ( wxGetOsVersion(&verMaj
, &verMin
) ) 
2773                 case wxOS_WINDOWS_9X
: 
2774                     s_isWin98Or2k 
= verMaj 
>= 4 && verMin 
>= 10; 
2777                 case wxOS_WINDOWS_NT
: 
2778                     s_isWin98Or2k 
= verMaj 
>= 5; 
2782                     // unknown: be conservative by default 
2787             wxASSERT_MSG( s_isWin98Or2k 
!= -1, wxT("should be set above") ); 
2790         return s_isWin98Or2k 
== 1; 
2793     static bool IsAtLeastWin2kSP4() 
2798         static int s_isAtLeastWin2kSP4 
= -1; 
2800         if ( s_isAtLeastWin2kSP4 
== -1 ) 
2802             OSVERSIONINFOEX ver
; 
2804             memset(&ver
, 0, sizeof(ver
)); 
2805             ver
.dwOSVersionInfoSize 
= sizeof(ver
); 
2806             GetVersionEx((OSVERSIONINFO
*)&ver
); 
2808             s_isAtLeastWin2kSP4 
= 
2809               ((ver
.dwMajorVersion 
> 5) || // Vista+ 
2810                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
> 0) || // XP/2003 
2811                (ver
.dwMajorVersion 
== 5 && ver
.dwMinorVersion 
== 0 && 
2812                ver
.wServicePackMajor 
>= 4)) // 2000 SP4+ 
2816         return s_isAtLeastWin2kSP4 
== 1; 
2821     // the code page we're working with 
2824     // cached result of GetMBNulLen(), set to 0 initially meaning 
2826     size_t m_minMBCharWidth
; 
2829 #endif // wxHAVE_WIN32_MB2WC 
2832 // ============================================================================ 
2833 // wxEncodingConverter based conversion classes 
2834 // ============================================================================ 
2838 class wxMBConv_wxwin 
: public wxMBConv
 
2843         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings. 
2844         // The wxMBConv_cf class does a better job. 
2845         m_ok 
= (m_enc 
< wxFONTENCODING_MACMIN 
|| m_enc 
> wxFONTENCODING_MACMAX
) && 
2846                m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
2847                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
2851     // temporarily just use wxEncodingConverter stuff, 
2852     // so that it works while a better implementation is built 
2853     wxMBConv_wxwin(const char* name
) 
2856             m_enc 
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false); 
2858             m_enc 
= wxFONTENCODING_SYSTEM
; 
2863     wxMBConv_wxwin(wxFontEncoding enc
) 
2870     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
2872         size_t inbuf 
= strlen(psz
); 
2875             if (!m2w
.Convert(psz
, buf
)) 
2876                 return wxCONV_FAILED
; 
2881     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
2883         const size_t inbuf 
= wxWcslen(psz
); 
2886             if (!w2m
.Convert(psz
, buf
)) 
2887                 return wxCONV_FAILED
; 
2893     virtual size_t GetMBNulLen() const 
2897             case wxFONTENCODING_UTF16BE
: 
2898             case wxFONTENCODING_UTF16LE
: 
2901             case wxFONTENCODING_UTF32BE
: 
2902             case wxFONTENCODING_UTF32LE
: 
2910     virtual wxMBConv 
*Clone() const { return new wxMBConv_wxwin(m_enc
); } 
2912     bool IsOk() const { return m_ok
; } 
2915     wxFontEncoding m_enc
; 
2916     wxEncodingConverter m2w
, w2m
; 
2919     // were we initialized successfully? 
2922     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin
); 
2925 // make the constructors available for unit testing 
2926 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const char* name 
) 
2928     wxMBConv_wxwin
* result 
= new wxMBConv_wxwin( name 
); 
2929     if ( !result
->IsOk() ) 
2938 #endif // wxUSE_FONTMAP 
2940 // ============================================================================ 
2941 // wxCSConv implementation 
2942 // ============================================================================ 
2944 void wxCSConv::Init() 
2951 wxCSConv::wxCSConv(const wxString
& charset
) 
2955     if ( !charset
.empty() ) 
2957         SetName(charset
.ToAscii()); 
2961     m_encoding 
= wxFontMapperBase::GetEncodingFromName(charset
); 
2962     if ( m_encoding 
== wxFONTENCODING_MAX 
) 
2964         // set to unknown/invalid value 
2965         m_encoding 
= wxFONTENCODING_SYSTEM
; 
2967     else if ( m_encoding 
== wxFONTENCODING_DEFAULT 
) 
2969         // wxFONTENCODING_DEFAULT is same as US-ASCII in this context 
2970         m_encoding 
= wxFONTENCODING_ISO8859_1
; 
2973     m_encoding 
= wxFONTENCODING_SYSTEM
; 
2977 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
2979     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
2981         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") ); 
2983         encoding 
= wxFONTENCODING_SYSTEM
; 
2988     m_encoding 
= encoding
; 
2991 wxCSConv::~wxCSConv() 
2996 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
3001     SetName(conv
.m_name
); 
3002     m_encoding 
= conv
.m_encoding
; 
3005 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
3009     SetName(conv
.m_name
); 
3010     m_encoding 
= conv
.m_encoding
; 
3015 void wxCSConv::Clear() 
3024 void wxCSConv::SetName(const char *charset
) 
3028         m_name 
= wxStrdup(charset
); 
3035 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
, 
3036                      wxEncodingNameCache 
); 
3038 static wxEncodingNameCache gs_nameCache
; 
3041 wxMBConv 
*wxCSConv::DoCreate() const 
3044     wxLogTrace(TRACE_STRCONV
, 
3045                wxT("creating conversion for %s"), 
3047                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding
).mb_str())); 
3048 #endif // wxUSE_FONTMAP 
3050     // check for the special case of ASCII or ISO8859-1 charset: as we have 
3051     // special knowledge of it anyhow, we don't need to create a special 
3052     // conversion object 
3053     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
|| 
3054             m_encoding 
== wxFONTENCODING_DEFAULT 
) 
3056         // don't convert at all 
3060     // we trust OS to do conversion better than we can so try external 
3061     // conversion methods first 
3063     // the full order is: 
3064     //      1. OS conversion (iconv() under Unix or Win32 API) 
3065     //      2. hard coded conversions for UTF 
3066     //      3. wxEncodingConverter as fall back 
3072 #endif // !wxUSE_FONTMAP 
3075         wxFontEncoding 
encoding(m_encoding
); 
3080             wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(m_name
); 
3088                 wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
3089 #endif // wxUSE_FONTMAP 
3093             const wxEncodingNameCache::iterator it 
= gs_nameCache
.find(encoding
); 
3094             if ( it 
!= gs_nameCache
.end() ) 
3096                 if ( it
->second
.empty() ) 
3099                 wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(it
->second
.ToAscii()); 
3106             const wxChar
* const* names 
= wxFontMapperBase::GetAllEncodingNames(encoding
); 
3107             // CS : in case this does not return valid names (eg for MacRoman) 
3108             // encoding got a 'failure' entry in the cache all the same, 
3109             // although it just has to be created using a different method, so 
3110             // only store failed iconv creation attempts (or perhaps we 
3111             // shoulnd't do this at all ?) 
3112             if ( names
[0] != NULL 
) 
3114                 for ( ; *names
; ++names 
) 
3116                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames() 
3117                     //             will need changes that will obsolete this 
3118                     wxString 
name(*names
); 
3119                     wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(name
.ToAscii()); 
3122                         gs_nameCache
[encoding
] = *names
; 
3129                 gs_nameCache
[encoding
] = wxT(""); // cache the failure 
3132 #endif // wxUSE_FONTMAP 
3134 #endif // HAVE_ICONV 
3136 #ifdef wxHAVE_WIN32_MB2WC 
3139         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
3140                                       : new wxMBConv_win32(m_encoding
); 
3149 #endif // wxHAVE_WIN32_MB2WC 
3153         // leave UTF16 and UTF32 to the built-ins of wx 
3154         if ( m_name 
|| ( m_encoding 
< wxFONTENCODING_UTF16BE 
|| 
3155             ( m_encoding 
>= wxFONTENCODING_MACMIN 
&& m_encoding 
<= wxFONTENCODING_MACMAX 
) ) ) 
3158             wxMBConv_cf 
*conv 
= m_name 
? new wxMBConv_cf(m_name
) 
3159                                           : new wxMBConv_cf(m_encoding
); 
3161             wxMBConv_cf 
*conv 
= new wxMBConv_cf(m_encoding
); 
3170 #endif // __DARWIN__ 
3173     wxFontEncoding enc 
= m_encoding
; 
3175     if ( enc 
== wxFONTENCODING_SYSTEM 
&& m_name 
) 
3177         // use "false" to suppress interactive dialogs -- we can be called from 
3178         // anywhere and popping up a dialog from here is the last thing we want to 
3180         enc 
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false); 
3182 #endif // wxUSE_FONTMAP 
3186         case wxFONTENCODING_UTF7
: 
3187              return new wxMBConvUTF7
; 
3189         case wxFONTENCODING_UTF8
: 
3190              return new wxMBConvUTF8
; 
3192         case wxFONTENCODING_UTF16BE
: 
3193              return new wxMBConvUTF16BE
; 
3195         case wxFONTENCODING_UTF16LE
: 
3196              return new wxMBConvUTF16LE
; 
3198         case wxFONTENCODING_UTF32BE
: 
3199              return new wxMBConvUTF32BE
; 
3201         case wxFONTENCODING_UTF32LE
: 
3202              return new wxMBConvUTF32LE
; 
3205              // nothing to do but put here to suppress gcc warnings 
3212         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
3213                                       : new wxMBConv_wxwin(m_encoding
); 
3220     wxLogTrace(TRACE_STRCONV
, 
3221                wxT("encoding \"%s\" is not supported by this system"), 
3222                (m_name 
? wxString(m_name
) 
3223                        : wxFontMapperBase::GetEncodingName(m_encoding
))); 
3224 #endif // wxUSE_FONTMAP 
3229 void wxCSConv::CreateConvIfNeeded() const 
3233         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
3235         // if we don't have neither the name nor the encoding, use the default 
3236         // encoding for this system 
3237         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
3240             self
->m_encoding 
= wxLocale::GetSystemEncoding(); 
3242             // fallback to some reasonable default: 
3243             self
->m_encoding 
= wxFONTENCODING_ISO8859_1
; 
3244 #endif // wxUSE_INTL 
3247         self
->m_convReal 
= DoCreate(); 
3248         self
->m_deferred 
= false; 
3252 bool wxCSConv::IsOk() const 
3254     CreateConvIfNeeded(); 
3256     // special case: no convReal created for wxFONTENCODING_ISO8859_1 
3257     if ( m_encoding 
== wxFONTENCODING_ISO8859_1 
) 
3258         return true; // always ok as we do it ourselves 
3260     // m_convReal->IsOk() is called at its own creation, so we know it must 
3261     // be ok if m_convReal is non-NULL 
3262     return m_convReal 
!= NULL
; 
3265 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
, 
3266                          const char *src
, size_t srcLen
) const 
3268     CreateConvIfNeeded(); 
3271         return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
); 
3274     if ( srcLen 
== wxNO_LEN 
) 
3275         srcLen 
= strlen(src
) + 1; // take trailing NUL too 
3279         if ( dstLen 
< srcLen 
) 
3280             return wxCONV_FAILED
; 
3282         for ( size_t n 
= 0; n 
< srcLen
; n
++ ) 
3283             dst
[n
] = (unsigned char)(src
[n
]); 
3289 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
, 
3290                            const wchar_t *src
, size_t srcLen
) const 
3292     CreateConvIfNeeded(); 
3295         return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
); 
3298     if ( srcLen 
== wxNO_LEN 
) 
3299         srcLen 
= wxWcslen(src
) + 1; 
3303         if ( dstLen 
< srcLen 
) 
3304             return wxCONV_FAILED
; 
3306         for ( size_t n 
= 0; n 
< srcLen
; n
++ ) 
3308             if ( src
[n
] > 0xFF ) 
3309                 return wxCONV_FAILED
; 
3311             dst
[n
] = (char)src
[n
]; 
3315     else // still need to check the input validity 
3317         for ( size_t n 
= 0; n 
< srcLen
; n
++ ) 
3319             if ( src
[n
] > 0xFF ) 
3320                 return wxCONV_FAILED
; 
3327 size_t wxCSConv::GetMBNulLen() const 
3329     CreateConvIfNeeded(); 
3333         return m_convReal
->GetMBNulLen(); 
3336     // otherwise, we are ISO-8859-1 
3340 #if wxUSE_UNICODE_UTF8 
3341 bool wxCSConv::IsUTF8() const 
3343     CreateConvIfNeeded(); 
3347         return m_convReal
->IsUTF8(); 
3350     // otherwise, we are ISO-8859-1 
3358 wxWCharBuffer 
wxSafeConvertMB2WX(const char *s
) 
3361         return wxWCharBuffer(); 
3363     wxWCharBuffer 
wbuf(wxConvLibc
.cMB2WX(s
)); 
3365         wbuf 
= wxMBConvUTF8().cMB2WX(s
); 
3367         wbuf 
= wxConvISO8859_1
.cMB2WX(s
); 
3372 wxCharBuffer 
wxSafeConvertWX2MB(const wchar_t *ws
) 
3375         return wxCharBuffer(); 
3377     wxCharBuffer 
buf(wxConvLibc
.cWX2MB(ws
)); 
3379         buf 
= wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
).cWX2MB(ws
); 
3384 #endif // wxUSE_UNICODE 
3386 // ---------------------------------------------------------------------------- 
3388 // ---------------------------------------------------------------------------- 
3390 // NB: The reason why we create converted objects in this convoluted way, 
3391 //     using a factory function instead of global variable, is that they 
3392 //     may be used at static initialization time (some of them are used by 
3393 //     wxString ctors and there may be a global wxString object). In other 
3394 //     words, possibly _before_ the converter global object would be 
3401 #undef wxConvISO8859_1 
3403 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \ 
3404     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \ 
3405     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \ 
3407         static impl_klass name##Obj ctor_args;                          \ 
3408         return &name##Obj;                                              \ 
3410     /* this ensures that all global converter objects are created */    \ 
3411     /* by the time static initialization is done, i.e. before any */    \ 
3412     /* thread is launched: */                                           \ 
3413     static klass* gs_##name##instance = wxGet_##name##Ptr() 
3415 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \ 
3416     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args) 
3419     // disable warning "variable 'xxx' was declared but never referenced" 
3420     #pragma warning(disable: 177) 
3424     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_win32
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
); 
3425 #elif 0 // defined(__WXOSX__) 
3426     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_cf
, wxConvLibc
,  (wxFONTENCODING_UTF8
)); 
3428     WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConvLibc
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
); 
3431 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's 
3432 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still 
3433 //     provokes an error message about "not enough macro parameters"; and we 
3434 //     can't use "()" here as the name##Obj declaration would be parsed as a 
3435 //     function declaration then, so use a semicolon and live with an extra 
3436 //     empty statement (and hope that no compilers warns about this) 
3437 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8
, wxConvUTF8
, ;); 
3438 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7
, wxConvUTF7
, ;); 
3440 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvLocal
, (wxFONTENCODING_SYSTEM
)); 
3441 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvISO8859_1
, (wxFONTENCODING_ISO8859_1
)); 
3443 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= wxGet_wxConvLibcPtr(); 
3444 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvUI 
= wxGet_wxConvLocalPtr(); 
3447 // The xnu kernel always communicates file paths in decomposed UTF-8. 
3448 // WARNING: Are we sure that CFString's conversion will cause decomposition? 
3449 static wxMBConv_cf 
wxConvMacUTF8DObj(wxFONTENCODING_UTF8
); 
3452 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvFileName 
= 
3455 #else // !__DARWIN__ 
3456                                     wxGet_wxConvLibcPtr(); 
3457 #endif // __DARWIN__/!__DARWIN__ 
3459 #else // !wxUSE_WCHAR_T 
3461 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now 
3462 // stand-ins in absence of wchar_t 
3463 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
3468 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T