1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
73 #include "wx/thread.h"
76 #include "wx/encconv.h"
77 #include "wx/fontmap.h"
81 #include <ATSUnicode.h>
82 #include <TextCommon.h>
83 #include <TextEncodingConverter.h>
85 #include "wx/mac/private.h" // includes mac headers
87 // ----------------------------------------------------------------------------
89 // ----------------------------------------------------------------------------
91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
94 #if SIZEOF_WCHAR_T == 4
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
100 #define WC_NAME_BEST "UCS-4LE"
102 #elif SIZEOF_WCHAR_T == 2
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
109 #define WC_NAME_BEST "UTF-16LE"
111 #else // sizeof(wchar_t) != 2 nor 4
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
116 // ============================================================================
118 // ============================================================================
120 // ----------------------------------------------------------------------------
121 // UTF-16 en/decoding to/from UCS-4
122 // ----------------------------------------------------------------------------
125 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
130 *output
= (wxUint16
) input
;
133 else if (input
>=0x110000)
141 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
142 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
150 if ((*input
<0xd800) || (*input
>0xdfff))
155 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
162 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
168 // ----------------------------------------------------------------------------
170 // ----------------------------------------------------------------------------
172 wxMBConv::~wxMBConv()
174 // nothing to do here (necessary for Darwin linking probably)
177 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
181 // calculate the length of the buffer needed first
182 size_t nLen
= MB2WC(NULL
, psz
, 0);
183 if ( nLen
!= (size_t)-1 )
185 // now do the actual conversion
186 wxWCharBuffer
buf(nLen
);
187 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
188 if ( nLen
!= (size_t)-1 )
195 wxWCharBuffer
buf((wchar_t *)NULL
);
200 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
204 size_t nLen
= WC2MB(NULL
, pwz
, 0);
205 if ( nLen
!= (size_t)-1 )
207 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
208 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
209 if ( nLen
!= (size_t)-1 )
216 wxCharBuffer
buf((char *)NULL
);
221 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
223 wxASSERT(pOutSize
!= NULL
);
225 const char* szEnd
= szString
+ nStringLen
+ 1;
226 const char* szPos
= szString
;
227 const char* szStart
= szPos
;
229 size_t nActualLength
= 0;
230 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
232 wxWCharBuffer
theBuffer(nCurrentSize
);
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos
!= szEnd
)
238 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
240 //Get the length of the current (sub)string
241 size_t nLen
= MB2WC(NULL
, szPos
, 0);
243 //Invalid conversion?
244 if( nLen
== (size_t)-1 )
247 theBuffer
.data()[0u] = wxT('\0');
252 //Increase the actual length (+1 for current null character)
253 nActualLength
+= nLen
+ 1;
255 //if buffer too big, realloc the buffer
256 if (nActualLength
> (nCurrentSize
+1))
258 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
259 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
260 theBuffer
= theNewBuffer
;
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
268 theBuffer
.data()[0u] = wxT('\0');
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos
+= strlen(szPos
) + 1;
279 //success - return actual length and the buffer
280 *pOutSize
= nActualLength
;
284 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
286 wxASSERT(pOutSize
!= NULL
);
288 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
289 const wchar_t* szPos
= szString
;
290 const wchar_t* szStart
= szPos
;
292 size_t nActualLength
= 0;
293 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
295 wxCharBuffer
theBuffer(nCurrentSize
);
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos
!= szEnd
)
301 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
303 //Get the length of the current (sub)string
304 size_t nLen
= WC2MB(NULL
, szPos
, 0);
306 //Invalid conversion?
307 if( nLen
== (size_t)-1 )
310 theBuffer
.data()[0u] = wxT('\0');
314 //Increase the actual length (+1 for current null character)
315 nActualLength
+= nLen
+ 1;
317 //if buffer too big, realloc the buffer
318 if (nActualLength
> (nCurrentSize
+1))
320 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
321 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
322 theBuffer
= theNewBuffer
;
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
330 theBuffer
.data()[0u] = wxT('\0');
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos
+= wxWcslen(szPos
) + 1;
341 //success - return actual length and the buffer
342 *pOutSize
= nActualLength
;
346 // ----------------------------------------------------------------------------
348 // ----------------------------------------------------------------------------
350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
352 return wxMB2WC(buf
, psz
, n
);
355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
357 return wxWC2MB(buf
, psz
, n
);
362 // ----------------------------------------------------------------------------
363 // wxConvBrokenFileNames
364 // ----------------------------------------------------------------------------
366 wxConvBrokenFileNames::wxConvBrokenFileNames()
368 // decide which conversion to use for the file names
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
372 wxString
encName(wxGetenv(_T("G_FILENAME_ENCODING")));
374 if ( !encName
.empty() && encName
!= _T("UTF-8") && encName
!= _T("UTF8") )
376 m_conv
= new wxCSConv(encName
);
378 else // no G_FILENAME_ENCODING
380 if ( encName
.empty() )
381 encName
= wxLocale::GetSystemEncodingName().Upper();
383 // (2) if a non default locale is set, assume that the user wants his
384 // filenames in this locale too
385 if ( !encName
.empty() && encName
!= _T("UTF-8") && encName
!= _T("UTF8") )
387 wxSetEnv(_T("G_FILENAME_ENCODING"), encName
);
388 m_conv
= new wxMBConvLibc
;
392 // (3) finally use UTF-8 by default
393 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
399 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
401 size_t outputSize
) const
403 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
407 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
409 size_t outputSize
) const
411 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
416 // ----------------------------------------------------------------------------
418 // ----------------------------------------------------------------------------
420 // Implementation (C) 2004 Fredrik Roubert
423 // BASE64 decoding table
425 static const unsigned char utf7unb64
[] =
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
433 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
434 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
436 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
437 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
438 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
440 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
441 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
442 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
461 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
465 while (*psz
&& ((!buf
) || (len
< n
)))
467 unsigned char cc
= *psz
++;
475 else if (*psz
== '-')
485 // BASE64 encoded string
489 for (lsb
= false, d
= 0, l
= 0;
490 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
494 for (l
+= 6; l
>= 8; lsb
= !lsb
)
496 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
505 *buf
= (wchar_t)(c
<< 8);
512 if (buf
&& (len
< n
))
518 // BASE64 encoding table
520 static const unsigned char utf7enb64
[] =
522 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
523 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
524 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
525 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
526 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
527 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
528 'w', 'x', 'y', 'z', '0', '1', '2', '3',
529 '4', '5', '6', '7', '8', '9', '+', '/'
533 // UTF-7 encoding table
535 // 0 - Set D (directly encoded characters)
536 // 1 - Set O (optional direct characters)
537 // 2 - whitespace characters (optional)
538 // 3 - special characters
540 static const unsigned char utf7encode
[128] =
542 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
543 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
544 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
548 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
552 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
558 while (*psz
&& ((!buf
) || (len
< n
)))
561 if (cc
< 0x80 && utf7encode
[cc
] < 1)
569 else if (((wxUint32
)cc
) > 0xffff)
571 // no surrogate pair generation (yet?)
582 // BASE64 encode string
583 unsigned int lsb
, d
, l
;
584 for (d
= 0, l
= 0;; psz
++)
586 for (lsb
= 0; lsb
< 2; lsb
++)
589 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
591 for (l
+= 8; l
>= 6; )
595 *buf
++ = utf7enb64
[(d
>> l
) % 64];
600 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
606 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
615 if (buf
&& (len
< n
))
620 // ----------------------------------------------------------------------------
622 // ----------------------------------------------------------------------------
624 static wxUint32 utf8_max
[]=
625 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
627 // boundaries of the private use area we use to (temporarily) remap invalid
628 // characters invalid in a UTF-8 encoded string
629 const wxUint32 wxUnicodePUA
= 0x100000;
630 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
632 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
636 while (*psz
&& ((!buf
) || (len
< n
)))
638 const char *opsz
= psz
;
639 bool invalid
= false;
640 unsigned char cc
= *psz
++, fc
= cc
;
642 for (cnt
= 0; fc
& 0x80; cnt
++)
656 // invalid UTF-8 sequence
661 unsigned ocnt
= cnt
- 1;
662 wxUint32 res
= cc
& (0x3f >> cnt
);
666 if ((cc
& 0xC0) != 0x80)
668 // invalid UTF-8 sequence
673 res
= (res
<< 6) | (cc
& 0x3f);
675 if (invalid
|| res
<= utf8_max
[ocnt
])
677 // illegal UTF-8 encoding
680 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
681 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
683 // if one of our PUA characters turns up externally
684 // it must also be treated as an illegal sequence
685 // (a bit like you have to escape an escape character)
691 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
692 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
693 if (pa
== (size_t)-1)
707 #endif // WC_UTF16/!WC_UTF16
712 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
714 while (opsz
< psz
&& (!buf
|| len
< n
))
717 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
718 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
719 wxASSERT(pa
!= (size_t)-1);
726 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
732 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
734 while (opsz
< psz
&& (!buf
|| len
< n
))
736 if ( buf
&& len
+ 3 < n
)
738 unsigned char n
= *opsz
;
740 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
741 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
742 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
748 else // MAP_INVALID_UTF8_NOT
755 if (buf
&& (len
< n
))
760 static inline bool isoctal(wchar_t wch
)
762 return L
'0' <= wch
&& wch
<= L
'7';
765 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
769 while (*psz
&& ((!buf
) || (len
< n
)))
773 // cast is ok for WC_UTF16
774 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
775 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
777 cc
=(*psz
++) & 0x7fffffff;
780 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
781 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
784 *buf
++ = (char)(cc
- wxUnicodePUA
);
787 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
789 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
793 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
794 (psz
[1] - L
'0')*010 +
804 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
818 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
820 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
832 // ----------------------------------------------------------------------------
834 // ----------------------------------------------------------------------------
836 #ifdef WORDS_BIGENDIAN
837 #define wxMBConvUTF16straight wxMBConvUTF16BE
838 #define wxMBConvUTF16swap wxMBConvUTF16LE
840 #define wxMBConvUTF16swap wxMBConvUTF16BE
841 #define wxMBConvUTF16straight wxMBConvUTF16LE
847 // copy 16bit MB to 16bit String
848 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
852 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
855 *buf
++ = *(wxUint16
*)psz
;
858 psz
+= sizeof(wxUint16
);
860 if (buf
&& len
<n
) *buf
=0;
866 // copy 16bit String to 16bit MB
867 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
871 while (*psz
&& (!buf
|| len
< n
))
875 *(wxUint16
*)buf
= *psz
;
876 buf
+= sizeof(wxUint16
);
878 len
+= sizeof(wxUint16
);
881 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
887 // swap 16bit MB to 16bit String
888 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
892 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
896 ((char *)buf
)[0] = psz
[1];
897 ((char *)buf
)[1] = psz
[0];
901 psz
+= sizeof(wxUint16
);
903 if (buf
&& len
<n
) *buf
=0;
909 // swap 16bit MB to 16bit String
910 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
914 while (*psz
&& (!buf
|| len
< n
))
918 *buf
++ = ((char*)psz
)[1];
919 *buf
++ = ((char*)psz
)[0];
921 len
+= sizeof(wxUint16
);
924 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
933 // copy 16bit MB to 32bit String
934 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
938 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
941 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
942 if (pa
== (size_t)-1)
948 psz
+= pa
* sizeof(wxUint16
);
950 if (buf
&& len
<n
) *buf
=0;
956 // copy 32bit String to 16bit MB
957 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
961 while (*psz
&& (!buf
|| len
< n
))
964 size_t pa
=encode_utf16(*psz
, cc
);
966 if (pa
== (size_t)-1)
971 *(wxUint16
*)buf
= cc
[0];
972 buf
+= sizeof(wxUint16
);
975 *(wxUint16
*)buf
= cc
[1];
976 buf
+= sizeof(wxUint16
);
980 len
+= pa
*sizeof(wxUint16
);
983 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
989 // swap 16bit MB to 32bit String
990 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
994 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
998 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
999 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
1001 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
1002 if (pa
== (size_t)-1)
1009 psz
+= pa
* sizeof(wxUint16
);
1011 if (buf
&& len
<n
) *buf
=0;
1017 // swap 32bit String to 16bit MB
1018 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1022 while (*psz
&& (!buf
|| len
< n
))
1025 size_t pa
=encode_utf16(*psz
, cc
);
1027 if (pa
== (size_t)-1)
1032 *buf
++ = ((char*)cc
)[1];
1033 *buf
++ = ((char*)cc
)[0];
1036 *buf
++ = ((char*)cc
)[3];
1037 *buf
++ = ((char*)cc
)[2];
1041 len
+= pa
*sizeof(wxUint16
);
1044 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1052 // ----------------------------------------------------------------------------
1054 // ----------------------------------------------------------------------------
1056 #ifdef WORDS_BIGENDIAN
1057 #define wxMBConvUTF32straight wxMBConvUTF32BE
1058 #define wxMBConvUTF32swap wxMBConvUTF32LE
1060 #define wxMBConvUTF32swap wxMBConvUTF32BE
1061 #define wxMBConvUTF32straight wxMBConvUTF32LE
1065 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1066 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1071 // copy 32bit MB to 16bit String
1072 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1076 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1080 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1081 if (pa
== (size_t)-1)
1091 psz
+= sizeof(wxUint32
);
1093 if (buf
&& len
<n
) *buf
=0;
1099 // copy 16bit String to 32bit MB
1100 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1104 while (*psz
&& (!buf
|| len
< n
))
1108 // cast is ok for WC_UTF16
1109 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1110 if (pa
== (size_t)-1)
1115 *(wxUint32
*)buf
= cc
;
1116 buf
+= sizeof(wxUint32
);
1118 len
+= sizeof(wxUint32
);
1122 if (buf
&& len
<=n
-sizeof(wxUint32
))
1130 // swap 32bit MB to 16bit String
1131 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1135 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1138 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1139 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1144 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1145 if (pa
== (size_t)-1)
1155 psz
+= sizeof(wxUint32
);
1165 // swap 16bit String to 32bit MB
1166 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1170 while (*psz
&& (!buf
|| len
< n
))
1174 // cast is ok for WC_UTF16
1175 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1176 if (pa
== (size_t)-1)
1186 len
+= sizeof(wxUint32
);
1190 if (buf
&& len
<=n
-sizeof(wxUint32
))
1199 // copy 32bit MB to 32bit String
1200 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1204 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1207 *buf
++ = *(wxUint32
*)psz
;
1209 psz
+= sizeof(wxUint32
);
1219 // copy 32bit String to 32bit MB
1220 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1224 while (*psz
&& (!buf
|| len
< n
))
1228 *(wxUint32
*)buf
= *psz
;
1229 buf
+= sizeof(wxUint32
);
1232 len
+= sizeof(wxUint32
);
1236 if (buf
&& len
<=n
-sizeof(wxUint32
))
1243 // swap 32bit MB to 32bit String
1244 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1248 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1252 ((char *)buf
)[0] = psz
[3];
1253 ((char *)buf
)[1] = psz
[2];
1254 ((char *)buf
)[2] = psz
[1];
1255 ((char *)buf
)[3] = psz
[0];
1259 psz
+= sizeof(wxUint32
);
1269 // swap 32bit String to 32bit MB
1270 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1274 while (*psz
&& (!buf
|| len
< n
))
1278 *buf
++ = ((char *)psz
)[3];
1279 *buf
++ = ((char *)psz
)[2];
1280 *buf
++ = ((char *)psz
)[1];
1281 *buf
++ = ((char *)psz
)[0];
1283 len
+= sizeof(wxUint32
);
1287 if (buf
&& len
<=n
-sizeof(wxUint32
))
1297 // ============================================================================
1298 // The classes doing conversion using the iconv_xxx() functions
1299 // ============================================================================
1303 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1304 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1305 // (unless there's yet another bug in glibc) the only case when iconv()
1306 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1307 // left in the input buffer -- when _real_ error occurs,
1308 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1310 // [This bug does not appear in glibc 2.2.]
1311 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1312 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1313 (errno != E2BIG || bufLeft != 0))
1315 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1318 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1320 // ----------------------------------------------------------------------------
1321 // wxMBConv_iconv: encapsulates an iconv character set
1322 // ----------------------------------------------------------------------------
1324 class wxMBConv_iconv
: public wxMBConv
1327 wxMBConv_iconv(const wxChar
*name
);
1328 virtual ~wxMBConv_iconv();
1330 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1331 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1334 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1337 // the iconv handlers used to translate from multibyte to wide char and in
1338 // the other direction
1342 // guards access to m2w and w2m objects
1343 wxMutex m_iconvMutex
;
1347 // the name (for iconv_open()) of a wide char charset -- if none is
1348 // available on this machine, it will remain NULL
1349 static const char *ms_wcCharsetName
;
1351 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1352 // different endian-ness than the native one
1353 static bool ms_wcNeedsSwap
;
1356 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1357 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1359 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1361 // Do it the hard way
1363 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1364 cname
[i
] = (char) name
[i
];
1366 // check for charset that represents wchar_t:
1367 if (ms_wcCharsetName
== NULL
)
1369 ms_wcNeedsSwap
= false;
1371 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1372 ms_wcCharsetName
= WC_NAME_BEST
;
1373 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1375 if (m2w
== (iconv_t
)-1)
1377 // try charset w/o bytesex info (e.g. "UCS4")
1378 // and check for bytesex ourselves:
1379 ms_wcCharsetName
= WC_NAME
;
1380 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1382 // last bet, try if it knows WCHAR_T pseudo-charset
1383 if (m2w
== (iconv_t
)-1)
1385 ms_wcCharsetName
= "WCHAR_T";
1386 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1389 if (m2w
!= (iconv_t
)-1)
1391 char buf
[2], *bufPtr
;
1392 wchar_t wbuf
[2], *wbufPtr
;
1400 outsz
= SIZEOF_WCHAR_T
* 2;
1404 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1405 (char**)&wbufPtr
, &outsz
);
1407 if (ICONV_FAILED(res
, insz
))
1409 ms_wcCharsetName
= NULL
;
1410 wxLogLastError(wxT("iconv"));
1411 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1415 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1420 ms_wcCharsetName
= NULL
;
1422 // VS: we must not output an error here, since wxWidgets will safely
1423 // fall back to using wxEncodingConverter.
1424 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1428 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1430 else // we already have ms_wcCharsetName
1432 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1435 // NB: don't ever pass NULL to iconv_open(), it may crash!
1436 if ( ms_wcCharsetName
)
1438 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1446 wxMBConv_iconv::~wxMBConv_iconv()
1448 if ( m2w
!= (iconv_t
)-1 )
1450 if ( w2m
!= (iconv_t
)-1 )
1454 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1457 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1458 // Unfortunately there is a couple of global wxCSConv objects such as
1459 // wxConvLocal that are used all over wx code, so we have to make sure
1460 // the handle is used by at most one thread at the time. Otherwise
1461 // only a few wx classes would be safe to use from non-main threads
1462 // as MB<->WC conversion would fail "randomly".
1463 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1466 size_t inbuf
= strlen(psz
);
1467 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1469 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1470 wchar_t *bufPtr
= buf
;
1471 const char *pszPtr
= psz
;
1475 // have destination buffer, convert there
1477 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1478 (char**)&bufPtr
, &outbuf
);
1479 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1483 // convert to native endianness
1484 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1487 // NB: iconv was given only strlen(psz) characters on input, and so
1488 // it couldn't convert the trailing zero. Let's do it ourselves
1489 // if there's some room left for it in the output buffer.
1495 // no destination buffer... convert using temp buffer
1496 // to calculate destination buffer requirement
1501 outbuf
= 8*SIZEOF_WCHAR_T
;
1504 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1505 (char**)&bufPtr
, &outbuf
);
1507 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1508 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1511 if (ICONV_FAILED(cres
, inbuf
))
1513 //VS: it is ok if iconv fails, hence trace only
1514 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1521 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1524 // NB: explained in MB2WC
1525 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1528 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1532 wchar_t *tmpbuf
= 0;
1536 // need to copy to temp buffer to switch endianness
1537 // this absolutely doesn't rock!
1538 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1539 // could be in read-only memory, or be accessed in some other thread)
1540 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1541 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1542 WC_BSWAP(tmpbuf
, inbuf
)
1548 // have destination buffer, convert there
1549 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1553 // NB: iconv was given only wcslen(psz) characters on input, and so
1554 // it couldn't convert the trailing zero. Let's do it ourselves
1555 // if there's some room left for it in the output buffer.
1561 // no destination buffer... convert using temp buffer
1562 // to calculate destination buffer requirement
1566 buf
= tbuf
; outbuf
= 16;
1568 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1571 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1579 if (ICONV_FAILED(cres
, inbuf
))
1581 //VS: it is ok if iconv fails, hence trace only
1582 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1589 #endif // HAVE_ICONV
1592 // ============================================================================
1593 // Win32 conversion classes
1594 // ============================================================================
1596 #ifdef wxHAVE_WIN32_MB2WC
1600 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1601 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1604 class wxMBConv_win32
: public wxMBConv
1609 m_CodePage
= CP_ACP
;
1613 wxMBConv_win32(const wxChar
* name
)
1615 m_CodePage
= wxCharsetToCodepage(name
);
1618 wxMBConv_win32(wxFontEncoding encoding
)
1620 m_CodePage
= wxEncodingToCodepage(encoding
);
1624 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1626 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1627 // the behaviour is not compatible with the Unix version (using iconv)
1628 // and break the library itself, e.g. wxTextInputStream::NextChar()
1629 // wouldn't work if reading an incomplete MB char didn't result in an
1632 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1633 // an error (tested under Windows Server 2003) and apparently it is
1634 // done on purpose, i.e. the function accepts any input in this case
1635 // and although I'd prefer to return error on ill-formed output, our
1636 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1637 // explicitly ill-formed according to RFC 2152) neither so we don't
1638 // even have any fallback here...
1639 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1641 const size_t len
= ::MultiByteToWideChar
1643 m_CodePage
, // code page
1644 flags
, // flags: fall on error
1645 psz
, // input string
1646 -1, // its length (NUL-terminated)
1647 buf
, // output string
1648 buf
? n
: 0 // size of output buffer
1651 // note that it returns count of written chars for buf != NULL and size
1652 // of the needed buffer for buf == NULL so in either case the length of
1653 // the string (which never includes the terminating NUL) is one less
1654 return len
? len
- 1 : (size_t)-1;
1657 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1660 we have a problem here: by default, WideCharToMultiByte() may
1661 replace characters unrepresentable in the target code page with bad
1662 quality approximations such as turning "1/2" symbol (U+00BD) into
1663 "1" for the code pages which don't have it and we, obviously, want
1664 to avoid this at any price
1666 the trouble is that this function does it _silently_, i.e. it won't
1667 even tell us whether it did or not... Win98/2000 and higher provide
1668 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1669 we have to resort to a round trip, i.e. check that converting back
1670 results in the same string -- this is, of course, expensive but
1671 otherwise we simply can't be sure to not garble the data.
1674 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1675 // it doesn't work with CJK encodings (which we test for rather roughly
1676 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1678 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1681 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1683 // it's our lucky day
1684 flags
= WC_NO_BEST_FIT_CHARS
;
1685 pUsedDef
= &usedDef
;
1687 else // old system or unsupported encoding
1693 const size_t len
= ::WideCharToMultiByte
1695 m_CodePage
, // code page
1696 flags
, // either none or no best fit
1697 pwz
, // input string
1698 -1, // it is (wide) NUL-terminated
1699 buf
, // output buffer
1700 buf
? n
: 0, // and its size
1701 NULL
, // default "replacement" char
1702 pUsedDef
// [out] was it used?
1707 // function totally failed
1711 // if we were really converting, check if we succeeded
1716 // check if the conversion failed, i.e. if any replacements
1721 else // we must resort to double tripping...
1723 wxWCharBuffer
wcBuf(n
);
1724 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1725 wcscmp(wcBuf
, pwz
) != 0 )
1727 // we didn't obtain the same thing we started from, hence
1728 // the conversion was lossy and we consider that it failed
1734 // see the comment above for the reason of "len - 1"
1738 bool IsOk() const { return m_CodePage
!= -1; }
1741 static bool CanUseNoBestFit()
1743 static int s_isWin98Or2k
= -1;
1745 if ( s_isWin98Or2k
== -1 )
1748 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1751 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1755 s_isWin98Or2k
= verMaj
>= 5;
1759 // unknown, be conseravtive by default
1763 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1766 return s_isWin98Or2k
== 1;
1772 #endif // wxHAVE_WIN32_MB2WC
1774 // ============================================================================
1775 // Cocoa conversion classes
1776 // ============================================================================
1778 #if defined(__WXCOCOA__)
1780 // RN: There is no UTF-32 support in either Core Foundation or
1781 // Cocoa. Strangely enough, internally Core Foundation uses
1782 // UTF 32 internally quite a bit - its just not public (yet).
1784 #include <CoreFoundation/CFString.h>
1785 #include <CoreFoundation/CFStringEncodingExt.h>
1787 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1789 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1790 if ( encoding
== wxFONTENCODING_DEFAULT
)
1792 enc
= CFStringGetSystemEncoding();
1794 else switch( encoding
)
1796 case wxFONTENCODING_ISO8859_1
:
1797 enc
= kCFStringEncodingISOLatin1
;
1799 case wxFONTENCODING_ISO8859_2
:
1800 enc
= kCFStringEncodingISOLatin2
;
1802 case wxFONTENCODING_ISO8859_3
:
1803 enc
= kCFStringEncodingISOLatin3
;
1805 case wxFONTENCODING_ISO8859_4
:
1806 enc
= kCFStringEncodingISOLatin4
;
1808 case wxFONTENCODING_ISO8859_5
:
1809 enc
= kCFStringEncodingISOLatinCyrillic
;
1811 case wxFONTENCODING_ISO8859_6
:
1812 enc
= kCFStringEncodingISOLatinArabic
;
1814 case wxFONTENCODING_ISO8859_7
:
1815 enc
= kCFStringEncodingISOLatinGreek
;
1817 case wxFONTENCODING_ISO8859_8
:
1818 enc
= kCFStringEncodingISOLatinHebrew
;
1820 case wxFONTENCODING_ISO8859_9
:
1821 enc
= kCFStringEncodingISOLatin5
;
1823 case wxFONTENCODING_ISO8859_10
:
1824 enc
= kCFStringEncodingISOLatin6
;
1826 case wxFONTENCODING_ISO8859_11
:
1827 enc
= kCFStringEncodingISOLatinThai
;
1829 case wxFONTENCODING_ISO8859_13
:
1830 enc
= kCFStringEncodingISOLatin7
;
1832 case wxFONTENCODING_ISO8859_14
:
1833 enc
= kCFStringEncodingISOLatin8
;
1835 case wxFONTENCODING_ISO8859_15
:
1836 enc
= kCFStringEncodingISOLatin9
;
1839 case wxFONTENCODING_KOI8
:
1840 enc
= kCFStringEncodingKOI8_R
;
1842 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1843 enc
= kCFStringEncodingDOSRussian
;
1846 // case wxFONTENCODING_BULGARIAN :
1850 case wxFONTENCODING_CP437
:
1851 enc
=kCFStringEncodingDOSLatinUS
;
1853 case wxFONTENCODING_CP850
:
1854 enc
= kCFStringEncodingDOSLatin1
;
1856 case wxFONTENCODING_CP852
:
1857 enc
= kCFStringEncodingDOSLatin2
;
1859 case wxFONTENCODING_CP855
:
1860 enc
= kCFStringEncodingDOSCyrillic
;
1862 case wxFONTENCODING_CP866
:
1863 enc
=kCFStringEncodingDOSRussian
;
1865 case wxFONTENCODING_CP874
:
1866 enc
= kCFStringEncodingDOSThai
;
1868 case wxFONTENCODING_CP932
:
1869 enc
= kCFStringEncodingDOSJapanese
;
1871 case wxFONTENCODING_CP936
:
1872 enc
=kCFStringEncodingDOSChineseSimplif
;
1874 case wxFONTENCODING_CP949
:
1875 enc
= kCFStringEncodingDOSKorean
;
1877 case wxFONTENCODING_CP950
:
1878 enc
= kCFStringEncodingDOSChineseTrad
;
1880 case wxFONTENCODING_CP1250
:
1881 enc
= kCFStringEncodingWindowsLatin2
;
1883 case wxFONTENCODING_CP1251
:
1884 enc
=kCFStringEncodingWindowsCyrillic
;
1886 case wxFONTENCODING_CP1252
:
1887 enc
=kCFStringEncodingWindowsLatin1
;
1889 case wxFONTENCODING_CP1253
:
1890 enc
= kCFStringEncodingWindowsGreek
;
1892 case wxFONTENCODING_CP1254
:
1893 enc
= kCFStringEncodingWindowsLatin5
;
1895 case wxFONTENCODING_CP1255
:
1896 enc
=kCFStringEncodingWindowsHebrew
;
1898 case wxFONTENCODING_CP1256
:
1899 enc
=kCFStringEncodingWindowsArabic
;
1901 case wxFONTENCODING_CP1257
:
1902 enc
= kCFStringEncodingWindowsBalticRim
;
1904 // This only really encodes to UTF7 (if that) evidently
1905 // case wxFONTENCODING_UTF7 :
1906 // enc = kCFStringEncodingNonLossyASCII ;
1908 case wxFONTENCODING_UTF8
:
1909 enc
= kCFStringEncodingUTF8
;
1911 case wxFONTENCODING_EUC_JP
:
1912 enc
= kCFStringEncodingEUC_JP
;
1914 case wxFONTENCODING_UTF16
:
1915 enc
= kCFStringEncodingUnicode
;
1917 case wxFONTENCODING_MACROMAN
:
1918 enc
= kCFStringEncodingMacRoman
;
1920 case wxFONTENCODING_MACJAPANESE
:
1921 enc
= kCFStringEncodingMacJapanese
;
1923 case wxFONTENCODING_MACCHINESETRAD
:
1924 enc
= kCFStringEncodingMacChineseTrad
;
1926 case wxFONTENCODING_MACKOREAN
:
1927 enc
= kCFStringEncodingMacKorean
;
1929 case wxFONTENCODING_MACARABIC
:
1930 enc
= kCFStringEncodingMacArabic
;
1932 case wxFONTENCODING_MACHEBREW
:
1933 enc
= kCFStringEncodingMacHebrew
;
1935 case wxFONTENCODING_MACGREEK
:
1936 enc
= kCFStringEncodingMacGreek
;
1938 case wxFONTENCODING_MACCYRILLIC
:
1939 enc
= kCFStringEncodingMacCyrillic
;
1941 case wxFONTENCODING_MACDEVANAGARI
:
1942 enc
= kCFStringEncodingMacDevanagari
;
1944 case wxFONTENCODING_MACGURMUKHI
:
1945 enc
= kCFStringEncodingMacGurmukhi
;
1947 case wxFONTENCODING_MACGUJARATI
:
1948 enc
= kCFStringEncodingMacGujarati
;
1950 case wxFONTENCODING_MACORIYA
:
1951 enc
= kCFStringEncodingMacOriya
;
1953 case wxFONTENCODING_MACBENGALI
:
1954 enc
= kCFStringEncodingMacBengali
;
1956 case wxFONTENCODING_MACTAMIL
:
1957 enc
= kCFStringEncodingMacTamil
;
1959 case wxFONTENCODING_MACTELUGU
:
1960 enc
= kCFStringEncodingMacTelugu
;
1962 case wxFONTENCODING_MACKANNADA
:
1963 enc
= kCFStringEncodingMacKannada
;
1965 case wxFONTENCODING_MACMALAJALAM
:
1966 enc
= kCFStringEncodingMacMalayalam
;
1968 case wxFONTENCODING_MACSINHALESE
:
1969 enc
= kCFStringEncodingMacSinhalese
;
1971 case wxFONTENCODING_MACBURMESE
:
1972 enc
= kCFStringEncodingMacBurmese
;
1974 case wxFONTENCODING_MACKHMER
:
1975 enc
= kCFStringEncodingMacKhmer
;
1977 case wxFONTENCODING_MACTHAI
:
1978 enc
= kCFStringEncodingMacThai
;
1980 case wxFONTENCODING_MACLAOTIAN
:
1981 enc
= kCFStringEncodingMacLaotian
;
1983 case wxFONTENCODING_MACGEORGIAN
:
1984 enc
= kCFStringEncodingMacGeorgian
;
1986 case wxFONTENCODING_MACARMENIAN
:
1987 enc
= kCFStringEncodingMacArmenian
;
1989 case wxFONTENCODING_MACCHINESESIMP
:
1990 enc
= kCFStringEncodingMacChineseSimp
;
1992 case wxFONTENCODING_MACTIBETAN
:
1993 enc
= kCFStringEncodingMacTibetan
;
1995 case wxFONTENCODING_MACMONGOLIAN
:
1996 enc
= kCFStringEncodingMacMongolian
;
1998 case wxFONTENCODING_MACETHIOPIC
:
1999 enc
= kCFStringEncodingMacEthiopic
;
2001 case wxFONTENCODING_MACCENTRALEUR
:
2002 enc
= kCFStringEncodingMacCentralEurRoman
;
2004 case wxFONTENCODING_MACVIATNAMESE
:
2005 enc
= kCFStringEncodingMacVietnamese
;
2007 case wxFONTENCODING_MACARABICEXT
:
2008 enc
= kCFStringEncodingMacExtArabic
;
2010 case wxFONTENCODING_MACSYMBOL
:
2011 enc
= kCFStringEncodingMacSymbol
;
2013 case wxFONTENCODING_MACDINGBATS
:
2014 enc
= kCFStringEncodingMacDingbats
;
2016 case wxFONTENCODING_MACTURKISH
:
2017 enc
= kCFStringEncodingMacTurkish
;
2019 case wxFONTENCODING_MACCROATIAN
:
2020 enc
= kCFStringEncodingMacCroatian
;
2022 case wxFONTENCODING_MACICELANDIC
:
2023 enc
= kCFStringEncodingMacIcelandic
;
2025 case wxFONTENCODING_MACROMANIAN
:
2026 enc
= kCFStringEncodingMacRomanian
;
2028 case wxFONTENCODING_MACCELTIC
:
2029 enc
= kCFStringEncodingMacCeltic
;
2031 case wxFONTENCODING_MACGAELIC
:
2032 enc
= kCFStringEncodingMacGaelic
;
2034 // case wxFONTENCODING_MACKEYBOARD :
2035 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2038 // because gcc is picky
2044 class wxMBConv_cocoa
: public wxMBConv
2049 Init(CFStringGetSystemEncoding()) ;
2053 wxMBConv_cocoa(const wxChar
* name
)
2055 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2059 wxMBConv_cocoa(wxFontEncoding encoding
)
2061 Init( wxCFStringEncFromFontEnc(encoding
) );
2068 void Init( CFStringEncoding encoding
)
2070 m_encoding
= encoding
;
2073 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2077 CFStringRef theString
= CFStringCreateWithBytes (
2078 NULL
, //the allocator
2079 (const UInt8
*)szUnConv
,
2082 false //no BOM/external representation
2085 wxASSERT(theString
);
2087 size_t nOutLength
= CFStringGetLength(theString
);
2091 CFRelease(theString
);
2095 CFRange theRange
= { 0, nOutSize
};
2097 #if SIZEOF_WCHAR_T == 4
2098 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2101 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2103 CFRelease(theString
);
2105 szUniCharBuffer
[nOutLength
] = '\0' ;
2107 #if SIZEOF_WCHAR_T == 4
2108 wxMBConvUTF16 converter
;
2109 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2110 delete[] szUniCharBuffer
;
2116 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2120 size_t nRealOutSize
;
2121 size_t nBufSize
= wxWcslen(szUnConv
);
2122 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2124 #if SIZEOF_WCHAR_T == 4
2125 wxMBConvUTF16BE converter
;
2126 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2127 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2128 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2129 nBufSize
/= sizeof(UniChar
);
2132 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2136 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2139 wxASSERT(theString
);
2141 //Note that CER puts a BOM when converting to unicode
2142 //so we check and use getchars instead in that case
2143 if (m_encoding
== kCFStringEncodingUnicode
)
2146 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2148 nRealOutSize
= CFStringGetLength(theString
) + 1;
2154 CFRangeMake(0, CFStringGetLength(theString
)),
2156 0, //what to put in characters that can't be converted -
2157 //0 tells CFString to return NULL if it meets such a character
2158 false, //not an external representation
2161 (CFIndex
*) &nRealOutSize
2165 CFRelease(theString
);
2167 #if SIZEOF_WCHAR_T == 4
2168 delete[] szUniBuffer
;
2171 return nRealOutSize
- 1;
2176 return m_encoding
!= kCFStringEncodingInvalidId
&&
2177 CFStringIsEncodingAvailable(m_encoding
);
2181 CFStringEncoding m_encoding
;
2184 #endif // defined(__WXCOCOA__)
2186 // ============================================================================
2187 // Mac conversion classes
2188 // ============================================================================
2190 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2192 class wxMBConv_mac
: public wxMBConv
2197 Init(CFStringGetSystemEncoding()) ;
2201 wxMBConv_mac(const wxChar
* name
)
2203 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2207 wxMBConv_mac(wxFontEncoding encoding
)
2209 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2214 OSStatus status
= noErr
;
2215 status
= TECDisposeConverter(m_MB2WC_converter
);
2216 status
= TECDisposeConverter(m_WC2MB_converter
);
2220 void Init( TextEncodingBase encoding
)
2222 OSStatus status
= noErr
;
2223 m_char_encoding
= encoding
;
2224 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2226 status
= TECCreateConverter(&m_MB2WC_converter
,
2228 m_unicode_encoding
);
2229 status
= TECCreateConverter(&m_WC2MB_converter
,
2234 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2236 OSStatus status
= noErr
;
2237 ByteCount byteOutLen
;
2238 ByteCount byteInLen
= strlen(psz
) ;
2239 wchar_t *tbuf
= NULL
;
2240 UniChar
* ubuf
= NULL
;
2245 //apple specs say at least 32
2246 n
= wxMax( 32 , byteInLen
) ;
2247 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2249 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2250 #if SIZEOF_WCHAR_T == 4
2251 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2253 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2255 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2256 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2257 #if SIZEOF_WCHAR_T == 4
2258 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2259 // is not properly terminated we get random characters at the end
2260 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2261 wxMBConvUTF16BE converter
;
2262 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2265 res
= byteOutLen
/ sizeof( UniChar
) ;
2270 if ( buf
&& res
< n
)
2276 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2278 OSStatus status
= noErr
;
2279 ByteCount byteOutLen
;
2280 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2286 //apple specs say at least 32
2287 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2288 tbuf
= (char*) malloc( n
) ;
2291 ByteCount byteBufferLen
= n
;
2292 UniChar
* ubuf
= NULL
;
2293 #if SIZEOF_WCHAR_T == 4
2294 wxMBConvUTF16BE converter
;
2295 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2296 byteInLen
= unicharlen
;
2297 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2298 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2300 ubuf
= (UniChar
*) psz
;
2302 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2303 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2304 #if SIZEOF_WCHAR_T == 4
2310 size_t res
= byteOutLen
;
2311 if ( buf
&& res
< n
)
2315 //we need to double-trip to verify it didn't insert any ? in place
2316 //of bogus characters
2317 wxWCharBuffer
wcBuf(n
);
2318 size_t pszlen
= wxWcslen(psz
);
2319 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2320 wxWcslen(wcBuf
) != pszlen
||
2321 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2323 // we didn't obtain the same thing we started from, hence
2324 // the conversion was lossy and we consider that it failed
2333 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2336 TECObjectRef m_MB2WC_converter
;
2337 TECObjectRef m_WC2MB_converter
;
2339 TextEncodingBase m_char_encoding
;
2340 TextEncodingBase m_unicode_encoding
;
2343 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2345 // ============================================================================
2346 // wxEncodingConverter based conversion classes
2347 // ============================================================================
2351 class wxMBConv_wxwin
: public wxMBConv
2356 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2357 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2361 // temporarily just use wxEncodingConverter stuff,
2362 // so that it works while a better implementation is built
2363 wxMBConv_wxwin(const wxChar
* name
)
2366 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2368 m_enc
= wxFONTENCODING_SYSTEM
;
2373 wxMBConv_wxwin(wxFontEncoding enc
)
2380 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2382 size_t inbuf
= strlen(psz
);
2385 if (!m2w
.Convert(psz
,buf
))
2391 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2393 const size_t inbuf
= wxWcslen(psz
);
2396 if (!w2m
.Convert(psz
,buf
))
2403 bool IsOk() const { return m_ok
; }
2406 wxFontEncoding m_enc
;
2407 wxEncodingConverter m2w
, w2m
;
2409 // were we initialized successfully?
2412 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2415 #endif // wxUSE_FONTMAP
2417 // ============================================================================
2418 // wxCSConv implementation
2419 // ============================================================================
2421 void wxCSConv::Init()
2428 wxCSConv::wxCSConv(const wxChar
*charset
)
2437 m_encoding
= wxFONTENCODING_SYSTEM
;
2440 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2442 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2444 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2446 encoding
= wxFONTENCODING_SYSTEM
;
2451 m_encoding
= encoding
;
2454 wxCSConv::~wxCSConv()
2459 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2464 SetName(conv
.m_name
);
2465 m_encoding
= conv
.m_encoding
;
2468 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2472 SetName(conv
.m_name
);
2473 m_encoding
= conv
.m_encoding
;
2478 void wxCSConv::Clear()
2487 void wxCSConv::SetName(const wxChar
*charset
)
2491 m_name
= wxStrdup(charset
);
2496 wxMBConv
*wxCSConv::DoCreate() const
2498 // check for the special case of ASCII or ISO8859-1 charset: as we have
2499 // special knowledge of it anyhow, we don't need to create a special
2500 // conversion object
2501 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2503 // don't convert at all
2507 // we trust OS to do conversion better than we can so try external
2508 // conversion methods first
2510 // the full order is:
2511 // 1. OS conversion (iconv() under Unix or Win32 API)
2512 // 2. hard coded conversions for UTF
2513 // 3. wxEncodingConverter as fall back
2519 #endif // !wxUSE_FONTMAP
2521 wxString
name(m_name
);
2525 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2526 #endif // wxUSE_FONTMAP
2528 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2534 #endif // HAVE_ICONV
2536 #ifdef wxHAVE_WIN32_MB2WC
2539 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2540 : new wxMBConv_win32(m_encoding
);
2549 #endif // wxHAVE_WIN32_MB2WC
2550 #if defined(__WXMAC__)
2552 // leave UTF16 and UTF32 to the built-ins of wx
2553 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2554 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2558 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2559 : new wxMBConv_mac(m_encoding
);
2561 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2570 #if defined(__WXCOCOA__)
2572 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2576 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2577 : new wxMBConv_cocoa(m_encoding
);
2579 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2589 wxFontEncoding enc
= m_encoding
;
2591 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2593 // use "false" to suppress interactive dialogs -- we can be called from
2594 // anywhere and popping up a dialog from here is the last thing we want to
2596 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2598 #endif // wxUSE_FONTMAP
2602 case wxFONTENCODING_UTF7
:
2603 return new wxMBConvUTF7
;
2605 case wxFONTENCODING_UTF8
:
2606 return new wxMBConvUTF8
;
2608 case wxFONTENCODING_UTF16BE
:
2609 return new wxMBConvUTF16BE
;
2611 case wxFONTENCODING_UTF16LE
:
2612 return new wxMBConvUTF16LE
;
2614 case wxFONTENCODING_UTF32BE
:
2615 return new wxMBConvUTF32BE
;
2617 case wxFONTENCODING_UTF32LE
:
2618 return new wxMBConvUTF32LE
;
2621 // nothing to do but put here to suppress gcc warnings
2628 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2629 : new wxMBConv_wxwin(m_encoding
);
2635 #endif // wxUSE_FONTMAP
2637 // NB: This is a hack to prevent deadlock. What could otherwise happen
2638 // in Unicode build: wxConvLocal creation ends up being here
2639 // because of some failure and logs the error. But wxLog will try to
2640 // attach timestamp, for which it will need wxConvLocal (to convert
2641 // time to char* and then wchar_t*), but that fails, tries to log
2642 // error, but wxLog has a (already locked) critical section that
2643 // guards static buffer.
2644 static bool alreadyLoggingError
= false;
2645 if (!alreadyLoggingError
)
2647 alreadyLoggingError
= true;
2648 wxLogError(_("Cannot convert from the charset '%s'!"),
2652 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2653 #else // !wxUSE_FONTMAP
2654 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2655 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2657 alreadyLoggingError
= false;
2663 void wxCSConv::CreateConvIfNeeded() const
2667 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2670 // if we don't have neither the name nor the encoding, use the default
2671 // encoding for this system
2672 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2674 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2676 #endif // wxUSE_INTL
2678 self
->m_convReal
= DoCreate();
2679 self
->m_deferred
= false;
2683 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2685 CreateConvIfNeeded();
2688 return m_convReal
->MB2WC(buf
, psz
, n
);
2691 size_t len
= strlen(psz
);
2695 for (size_t c
= 0; c
<= len
; c
++)
2696 buf
[c
] = (unsigned char)(psz
[c
]);
2702 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2704 CreateConvIfNeeded();
2707 return m_convReal
->WC2MB(buf
, psz
, n
);
2710 const size_t len
= wxWcslen(psz
);
2713 for (size_t c
= 0; c
<= len
; c
++)
2717 buf
[c
] = (char)psz
[c
];
2722 for (size_t c
= 0; c
<= len
; c
++)
2732 // ----------------------------------------------------------------------------
2734 // ----------------------------------------------------------------------------
2737 static wxMBConv_win32 wxConvLibcObj
;
2738 #elif defined(__WXMAC__) && !defined(__MACH__)
2739 static wxMBConv_mac wxConvLibcObj
;
2741 static wxMBConvLibc wxConvLibcObj
;
2744 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2745 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2746 static wxMBConvUTF7 wxConvUTF7Obj
;
2747 static wxMBConvUTF8 wxConvUTF8Obj
;
2749 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2750 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2751 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2752 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2753 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2754 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2755 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2763 #else // !wxUSE_WCHAR_T
2765 // stand-ins in absence of wchar_t
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2771 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T