1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
73 #include "wx/thread.h"
76 #include "wx/encconv.h"
77 #include "wx/fontmap.h"
81 #include <ATSUnicode.h>
82 #include <TextCommon.h>
83 #include <TextEncodingConverter.h>
85 #include "wx/mac/private.h" // includes mac headers
87 // ----------------------------------------------------------------------------
89 // ----------------------------------------------------------------------------
91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
94 #if SIZEOF_WCHAR_T == 4
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
100 #define WC_NAME_BEST "UCS-4LE"
102 #elif SIZEOF_WCHAR_T == 2
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
109 #define WC_NAME_BEST "UTF-16LE"
111 #else // sizeof(wchar_t) != 2 nor 4
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
116 // ============================================================================
118 // ============================================================================
120 // ----------------------------------------------------------------------------
121 // UTF-16 en/decoding to/from UCS-4
122 // ----------------------------------------------------------------------------
125 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
130 *output
= (wxUint16
) input
;
133 else if (input
>=0x110000)
141 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
142 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
150 if ((*input
<0xd800) || (*input
>0xdfff))
155 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
162 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
168 // ----------------------------------------------------------------------------
170 // ----------------------------------------------------------------------------
172 wxMBConv::~wxMBConv()
174 // nothing to do here (necessary for Darwin linking probably)
177 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
181 // calculate the length of the buffer needed first
182 size_t nLen
= MB2WC(NULL
, psz
, 0);
183 if ( nLen
!= (size_t)-1 )
185 // now do the actual conversion
186 wxWCharBuffer
buf(nLen
);
187 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
188 if ( nLen
!= (size_t)-1 )
195 wxWCharBuffer
buf((wchar_t *)NULL
);
200 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
204 size_t nLen
= WC2MB(NULL
, pwz
, 0);
205 if ( nLen
!= (size_t)-1 )
207 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
208 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
209 if ( nLen
!= (size_t)-1 )
216 wxCharBuffer
buf((char *)NULL
);
221 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
223 wxASSERT(pOutSize
!= NULL
);
225 const char* szEnd
= szString
+ nStringLen
+ 1;
226 const char* szPos
= szString
;
227 const char* szStart
= szPos
;
229 size_t nActualLength
= 0;
230 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
232 wxWCharBuffer
theBuffer(nCurrentSize
);
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos
!= szEnd
)
238 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
240 //Get the length of the current (sub)string
241 size_t nLen
= MB2WC(NULL
, szPos
, 0);
243 //Invalid conversion?
244 if( nLen
== (size_t)-1 )
247 theBuffer
.data()[0u] = wxT('\0');
252 //Increase the actual length (+1 for current null character)
253 nActualLength
+= nLen
+ 1;
255 //if buffer too big, realloc the buffer
256 if (nActualLength
> (nCurrentSize
+1))
258 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
259 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
260 theBuffer
= theNewBuffer
;
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
268 theBuffer
.data()[0u] = wxT('\0');
272 //Increment to next (sub)string
273 //Note that we have to use strlen instead of nLen here
274 //because XX2XX gives us the size of the output buffer,
275 //which is not necessarily the length of the string
276 szPos
+= strlen(szPos
) + 1;
279 //success - return actual length and the buffer
280 *pOutSize
= nActualLength
;
284 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
286 wxASSERT(pOutSize
!= NULL
);
288 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
289 const wchar_t* szPos
= szString
;
290 const wchar_t* szStart
= szPos
;
292 size_t nActualLength
= 0;
293 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
295 wxCharBuffer
theBuffer(nCurrentSize
);
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos
!= szEnd
)
301 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
303 //Get the length of the current (sub)string
304 size_t nLen
= WC2MB(NULL
, szPos
, 0);
306 //Invalid conversion?
307 if( nLen
== (size_t)-1 )
310 theBuffer
.data()[0u] = wxT('\0');
314 //Increase the actual length (+1 for current null character)
315 nActualLength
+= nLen
+ 1;
317 //if buffer too big, realloc the buffer
318 if (nActualLength
> (nCurrentSize
+1))
320 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
321 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
322 theBuffer
= theNewBuffer
;
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
330 theBuffer
.data()[0u] = wxT('\0');
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen instead of nLen here
336 //because XX2XX gives us the size of the output buffer,
337 //which is not necessarily the length of the string
338 szPos
+= wxWcslen(szPos
) + 1;
341 //success - return actual length and the buffer
342 *pOutSize
= nActualLength
;
346 // ----------------------------------------------------------------------------
348 // ----------------------------------------------------------------------------
350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
352 return wxMB2WC(buf
, psz
, n
);
355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
357 return wxWC2MB(buf
, psz
, n
);
362 // ----------------------------------------------------------------------------
363 // wxConvBrokenFileNames
364 // ----------------------------------------------------------------------------
366 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
368 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
369 || wxStricmp(charset
, _T("UTF8")) == 0 )
370 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
372 m_conv
= new wxCSConv(charset
);
376 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
378 size_t outputSize
) const
380 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
384 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
386 size_t outputSize
) const
388 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
393 // ----------------------------------------------------------------------------
395 // ----------------------------------------------------------------------------
397 // Implementation (C) 2004 Fredrik Roubert
400 // BASE64 decoding table
402 static const unsigned char utf7unb64
[] =
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
410 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
411 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
413 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
414 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
415 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
417 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
418 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
419 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
438 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
442 while (*psz
&& ((!buf
) || (len
< n
)))
444 unsigned char cc
= *psz
++;
452 else if (*psz
== '-')
462 // BASE64 encoded string
466 for (lsb
= false, d
= 0, l
= 0;
467 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
471 for (l
+= 6; l
>= 8; lsb
= !lsb
)
473 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
482 *buf
= (wchar_t)(c
<< 8);
489 if (buf
&& (len
< n
))
495 // BASE64 encoding table
497 static const unsigned char utf7enb64
[] =
499 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
500 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
501 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
502 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
503 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
504 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
505 'w', 'x', 'y', 'z', '0', '1', '2', '3',
506 '4', '5', '6', '7', '8', '9', '+', '/'
510 // UTF-7 encoding table
512 // 0 - Set D (directly encoded characters)
513 // 1 - Set O (optional direct characters)
514 // 2 - whitespace characters (optional)
515 // 3 - special characters
517 static const unsigned char utf7encode
[128] =
519 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
520 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
521 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
523 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
525 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
529 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
535 while (*psz
&& ((!buf
) || (len
< n
)))
538 if (cc
< 0x80 && utf7encode
[cc
] < 1)
546 else if (((wxUint32
)cc
) > 0xffff)
548 // no surrogate pair generation (yet?)
559 // BASE64 encode string
560 unsigned int lsb
, d
, l
;
561 for (d
= 0, l
= 0;; psz
++)
563 for (lsb
= 0; lsb
< 2; lsb
++)
566 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
568 for (l
+= 8; l
>= 6; )
572 *buf
++ = utf7enb64
[(d
>> l
) % 64];
577 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
583 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
592 if (buf
&& (len
< n
))
597 // ----------------------------------------------------------------------------
599 // ----------------------------------------------------------------------------
601 static wxUint32 utf8_max
[]=
602 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
604 // boundaries of the private use area we use to (temporarily) remap invalid
605 // characters invalid in a UTF-8 encoded string
606 const wxUint32 wxUnicodePUA
= 0x100000;
607 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
609 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
613 while (*psz
&& ((!buf
) || (len
< n
)))
615 const char *opsz
= psz
;
616 bool invalid
= false;
617 unsigned char cc
= *psz
++, fc
= cc
;
619 for (cnt
= 0; fc
& 0x80; cnt
++)
628 // escape the escape character for octal escapes
629 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
630 && cc
== '\\' && (!buf
|| len
< n
))
642 // invalid UTF-8 sequence
647 unsigned ocnt
= cnt
- 1;
648 wxUint32 res
= cc
& (0x3f >> cnt
);
652 if ((cc
& 0xC0) != 0x80)
654 // invalid UTF-8 sequence
659 res
= (res
<< 6) | (cc
& 0x3f);
661 if (invalid
|| res
<= utf8_max
[ocnt
])
663 // illegal UTF-8 encoding
666 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
667 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
669 // if one of our PUA characters turns up externally
670 // it must also be treated as an illegal sequence
671 // (a bit like you have to escape an escape character)
677 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
678 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
679 if (pa
== (size_t)-1)
693 #endif // WC_UTF16/!WC_UTF16
698 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
700 while (opsz
< psz
&& (!buf
|| len
< n
))
703 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
704 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
705 wxASSERT(pa
!= (size_t)-1);
712 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
718 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
720 while (opsz
< psz
&& (!buf
|| len
< n
))
722 if ( buf
&& len
+ 3 < n
)
724 unsigned char n
= *opsz
;
726 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
727 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
728 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
734 else // MAP_INVALID_UTF8_NOT
741 if (buf
&& (len
< n
))
746 static inline bool isoctal(wchar_t wch
)
748 return L
'0' <= wch
&& wch
<= L
'7';
751 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
755 while (*psz
&& ((!buf
) || (len
< n
)))
759 // cast is ok for WC_UTF16
760 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
761 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
763 cc
=(*psz
++) & 0x7fffffff;
766 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
767 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
770 *buf
++ = (char)(cc
- wxUnicodePUA
);
773 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
774 && cc
== L
'\\' && psz
[0] == L
'\\' )
781 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
783 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
787 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
788 (psz
[1] - L
'0')*010 +
798 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
812 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
814 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
826 // ----------------------------------------------------------------------------
828 // ----------------------------------------------------------------------------
830 #ifdef WORDS_BIGENDIAN
831 #define wxMBConvUTF16straight wxMBConvUTF16BE
832 #define wxMBConvUTF16swap wxMBConvUTF16LE
834 #define wxMBConvUTF16swap wxMBConvUTF16BE
835 #define wxMBConvUTF16straight wxMBConvUTF16LE
841 // copy 16bit MB to 16bit String
842 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
846 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
849 *buf
++ = *(wxUint16
*)psz
;
852 psz
+= sizeof(wxUint16
);
854 if (buf
&& len
<n
) *buf
=0;
860 // copy 16bit String to 16bit MB
861 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
865 while (*psz
&& (!buf
|| len
< n
))
869 *(wxUint16
*)buf
= *psz
;
870 buf
+= sizeof(wxUint16
);
872 len
+= sizeof(wxUint16
);
875 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
881 // swap 16bit MB to 16bit String
882 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
886 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
890 ((char *)buf
)[0] = psz
[1];
891 ((char *)buf
)[1] = psz
[0];
895 psz
+= sizeof(wxUint16
);
897 if (buf
&& len
<n
) *buf
=0;
903 // swap 16bit MB to 16bit String
904 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
908 while (*psz
&& (!buf
|| len
< n
))
912 *buf
++ = ((char*)psz
)[1];
913 *buf
++ = ((char*)psz
)[0];
915 len
+= sizeof(wxUint16
);
918 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
927 // copy 16bit MB to 32bit String
928 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
932 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
935 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
936 if (pa
== (size_t)-1)
942 psz
+= pa
* sizeof(wxUint16
);
944 if (buf
&& len
<n
) *buf
=0;
950 // copy 32bit String to 16bit MB
951 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
955 while (*psz
&& (!buf
|| len
< n
))
958 size_t pa
=encode_utf16(*psz
, cc
);
960 if (pa
== (size_t)-1)
965 *(wxUint16
*)buf
= cc
[0];
966 buf
+= sizeof(wxUint16
);
969 *(wxUint16
*)buf
= cc
[1];
970 buf
+= sizeof(wxUint16
);
974 len
+= pa
*sizeof(wxUint16
);
977 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
983 // swap 16bit MB to 32bit String
984 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
988 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
992 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
993 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
995 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
996 if (pa
== (size_t)-1)
1003 psz
+= pa
* sizeof(wxUint16
);
1005 if (buf
&& len
<n
) *buf
=0;
1011 // swap 32bit String to 16bit MB
1012 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1016 while (*psz
&& (!buf
|| len
< n
))
1019 size_t pa
=encode_utf16(*psz
, cc
);
1021 if (pa
== (size_t)-1)
1026 *buf
++ = ((char*)cc
)[1];
1027 *buf
++ = ((char*)cc
)[0];
1030 *buf
++ = ((char*)cc
)[3];
1031 *buf
++ = ((char*)cc
)[2];
1035 len
+= pa
*sizeof(wxUint16
);
1038 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1046 // ----------------------------------------------------------------------------
1048 // ----------------------------------------------------------------------------
1050 #ifdef WORDS_BIGENDIAN
1051 #define wxMBConvUTF32straight wxMBConvUTF32BE
1052 #define wxMBConvUTF32swap wxMBConvUTF32LE
1054 #define wxMBConvUTF32swap wxMBConvUTF32BE
1055 #define wxMBConvUTF32straight wxMBConvUTF32LE
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1065 // copy 32bit MB to 16bit String
1066 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1070 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1074 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1075 if (pa
== (size_t)-1)
1085 psz
+= sizeof(wxUint32
);
1087 if (buf
&& len
<n
) *buf
=0;
1093 // copy 16bit String to 32bit MB
1094 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1098 while (*psz
&& (!buf
|| len
< n
))
1102 // cast is ok for WC_UTF16
1103 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1104 if (pa
== (size_t)-1)
1109 *(wxUint32
*)buf
= cc
;
1110 buf
+= sizeof(wxUint32
);
1112 len
+= sizeof(wxUint32
);
1116 if (buf
&& len
<=n
-sizeof(wxUint32
))
1124 // swap 32bit MB to 16bit String
1125 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1129 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1132 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1133 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1138 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1139 if (pa
== (size_t)-1)
1149 psz
+= sizeof(wxUint32
);
1159 // swap 16bit String to 32bit MB
1160 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1164 while (*psz
&& (!buf
|| len
< n
))
1168 // cast is ok for WC_UTF16
1169 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1170 if (pa
== (size_t)-1)
1180 len
+= sizeof(wxUint32
);
1184 if (buf
&& len
<=n
-sizeof(wxUint32
))
1193 // copy 32bit MB to 32bit String
1194 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1198 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1201 *buf
++ = *(wxUint32
*)psz
;
1203 psz
+= sizeof(wxUint32
);
1213 // copy 32bit String to 32bit MB
1214 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1218 while (*psz
&& (!buf
|| len
< n
))
1222 *(wxUint32
*)buf
= *psz
;
1223 buf
+= sizeof(wxUint32
);
1226 len
+= sizeof(wxUint32
);
1230 if (buf
&& len
<=n
-sizeof(wxUint32
))
1237 // swap 32bit MB to 32bit String
1238 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1242 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1246 ((char *)buf
)[0] = psz
[3];
1247 ((char *)buf
)[1] = psz
[2];
1248 ((char *)buf
)[2] = psz
[1];
1249 ((char *)buf
)[3] = psz
[0];
1253 psz
+= sizeof(wxUint32
);
1263 // swap 32bit String to 32bit MB
1264 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1268 while (*psz
&& (!buf
|| len
< n
))
1272 *buf
++ = ((char *)psz
)[3];
1273 *buf
++ = ((char *)psz
)[2];
1274 *buf
++ = ((char *)psz
)[1];
1275 *buf
++ = ((char *)psz
)[0];
1277 len
+= sizeof(wxUint32
);
1281 if (buf
&& len
<=n
-sizeof(wxUint32
))
1291 // ============================================================================
1292 // The classes doing conversion using the iconv_xxx() functions
1293 // ============================================================================
1297 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1298 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1299 // (unless there's yet another bug in glibc) the only case when iconv()
1300 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1301 // left in the input buffer -- when _real_ error occurs,
1302 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1304 // [This bug does not appear in glibc 2.2.]
1305 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1306 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1307 (errno != E2BIG || bufLeft != 0))
1309 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1312 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1314 // ----------------------------------------------------------------------------
1315 // wxMBConv_iconv: encapsulates an iconv character set
1316 // ----------------------------------------------------------------------------
1318 class wxMBConv_iconv
: public wxMBConv
1321 wxMBConv_iconv(const wxChar
*name
);
1322 virtual ~wxMBConv_iconv();
1324 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1325 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1328 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1331 // the iconv handlers used to translate from multibyte to wide char and in
1332 // the other direction
1336 // guards access to m2w and w2m objects
1337 wxMutex m_iconvMutex
;
1341 // the name (for iconv_open()) of a wide char charset -- if none is
1342 // available on this machine, it will remain NULL
1343 static const char *ms_wcCharsetName
;
1345 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1346 // different endian-ness than the native one
1347 static bool ms_wcNeedsSwap
;
1350 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1351 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1353 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1355 // Do it the hard way
1357 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1358 cname
[i
] = (char) name
[i
];
1360 // check for charset that represents wchar_t:
1361 if (ms_wcCharsetName
== NULL
)
1363 ms_wcNeedsSwap
= false;
1365 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1366 ms_wcCharsetName
= WC_NAME_BEST
;
1367 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1369 if (m2w
== (iconv_t
)-1)
1371 // try charset w/o bytesex info (e.g. "UCS4")
1372 // and check for bytesex ourselves:
1373 ms_wcCharsetName
= WC_NAME
;
1374 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1376 // last bet, try if it knows WCHAR_T pseudo-charset
1377 if (m2w
== (iconv_t
)-1)
1379 ms_wcCharsetName
= "WCHAR_T";
1380 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1383 if (m2w
!= (iconv_t
)-1)
1385 char buf
[2], *bufPtr
;
1386 wchar_t wbuf
[2], *wbufPtr
;
1394 outsz
= SIZEOF_WCHAR_T
* 2;
1398 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1399 (char**)&wbufPtr
, &outsz
);
1401 if (ICONV_FAILED(res
, insz
))
1403 ms_wcCharsetName
= NULL
;
1404 wxLogLastError(wxT("iconv"));
1405 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1409 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1414 ms_wcCharsetName
= NULL
;
1416 // VS: we must not output an error here, since wxWidgets will safely
1417 // fall back to using wxEncodingConverter.
1418 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1422 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1424 else // we already have ms_wcCharsetName
1426 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1429 // NB: don't ever pass NULL to iconv_open(), it may crash!
1430 if ( ms_wcCharsetName
)
1432 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1440 wxMBConv_iconv::~wxMBConv_iconv()
1442 if ( m2w
!= (iconv_t
)-1 )
1444 if ( w2m
!= (iconv_t
)-1 )
1448 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1451 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1452 // Unfortunately there is a couple of global wxCSConv objects such as
1453 // wxConvLocal that are used all over wx code, so we have to make sure
1454 // the handle is used by at most one thread at the time. Otherwise
1455 // only a few wx classes would be safe to use from non-main threads
1456 // as MB<->WC conversion would fail "randomly".
1457 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1460 size_t inbuf
= strlen(psz
);
1461 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1463 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1464 wchar_t *bufPtr
= buf
;
1465 const char *pszPtr
= psz
;
1469 // have destination buffer, convert there
1471 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1472 (char**)&bufPtr
, &outbuf
);
1473 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1477 // convert to native endianness
1478 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1481 // NB: iconv was given only strlen(psz) characters on input, and so
1482 // it couldn't convert the trailing zero. Let's do it ourselves
1483 // if there's some room left for it in the output buffer.
1489 // no destination buffer... convert using temp buffer
1490 // to calculate destination buffer requirement
1495 outbuf
= 8*SIZEOF_WCHAR_T
;
1498 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1499 (char**)&bufPtr
, &outbuf
);
1501 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1502 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1505 if (ICONV_FAILED(cres
, inbuf
))
1507 //VS: it is ok if iconv fails, hence trace only
1508 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1515 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1518 // NB: explained in MB2WC
1519 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1522 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1526 wchar_t *tmpbuf
= 0;
1530 // need to copy to temp buffer to switch endianness
1531 // this absolutely doesn't rock!
1532 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1533 // could be in read-only memory, or be accessed in some other thread)
1534 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1535 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1536 WC_BSWAP(tmpbuf
, inbuf
)
1542 // have destination buffer, convert there
1543 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1547 // NB: iconv was given only wcslen(psz) characters on input, and so
1548 // it couldn't convert the trailing zero. Let's do it ourselves
1549 // if there's some room left for it in the output buffer.
1555 // no destination buffer... convert using temp buffer
1556 // to calculate destination buffer requirement
1560 buf
= tbuf
; outbuf
= 16;
1562 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1565 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1573 if (ICONV_FAILED(cres
, inbuf
))
1575 //VS: it is ok if iconv fails, hence trace only
1576 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1583 #endif // HAVE_ICONV
1586 // ============================================================================
1587 // Win32 conversion classes
1588 // ============================================================================
1590 #ifdef wxHAVE_WIN32_MB2WC
1594 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1595 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1598 class wxMBConv_win32
: public wxMBConv
1603 m_CodePage
= CP_ACP
;
1607 wxMBConv_win32(const wxChar
* name
)
1609 m_CodePage
= wxCharsetToCodepage(name
);
1612 wxMBConv_win32(wxFontEncoding encoding
)
1614 m_CodePage
= wxEncodingToCodepage(encoding
);
1618 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1620 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1621 // the behaviour is not compatible with the Unix version (using iconv)
1622 // and break the library itself, e.g. wxTextInputStream::NextChar()
1623 // wouldn't work if reading an incomplete MB char didn't result in an
1626 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1627 // an error (tested under Windows Server 2003) and apparently it is
1628 // done on purpose, i.e. the function accepts any input in this case
1629 // and although I'd prefer to return error on ill-formed output, our
1630 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1631 // explicitly ill-formed according to RFC 2152) neither so we don't
1632 // even have any fallback here...
1633 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1635 const size_t len
= ::MultiByteToWideChar
1637 m_CodePage
, // code page
1638 flags
, // flags: fall on error
1639 psz
, // input string
1640 -1, // its length (NUL-terminated)
1641 buf
, // output string
1642 buf
? n
: 0 // size of output buffer
1645 // note that it returns count of written chars for buf != NULL and size
1646 // of the needed buffer for buf == NULL so in either case the length of
1647 // the string (which never includes the terminating NUL) is one less
1648 return len
? len
- 1 : (size_t)-1;
1651 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1654 we have a problem here: by default, WideCharToMultiByte() may
1655 replace characters unrepresentable in the target code page with bad
1656 quality approximations such as turning "1/2" symbol (U+00BD) into
1657 "1" for the code pages which don't have it and we, obviously, want
1658 to avoid this at any price
1660 the trouble is that this function does it _silently_, i.e. it won't
1661 even tell us whether it did or not... Win98/2000 and higher provide
1662 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1663 we have to resort to a round trip, i.e. check that converting back
1664 results in the same string -- this is, of course, expensive but
1665 otherwise we simply can't be sure to not garble the data.
1668 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1669 // it doesn't work with CJK encodings (which we test for rather roughly
1670 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1672 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1675 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1677 // it's our lucky day
1678 flags
= WC_NO_BEST_FIT_CHARS
;
1679 pUsedDef
= &usedDef
;
1681 else // old system or unsupported encoding
1687 const size_t len
= ::WideCharToMultiByte
1689 m_CodePage
, // code page
1690 flags
, // either none or no best fit
1691 pwz
, // input string
1692 -1, // it is (wide) NUL-terminated
1693 buf
, // output buffer
1694 buf
? n
: 0, // and its size
1695 NULL
, // default "replacement" char
1696 pUsedDef
// [out] was it used?
1701 // function totally failed
1705 // if we were really converting, check if we succeeded
1710 // check if the conversion failed, i.e. if any replacements
1715 else // we must resort to double tripping...
1717 wxWCharBuffer
wcBuf(n
);
1718 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1719 wcscmp(wcBuf
, pwz
) != 0 )
1721 // we didn't obtain the same thing we started from, hence
1722 // the conversion was lossy and we consider that it failed
1728 // see the comment above for the reason of "len - 1"
1732 bool IsOk() const { return m_CodePage
!= -1; }
1735 static bool CanUseNoBestFit()
1737 static int s_isWin98Or2k
= -1;
1739 if ( s_isWin98Or2k
== -1 )
1742 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1745 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1749 s_isWin98Or2k
= verMaj
>= 5;
1753 // unknown, be conseravtive by default
1757 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1760 return s_isWin98Or2k
== 1;
1766 #endif // wxHAVE_WIN32_MB2WC
1768 // ============================================================================
1769 // Cocoa conversion classes
1770 // ============================================================================
1772 #if defined(__WXCOCOA__)
1774 // RN: There is no UTF-32 support in either Core Foundation or
1775 // Cocoa. Strangely enough, internally Core Foundation uses
1776 // UTF 32 internally quite a bit - its just not public (yet).
1778 #include <CoreFoundation/CFString.h>
1779 #include <CoreFoundation/CFStringEncodingExt.h>
1781 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1783 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1784 if ( encoding
== wxFONTENCODING_DEFAULT
)
1786 enc
= CFStringGetSystemEncoding();
1788 else switch( encoding
)
1790 case wxFONTENCODING_ISO8859_1
:
1791 enc
= kCFStringEncodingISOLatin1
;
1793 case wxFONTENCODING_ISO8859_2
:
1794 enc
= kCFStringEncodingISOLatin2
;
1796 case wxFONTENCODING_ISO8859_3
:
1797 enc
= kCFStringEncodingISOLatin3
;
1799 case wxFONTENCODING_ISO8859_4
:
1800 enc
= kCFStringEncodingISOLatin4
;
1802 case wxFONTENCODING_ISO8859_5
:
1803 enc
= kCFStringEncodingISOLatinCyrillic
;
1805 case wxFONTENCODING_ISO8859_6
:
1806 enc
= kCFStringEncodingISOLatinArabic
;
1808 case wxFONTENCODING_ISO8859_7
:
1809 enc
= kCFStringEncodingISOLatinGreek
;
1811 case wxFONTENCODING_ISO8859_8
:
1812 enc
= kCFStringEncodingISOLatinHebrew
;
1814 case wxFONTENCODING_ISO8859_9
:
1815 enc
= kCFStringEncodingISOLatin5
;
1817 case wxFONTENCODING_ISO8859_10
:
1818 enc
= kCFStringEncodingISOLatin6
;
1820 case wxFONTENCODING_ISO8859_11
:
1821 enc
= kCFStringEncodingISOLatinThai
;
1823 case wxFONTENCODING_ISO8859_13
:
1824 enc
= kCFStringEncodingISOLatin7
;
1826 case wxFONTENCODING_ISO8859_14
:
1827 enc
= kCFStringEncodingISOLatin8
;
1829 case wxFONTENCODING_ISO8859_15
:
1830 enc
= kCFStringEncodingISOLatin9
;
1833 case wxFONTENCODING_KOI8
:
1834 enc
= kCFStringEncodingKOI8_R
;
1836 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1837 enc
= kCFStringEncodingDOSRussian
;
1840 // case wxFONTENCODING_BULGARIAN :
1844 case wxFONTENCODING_CP437
:
1845 enc
=kCFStringEncodingDOSLatinUS
;
1847 case wxFONTENCODING_CP850
:
1848 enc
= kCFStringEncodingDOSLatin1
;
1850 case wxFONTENCODING_CP852
:
1851 enc
= kCFStringEncodingDOSLatin2
;
1853 case wxFONTENCODING_CP855
:
1854 enc
= kCFStringEncodingDOSCyrillic
;
1856 case wxFONTENCODING_CP866
:
1857 enc
=kCFStringEncodingDOSRussian
;
1859 case wxFONTENCODING_CP874
:
1860 enc
= kCFStringEncodingDOSThai
;
1862 case wxFONTENCODING_CP932
:
1863 enc
= kCFStringEncodingDOSJapanese
;
1865 case wxFONTENCODING_CP936
:
1866 enc
=kCFStringEncodingDOSChineseSimplif
;
1868 case wxFONTENCODING_CP949
:
1869 enc
= kCFStringEncodingDOSKorean
;
1871 case wxFONTENCODING_CP950
:
1872 enc
= kCFStringEncodingDOSChineseTrad
;
1874 case wxFONTENCODING_CP1250
:
1875 enc
= kCFStringEncodingWindowsLatin2
;
1877 case wxFONTENCODING_CP1251
:
1878 enc
=kCFStringEncodingWindowsCyrillic
;
1880 case wxFONTENCODING_CP1252
:
1881 enc
=kCFStringEncodingWindowsLatin1
;
1883 case wxFONTENCODING_CP1253
:
1884 enc
= kCFStringEncodingWindowsGreek
;
1886 case wxFONTENCODING_CP1254
:
1887 enc
= kCFStringEncodingWindowsLatin5
;
1889 case wxFONTENCODING_CP1255
:
1890 enc
=kCFStringEncodingWindowsHebrew
;
1892 case wxFONTENCODING_CP1256
:
1893 enc
=kCFStringEncodingWindowsArabic
;
1895 case wxFONTENCODING_CP1257
:
1896 enc
= kCFStringEncodingWindowsBalticRim
;
1898 // This only really encodes to UTF7 (if that) evidently
1899 // case wxFONTENCODING_UTF7 :
1900 // enc = kCFStringEncodingNonLossyASCII ;
1902 case wxFONTENCODING_UTF8
:
1903 enc
= kCFStringEncodingUTF8
;
1905 case wxFONTENCODING_EUC_JP
:
1906 enc
= kCFStringEncodingEUC_JP
;
1908 case wxFONTENCODING_UTF16
:
1909 enc
= kCFStringEncodingUnicode
;
1911 case wxFONTENCODING_MACROMAN
:
1912 enc
= kCFStringEncodingMacRoman
;
1914 case wxFONTENCODING_MACJAPANESE
:
1915 enc
= kCFStringEncodingMacJapanese
;
1917 case wxFONTENCODING_MACCHINESETRAD
:
1918 enc
= kCFStringEncodingMacChineseTrad
;
1920 case wxFONTENCODING_MACKOREAN
:
1921 enc
= kCFStringEncodingMacKorean
;
1923 case wxFONTENCODING_MACARABIC
:
1924 enc
= kCFStringEncodingMacArabic
;
1926 case wxFONTENCODING_MACHEBREW
:
1927 enc
= kCFStringEncodingMacHebrew
;
1929 case wxFONTENCODING_MACGREEK
:
1930 enc
= kCFStringEncodingMacGreek
;
1932 case wxFONTENCODING_MACCYRILLIC
:
1933 enc
= kCFStringEncodingMacCyrillic
;
1935 case wxFONTENCODING_MACDEVANAGARI
:
1936 enc
= kCFStringEncodingMacDevanagari
;
1938 case wxFONTENCODING_MACGURMUKHI
:
1939 enc
= kCFStringEncodingMacGurmukhi
;
1941 case wxFONTENCODING_MACGUJARATI
:
1942 enc
= kCFStringEncodingMacGujarati
;
1944 case wxFONTENCODING_MACORIYA
:
1945 enc
= kCFStringEncodingMacOriya
;
1947 case wxFONTENCODING_MACBENGALI
:
1948 enc
= kCFStringEncodingMacBengali
;
1950 case wxFONTENCODING_MACTAMIL
:
1951 enc
= kCFStringEncodingMacTamil
;
1953 case wxFONTENCODING_MACTELUGU
:
1954 enc
= kCFStringEncodingMacTelugu
;
1956 case wxFONTENCODING_MACKANNADA
:
1957 enc
= kCFStringEncodingMacKannada
;
1959 case wxFONTENCODING_MACMALAJALAM
:
1960 enc
= kCFStringEncodingMacMalayalam
;
1962 case wxFONTENCODING_MACSINHALESE
:
1963 enc
= kCFStringEncodingMacSinhalese
;
1965 case wxFONTENCODING_MACBURMESE
:
1966 enc
= kCFStringEncodingMacBurmese
;
1968 case wxFONTENCODING_MACKHMER
:
1969 enc
= kCFStringEncodingMacKhmer
;
1971 case wxFONTENCODING_MACTHAI
:
1972 enc
= kCFStringEncodingMacThai
;
1974 case wxFONTENCODING_MACLAOTIAN
:
1975 enc
= kCFStringEncodingMacLaotian
;
1977 case wxFONTENCODING_MACGEORGIAN
:
1978 enc
= kCFStringEncodingMacGeorgian
;
1980 case wxFONTENCODING_MACARMENIAN
:
1981 enc
= kCFStringEncodingMacArmenian
;
1983 case wxFONTENCODING_MACCHINESESIMP
:
1984 enc
= kCFStringEncodingMacChineseSimp
;
1986 case wxFONTENCODING_MACTIBETAN
:
1987 enc
= kCFStringEncodingMacTibetan
;
1989 case wxFONTENCODING_MACMONGOLIAN
:
1990 enc
= kCFStringEncodingMacMongolian
;
1992 case wxFONTENCODING_MACETHIOPIC
:
1993 enc
= kCFStringEncodingMacEthiopic
;
1995 case wxFONTENCODING_MACCENTRALEUR
:
1996 enc
= kCFStringEncodingMacCentralEurRoman
;
1998 case wxFONTENCODING_MACVIATNAMESE
:
1999 enc
= kCFStringEncodingMacVietnamese
;
2001 case wxFONTENCODING_MACARABICEXT
:
2002 enc
= kCFStringEncodingMacExtArabic
;
2004 case wxFONTENCODING_MACSYMBOL
:
2005 enc
= kCFStringEncodingMacSymbol
;
2007 case wxFONTENCODING_MACDINGBATS
:
2008 enc
= kCFStringEncodingMacDingbats
;
2010 case wxFONTENCODING_MACTURKISH
:
2011 enc
= kCFStringEncodingMacTurkish
;
2013 case wxFONTENCODING_MACCROATIAN
:
2014 enc
= kCFStringEncodingMacCroatian
;
2016 case wxFONTENCODING_MACICELANDIC
:
2017 enc
= kCFStringEncodingMacIcelandic
;
2019 case wxFONTENCODING_MACROMANIAN
:
2020 enc
= kCFStringEncodingMacRomanian
;
2022 case wxFONTENCODING_MACCELTIC
:
2023 enc
= kCFStringEncodingMacCeltic
;
2025 case wxFONTENCODING_MACGAELIC
:
2026 enc
= kCFStringEncodingMacGaelic
;
2028 // case wxFONTENCODING_MACKEYBOARD :
2029 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2032 // because gcc is picky
2038 class wxMBConv_cocoa
: public wxMBConv
2043 Init(CFStringGetSystemEncoding()) ;
2047 wxMBConv_cocoa(const wxChar
* name
)
2049 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2053 wxMBConv_cocoa(wxFontEncoding encoding
)
2055 Init( wxCFStringEncFromFontEnc(encoding
) );
2062 void Init( CFStringEncoding encoding
)
2064 m_encoding
= encoding
;
2067 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2071 CFStringRef theString
= CFStringCreateWithBytes (
2072 NULL
, //the allocator
2073 (const UInt8
*)szUnConv
,
2076 false //no BOM/external representation
2079 wxASSERT(theString
);
2081 size_t nOutLength
= CFStringGetLength(theString
);
2085 CFRelease(theString
);
2089 CFRange theRange
= { 0, nOutSize
};
2091 #if SIZEOF_WCHAR_T == 4
2092 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2095 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2097 CFRelease(theString
);
2099 szUniCharBuffer
[nOutLength
] = '\0' ;
2101 #if SIZEOF_WCHAR_T == 4
2102 wxMBConvUTF16 converter
;
2103 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2104 delete[] szUniCharBuffer
;
2110 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2114 size_t nRealOutSize
;
2115 size_t nBufSize
= wxWcslen(szUnConv
);
2116 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2118 #if SIZEOF_WCHAR_T == 4
2119 wxMBConvUTF16 converter
;
2120 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2121 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2122 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2123 nBufSize
/= sizeof(UniChar
);
2126 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2130 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2133 wxASSERT(theString
);
2135 //Note that CER puts a BOM when converting to unicode
2136 //so we check and use getchars instead in that case
2137 if (m_encoding
== kCFStringEncodingUnicode
)
2140 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2142 nRealOutSize
= CFStringGetLength(theString
) + 1;
2148 CFRangeMake(0, CFStringGetLength(theString
)),
2150 0, //what to put in characters that can't be converted -
2151 //0 tells CFString to return NULL if it meets such a character
2152 false, //not an external representation
2155 (CFIndex
*) &nRealOutSize
2159 CFRelease(theString
);
2161 #if SIZEOF_WCHAR_T == 4
2162 delete[] szUniBuffer
;
2165 return nRealOutSize
- 1;
2170 return m_encoding
!= kCFStringEncodingInvalidId
&&
2171 CFStringIsEncodingAvailable(m_encoding
);
2175 CFStringEncoding m_encoding
;
2178 #endif // defined(__WXCOCOA__)
2180 // ============================================================================
2181 // Mac conversion classes
2182 // ============================================================================
2184 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2186 class wxMBConv_mac
: public wxMBConv
2191 Init(CFStringGetSystemEncoding()) ;
2195 wxMBConv_mac(const wxChar
* name
)
2197 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2201 wxMBConv_mac(wxFontEncoding encoding
)
2203 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2208 OSStatus status
= noErr
;
2209 status
= TECDisposeConverter(m_MB2WC_converter
);
2210 status
= TECDisposeConverter(m_WC2MB_converter
);
2214 void Init( TextEncodingBase encoding
)
2216 OSStatus status
= noErr
;
2217 m_char_encoding
= encoding
;
2218 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2220 status
= TECCreateConverter(&m_MB2WC_converter
,
2222 m_unicode_encoding
);
2223 status
= TECCreateConverter(&m_WC2MB_converter
,
2228 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2230 OSStatus status
= noErr
;
2231 ByteCount byteOutLen
;
2232 ByteCount byteInLen
= strlen(psz
) ;
2233 wchar_t *tbuf
= NULL
;
2234 UniChar
* ubuf
= NULL
;
2239 //apple specs say at least 32
2240 n
= wxMax( 32 , byteInLen
) ;
2241 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2243 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2244 #if SIZEOF_WCHAR_T == 4
2245 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2247 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2249 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2250 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2251 #if SIZEOF_WCHAR_T == 4
2252 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2253 // is not properly terminated we get random characters at the end
2254 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2255 wxMBConvUTF16 converter
;
2256 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2259 res
= byteOutLen
/ sizeof( UniChar
) ;
2264 if ( buf
&& res
< n
)
2270 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2272 OSStatus status
= noErr
;
2273 ByteCount byteOutLen
;
2274 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2280 //apple specs say at least 32
2281 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2282 tbuf
= (char*) malloc( n
) ;
2285 ByteCount byteBufferLen
= n
;
2286 UniChar
* ubuf
= NULL
;
2287 #if SIZEOF_WCHAR_T == 4
2288 wxMBConvUTF16 converter
;
2289 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2290 byteInLen
= unicharlen
;
2291 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2292 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2294 ubuf
= (UniChar
*) psz
;
2296 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2297 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2298 #if SIZEOF_WCHAR_T == 4
2304 size_t res
= byteOutLen
;
2305 if ( buf
&& res
< n
)
2309 //we need to double-trip to verify it didn't insert any ? in place
2310 //of bogus characters
2311 wxWCharBuffer
wcBuf(n
);
2312 size_t pszlen
= wxWcslen(psz
);
2313 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2314 wxWcslen(wcBuf
) != pszlen
||
2315 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2317 // we didn't obtain the same thing we started from, hence
2318 // the conversion was lossy and we consider that it failed
2327 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2330 TECObjectRef m_MB2WC_converter
;
2331 TECObjectRef m_WC2MB_converter
;
2333 TextEncodingBase m_char_encoding
;
2334 TextEncodingBase m_unicode_encoding
;
2337 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2339 // ============================================================================
2340 // wxEncodingConverter based conversion classes
2341 // ============================================================================
2345 class wxMBConv_wxwin
: public wxMBConv
2350 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2351 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2355 // temporarily just use wxEncodingConverter stuff,
2356 // so that it works while a better implementation is built
2357 wxMBConv_wxwin(const wxChar
* name
)
2360 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2362 m_enc
= wxFONTENCODING_SYSTEM
;
2367 wxMBConv_wxwin(wxFontEncoding enc
)
2374 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2376 size_t inbuf
= strlen(psz
);
2379 if (!m2w
.Convert(psz
,buf
))
2385 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2387 const size_t inbuf
= wxWcslen(psz
);
2390 if (!w2m
.Convert(psz
,buf
))
2397 bool IsOk() const { return m_ok
; }
2400 wxFontEncoding m_enc
;
2401 wxEncodingConverter m2w
, w2m
;
2403 // were we initialized successfully?
2406 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2409 #endif // wxUSE_FONTMAP
2411 // ============================================================================
2412 // wxCSConv implementation
2413 // ============================================================================
2415 void wxCSConv::Init()
2422 wxCSConv::wxCSConv(const wxChar
*charset
)
2431 m_encoding
= wxFONTENCODING_SYSTEM
;
2434 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2436 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2438 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2440 encoding
= wxFONTENCODING_SYSTEM
;
2445 m_encoding
= encoding
;
2448 wxCSConv::~wxCSConv()
2453 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2458 SetName(conv
.m_name
);
2459 m_encoding
= conv
.m_encoding
;
2462 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2466 SetName(conv
.m_name
);
2467 m_encoding
= conv
.m_encoding
;
2472 void wxCSConv::Clear()
2481 void wxCSConv::SetName(const wxChar
*charset
)
2485 m_name
= wxStrdup(charset
);
2490 wxMBConv
*wxCSConv::DoCreate() const
2492 // check for the special case of ASCII or ISO8859-1 charset: as we have
2493 // special knowledge of it anyhow, we don't need to create a special
2494 // conversion object
2495 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2497 // don't convert at all
2501 // we trust OS to do conversion better than we can so try external
2502 // conversion methods first
2504 // the full order is:
2505 // 1. OS conversion (iconv() under Unix or Win32 API)
2506 // 2. hard coded conversions for UTF
2507 // 3. wxEncodingConverter as fall back
2513 #endif // !wxUSE_FONTMAP
2515 wxString
name(m_name
);
2519 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2520 #endif // wxUSE_FONTMAP
2522 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2528 #endif // HAVE_ICONV
2530 #ifdef wxHAVE_WIN32_MB2WC
2533 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2534 : new wxMBConv_win32(m_encoding
);
2543 #endif // wxHAVE_WIN32_MB2WC
2544 #if defined(__WXMAC__)
2546 // leave UTF16 and UTF32 to the built-ins of wx
2547 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2548 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2552 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2553 : new wxMBConv_mac(m_encoding
);
2555 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2564 #if defined(__WXCOCOA__)
2566 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2570 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2571 : new wxMBConv_cocoa(m_encoding
);
2573 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2583 wxFontEncoding enc
= m_encoding
;
2585 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2587 // use "false" to suppress interactive dialogs -- we can be called from
2588 // anywhere and popping up a dialog from here is the last thing we want to
2590 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2592 #endif // wxUSE_FONTMAP
2596 case wxFONTENCODING_UTF7
:
2597 return new wxMBConvUTF7
;
2599 case wxFONTENCODING_UTF8
:
2600 return new wxMBConvUTF8
;
2602 case wxFONTENCODING_UTF16BE
:
2603 return new wxMBConvUTF16BE
;
2605 case wxFONTENCODING_UTF16LE
:
2606 return new wxMBConvUTF16LE
;
2608 case wxFONTENCODING_UTF32BE
:
2609 return new wxMBConvUTF32BE
;
2611 case wxFONTENCODING_UTF32LE
:
2612 return new wxMBConvUTF32LE
;
2615 // nothing to do but put here to suppress gcc warnings
2622 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2623 : new wxMBConv_wxwin(m_encoding
);
2629 #endif // wxUSE_FONTMAP
2631 // NB: This is a hack to prevent deadlock. What could otherwise happen
2632 // in Unicode build: wxConvLocal creation ends up being here
2633 // because of some failure and logs the error. But wxLog will try to
2634 // attach timestamp, for which it will need wxConvLocal (to convert
2635 // time to char* and then wchar_t*), but that fails, tries to log
2636 // error, but wxLog has a (already locked) critical section that
2637 // guards static buffer.
2638 static bool alreadyLoggingError
= false;
2639 if (!alreadyLoggingError
)
2641 alreadyLoggingError
= true;
2642 wxLogError(_("Cannot convert from the charset '%s'!"),
2646 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2647 #else // !wxUSE_FONTMAP
2648 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2649 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2651 alreadyLoggingError
= false;
2657 void wxCSConv::CreateConvIfNeeded() const
2661 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2664 // if we don't have neither the name nor the encoding, use the default
2665 // encoding for this system
2666 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2668 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2670 #endif // wxUSE_INTL
2672 self
->m_convReal
= DoCreate();
2673 self
->m_deferred
= false;
2677 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2679 CreateConvIfNeeded();
2682 return m_convReal
->MB2WC(buf
, psz
, n
);
2685 size_t len
= strlen(psz
);
2689 for (size_t c
= 0; c
<= len
; c
++)
2690 buf
[c
] = (unsigned char)(psz
[c
]);
2696 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2698 CreateConvIfNeeded();
2701 return m_convReal
->WC2MB(buf
, psz
, n
);
2704 const size_t len
= wxWcslen(psz
);
2707 for (size_t c
= 0; c
<= len
; c
++)
2711 buf
[c
] = (char)psz
[c
];
2716 for (size_t c
= 0; c
<= len
; c
++)
2726 // ----------------------------------------------------------------------------
2728 // ----------------------------------------------------------------------------
2731 static wxMBConv_win32 wxConvLibcObj
;
2732 #elif defined(__WXMAC__) && !defined(__MACH__)
2733 static wxMBConv_mac wxConvLibcObj
;
2735 static wxMBConvLibc wxConvLibcObj
;
2738 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2739 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2740 static wxMBConvUTF7 wxConvUTF7Obj
;
2741 static wxMBConvUTF8 wxConvUTF8Obj
;
2743 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2744 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2745 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2748 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2749 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2757 #else // !wxUSE_WCHAR_T
2759 // stand-ins in absence of wchar_t
2760 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2765 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T