1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
73 #include "wx/thread.h"
76 #include "wx/encconv.h"
77 #include "wx/fontmap.h"
81 #include <ATSUnicode.h>
82 #include <TextCommon.h>
83 #include <TextEncodingConverter.h>
85 #include "wx/mac/private.h" // includes mac headers
87 // ----------------------------------------------------------------------------
89 // ----------------------------------------------------------------------------
91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
94 #if SIZEOF_WCHAR_T == 4
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
100 #define WC_NAME_BEST "UCS-4LE"
102 #elif SIZEOF_WCHAR_T == 2
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
109 #define WC_NAME_BEST "UTF-16LE"
111 #else // sizeof(wchar_t) != 2 nor 4
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
116 // ============================================================================
118 // ============================================================================
120 // ----------------------------------------------------------------------------
121 // UTF-16 en/decoding to/from UCS-4
122 // ----------------------------------------------------------------------------
125 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
130 *output
= (wxUint16
) input
;
133 else if (input
>=0x110000)
141 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
142 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
150 if ((*input
<0xd800) || (*input
>0xdfff))
155 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
162 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
168 // ----------------------------------------------------------------------------
170 // ----------------------------------------------------------------------------
172 wxMBConv::~wxMBConv()
174 // nothing to do here (necessary for Darwin linking probably)
177 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
181 // calculate the length of the buffer needed first
182 size_t nLen
= MB2WC(NULL
, psz
, 0);
183 if ( nLen
!= (size_t)-1 )
185 // now do the actual conversion
186 wxWCharBuffer
buf(nLen
);
187 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
188 if ( nLen
!= (size_t)-1 )
195 wxWCharBuffer
buf((wchar_t *)NULL
);
200 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
204 size_t nLen
= WC2MB(NULL
, pwz
, 0);
205 if ( nLen
!= (size_t)-1 )
207 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
208 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
209 if ( nLen
!= (size_t)-1 )
216 wxCharBuffer
buf((char *)NULL
);
221 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
223 wxASSERT(pOutSize
!= NULL
);
225 const char* szEnd
= szString
+ nStringLen
+ 1;
226 const char* szPos
= szString
;
227 const char* szStart
= szPos
;
229 size_t nActualLength
= 0;
230 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
232 wxWCharBuffer
theBuffer(nCurrentSize
);
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos
!= szEnd
)
238 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
240 //Get the length of the current (sub)string
241 size_t nLen
= MB2WC(NULL
, szPos
, 0);
243 //Invalid conversion?
244 if( nLen
== (size_t)-1 )
247 theBuffer
.data()[0u] = wxT('\0');
252 //Increase the actual length (+1 for current null character)
253 nActualLength
+= nLen
+ 1;
255 //if buffer too big, realloc the buffer
256 if (nActualLength
> (nCurrentSize
+1))
258 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
259 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
260 theBuffer
= theNewBuffer
;
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
268 theBuffer
.data()[0u] = wxT('\0');
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos
+= strlen(szPos
) + 1;
279 //success - return actual length and the buffer
280 *pOutSize
= nActualLength
;
284 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
286 wxASSERT(pOutSize
!= NULL
);
288 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
289 const wchar_t* szPos
= szString
;
290 const wchar_t* szStart
= szPos
;
292 size_t nActualLength
= 0;
293 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
295 wxCharBuffer
theBuffer(nCurrentSize
);
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos
!= szEnd
)
301 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
303 //Get the length of the current (sub)string
304 size_t nLen
= WC2MB(NULL
, szPos
, 0);
306 //Invalid conversion?
307 if( nLen
== (size_t)-1 )
310 theBuffer
.data()[0u] = wxT('\0');
314 //Increase the actual length (+1 for current null character)
315 nActualLength
+= nLen
+ 1;
317 //if buffer too big, realloc the buffer
318 if (nActualLength
> (nCurrentSize
+1))
320 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
321 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
322 theBuffer
= theNewBuffer
;
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
330 theBuffer
.data()[0u] = wxT('\0');
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos
+= wxWcslen(szPos
) + 1;
341 //success - return actual length and the buffer
342 *pOutSize
= nActualLength
;
346 // ----------------------------------------------------------------------------
348 // ----------------------------------------------------------------------------
350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
352 return wxMB2WC(buf
, psz
, n
);
355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
357 return wxWC2MB(buf
, psz
, n
);
362 // ----------------------------------------------------------------------------
363 // wxConvBrokenFileNames
364 // ----------------------------------------------------------------------------
366 wxConvBrokenFileNames::wxConvBrokenFileNames()
368 // decide which conversion to use for the file names
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
372 const wxChar
*encName
= wxGetenv(_T("G_FILENAME_ENCODING"));
375 m_conv
= new wxCSConv(encName
);
377 else // no G_FILENAME_ENCODING
379 // (2) if a non default locale is set, assume that the user wants his
380 // filenames in this locale too
381 switch ( wxLocale::GetSystemEncoding() )
384 m_conv
= new wxMBConvLibc
;
387 // (3) finally use UTF-8 by default
388 case wxFONTENCODING_SYSTEM
:
389 case wxFONTENCODING_UTF8
:
390 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
397 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
399 size_t outputSize
) const
401 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
405 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
407 size_t outputSize
) const
409 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
414 // ----------------------------------------------------------------------------
416 // ----------------------------------------------------------------------------
418 // Implementation (C) 2004 Fredrik Roubert
421 // BASE64 decoding table
423 static const unsigned char utf7unb64
[] =
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
431 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
432 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
434 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
435 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
436 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
438 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
439 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
440 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
459 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
463 while (*psz
&& ((!buf
) || (len
< n
)))
465 unsigned char cc
= *psz
++;
473 else if (*psz
== '-')
483 // BASE64 encoded string
487 for (lsb
= false, d
= 0, l
= 0;
488 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
492 for (l
+= 6; l
>= 8; lsb
= !lsb
)
494 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
503 *buf
= (wchar_t)(c
<< 8);
510 if (buf
&& (len
< n
))
516 // BASE64 encoding table
518 static const unsigned char utf7enb64
[] =
520 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
521 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
522 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
523 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
524 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
525 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
526 'w', 'x', 'y', 'z', '0', '1', '2', '3',
527 '4', '5', '6', '7', '8', '9', '+', '/'
531 // UTF-7 encoding table
533 // 0 - Set D (directly encoded characters)
534 // 1 - Set O (optional direct characters)
535 // 2 - whitespace characters (optional)
536 // 3 - special characters
538 static const unsigned char utf7encode
[128] =
540 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
541 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
542 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
544 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
550 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
556 while (*psz
&& ((!buf
) || (len
< n
)))
559 if (cc
< 0x80 && utf7encode
[cc
] < 1)
567 else if (((wxUint32
)cc
) > 0xffff)
569 // no surrogate pair generation (yet?)
580 // BASE64 encode string
581 unsigned int lsb
, d
, l
;
582 for (d
= 0, l
= 0;; psz
++)
584 for (lsb
= 0; lsb
< 2; lsb
++)
587 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
589 for (l
+= 8; l
>= 6; )
593 *buf
++ = utf7enb64
[(d
>> l
) % 64];
598 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
604 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
613 if (buf
&& (len
< n
))
618 // ----------------------------------------------------------------------------
620 // ----------------------------------------------------------------------------
622 static wxUint32 utf8_max
[]=
623 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
625 // boundaries of the private use area we use to (temporarily) remap invalid
626 // characters invalid in a UTF-8 encoded string
627 const wxUint32 wxUnicodePUA
= 0x100000;
628 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
630 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
634 while (*psz
&& ((!buf
) || (len
< n
)))
636 const char *opsz
= psz
;
637 bool invalid
= false;
638 unsigned char cc
= *psz
++, fc
= cc
;
640 for (cnt
= 0; fc
& 0x80; cnt
++)
654 // invalid UTF-8 sequence
659 unsigned ocnt
= cnt
- 1;
660 wxUint32 res
= cc
& (0x3f >> cnt
);
664 if ((cc
& 0xC0) != 0x80)
666 // invalid UTF-8 sequence
671 res
= (res
<< 6) | (cc
& 0x3f);
673 if (invalid
|| res
<= utf8_max
[ocnt
])
675 // illegal UTF-8 encoding
678 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
679 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
681 // if one of our PUA characters turns up externally
682 // it must also be treated as an illegal sequence
683 // (a bit like you have to escape an escape character)
689 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
690 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
691 if (pa
== (size_t)-1)
705 #endif // WC_UTF16/!WC_UTF16
710 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
712 while (opsz
< psz
&& (!buf
|| len
< n
))
715 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
716 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
717 wxASSERT(pa
!= (size_t)-1);
724 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
730 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
732 while (opsz
< psz
&& (!buf
|| len
< n
))
734 if ( buf
&& len
+ 3 < n
)
736 unsigned char n
= *opsz
;
738 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
739 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
740 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
746 else // MAP_INVALID_UTF8_NOT
753 if (buf
&& (len
< n
))
758 static inline bool isoctal(wchar_t wch
)
760 return L
'0' <= wch
&& wch
<= L
'7';
763 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
767 while (*psz
&& ((!buf
) || (len
< n
)))
771 // cast is ok for WC_UTF16
772 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
773 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
775 cc
=(*psz
++) & 0x7fffffff;
778 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
779 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
782 *buf
++ = (char)(cc
- wxUnicodePUA
);
785 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
787 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
791 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
792 (psz
[1] - L
'0')*010 +
802 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
816 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
818 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
830 // ----------------------------------------------------------------------------
832 // ----------------------------------------------------------------------------
834 #ifdef WORDS_BIGENDIAN
835 #define wxMBConvUTF16straight wxMBConvUTF16BE
836 #define wxMBConvUTF16swap wxMBConvUTF16LE
838 #define wxMBConvUTF16swap wxMBConvUTF16BE
839 #define wxMBConvUTF16straight wxMBConvUTF16LE
845 // copy 16bit MB to 16bit String
846 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
850 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
853 *buf
++ = *(wxUint16
*)psz
;
856 psz
+= sizeof(wxUint16
);
858 if (buf
&& len
<n
) *buf
=0;
864 // copy 16bit String to 16bit MB
865 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
869 while (*psz
&& (!buf
|| len
< n
))
873 *(wxUint16
*)buf
= *psz
;
874 buf
+= sizeof(wxUint16
);
876 len
+= sizeof(wxUint16
);
879 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
885 // swap 16bit MB to 16bit String
886 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
890 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
894 ((char *)buf
)[0] = psz
[1];
895 ((char *)buf
)[1] = psz
[0];
899 psz
+= sizeof(wxUint16
);
901 if (buf
&& len
<n
) *buf
=0;
907 // swap 16bit MB to 16bit String
908 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
912 while (*psz
&& (!buf
|| len
< n
))
916 *buf
++ = ((char*)psz
)[1];
917 *buf
++ = ((char*)psz
)[0];
919 len
+= sizeof(wxUint16
);
922 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
931 // copy 16bit MB to 32bit String
932 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
936 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
939 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
940 if (pa
== (size_t)-1)
946 psz
+= pa
* sizeof(wxUint16
);
948 if (buf
&& len
<n
) *buf
=0;
954 // copy 32bit String to 16bit MB
955 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
959 while (*psz
&& (!buf
|| len
< n
))
962 size_t pa
=encode_utf16(*psz
, cc
);
964 if (pa
== (size_t)-1)
969 *(wxUint16
*)buf
= cc
[0];
970 buf
+= sizeof(wxUint16
);
973 *(wxUint16
*)buf
= cc
[1];
974 buf
+= sizeof(wxUint16
);
978 len
+= pa
*sizeof(wxUint16
);
981 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
987 // swap 16bit MB to 32bit String
988 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
992 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
996 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
997 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
999 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
1000 if (pa
== (size_t)-1)
1007 psz
+= pa
* sizeof(wxUint16
);
1009 if (buf
&& len
<n
) *buf
=0;
1015 // swap 32bit String to 16bit MB
1016 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1020 while (*psz
&& (!buf
|| len
< n
))
1023 size_t pa
=encode_utf16(*psz
, cc
);
1025 if (pa
== (size_t)-1)
1030 *buf
++ = ((char*)cc
)[1];
1031 *buf
++ = ((char*)cc
)[0];
1034 *buf
++ = ((char*)cc
)[3];
1035 *buf
++ = ((char*)cc
)[2];
1039 len
+= pa
*sizeof(wxUint16
);
1042 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1050 // ----------------------------------------------------------------------------
1052 // ----------------------------------------------------------------------------
1054 #ifdef WORDS_BIGENDIAN
1055 #define wxMBConvUTF32straight wxMBConvUTF32BE
1056 #define wxMBConvUTF32swap wxMBConvUTF32LE
1058 #define wxMBConvUTF32swap wxMBConvUTF32BE
1059 #define wxMBConvUTF32straight wxMBConvUTF32LE
1063 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1064 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1069 // copy 32bit MB to 16bit String
1070 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1074 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1078 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1079 if (pa
== (size_t)-1)
1089 psz
+= sizeof(wxUint32
);
1091 if (buf
&& len
<n
) *buf
=0;
1097 // copy 16bit String to 32bit MB
1098 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1102 while (*psz
&& (!buf
|| len
< n
))
1106 // cast is ok for WC_UTF16
1107 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1108 if (pa
== (size_t)-1)
1113 *(wxUint32
*)buf
= cc
;
1114 buf
+= sizeof(wxUint32
);
1116 len
+= sizeof(wxUint32
);
1120 if (buf
&& len
<=n
-sizeof(wxUint32
))
1128 // swap 32bit MB to 16bit String
1129 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1133 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1136 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1137 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1142 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1143 if (pa
== (size_t)-1)
1153 psz
+= sizeof(wxUint32
);
1163 // swap 16bit String to 32bit MB
1164 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1168 while (*psz
&& (!buf
|| len
< n
))
1172 // cast is ok for WC_UTF16
1173 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1174 if (pa
== (size_t)-1)
1184 len
+= sizeof(wxUint32
);
1188 if (buf
&& len
<=n
-sizeof(wxUint32
))
1197 // copy 32bit MB to 32bit String
1198 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1202 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1205 *buf
++ = *(wxUint32
*)psz
;
1207 psz
+= sizeof(wxUint32
);
1217 // copy 32bit String to 32bit MB
1218 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1222 while (*psz
&& (!buf
|| len
< n
))
1226 *(wxUint32
*)buf
= *psz
;
1227 buf
+= sizeof(wxUint32
);
1230 len
+= sizeof(wxUint32
);
1234 if (buf
&& len
<=n
-sizeof(wxUint32
))
1241 // swap 32bit MB to 32bit String
1242 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1246 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1250 ((char *)buf
)[0] = psz
[3];
1251 ((char *)buf
)[1] = psz
[2];
1252 ((char *)buf
)[2] = psz
[1];
1253 ((char *)buf
)[3] = psz
[0];
1257 psz
+= sizeof(wxUint32
);
1267 // swap 32bit String to 32bit MB
1268 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1272 while (*psz
&& (!buf
|| len
< n
))
1276 *buf
++ = ((char *)psz
)[3];
1277 *buf
++ = ((char *)psz
)[2];
1278 *buf
++ = ((char *)psz
)[1];
1279 *buf
++ = ((char *)psz
)[0];
1281 len
+= sizeof(wxUint32
);
1285 if (buf
&& len
<=n
-sizeof(wxUint32
))
1295 // ============================================================================
1296 // The classes doing conversion using the iconv_xxx() functions
1297 // ============================================================================
1301 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1302 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1303 // (unless there's yet another bug in glibc) the only case when iconv()
1304 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1305 // left in the input buffer -- when _real_ error occurs,
1306 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1308 // [This bug does not appear in glibc 2.2.]
1309 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1310 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1311 (errno != E2BIG || bufLeft != 0))
1313 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1316 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1318 // ----------------------------------------------------------------------------
1319 // wxMBConv_iconv: encapsulates an iconv character set
1320 // ----------------------------------------------------------------------------
1322 class wxMBConv_iconv
: public wxMBConv
1325 wxMBConv_iconv(const wxChar
*name
);
1326 virtual ~wxMBConv_iconv();
1328 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1329 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1332 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1335 // the iconv handlers used to translate from multibyte to wide char and in
1336 // the other direction
1340 // guards access to m2w and w2m objects
1341 wxMutex m_iconvMutex
;
1345 // the name (for iconv_open()) of a wide char charset -- if none is
1346 // available on this machine, it will remain NULL
1347 static const char *ms_wcCharsetName
;
1349 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1350 // different endian-ness than the native one
1351 static bool ms_wcNeedsSwap
;
1354 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1355 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1357 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1359 // Do it the hard way
1361 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1362 cname
[i
] = (char) name
[i
];
1364 // check for charset that represents wchar_t:
1365 if (ms_wcCharsetName
== NULL
)
1367 ms_wcNeedsSwap
= false;
1369 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1370 ms_wcCharsetName
= WC_NAME_BEST
;
1371 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1373 if (m2w
== (iconv_t
)-1)
1375 // try charset w/o bytesex info (e.g. "UCS4")
1376 // and check for bytesex ourselves:
1377 ms_wcCharsetName
= WC_NAME
;
1378 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1380 // last bet, try if it knows WCHAR_T pseudo-charset
1381 if (m2w
== (iconv_t
)-1)
1383 ms_wcCharsetName
= "WCHAR_T";
1384 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1387 if (m2w
!= (iconv_t
)-1)
1389 char buf
[2], *bufPtr
;
1390 wchar_t wbuf
[2], *wbufPtr
;
1398 outsz
= SIZEOF_WCHAR_T
* 2;
1402 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1403 (char**)&wbufPtr
, &outsz
);
1405 if (ICONV_FAILED(res
, insz
))
1407 ms_wcCharsetName
= NULL
;
1408 wxLogLastError(wxT("iconv"));
1409 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1413 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1418 ms_wcCharsetName
= NULL
;
1420 // VS: we must not output an error here, since wxWidgets will safely
1421 // fall back to using wxEncodingConverter.
1422 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1426 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1428 else // we already have ms_wcCharsetName
1430 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1433 // NB: don't ever pass NULL to iconv_open(), it may crash!
1434 if ( ms_wcCharsetName
)
1436 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1444 wxMBConv_iconv::~wxMBConv_iconv()
1446 if ( m2w
!= (iconv_t
)-1 )
1448 if ( w2m
!= (iconv_t
)-1 )
1452 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1455 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1456 // Unfortunately there is a couple of global wxCSConv objects such as
1457 // wxConvLocal that are used all over wx code, so we have to make sure
1458 // the handle is used by at most one thread at the time. Otherwise
1459 // only a few wx classes would be safe to use from non-main threads
1460 // as MB<->WC conversion would fail "randomly".
1461 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1464 size_t inbuf
= strlen(psz
);
1465 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1467 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1468 wchar_t *bufPtr
= buf
;
1469 const char *pszPtr
= psz
;
1473 // have destination buffer, convert there
1475 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1476 (char**)&bufPtr
, &outbuf
);
1477 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1481 // convert to native endianness
1482 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1485 // NB: iconv was given only strlen(psz) characters on input, and so
1486 // it couldn't convert the trailing zero. Let's do it ourselves
1487 // if there's some room left for it in the output buffer.
1493 // no destination buffer... convert using temp buffer
1494 // to calculate destination buffer requirement
1499 outbuf
= 8*SIZEOF_WCHAR_T
;
1502 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1503 (char**)&bufPtr
, &outbuf
);
1505 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1506 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1509 if (ICONV_FAILED(cres
, inbuf
))
1511 //VS: it is ok if iconv fails, hence trace only
1512 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1519 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1522 // NB: explained in MB2WC
1523 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1526 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1530 wchar_t *tmpbuf
= 0;
1534 // need to copy to temp buffer to switch endianness
1535 // this absolutely doesn't rock!
1536 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1537 // could be in read-only memory, or be accessed in some other thread)
1538 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1539 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1540 WC_BSWAP(tmpbuf
, inbuf
)
1546 // have destination buffer, convert there
1547 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1551 // NB: iconv was given only wcslen(psz) characters on input, and so
1552 // it couldn't convert the trailing zero. Let's do it ourselves
1553 // if there's some room left for it in the output buffer.
1559 // no destination buffer... convert using temp buffer
1560 // to calculate destination buffer requirement
1564 buf
= tbuf
; outbuf
= 16;
1566 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1569 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1577 if (ICONV_FAILED(cres
, inbuf
))
1579 //VS: it is ok if iconv fails, hence trace only
1580 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1587 #endif // HAVE_ICONV
1590 // ============================================================================
1591 // Win32 conversion classes
1592 // ============================================================================
1594 #ifdef wxHAVE_WIN32_MB2WC
1598 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1599 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1602 class wxMBConv_win32
: public wxMBConv
1607 m_CodePage
= CP_ACP
;
1611 wxMBConv_win32(const wxChar
* name
)
1613 m_CodePage
= wxCharsetToCodepage(name
);
1616 wxMBConv_win32(wxFontEncoding encoding
)
1618 m_CodePage
= wxEncodingToCodepage(encoding
);
1622 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1624 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1625 // the behaviour is not compatible with the Unix version (using iconv)
1626 // and break the library itself, e.g. wxTextInputStream::NextChar()
1627 // wouldn't work if reading an incomplete MB char didn't result in an
1630 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1631 // an error (tested under Windows Server 2003) and apparently it is
1632 // done on purpose, i.e. the function accepts any input in this case
1633 // and although I'd prefer to return error on ill-formed output, our
1634 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1635 // explicitly ill-formed according to RFC 2152) neither so we don't
1636 // even have any fallback here...
1637 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1639 const size_t len
= ::MultiByteToWideChar
1641 m_CodePage
, // code page
1642 flags
, // flags: fall on error
1643 psz
, // input string
1644 -1, // its length (NUL-terminated)
1645 buf
, // output string
1646 buf
? n
: 0 // size of output buffer
1649 // note that it returns count of written chars for buf != NULL and size
1650 // of the needed buffer for buf == NULL so in either case the length of
1651 // the string (which never includes the terminating NUL) is one less
1652 return len
? len
- 1 : (size_t)-1;
1655 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1658 we have a problem here: by default, WideCharToMultiByte() may
1659 replace characters unrepresentable in the target code page with bad
1660 quality approximations such as turning "1/2" symbol (U+00BD) into
1661 "1" for the code pages which don't have it and we, obviously, want
1662 to avoid this at any price
1664 the trouble is that this function does it _silently_, i.e. it won't
1665 even tell us whether it did or not... Win98/2000 and higher provide
1666 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1667 we have to resort to a round trip, i.e. check that converting back
1668 results in the same string -- this is, of course, expensive but
1669 otherwise we simply can't be sure to not garble the data.
1672 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1673 // it doesn't work with CJK encodings (which we test for rather roughly
1674 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1676 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1679 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1681 // it's our lucky day
1682 flags
= WC_NO_BEST_FIT_CHARS
;
1683 pUsedDef
= &usedDef
;
1685 else // old system or unsupported encoding
1691 const size_t len
= ::WideCharToMultiByte
1693 m_CodePage
, // code page
1694 flags
, // either none or no best fit
1695 pwz
, // input string
1696 -1, // it is (wide) NUL-terminated
1697 buf
, // output buffer
1698 buf
? n
: 0, // and its size
1699 NULL
, // default "replacement" char
1700 pUsedDef
// [out] was it used?
1705 // function totally failed
1709 // if we were really converting, check if we succeeded
1714 // check if the conversion failed, i.e. if any replacements
1719 else // we must resort to double tripping...
1721 wxWCharBuffer
wcBuf(n
);
1722 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1723 wcscmp(wcBuf
, pwz
) != 0 )
1725 // we didn't obtain the same thing we started from, hence
1726 // the conversion was lossy and we consider that it failed
1732 // see the comment above for the reason of "len - 1"
1736 bool IsOk() const { return m_CodePage
!= -1; }
1739 static bool CanUseNoBestFit()
1741 static int s_isWin98Or2k
= -1;
1743 if ( s_isWin98Or2k
== -1 )
1746 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1749 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1753 s_isWin98Or2k
= verMaj
>= 5;
1757 // unknown, be conseravtive by default
1761 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1764 return s_isWin98Or2k
== 1;
1770 #endif // wxHAVE_WIN32_MB2WC
1772 // ============================================================================
1773 // Cocoa conversion classes
1774 // ============================================================================
1776 #if defined(__WXCOCOA__)
1778 // RN: There is no UTF-32 support in either Core Foundation or
1779 // Cocoa. Strangely enough, internally Core Foundation uses
1780 // UTF 32 internally quite a bit - its just not public (yet).
1782 #include <CoreFoundation/CFString.h>
1783 #include <CoreFoundation/CFStringEncodingExt.h>
1785 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1787 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1788 if ( encoding
== wxFONTENCODING_DEFAULT
)
1790 enc
= CFStringGetSystemEncoding();
1792 else switch( encoding
)
1794 case wxFONTENCODING_ISO8859_1
:
1795 enc
= kCFStringEncodingISOLatin1
;
1797 case wxFONTENCODING_ISO8859_2
:
1798 enc
= kCFStringEncodingISOLatin2
;
1800 case wxFONTENCODING_ISO8859_3
:
1801 enc
= kCFStringEncodingISOLatin3
;
1803 case wxFONTENCODING_ISO8859_4
:
1804 enc
= kCFStringEncodingISOLatin4
;
1806 case wxFONTENCODING_ISO8859_5
:
1807 enc
= kCFStringEncodingISOLatinCyrillic
;
1809 case wxFONTENCODING_ISO8859_6
:
1810 enc
= kCFStringEncodingISOLatinArabic
;
1812 case wxFONTENCODING_ISO8859_7
:
1813 enc
= kCFStringEncodingISOLatinGreek
;
1815 case wxFONTENCODING_ISO8859_8
:
1816 enc
= kCFStringEncodingISOLatinHebrew
;
1818 case wxFONTENCODING_ISO8859_9
:
1819 enc
= kCFStringEncodingISOLatin5
;
1821 case wxFONTENCODING_ISO8859_10
:
1822 enc
= kCFStringEncodingISOLatin6
;
1824 case wxFONTENCODING_ISO8859_11
:
1825 enc
= kCFStringEncodingISOLatinThai
;
1827 case wxFONTENCODING_ISO8859_13
:
1828 enc
= kCFStringEncodingISOLatin7
;
1830 case wxFONTENCODING_ISO8859_14
:
1831 enc
= kCFStringEncodingISOLatin8
;
1833 case wxFONTENCODING_ISO8859_15
:
1834 enc
= kCFStringEncodingISOLatin9
;
1837 case wxFONTENCODING_KOI8
:
1838 enc
= kCFStringEncodingKOI8_R
;
1840 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1841 enc
= kCFStringEncodingDOSRussian
;
1844 // case wxFONTENCODING_BULGARIAN :
1848 case wxFONTENCODING_CP437
:
1849 enc
=kCFStringEncodingDOSLatinUS
;
1851 case wxFONTENCODING_CP850
:
1852 enc
= kCFStringEncodingDOSLatin1
;
1854 case wxFONTENCODING_CP852
:
1855 enc
= kCFStringEncodingDOSLatin2
;
1857 case wxFONTENCODING_CP855
:
1858 enc
= kCFStringEncodingDOSCyrillic
;
1860 case wxFONTENCODING_CP866
:
1861 enc
=kCFStringEncodingDOSRussian
;
1863 case wxFONTENCODING_CP874
:
1864 enc
= kCFStringEncodingDOSThai
;
1866 case wxFONTENCODING_CP932
:
1867 enc
= kCFStringEncodingDOSJapanese
;
1869 case wxFONTENCODING_CP936
:
1870 enc
=kCFStringEncodingDOSChineseSimplif
;
1872 case wxFONTENCODING_CP949
:
1873 enc
= kCFStringEncodingDOSKorean
;
1875 case wxFONTENCODING_CP950
:
1876 enc
= kCFStringEncodingDOSChineseTrad
;
1878 case wxFONTENCODING_CP1250
:
1879 enc
= kCFStringEncodingWindowsLatin2
;
1881 case wxFONTENCODING_CP1251
:
1882 enc
=kCFStringEncodingWindowsCyrillic
;
1884 case wxFONTENCODING_CP1252
:
1885 enc
=kCFStringEncodingWindowsLatin1
;
1887 case wxFONTENCODING_CP1253
:
1888 enc
= kCFStringEncodingWindowsGreek
;
1890 case wxFONTENCODING_CP1254
:
1891 enc
= kCFStringEncodingWindowsLatin5
;
1893 case wxFONTENCODING_CP1255
:
1894 enc
=kCFStringEncodingWindowsHebrew
;
1896 case wxFONTENCODING_CP1256
:
1897 enc
=kCFStringEncodingWindowsArabic
;
1899 case wxFONTENCODING_CP1257
:
1900 enc
= kCFStringEncodingWindowsBalticRim
;
1902 // This only really encodes to UTF7 (if that) evidently
1903 // case wxFONTENCODING_UTF7 :
1904 // enc = kCFStringEncodingNonLossyASCII ;
1906 case wxFONTENCODING_UTF8
:
1907 enc
= kCFStringEncodingUTF8
;
1909 case wxFONTENCODING_EUC_JP
:
1910 enc
= kCFStringEncodingEUC_JP
;
1912 case wxFONTENCODING_UTF16
:
1913 enc
= kCFStringEncodingUnicode
;
1915 case wxFONTENCODING_MACROMAN
:
1916 enc
= kCFStringEncodingMacRoman
;
1918 case wxFONTENCODING_MACJAPANESE
:
1919 enc
= kCFStringEncodingMacJapanese
;
1921 case wxFONTENCODING_MACCHINESETRAD
:
1922 enc
= kCFStringEncodingMacChineseTrad
;
1924 case wxFONTENCODING_MACKOREAN
:
1925 enc
= kCFStringEncodingMacKorean
;
1927 case wxFONTENCODING_MACARABIC
:
1928 enc
= kCFStringEncodingMacArabic
;
1930 case wxFONTENCODING_MACHEBREW
:
1931 enc
= kCFStringEncodingMacHebrew
;
1933 case wxFONTENCODING_MACGREEK
:
1934 enc
= kCFStringEncodingMacGreek
;
1936 case wxFONTENCODING_MACCYRILLIC
:
1937 enc
= kCFStringEncodingMacCyrillic
;
1939 case wxFONTENCODING_MACDEVANAGARI
:
1940 enc
= kCFStringEncodingMacDevanagari
;
1942 case wxFONTENCODING_MACGURMUKHI
:
1943 enc
= kCFStringEncodingMacGurmukhi
;
1945 case wxFONTENCODING_MACGUJARATI
:
1946 enc
= kCFStringEncodingMacGujarati
;
1948 case wxFONTENCODING_MACORIYA
:
1949 enc
= kCFStringEncodingMacOriya
;
1951 case wxFONTENCODING_MACBENGALI
:
1952 enc
= kCFStringEncodingMacBengali
;
1954 case wxFONTENCODING_MACTAMIL
:
1955 enc
= kCFStringEncodingMacTamil
;
1957 case wxFONTENCODING_MACTELUGU
:
1958 enc
= kCFStringEncodingMacTelugu
;
1960 case wxFONTENCODING_MACKANNADA
:
1961 enc
= kCFStringEncodingMacKannada
;
1963 case wxFONTENCODING_MACMALAJALAM
:
1964 enc
= kCFStringEncodingMacMalayalam
;
1966 case wxFONTENCODING_MACSINHALESE
:
1967 enc
= kCFStringEncodingMacSinhalese
;
1969 case wxFONTENCODING_MACBURMESE
:
1970 enc
= kCFStringEncodingMacBurmese
;
1972 case wxFONTENCODING_MACKHMER
:
1973 enc
= kCFStringEncodingMacKhmer
;
1975 case wxFONTENCODING_MACTHAI
:
1976 enc
= kCFStringEncodingMacThai
;
1978 case wxFONTENCODING_MACLAOTIAN
:
1979 enc
= kCFStringEncodingMacLaotian
;
1981 case wxFONTENCODING_MACGEORGIAN
:
1982 enc
= kCFStringEncodingMacGeorgian
;
1984 case wxFONTENCODING_MACARMENIAN
:
1985 enc
= kCFStringEncodingMacArmenian
;
1987 case wxFONTENCODING_MACCHINESESIMP
:
1988 enc
= kCFStringEncodingMacChineseSimp
;
1990 case wxFONTENCODING_MACTIBETAN
:
1991 enc
= kCFStringEncodingMacTibetan
;
1993 case wxFONTENCODING_MACMONGOLIAN
:
1994 enc
= kCFStringEncodingMacMongolian
;
1996 case wxFONTENCODING_MACETHIOPIC
:
1997 enc
= kCFStringEncodingMacEthiopic
;
1999 case wxFONTENCODING_MACCENTRALEUR
:
2000 enc
= kCFStringEncodingMacCentralEurRoman
;
2002 case wxFONTENCODING_MACVIATNAMESE
:
2003 enc
= kCFStringEncodingMacVietnamese
;
2005 case wxFONTENCODING_MACARABICEXT
:
2006 enc
= kCFStringEncodingMacExtArabic
;
2008 case wxFONTENCODING_MACSYMBOL
:
2009 enc
= kCFStringEncodingMacSymbol
;
2011 case wxFONTENCODING_MACDINGBATS
:
2012 enc
= kCFStringEncodingMacDingbats
;
2014 case wxFONTENCODING_MACTURKISH
:
2015 enc
= kCFStringEncodingMacTurkish
;
2017 case wxFONTENCODING_MACCROATIAN
:
2018 enc
= kCFStringEncodingMacCroatian
;
2020 case wxFONTENCODING_MACICELANDIC
:
2021 enc
= kCFStringEncodingMacIcelandic
;
2023 case wxFONTENCODING_MACROMANIAN
:
2024 enc
= kCFStringEncodingMacRomanian
;
2026 case wxFONTENCODING_MACCELTIC
:
2027 enc
= kCFStringEncodingMacCeltic
;
2029 case wxFONTENCODING_MACGAELIC
:
2030 enc
= kCFStringEncodingMacGaelic
;
2032 // case wxFONTENCODING_MACKEYBOARD :
2033 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2036 // because gcc is picky
2042 class wxMBConv_cocoa
: public wxMBConv
2047 Init(CFStringGetSystemEncoding()) ;
2051 wxMBConv_cocoa(const wxChar
* name
)
2053 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2057 wxMBConv_cocoa(wxFontEncoding encoding
)
2059 Init( wxCFStringEncFromFontEnc(encoding
) );
2066 void Init( CFStringEncoding encoding
)
2068 m_encoding
= encoding
;
2071 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2075 CFStringRef theString
= CFStringCreateWithBytes (
2076 NULL
, //the allocator
2077 (const UInt8
*)szUnConv
,
2080 false //no BOM/external representation
2083 wxASSERT(theString
);
2085 size_t nOutLength
= CFStringGetLength(theString
);
2089 CFRelease(theString
);
2093 CFRange theRange
= { 0, nOutSize
};
2095 #if SIZEOF_WCHAR_T == 4
2096 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2099 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2101 CFRelease(theString
);
2103 szUniCharBuffer
[nOutLength
] = '\0' ;
2105 #if SIZEOF_WCHAR_T == 4
2106 wxMBConvUTF16 converter
;
2107 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2108 delete[] szUniCharBuffer
;
2114 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2118 size_t nRealOutSize
;
2119 size_t nBufSize
= wxWcslen(szUnConv
);
2120 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2122 #if SIZEOF_WCHAR_T == 4
2123 wxMBConvUTF16BE converter
;
2124 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2125 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2126 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2127 nBufSize
/= sizeof(UniChar
);
2130 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2134 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2137 wxASSERT(theString
);
2139 //Note that CER puts a BOM when converting to unicode
2140 //so we check and use getchars instead in that case
2141 if (m_encoding
== kCFStringEncodingUnicode
)
2144 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2146 nRealOutSize
= CFStringGetLength(theString
) + 1;
2152 CFRangeMake(0, CFStringGetLength(theString
)),
2154 0, //what to put in characters that can't be converted -
2155 //0 tells CFString to return NULL if it meets such a character
2156 false, //not an external representation
2159 (CFIndex
*) &nRealOutSize
2163 CFRelease(theString
);
2165 #if SIZEOF_WCHAR_T == 4
2166 delete[] szUniBuffer
;
2169 return nRealOutSize
- 1;
2174 return m_encoding
!= kCFStringEncodingInvalidId
&&
2175 CFStringIsEncodingAvailable(m_encoding
);
2179 CFStringEncoding m_encoding
;
2182 #endif // defined(__WXCOCOA__)
2184 // ============================================================================
2185 // Mac conversion classes
2186 // ============================================================================
2188 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2190 class wxMBConv_mac
: public wxMBConv
2195 Init(CFStringGetSystemEncoding()) ;
2199 wxMBConv_mac(const wxChar
* name
)
2201 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2205 wxMBConv_mac(wxFontEncoding encoding
)
2207 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2212 OSStatus status
= noErr
;
2213 status
= TECDisposeConverter(m_MB2WC_converter
);
2214 status
= TECDisposeConverter(m_WC2MB_converter
);
2218 void Init( TextEncodingBase encoding
)
2220 OSStatus status
= noErr
;
2221 m_char_encoding
= encoding
;
2222 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2224 status
= TECCreateConverter(&m_MB2WC_converter
,
2226 m_unicode_encoding
);
2227 status
= TECCreateConverter(&m_WC2MB_converter
,
2232 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2234 OSStatus status
= noErr
;
2235 ByteCount byteOutLen
;
2236 ByteCount byteInLen
= strlen(psz
) ;
2237 wchar_t *tbuf
= NULL
;
2238 UniChar
* ubuf
= NULL
;
2243 //apple specs say at least 32
2244 n
= wxMax( 32 , byteInLen
) ;
2245 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2247 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2248 #if SIZEOF_WCHAR_T == 4
2249 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2251 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2253 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2254 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2255 #if SIZEOF_WCHAR_T == 4
2256 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2257 // is not properly terminated we get random characters at the end
2258 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2259 wxMBConvUTF16BE converter
;
2260 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2263 res
= byteOutLen
/ sizeof( UniChar
) ;
2268 if ( buf
&& res
< n
)
2274 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2276 OSStatus status
= noErr
;
2277 ByteCount byteOutLen
;
2278 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2284 //apple specs say at least 32
2285 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2286 tbuf
= (char*) malloc( n
) ;
2289 ByteCount byteBufferLen
= n
;
2290 UniChar
* ubuf
= NULL
;
2291 #if SIZEOF_WCHAR_T == 4
2292 wxMBConvUTF16BE converter
;
2293 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2294 byteInLen
= unicharlen
;
2295 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2296 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2298 ubuf
= (UniChar
*) psz
;
2300 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2301 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2302 #if SIZEOF_WCHAR_T == 4
2308 size_t res
= byteOutLen
;
2309 if ( buf
&& res
< n
)
2313 //we need to double-trip to verify it didn't insert any ? in place
2314 //of bogus characters
2315 wxWCharBuffer
wcBuf(n
);
2316 size_t pszlen
= wxWcslen(psz
);
2317 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2318 wxWcslen(wcBuf
) != pszlen
||
2319 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2321 // we didn't obtain the same thing we started from, hence
2322 // the conversion was lossy and we consider that it failed
2331 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2334 TECObjectRef m_MB2WC_converter
;
2335 TECObjectRef m_WC2MB_converter
;
2337 TextEncodingBase m_char_encoding
;
2338 TextEncodingBase m_unicode_encoding
;
2341 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2343 // ============================================================================
2344 // wxEncodingConverter based conversion classes
2345 // ============================================================================
2349 class wxMBConv_wxwin
: public wxMBConv
2354 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2355 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2359 // temporarily just use wxEncodingConverter stuff,
2360 // so that it works while a better implementation is built
2361 wxMBConv_wxwin(const wxChar
* name
)
2364 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2366 m_enc
= wxFONTENCODING_SYSTEM
;
2371 wxMBConv_wxwin(wxFontEncoding enc
)
2378 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2380 size_t inbuf
= strlen(psz
);
2383 if (!m2w
.Convert(psz
,buf
))
2389 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2391 const size_t inbuf
= wxWcslen(psz
);
2394 if (!w2m
.Convert(psz
,buf
))
2401 bool IsOk() const { return m_ok
; }
2404 wxFontEncoding m_enc
;
2405 wxEncodingConverter m2w
, w2m
;
2407 // were we initialized successfully?
2410 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2413 #endif // wxUSE_FONTMAP
2415 // ============================================================================
2416 // wxCSConv implementation
2417 // ============================================================================
2419 void wxCSConv::Init()
2426 wxCSConv::wxCSConv(const wxChar
*charset
)
2435 m_encoding
= wxFONTENCODING_SYSTEM
;
2438 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2440 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2442 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2444 encoding
= wxFONTENCODING_SYSTEM
;
2449 m_encoding
= encoding
;
2452 wxCSConv::~wxCSConv()
2457 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2462 SetName(conv
.m_name
);
2463 m_encoding
= conv
.m_encoding
;
2466 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2470 SetName(conv
.m_name
);
2471 m_encoding
= conv
.m_encoding
;
2476 void wxCSConv::Clear()
2485 void wxCSConv::SetName(const wxChar
*charset
)
2489 m_name
= wxStrdup(charset
);
2494 wxMBConv
*wxCSConv::DoCreate() const
2496 // check for the special case of ASCII or ISO8859-1 charset: as we have
2497 // special knowledge of it anyhow, we don't need to create a special
2498 // conversion object
2499 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2501 // don't convert at all
2505 // we trust OS to do conversion better than we can so try external
2506 // conversion methods first
2508 // the full order is:
2509 // 1. OS conversion (iconv() under Unix or Win32 API)
2510 // 2. hard coded conversions for UTF
2511 // 3. wxEncodingConverter as fall back
2517 #endif // !wxUSE_FONTMAP
2519 wxString
name(m_name
);
2523 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2524 #endif // wxUSE_FONTMAP
2526 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2532 #endif // HAVE_ICONV
2534 #ifdef wxHAVE_WIN32_MB2WC
2537 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2538 : new wxMBConv_win32(m_encoding
);
2547 #endif // wxHAVE_WIN32_MB2WC
2548 #if defined(__WXMAC__)
2550 // leave UTF16 and UTF32 to the built-ins of wx
2551 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2552 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2556 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2557 : new wxMBConv_mac(m_encoding
);
2559 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2568 #if defined(__WXCOCOA__)
2570 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2574 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2575 : new wxMBConv_cocoa(m_encoding
);
2577 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2587 wxFontEncoding enc
= m_encoding
;
2589 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2591 // use "false" to suppress interactive dialogs -- we can be called from
2592 // anywhere and popping up a dialog from here is the last thing we want to
2594 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2596 #endif // wxUSE_FONTMAP
2600 case wxFONTENCODING_UTF7
:
2601 return new wxMBConvUTF7
;
2603 case wxFONTENCODING_UTF8
:
2604 return new wxMBConvUTF8
;
2606 case wxFONTENCODING_UTF16BE
:
2607 return new wxMBConvUTF16BE
;
2609 case wxFONTENCODING_UTF16LE
:
2610 return new wxMBConvUTF16LE
;
2612 case wxFONTENCODING_UTF32BE
:
2613 return new wxMBConvUTF32BE
;
2615 case wxFONTENCODING_UTF32LE
:
2616 return new wxMBConvUTF32LE
;
2619 // nothing to do but put here to suppress gcc warnings
2626 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2627 : new wxMBConv_wxwin(m_encoding
);
2633 #endif // wxUSE_FONTMAP
2635 // NB: This is a hack to prevent deadlock. What could otherwise happen
2636 // in Unicode build: wxConvLocal creation ends up being here
2637 // because of some failure and logs the error. But wxLog will try to
2638 // attach timestamp, for which it will need wxConvLocal (to convert
2639 // time to char* and then wchar_t*), but that fails, tries to log
2640 // error, but wxLog has a (already locked) critical section that
2641 // guards static buffer.
2642 static bool alreadyLoggingError
= false;
2643 if (!alreadyLoggingError
)
2645 alreadyLoggingError
= true;
2646 wxLogError(_("Cannot convert from the charset '%s'!"),
2650 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2651 #else // !wxUSE_FONTMAP
2652 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2653 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2655 alreadyLoggingError
= false;
2661 void wxCSConv::CreateConvIfNeeded() const
2665 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2668 // if we don't have neither the name nor the encoding, use the default
2669 // encoding for this system
2670 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2672 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2674 #endif // wxUSE_INTL
2676 self
->m_convReal
= DoCreate();
2677 self
->m_deferred
= false;
2681 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2683 CreateConvIfNeeded();
2686 return m_convReal
->MB2WC(buf
, psz
, n
);
2689 size_t len
= strlen(psz
);
2693 for (size_t c
= 0; c
<= len
; c
++)
2694 buf
[c
] = (unsigned char)(psz
[c
]);
2700 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2702 CreateConvIfNeeded();
2705 return m_convReal
->WC2MB(buf
, psz
, n
);
2708 const size_t len
= wxWcslen(psz
);
2711 for (size_t c
= 0; c
<= len
; c
++)
2715 buf
[c
] = (char)psz
[c
];
2720 for (size_t c
= 0; c
<= len
; c
++)
2730 // ----------------------------------------------------------------------------
2732 // ----------------------------------------------------------------------------
2735 static wxMBConv_win32 wxConvLibcObj
;
2736 #elif defined(__WXMAC__) && !defined(__MACH__)
2737 static wxMBConv_mac wxConvLibcObj
;
2739 static wxMBConvLibc wxConvLibcObj
;
2742 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2743 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2744 static wxMBConvUTF7 wxConvUTF7Obj
;
2745 static wxMBConvUTF8 wxConvUTF8Obj
;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2748 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2749 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2750 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2751 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2752 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2753 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2761 #else // !wxUSE_WCHAR_T
2763 // stand-ins in absence of wchar_t
2764 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2769 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T