1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
58 #ifdef HAVE_LANGINFO_H
62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
63 #define wxHAVE_WIN32_MB2WC
64 #endif // __WIN32__ but !__WXMICROWIN__
66 // ----------------------------------------------------------------------------
68 // ----------------------------------------------------------------------------
76 #include "wx/thread.h"
79 #include "wx/encconv.h"
80 #include "wx/fontmap.h"
84 #include <ATSUnicode.h>
85 #include <TextCommon.h>
86 #include <TextEncodingConverter.h>
88 #include "wx/mac/private.h" // includes mac headers
90 // ----------------------------------------------------------------------------
92 // ----------------------------------------------------------------------------
94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
97 #if SIZEOF_WCHAR_T == 4
98 #define WC_NAME "UCS4"
99 #define WC_BSWAP BSWAP_UCS4
100 #ifdef WORDS_BIGENDIAN
101 #define WC_NAME_BEST "UCS-4BE"
103 #define WC_NAME_BEST "UCS-4LE"
105 #elif SIZEOF_WCHAR_T == 2
106 #define WC_NAME "UTF16"
107 #define WC_BSWAP BSWAP_UTF16
109 #ifdef WORDS_BIGENDIAN
110 #define WC_NAME_BEST "UTF-16BE"
112 #define WC_NAME_BEST "UTF-16LE"
114 #else // sizeof(wchar_t) != 2 nor 4
115 // does this ever happen?
116 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
119 // ============================================================================
121 // ============================================================================
123 // ----------------------------------------------------------------------------
124 // UTF-16 en/decoding to/from UCS-4
125 // ----------------------------------------------------------------------------
128 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
133 *output
= (wxUint16
) input
;
136 else if (input
>=0x110000)
144 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
145 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
151 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
153 if ((*input
<0xd800) || (*input
>0xdfff))
158 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
165 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
171 // ----------------------------------------------------------------------------
173 // ----------------------------------------------------------------------------
175 wxMBConv::~wxMBConv()
177 // nothing to do here (necessary for Darwin linking probably)
180 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
184 // calculate the length of the buffer needed first
185 size_t nLen
= MB2WC(NULL
, psz
, 0);
186 if ( nLen
!= (size_t)-1 )
188 // now do the actual conversion
189 wxWCharBuffer
buf(nLen
);
190 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
191 if ( nLen
!= (size_t)-1 )
198 wxWCharBuffer
buf((wchar_t *)NULL
);
203 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
207 size_t nLen
= WC2MB(NULL
, pwz
, 0);
208 if ( nLen
!= (size_t)-1 )
210 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
211 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
212 if ( nLen
!= (size_t)-1 )
219 wxCharBuffer
buf((char *)NULL
);
224 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
226 wxASSERT(pOutSize
!= NULL
);
228 const char* szEnd
= szString
+ nStringLen
+ 1;
229 const char* szPos
= szString
;
230 const char* szStart
= szPos
;
232 size_t nActualLength
= 0;
233 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
235 wxWCharBuffer
theBuffer(nCurrentSize
);
237 //Convert the string until the length() is reached, continuing the
238 //loop every time a null character is reached
239 while(szPos
!= szEnd
)
241 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
243 //Get the length of the current (sub)string
244 size_t nLen
= MB2WC(NULL
, szPos
, 0);
246 //Invalid conversion?
247 if( nLen
== (size_t)-1 )
250 theBuffer
.data()[0u] = wxT('\0');
255 //Increase the actual length (+1 for current null character)
256 nActualLength
+= nLen
+ 1;
258 //if buffer too big, realloc the buffer
259 if (nActualLength
> (nCurrentSize
+1))
261 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
262 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
263 theBuffer
= theNewBuffer
;
267 //Convert the current (sub)string
268 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
271 theBuffer
.data()[0u] = wxT('\0');
275 //Increment to next (sub)string
276 //Note that we have to use strlen here instead of nLen
277 //here because XX2XX gives us the size of the output buffer,
278 //not neccessarly the length of the string
279 szPos
+= strlen(szPos
) + 1;
282 //success - return actual length and the buffer
283 *pOutSize
= nActualLength
;
287 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
289 wxASSERT(pOutSize
!= NULL
);
291 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
292 const wchar_t* szPos
= szString
;
293 const wchar_t* szStart
= szPos
;
295 size_t nActualLength
= 0;
296 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
298 wxCharBuffer
theBuffer(nCurrentSize
);
300 //Convert the string until the length() is reached, continuing the
301 //loop every time a null character is reached
302 while(szPos
!= szEnd
)
304 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
306 //Get the length of the current (sub)string
307 size_t nLen
= WC2MB(NULL
, szPos
, 0);
309 //Invalid conversion?
310 if( nLen
== (size_t)-1 )
313 theBuffer
.data()[0u] = wxT('\0');
317 //Increase the actual length (+1 for current null character)
318 nActualLength
+= nLen
+ 1;
320 //if buffer too big, realloc the buffer
321 if (nActualLength
> (nCurrentSize
+1))
323 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
324 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
325 theBuffer
= theNewBuffer
;
329 //Convert the current (sub)string
330 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
333 theBuffer
.data()[0u] = wxT('\0');
337 //Increment to next (sub)string
338 //Note that we have to use wxWcslen here instead of nLen
339 //here because XX2XX gives us the size of the output buffer,
340 //not neccessarly the length of the string
341 szPos
+= wxWcslen(szPos
) + 1;
344 //success - return actual length and the buffer
345 *pOutSize
= nActualLength
;
349 // ----------------------------------------------------------------------------
351 // ----------------------------------------------------------------------------
353 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
355 return wxMB2WC(buf
, psz
, n
);
358 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
360 return wxWC2MB(buf
, psz
, n
);
363 // ----------------------------------------------------------------------------
364 // wxConvBrokenFileNames is made for GTK2 in Unicode mode when
365 // files are accidentally written in an encoding which is not
366 // the system encoding. Typically, the system encoding will be
367 // UTF8 but there might be files stored in ISO8859-1 on disk.
368 // ----------------------------------------------------------------------------
370 class wxConvBrokenFileNames
: public wxMBConvLibc
373 wxConvBrokenFileNames() : m_utf8conv(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
) { }
374 virtual size_t MB2WC(wchar_t *outputBuf
, const char *psz
, size_t outputSize
) const;
375 virtual size_t WC2MB(char *outputBuf
, const wchar_t *psz
, size_t outputSize
) const;
376 inline bool UseUTF8() const;
378 wxMBConvUTF8 m_utf8conv
;
381 bool wxConvBrokenFileNames::UseUTF8() const
383 #if defined HAVE_LANGINFO_H && defined CODESET
384 char *codeset
= nl_langinfo(CODESET
);
385 return strcmp(codeset
, "UTF-8") == 0;
391 size_t wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
, const char *psz
, size_t outputSize
) const
394 return m_utf8conv
.MB2WC( outputBuf
, psz
, outputSize
);
396 return wxMBConvLibc::MB2WC( outputBuf
, psz
, outputSize
);
399 size_t wxConvBrokenFileNames::WC2MB(char *outputBuf
, const wchar_t *psz
, size_t outputSize
) const
402 return m_utf8conv
.WC2MB( outputBuf
, psz
, outputSize
);
404 return wxMBConvLibc::WC2MB( outputBuf
, psz
, outputSize
);
407 // ----------------------------------------------------------------------------
409 // ----------------------------------------------------------------------------
411 // Implementation (C) 2004 Fredrik Roubert
414 // BASE64 decoding table
416 static const unsigned char utf7unb64
[] =
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
424 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
425 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
427 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
428 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
429 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
431 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
432 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
433 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
452 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
456 while (*psz
&& ((!buf
) || (len
< n
)))
458 unsigned char cc
= *psz
++;
466 else if (*psz
== '-')
476 // BASE64 encoded string
480 for (lsb
= false, d
= 0, l
= 0;
481 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
485 for (l
+= 6; l
>= 8; lsb
= !lsb
)
487 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
496 *buf
= (wchar_t)(c
<< 8);
503 if (buf
&& (len
< n
))
509 // BASE64 encoding table
511 static const unsigned char utf7enb64
[] =
513 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
514 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
515 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
516 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
517 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
518 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
519 'w', 'x', 'y', 'z', '0', '1', '2', '3',
520 '4', '5', '6', '7', '8', '9', '+', '/'
524 // UTF-7 encoding table
526 // 0 - Set D (directly encoded characters)
527 // 1 - Set O (optional direct characters)
528 // 2 - whitespace characters (optional)
529 // 3 - special characters
531 static const unsigned char utf7encode
[128] =
533 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
534 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
535 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
537 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
539 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
543 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
549 while (*psz
&& ((!buf
) || (len
< n
)))
552 if (cc
< 0x80 && utf7encode
[cc
] < 1)
560 else if (((wxUint32
)cc
) > 0xffff)
562 // no surrogate pair generation (yet?)
573 // BASE64 encode string
574 unsigned int lsb
, d
, l
;
575 for (d
= 0, l
= 0;; psz
++)
577 for (lsb
= 0; lsb
< 2; lsb
++)
580 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
582 for (l
+= 8; l
>= 6; )
586 *buf
++ = utf7enb64
[(d
>> l
) % 64];
591 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
597 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
606 if (buf
&& (len
< n
))
611 // ----------------------------------------------------------------------------
613 // ----------------------------------------------------------------------------
615 static wxUint32 utf8_max
[]=
616 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
618 // boundaries of the private use area we use to (temporarily) remap invalid
619 // characters invalid in a UTF-8 encoded string
620 const wxUint32 wxUnicodePUA
= 0x100000;
621 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
623 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
627 while (*psz
&& ((!buf
) || (len
< n
)))
629 const char *opsz
= psz
;
630 bool invalid
= false;
631 unsigned char cc
= *psz
++, fc
= cc
;
633 for (cnt
= 0; fc
& 0x80; cnt
++)
647 // invalid UTF-8 sequence
652 unsigned ocnt
= cnt
- 1;
653 wxUint32 res
= cc
& (0x3f >> cnt
);
657 if ((cc
& 0xC0) != 0x80)
659 // invalid UTF-8 sequence
664 res
= (res
<< 6) | (cc
& 0x3f);
666 if (invalid
|| res
<= utf8_max
[ocnt
])
668 // illegal UTF-8 encoding
671 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
672 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
674 // if one of our PUA characters turns up externally
675 // it must also be treated as an illegal sequence
676 // (a bit like you have to escape an escape character)
682 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
683 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
684 if (pa
== (size_t)-1)
698 #endif // WC_UTF16/!WC_UTF16
703 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
705 while (opsz
< psz
&& (!buf
|| len
< n
))
708 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
709 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
710 wxASSERT(pa
!= (size_t)-1);
717 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
723 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
725 while (opsz
< psz
&& (!buf
|| len
< n
))
727 if ( buf
&& len
+ 3 < n
)
729 unsigned char n
= *opsz
;
731 *buf
++ = L
'0' + n
/ 0100;
732 *buf
++ = L
'0' + (n
% 0100) / 010;
733 *buf
++ = L
'0' + n
% 010;
739 else // MAP_INVALID_UTF8_NOT
746 if (buf
&& (len
< n
))
751 static inline bool isoctal(wchar_t wch
)
753 return L
'0' <= wch
&& wch
<= L
'7';
756 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
760 while (*psz
&& ((!buf
) || (len
< n
)))
764 // cast is ok for WC_UTF16
765 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
766 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
768 cc
=(*psz
++) & 0x7fffffff;
771 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
772 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
775 *buf
++ = (char)(cc
- wxUnicodePUA
);
778 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
780 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
784 *buf
++ = (char) (psz
[0] - L
'0')*0100 +
785 (psz
[1] - L
'0')*010 +
795 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
809 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
811 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
823 // ----------------------------------------------------------------------------
825 // ----------------------------------------------------------------------------
827 #ifdef WORDS_BIGENDIAN
828 #define wxMBConvUTF16straight wxMBConvUTF16BE
829 #define wxMBConvUTF16swap wxMBConvUTF16LE
831 #define wxMBConvUTF16swap wxMBConvUTF16BE
832 #define wxMBConvUTF16straight wxMBConvUTF16LE
838 // copy 16bit MB to 16bit String
839 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
843 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
846 *buf
++ = *(wxUint16
*)psz
;
849 psz
+= sizeof(wxUint16
);
851 if (buf
&& len
<n
) *buf
=0;
857 // copy 16bit String to 16bit MB
858 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
862 while (*psz
&& (!buf
|| len
< n
))
866 *(wxUint16
*)buf
= *psz
;
867 buf
+= sizeof(wxUint16
);
869 len
+= sizeof(wxUint16
);
872 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
878 // swap 16bit MB to 16bit String
879 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
883 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
887 ((char *)buf
)[0] = psz
[1];
888 ((char *)buf
)[1] = psz
[0];
892 psz
+= sizeof(wxUint16
);
894 if (buf
&& len
<n
) *buf
=0;
900 // swap 16bit MB to 16bit String
901 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
905 while (*psz
&& (!buf
|| len
< n
))
909 *buf
++ = ((char*)psz
)[1];
910 *buf
++ = ((char*)psz
)[0];
912 len
+= sizeof(wxUint16
);
915 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
924 // copy 16bit MB to 32bit String
925 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
929 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
932 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
933 if (pa
== (size_t)-1)
939 psz
+= pa
* sizeof(wxUint16
);
941 if (buf
&& len
<n
) *buf
=0;
947 // copy 32bit String to 16bit MB
948 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
952 while (*psz
&& (!buf
|| len
< n
))
955 size_t pa
=encode_utf16(*psz
, cc
);
957 if (pa
== (size_t)-1)
962 *(wxUint16
*)buf
= cc
[0];
963 buf
+= sizeof(wxUint16
);
966 *(wxUint16
*)buf
= cc
[1];
967 buf
+= sizeof(wxUint16
);
971 len
+= pa
*sizeof(wxUint16
);
974 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
980 // swap 16bit MB to 32bit String
981 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
985 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
989 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
990 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
992 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
993 if (pa
== (size_t)-1)
1000 psz
+= pa
* sizeof(wxUint16
);
1002 if (buf
&& len
<n
) *buf
=0;
1008 // swap 32bit String to 16bit MB
1009 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1013 while (*psz
&& (!buf
|| len
< n
))
1016 size_t pa
=encode_utf16(*psz
, cc
);
1018 if (pa
== (size_t)-1)
1023 *buf
++ = ((char*)cc
)[1];
1024 *buf
++ = ((char*)cc
)[0];
1027 *buf
++ = ((char*)cc
)[3];
1028 *buf
++ = ((char*)cc
)[2];
1032 len
+= pa
*sizeof(wxUint16
);
1035 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1043 // ----------------------------------------------------------------------------
1045 // ----------------------------------------------------------------------------
1047 #ifdef WORDS_BIGENDIAN
1048 #define wxMBConvUTF32straight wxMBConvUTF32BE
1049 #define wxMBConvUTF32swap wxMBConvUTF32LE
1051 #define wxMBConvUTF32swap wxMBConvUTF32BE
1052 #define wxMBConvUTF32straight wxMBConvUTF32LE
1056 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1057 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1062 // copy 32bit MB to 16bit String
1063 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1067 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1071 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1072 if (pa
== (size_t)-1)
1082 psz
+= sizeof(wxUint32
);
1084 if (buf
&& len
<n
) *buf
=0;
1090 // copy 16bit String to 32bit MB
1091 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1095 while (*psz
&& (!buf
|| len
< n
))
1099 // cast is ok for WC_UTF16
1100 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1101 if (pa
== (size_t)-1)
1106 *(wxUint32
*)buf
= cc
;
1107 buf
+= sizeof(wxUint32
);
1109 len
+= sizeof(wxUint32
);
1113 if (buf
&& len
<=n
-sizeof(wxUint32
))
1121 // swap 32bit MB to 16bit String
1122 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1126 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1129 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1130 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1135 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1136 if (pa
== (size_t)-1)
1146 psz
+= sizeof(wxUint32
);
1156 // swap 16bit String to 32bit MB
1157 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1161 while (*psz
&& (!buf
|| len
< n
))
1165 // cast is ok for WC_UTF16
1166 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1167 if (pa
== (size_t)-1)
1177 len
+= sizeof(wxUint32
);
1181 if (buf
&& len
<=n
-sizeof(wxUint32
))
1190 // copy 32bit MB to 32bit String
1191 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1195 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1198 *buf
++ = *(wxUint32
*)psz
;
1200 psz
+= sizeof(wxUint32
);
1210 // copy 32bit String to 32bit MB
1211 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1215 while (*psz
&& (!buf
|| len
< n
))
1219 *(wxUint32
*)buf
= *psz
;
1220 buf
+= sizeof(wxUint32
);
1223 len
+= sizeof(wxUint32
);
1227 if (buf
&& len
<=n
-sizeof(wxUint32
))
1234 // swap 32bit MB to 32bit String
1235 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1239 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1243 ((char *)buf
)[0] = psz
[3];
1244 ((char *)buf
)[1] = psz
[2];
1245 ((char *)buf
)[2] = psz
[1];
1246 ((char *)buf
)[3] = psz
[0];
1250 psz
+= sizeof(wxUint32
);
1260 // swap 32bit String to 32bit MB
1261 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1265 while (*psz
&& (!buf
|| len
< n
))
1269 *buf
++ = ((char *)psz
)[3];
1270 *buf
++ = ((char *)psz
)[2];
1271 *buf
++ = ((char *)psz
)[1];
1272 *buf
++ = ((char *)psz
)[0];
1274 len
+= sizeof(wxUint32
);
1278 if (buf
&& len
<=n
-sizeof(wxUint32
))
1288 // ============================================================================
1289 // The classes doing conversion using the iconv_xxx() functions
1290 // ============================================================================
1294 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1295 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1296 // (unless there's yet another bug in glibc) the only case when iconv()
1297 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1298 // left in the input buffer -- when _real_ error occurs,
1299 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1301 // [This bug does not appear in glibc 2.2.]
1302 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1303 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1304 (errno != E2BIG || bufLeft != 0))
1306 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1309 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1311 // ----------------------------------------------------------------------------
1312 // wxMBConv_iconv: encapsulates an iconv character set
1313 // ----------------------------------------------------------------------------
1315 class wxMBConv_iconv
: public wxMBConv
1318 wxMBConv_iconv(const wxChar
*name
);
1319 virtual ~wxMBConv_iconv();
1321 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1322 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1325 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1328 // the iconv handlers used to translate from multibyte to wide char and in
1329 // the other direction
1333 // guards access to m2w and w2m objects
1334 wxMutex m_iconvMutex
;
1338 // the name (for iconv_open()) of a wide char charset -- if none is
1339 // available on this machine, it will remain NULL
1340 static const char *ms_wcCharsetName
;
1342 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1343 // different endian-ness than the native one
1344 static bool ms_wcNeedsSwap
;
1347 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1348 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1350 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1352 // Do it the hard way
1354 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1355 cname
[i
] = (char) name
[i
];
1357 // check for charset that represents wchar_t:
1358 if (ms_wcCharsetName
== NULL
)
1360 ms_wcNeedsSwap
= false;
1362 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1363 ms_wcCharsetName
= WC_NAME_BEST
;
1364 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1366 if (m2w
== (iconv_t
)-1)
1368 // try charset w/o bytesex info (e.g. "UCS4")
1369 // and check for bytesex ourselves:
1370 ms_wcCharsetName
= WC_NAME
;
1371 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1373 // last bet, try if it knows WCHAR_T pseudo-charset
1374 if (m2w
== (iconv_t
)-1)
1376 ms_wcCharsetName
= "WCHAR_T";
1377 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1380 if (m2w
!= (iconv_t
)-1)
1382 char buf
[2], *bufPtr
;
1383 wchar_t wbuf
[2], *wbufPtr
;
1391 outsz
= SIZEOF_WCHAR_T
* 2;
1395 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1396 (char**)&wbufPtr
, &outsz
);
1398 if (ICONV_FAILED(res
, insz
))
1400 ms_wcCharsetName
= NULL
;
1401 wxLogLastError(wxT("iconv"));
1402 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1406 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1411 ms_wcCharsetName
= NULL
;
1413 // VS: we must not output an error here, since wxWidgets will safely
1414 // fall back to using wxEncodingConverter.
1415 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1419 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1421 else // we already have ms_wcCharsetName
1423 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1426 // NB: don't ever pass NULL to iconv_open(), it may crash!
1427 if ( ms_wcCharsetName
)
1429 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1437 wxMBConv_iconv::~wxMBConv_iconv()
1439 if ( m2w
!= (iconv_t
)-1 )
1441 if ( w2m
!= (iconv_t
)-1 )
1445 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1448 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1449 // Unfortunately there is a couple of global wxCSConv objects such as
1450 // wxConvLocal that are used all over wx code, so we have to make sure
1451 // the handle is used by at most one thread at the time. Otherwise
1452 // only a few wx classes would be safe to use from non-main threads
1453 // as MB<->WC conversion would fail "randomly".
1454 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1457 size_t inbuf
= strlen(psz
);
1458 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1460 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1461 wchar_t *bufPtr
= buf
;
1462 const char *pszPtr
= psz
;
1466 // have destination buffer, convert there
1468 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1469 (char**)&bufPtr
, &outbuf
);
1470 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1474 // convert to native endianness
1475 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1478 // NB: iconv was given only strlen(psz) characters on input, and so
1479 // it couldn't convert the trailing zero. Let's do it ourselves
1480 // if there's some room left for it in the output buffer.
1486 // no destination buffer... convert using temp buffer
1487 // to calculate destination buffer requirement
1492 outbuf
= 8*SIZEOF_WCHAR_T
;
1495 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1496 (char**)&bufPtr
, &outbuf
);
1498 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1499 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1502 if (ICONV_FAILED(cres
, inbuf
))
1504 //VS: it is ok if iconv fails, hence trace only
1505 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1512 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1515 // NB: explained in MB2WC
1516 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1519 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1523 wchar_t *tmpbuf
= 0;
1527 // need to copy to temp buffer to switch endianness
1528 // this absolutely doesn't rock!
1529 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1530 // could be in read-only memory, or be accessed in some other thread)
1531 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1532 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1533 WC_BSWAP(tmpbuf
, inbuf
)
1539 // have destination buffer, convert there
1540 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1544 // NB: iconv was given only wcslen(psz) characters on input, and so
1545 // it couldn't convert the trailing zero. Let's do it ourselves
1546 // if there's some room left for it in the output buffer.
1552 // no destination buffer... convert using temp buffer
1553 // to calculate destination buffer requirement
1557 buf
= tbuf
; outbuf
= 16;
1559 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1562 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1570 if (ICONV_FAILED(cres
, inbuf
))
1572 //VS: it is ok if iconv fails, hence trace only
1573 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1580 #endif // HAVE_ICONV
1583 // ============================================================================
1584 // Win32 conversion classes
1585 // ============================================================================
1587 #ifdef wxHAVE_WIN32_MB2WC
1591 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1592 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1595 class wxMBConv_win32
: public wxMBConv
1600 m_CodePage
= CP_ACP
;
1604 wxMBConv_win32(const wxChar
* name
)
1606 m_CodePage
= wxCharsetToCodepage(name
);
1609 wxMBConv_win32(wxFontEncoding encoding
)
1611 m_CodePage
= wxEncodingToCodepage(encoding
);
1615 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1617 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1618 // the behaviour is not compatible with the Unix version (using iconv)
1619 // and break the library itself, e.g. wxTextInputStream::NextChar()
1620 // wouldn't work if reading an incomplete MB char didn't result in an
1623 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1624 // an error (tested under Windows Server 2003) and apparently it is
1625 // done on purpose, i.e. the function accepts any input in this case
1626 // and although I'd prefer to return error on ill-formed output, our
1627 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1628 // explicitly ill-formed according to RFC 2152) neither so we don't
1629 // even have any fallback here...
1630 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1632 const size_t len
= ::MultiByteToWideChar
1634 m_CodePage
, // code page
1635 flags
, // flags: fall on error
1636 psz
, // input string
1637 -1, // its length (NUL-terminated)
1638 buf
, // output string
1639 buf
? n
: 0 // size of output buffer
1642 // note that it returns count of written chars for buf != NULL and size
1643 // of the needed buffer for buf == NULL so in either case the length of
1644 // the string (which never includes the terminating NUL) is one less
1645 return len
? len
- 1 : (size_t)-1;
1648 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1651 we have a problem here: by default, WideCharToMultiByte() may
1652 replace characters unrepresentable in the target code page with bad
1653 quality approximations such as turning "1/2" symbol (U+00BD) into
1654 "1" for the code pages which don't have it and we, obviously, want
1655 to avoid this at any price
1657 the trouble is that this function does it _silently_, i.e. it won't
1658 even tell us whether it did or not... Win98/2000 and higher provide
1659 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1660 we have to resort to a round trip, i.e. check that converting back
1661 results in the same string -- this is, of course, expensive but
1662 otherwise we simply can't be sure to not garble the data.
1665 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1666 // it doesn't work with CJK encodings (which we test for rather roughly
1667 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1669 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1672 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1674 // it's our lucky day
1675 flags
= WC_NO_BEST_FIT_CHARS
;
1676 pUsedDef
= &usedDef
;
1678 else // old system or unsupported encoding
1684 const size_t len
= ::WideCharToMultiByte
1686 m_CodePage
, // code page
1687 flags
, // either none or no best fit
1688 pwz
, // input string
1689 -1, // it is (wide) NUL-terminated
1690 buf
, // output buffer
1691 buf
? n
: 0, // and its size
1692 NULL
, // default "replacement" char
1693 pUsedDef
// [out] was it used?
1698 // function totally failed
1702 // if we were really converting, check if we succeeded
1707 // check if the conversion failed, i.e. if any replacements
1712 else // we must resort to double tripping...
1714 wxWCharBuffer
wcBuf(n
);
1715 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1716 wcscmp(wcBuf
, pwz
) != 0 )
1718 // we didn't obtain the same thing we started from, hence
1719 // the conversion was lossy and we consider that it failed
1725 // see the comment above for the reason of "len - 1"
1729 bool IsOk() const { return m_CodePage
!= -1; }
1732 static bool CanUseNoBestFit()
1734 static int s_isWin98Or2k
= -1;
1736 if ( s_isWin98Or2k
== -1 )
1739 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1742 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1746 s_isWin98Or2k
= verMaj
>= 5;
1750 // unknown, be conseravtive by default
1754 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1757 return s_isWin98Or2k
== 1;
1763 #endif // wxHAVE_WIN32_MB2WC
1765 // ============================================================================
1766 // Cocoa conversion classes
1767 // ============================================================================
1769 #if defined(__WXCOCOA__)
1771 // RN: There is no UTF-32 support in either Core Foundation or
1772 // Cocoa. Strangely enough, internally Core Foundation uses
1773 // UTF 32 internally quite a bit - its just not public (yet).
1775 #include <CoreFoundation/CFString.h>
1776 #include <CoreFoundation/CFStringEncodingExt.h>
1778 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1780 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1781 if ( encoding
== wxFONTENCODING_DEFAULT
)
1783 enc
= CFStringGetSystemEncoding();
1785 else switch( encoding
)
1787 case wxFONTENCODING_ISO8859_1
:
1788 enc
= kCFStringEncodingISOLatin1
;
1790 case wxFONTENCODING_ISO8859_2
:
1791 enc
= kCFStringEncodingISOLatin2
;
1793 case wxFONTENCODING_ISO8859_3
:
1794 enc
= kCFStringEncodingISOLatin3
;
1796 case wxFONTENCODING_ISO8859_4
:
1797 enc
= kCFStringEncodingISOLatin4
;
1799 case wxFONTENCODING_ISO8859_5
:
1800 enc
= kCFStringEncodingISOLatinCyrillic
;
1802 case wxFONTENCODING_ISO8859_6
:
1803 enc
= kCFStringEncodingISOLatinArabic
;
1805 case wxFONTENCODING_ISO8859_7
:
1806 enc
= kCFStringEncodingISOLatinGreek
;
1808 case wxFONTENCODING_ISO8859_8
:
1809 enc
= kCFStringEncodingISOLatinHebrew
;
1811 case wxFONTENCODING_ISO8859_9
:
1812 enc
= kCFStringEncodingISOLatin5
;
1814 case wxFONTENCODING_ISO8859_10
:
1815 enc
= kCFStringEncodingISOLatin6
;
1817 case wxFONTENCODING_ISO8859_11
:
1818 enc
= kCFStringEncodingISOLatinThai
;
1820 case wxFONTENCODING_ISO8859_13
:
1821 enc
= kCFStringEncodingISOLatin7
;
1823 case wxFONTENCODING_ISO8859_14
:
1824 enc
= kCFStringEncodingISOLatin8
;
1826 case wxFONTENCODING_ISO8859_15
:
1827 enc
= kCFStringEncodingISOLatin9
;
1830 case wxFONTENCODING_KOI8
:
1831 enc
= kCFStringEncodingKOI8_R
;
1833 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1834 enc
= kCFStringEncodingDOSRussian
;
1837 // case wxFONTENCODING_BULGARIAN :
1841 case wxFONTENCODING_CP437
:
1842 enc
=kCFStringEncodingDOSLatinUS
;
1844 case wxFONTENCODING_CP850
:
1845 enc
= kCFStringEncodingDOSLatin1
;
1847 case wxFONTENCODING_CP852
:
1848 enc
= kCFStringEncodingDOSLatin2
;
1850 case wxFONTENCODING_CP855
:
1851 enc
= kCFStringEncodingDOSCyrillic
;
1853 case wxFONTENCODING_CP866
:
1854 enc
=kCFStringEncodingDOSRussian
;
1856 case wxFONTENCODING_CP874
:
1857 enc
= kCFStringEncodingDOSThai
;
1859 case wxFONTENCODING_CP932
:
1860 enc
= kCFStringEncodingDOSJapanese
;
1862 case wxFONTENCODING_CP936
:
1863 enc
=kCFStringEncodingDOSChineseSimplif
;
1865 case wxFONTENCODING_CP949
:
1866 enc
= kCFStringEncodingDOSKorean
;
1868 case wxFONTENCODING_CP950
:
1869 enc
= kCFStringEncodingDOSChineseTrad
;
1871 case wxFONTENCODING_CP1250
:
1872 enc
= kCFStringEncodingWindowsLatin2
;
1874 case wxFONTENCODING_CP1251
:
1875 enc
=kCFStringEncodingWindowsCyrillic
;
1877 case wxFONTENCODING_CP1252
:
1878 enc
=kCFStringEncodingWindowsLatin1
;
1880 case wxFONTENCODING_CP1253
:
1881 enc
= kCFStringEncodingWindowsGreek
;
1883 case wxFONTENCODING_CP1254
:
1884 enc
= kCFStringEncodingWindowsLatin5
;
1886 case wxFONTENCODING_CP1255
:
1887 enc
=kCFStringEncodingWindowsHebrew
;
1889 case wxFONTENCODING_CP1256
:
1890 enc
=kCFStringEncodingWindowsArabic
;
1892 case wxFONTENCODING_CP1257
:
1893 enc
= kCFStringEncodingWindowsBalticRim
;
1895 // This only really encodes to UTF7 (if that) evidently
1896 // case wxFONTENCODING_UTF7 :
1897 // enc = kCFStringEncodingNonLossyASCII ;
1899 case wxFONTENCODING_UTF8
:
1900 enc
= kCFStringEncodingUTF8
;
1902 case wxFONTENCODING_EUC_JP
:
1903 enc
= kCFStringEncodingEUC_JP
;
1905 case wxFONTENCODING_UTF16
:
1906 enc
= kCFStringEncodingUnicode
;
1908 case wxFONTENCODING_MACROMAN
:
1909 enc
= kCFStringEncodingMacRoman
;
1911 case wxFONTENCODING_MACJAPANESE
:
1912 enc
= kCFStringEncodingMacJapanese
;
1914 case wxFONTENCODING_MACCHINESETRAD
:
1915 enc
= kCFStringEncodingMacChineseTrad
;
1917 case wxFONTENCODING_MACKOREAN
:
1918 enc
= kCFStringEncodingMacKorean
;
1920 case wxFONTENCODING_MACARABIC
:
1921 enc
= kCFStringEncodingMacArabic
;
1923 case wxFONTENCODING_MACHEBREW
:
1924 enc
= kCFStringEncodingMacHebrew
;
1926 case wxFONTENCODING_MACGREEK
:
1927 enc
= kCFStringEncodingMacGreek
;
1929 case wxFONTENCODING_MACCYRILLIC
:
1930 enc
= kCFStringEncodingMacCyrillic
;
1932 case wxFONTENCODING_MACDEVANAGARI
:
1933 enc
= kCFStringEncodingMacDevanagari
;
1935 case wxFONTENCODING_MACGURMUKHI
:
1936 enc
= kCFStringEncodingMacGurmukhi
;
1938 case wxFONTENCODING_MACGUJARATI
:
1939 enc
= kCFStringEncodingMacGujarati
;
1941 case wxFONTENCODING_MACORIYA
:
1942 enc
= kCFStringEncodingMacOriya
;
1944 case wxFONTENCODING_MACBENGALI
:
1945 enc
= kCFStringEncodingMacBengali
;
1947 case wxFONTENCODING_MACTAMIL
:
1948 enc
= kCFStringEncodingMacTamil
;
1950 case wxFONTENCODING_MACTELUGU
:
1951 enc
= kCFStringEncodingMacTelugu
;
1953 case wxFONTENCODING_MACKANNADA
:
1954 enc
= kCFStringEncodingMacKannada
;
1956 case wxFONTENCODING_MACMALAJALAM
:
1957 enc
= kCFStringEncodingMacMalayalam
;
1959 case wxFONTENCODING_MACSINHALESE
:
1960 enc
= kCFStringEncodingMacSinhalese
;
1962 case wxFONTENCODING_MACBURMESE
:
1963 enc
= kCFStringEncodingMacBurmese
;
1965 case wxFONTENCODING_MACKHMER
:
1966 enc
= kCFStringEncodingMacKhmer
;
1968 case wxFONTENCODING_MACTHAI
:
1969 enc
= kCFStringEncodingMacThai
;
1971 case wxFONTENCODING_MACLAOTIAN
:
1972 enc
= kCFStringEncodingMacLaotian
;
1974 case wxFONTENCODING_MACGEORGIAN
:
1975 enc
= kCFStringEncodingMacGeorgian
;
1977 case wxFONTENCODING_MACARMENIAN
:
1978 enc
= kCFStringEncodingMacArmenian
;
1980 case wxFONTENCODING_MACCHINESESIMP
:
1981 enc
= kCFStringEncodingMacChineseSimp
;
1983 case wxFONTENCODING_MACTIBETAN
:
1984 enc
= kCFStringEncodingMacTibetan
;
1986 case wxFONTENCODING_MACMONGOLIAN
:
1987 enc
= kCFStringEncodingMacMongolian
;
1989 case wxFONTENCODING_MACETHIOPIC
:
1990 enc
= kCFStringEncodingMacEthiopic
;
1992 case wxFONTENCODING_MACCENTRALEUR
:
1993 enc
= kCFStringEncodingMacCentralEurRoman
;
1995 case wxFONTENCODING_MACVIATNAMESE
:
1996 enc
= kCFStringEncodingMacVietnamese
;
1998 case wxFONTENCODING_MACARABICEXT
:
1999 enc
= kCFStringEncodingMacExtArabic
;
2001 case wxFONTENCODING_MACSYMBOL
:
2002 enc
= kCFStringEncodingMacSymbol
;
2004 case wxFONTENCODING_MACDINGBATS
:
2005 enc
= kCFStringEncodingMacDingbats
;
2007 case wxFONTENCODING_MACTURKISH
:
2008 enc
= kCFStringEncodingMacTurkish
;
2010 case wxFONTENCODING_MACCROATIAN
:
2011 enc
= kCFStringEncodingMacCroatian
;
2013 case wxFONTENCODING_MACICELANDIC
:
2014 enc
= kCFStringEncodingMacIcelandic
;
2016 case wxFONTENCODING_MACROMANIAN
:
2017 enc
= kCFStringEncodingMacRomanian
;
2019 case wxFONTENCODING_MACCELTIC
:
2020 enc
= kCFStringEncodingMacCeltic
;
2022 case wxFONTENCODING_MACGAELIC
:
2023 enc
= kCFStringEncodingMacGaelic
;
2025 // case wxFONTENCODING_MACKEYBOARD :
2026 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2029 // because gcc is picky
2035 class wxMBConv_cocoa
: public wxMBConv
2040 Init(CFStringGetSystemEncoding()) ;
2044 wxMBConv_cocoa(const wxChar
* name
)
2046 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2050 wxMBConv_cocoa(wxFontEncoding encoding
)
2052 Init( wxCFStringEncFromFontEnc(encoding
) );
2059 void Init( CFStringEncoding encoding
)
2061 m_encoding
= encoding
;
2064 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2068 CFStringRef theString
= CFStringCreateWithBytes (
2069 NULL
, //the allocator
2070 (const UInt8
*)szUnConv
,
2073 false //no BOM/external representation
2076 wxASSERT(theString
);
2078 size_t nOutLength
= CFStringGetLength(theString
);
2082 CFRelease(theString
);
2086 CFRange theRange
= { 0, nOutSize
};
2088 #if SIZEOF_WCHAR_T == 4
2089 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2092 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2094 CFRelease(theString
);
2096 szUniCharBuffer
[nOutLength
] = '\0' ;
2098 #if SIZEOF_WCHAR_T == 4
2099 wxMBConvUTF16 converter
;
2100 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2101 delete[] szUniCharBuffer
;
2107 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2111 size_t nRealOutSize
;
2112 size_t nBufSize
= wxWcslen(szUnConv
);
2113 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2115 #if SIZEOF_WCHAR_T == 4
2116 wxMBConvUTF16BE converter
;
2117 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2118 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2119 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2120 nBufSize
/= sizeof(UniChar
);
2123 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2127 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2130 wxASSERT(theString
);
2132 //Note that CER puts a BOM when converting to unicode
2133 //so we check and use getchars instead in that case
2134 if (m_encoding
== kCFStringEncodingUnicode
)
2137 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2139 nRealOutSize
= CFStringGetLength(theString
) + 1;
2145 CFRangeMake(0, CFStringGetLength(theString
)),
2147 0, //what to put in characters that can't be converted -
2148 //0 tells CFString to return NULL if it meets such a character
2149 false, //not an external representation
2152 (CFIndex
*) &nRealOutSize
2156 CFRelease(theString
);
2158 #if SIZEOF_WCHAR_T == 4
2159 delete[] szUniBuffer
;
2162 return nRealOutSize
- 1;
2167 return m_encoding
!= kCFStringEncodingInvalidId
&&
2168 CFStringIsEncodingAvailable(m_encoding
);
2172 CFStringEncoding m_encoding
;
2175 #endif // defined(__WXCOCOA__)
2177 // ============================================================================
2178 // Mac conversion classes
2179 // ============================================================================
2181 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2183 class wxMBConv_mac
: public wxMBConv
2188 Init(CFStringGetSystemEncoding()) ;
2192 wxMBConv_mac(const wxChar
* name
)
2194 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2198 wxMBConv_mac(wxFontEncoding encoding
)
2200 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2205 OSStatus status
= noErr
;
2206 status
= TECDisposeConverter(m_MB2WC_converter
);
2207 status
= TECDisposeConverter(m_WC2MB_converter
);
2211 void Init( TextEncodingBase encoding
)
2213 OSStatus status
= noErr
;
2214 m_char_encoding
= encoding
;
2215 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2217 status
= TECCreateConverter(&m_MB2WC_converter
,
2219 m_unicode_encoding
);
2220 status
= TECCreateConverter(&m_WC2MB_converter
,
2225 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2227 OSStatus status
= noErr
;
2228 ByteCount byteOutLen
;
2229 ByteCount byteInLen
= strlen(psz
) ;
2230 wchar_t *tbuf
= NULL
;
2231 UniChar
* ubuf
= NULL
;
2236 //apple specs say at least 32
2237 n
= wxMax( 32 , byteInLen
) ;
2238 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2240 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2241 #if SIZEOF_WCHAR_T == 4
2242 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2244 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2246 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2247 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2248 #if SIZEOF_WCHAR_T == 4
2249 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2250 // is not properly terminated we get random characters at the end
2251 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2252 wxMBConvUTF16BE converter
;
2253 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2256 res
= byteOutLen
/ sizeof( UniChar
) ;
2261 if ( buf
&& res
< n
)
2267 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2269 OSStatus status
= noErr
;
2270 ByteCount byteOutLen
;
2271 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2277 //apple specs say at least 32
2278 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2279 tbuf
= (char*) malloc( n
) ;
2282 ByteCount byteBufferLen
= n
;
2283 UniChar
* ubuf
= NULL
;
2284 #if SIZEOF_WCHAR_T == 4
2285 wxMBConvUTF16BE converter
;
2286 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2287 byteInLen
= unicharlen
;
2288 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2289 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2291 ubuf
= (UniChar
*) psz
;
2293 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2294 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2295 #if SIZEOF_WCHAR_T == 4
2301 size_t res
= byteOutLen
;
2302 if ( buf
&& res
< n
)
2306 //we need to double-trip to verify it didn't insert any ? in place
2307 //of bogus characters
2308 wxWCharBuffer
wcBuf(n
);
2309 size_t pszlen
= wxWcslen(psz
);
2310 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2311 wxWcslen(wcBuf
) != pszlen
||
2312 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2314 // we didn't obtain the same thing we started from, hence
2315 // the conversion was lossy and we consider that it failed
2324 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2327 TECObjectRef m_MB2WC_converter
;
2328 TECObjectRef m_WC2MB_converter
;
2330 TextEncodingBase m_char_encoding
;
2331 TextEncodingBase m_unicode_encoding
;
2334 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2336 // ============================================================================
2337 // wxEncodingConverter based conversion classes
2338 // ============================================================================
2342 class wxMBConv_wxwin
: public wxMBConv
2347 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2348 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2352 // temporarily just use wxEncodingConverter stuff,
2353 // so that it works while a better implementation is built
2354 wxMBConv_wxwin(const wxChar
* name
)
2357 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2359 m_enc
= wxFONTENCODING_SYSTEM
;
2364 wxMBConv_wxwin(wxFontEncoding enc
)
2371 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2373 size_t inbuf
= strlen(psz
);
2376 if (!m2w
.Convert(psz
,buf
))
2382 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2384 const size_t inbuf
= wxWcslen(psz
);
2387 if (!w2m
.Convert(psz
,buf
))
2394 bool IsOk() const { return m_ok
; }
2397 wxFontEncoding m_enc
;
2398 wxEncodingConverter m2w
, w2m
;
2400 // were we initialized successfully?
2403 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2406 #endif // wxUSE_FONTMAP
2408 // ============================================================================
2409 // wxCSConv implementation
2410 // ============================================================================
2412 void wxCSConv::Init()
2419 wxCSConv::wxCSConv(const wxChar
*charset
)
2428 m_encoding
= wxFONTENCODING_SYSTEM
;
2431 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2433 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2435 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2437 encoding
= wxFONTENCODING_SYSTEM
;
2442 m_encoding
= encoding
;
2445 wxCSConv::~wxCSConv()
2450 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2455 SetName(conv
.m_name
);
2456 m_encoding
= conv
.m_encoding
;
2459 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2463 SetName(conv
.m_name
);
2464 m_encoding
= conv
.m_encoding
;
2469 void wxCSConv::Clear()
2478 void wxCSConv::SetName(const wxChar
*charset
)
2482 m_name
= wxStrdup(charset
);
2487 wxMBConv
*wxCSConv::DoCreate() const
2489 // check for the special case of ASCII or ISO8859-1 charset: as we have
2490 // special knowledge of it anyhow, we don't need to create a special
2491 // conversion object
2492 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2494 // don't convert at all
2498 // we trust OS to do conversion better than we can so try external
2499 // conversion methods first
2501 // the full order is:
2502 // 1. OS conversion (iconv() under Unix or Win32 API)
2503 // 2. hard coded conversions for UTF
2504 // 3. wxEncodingConverter as fall back
2510 #endif // !wxUSE_FONTMAP
2512 wxString
name(m_name
);
2516 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2517 #endif // wxUSE_FONTMAP
2519 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2525 #endif // HAVE_ICONV
2527 #ifdef wxHAVE_WIN32_MB2WC
2530 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2531 : new wxMBConv_win32(m_encoding
);
2540 #endif // wxHAVE_WIN32_MB2WC
2541 #if defined(__WXMAC__)
2543 // leave UTF16 and UTF32 to the built-ins of wx
2544 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2545 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2549 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2550 : new wxMBConv_mac(m_encoding
);
2552 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2561 #if defined(__WXCOCOA__)
2563 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2567 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2568 : new wxMBConv_cocoa(m_encoding
);
2570 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2580 wxFontEncoding enc
= m_encoding
;
2582 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2584 // use "false" to suppress interactive dialogs -- we can be called from
2585 // anywhere and popping up a dialog from here is the last thing we want to
2587 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2589 #endif // wxUSE_FONTMAP
2593 case wxFONTENCODING_UTF7
:
2594 return new wxMBConvUTF7
;
2596 case wxFONTENCODING_UTF8
:
2597 return new wxMBConvUTF8
;
2599 case wxFONTENCODING_UTF16BE
:
2600 return new wxMBConvUTF16BE
;
2602 case wxFONTENCODING_UTF16LE
:
2603 return new wxMBConvUTF16LE
;
2605 case wxFONTENCODING_UTF32BE
:
2606 return new wxMBConvUTF32BE
;
2608 case wxFONTENCODING_UTF32LE
:
2609 return new wxMBConvUTF32LE
;
2612 // nothing to do but put here to suppress gcc warnings
2619 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2620 : new wxMBConv_wxwin(m_encoding
);
2626 #endif // wxUSE_FONTMAP
2628 // NB: This is a hack to prevent deadlock. What could otherwise happen
2629 // in Unicode build: wxConvLocal creation ends up being here
2630 // because of some failure and logs the error. But wxLog will try to
2631 // attach timestamp, for which it will need wxConvLocal (to convert
2632 // time to char* and then wchar_t*), but that fails, tries to log
2633 // error, but wxLog has a (already locked) critical section that
2634 // guards static buffer.
2635 static bool alreadyLoggingError
= false;
2636 if (!alreadyLoggingError
)
2638 alreadyLoggingError
= true;
2639 wxLogError(_("Cannot convert from the charset '%s'!"),
2643 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2644 #else // !wxUSE_FONTMAP
2645 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2646 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2648 alreadyLoggingError
= false;
2654 void wxCSConv::CreateConvIfNeeded() const
2658 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2661 // if we don't have neither the name nor the encoding, use the default
2662 // encoding for this system
2663 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2665 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2667 #endif // wxUSE_INTL
2669 self
->m_convReal
= DoCreate();
2670 self
->m_deferred
= false;
2674 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2676 CreateConvIfNeeded();
2679 return m_convReal
->MB2WC(buf
, psz
, n
);
2682 size_t len
= strlen(psz
);
2686 for (size_t c
= 0; c
<= len
; c
++)
2687 buf
[c
] = (unsigned char)(psz
[c
]);
2693 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2695 CreateConvIfNeeded();
2698 return m_convReal
->WC2MB(buf
, psz
, n
);
2701 const size_t len
= wxWcslen(psz
);
2704 for (size_t c
= 0; c
<= len
; c
++)
2708 buf
[c
] = (char)psz
[c
];
2713 for (size_t c
= 0; c
<= len
; c
++)
2723 // ----------------------------------------------------------------------------
2725 // ----------------------------------------------------------------------------
2728 static wxMBConv_win32 wxConvLibcObj
;
2729 #elif defined(__WXMAC__) && !defined(__MACH__)
2730 static wxMBConv_mac wxConvLibcObj
;
2732 static wxMBConvLibc wxConvLibcObj
;
2735 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2736 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2737 static wxMBConvUTF7 wxConvUTF7Obj
;
2738 static wxMBConvUTF8 wxConvUTF8Obj
;
2739 static wxConvBrokenFileNames wxConvBrokenFileNamesObj
;
2741 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2742 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2743 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2744 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2745 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2751 wxConvBrokenFileNamesObj
;
2757 #else // !wxUSE_WCHAR_T
2759 // stand-ins in absence of wchar_t
2760 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2765 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T