1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
45 #include "wx/msw/missing.h"
56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
57 #define wxHAVE_WIN32_MB2WC
58 #endif // __WIN32__ but !__WXMICROWIN__
60 // ----------------------------------------------------------------------------
62 // ----------------------------------------------------------------------------
70 #include "wx/thread.h"
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
79 #include <ATSUnicode.h>
80 #include <TextCommon.h>
81 #include <TextEncodingConverter.h>
84 #include "wx/mac/private.h" // includes mac headers
86 // ----------------------------------------------------------------------------
88 // ----------------------------------------------------------------------------
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
99 #define WC_NAME_BEST "UCS-4LE"
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
108 #define WC_NAME_BEST "UTF-16LE"
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
115 // ============================================================================
117 // ============================================================================
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
124 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
129 *output
= (wxUint16
) input
;
132 else if (input
>=0x110000)
140 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
141 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
147 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
149 if ((*input
<0xd800) || (*input
>0xdfff))
154 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
161 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
167 // ----------------------------------------------------------------------------
169 // ----------------------------------------------------------------------------
171 wxMBConv::~wxMBConv()
173 // nothing to do here (necessary for Darwin linking probably)
176 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
180 // calculate the length of the buffer needed first
181 size_t nLen
= MB2WC(NULL
, psz
, 0);
182 if ( nLen
!= (size_t)-1 )
184 // now do the actual conversion
185 wxWCharBuffer
buf(nLen
);
186 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
187 if ( nLen
!= (size_t)-1 )
194 wxWCharBuffer
buf((wchar_t *)NULL
);
199 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
203 size_t nLen
= WC2MB(NULL
, pwz
, 0);
204 if ( nLen
!= (size_t)-1 )
206 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
207 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
208 if ( nLen
!= (size_t)-1 )
215 wxCharBuffer
buf((char *)NULL
);
220 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
222 wxASSERT(pOutSize
!= NULL
);
224 const char* szEnd
= szString
+ nStringLen
+ 1;
225 const char* szPos
= szString
;
226 const char* szStart
= szPos
;
228 size_t nActualLength
= 0;
229 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
231 wxWCharBuffer
theBuffer(nCurrentSize
);
233 //Convert the string until the length() is reached, continuing the
234 //loop every time a null character is reached
235 while(szPos
!= szEnd
)
237 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
239 //Get the length of the current (sub)string
240 size_t nLen
= MB2WC(NULL
, szPos
, 0);
242 //Invalid conversion?
243 if( nLen
== (size_t)-1 )
246 theBuffer
.data()[0u] = wxT('\0');
251 //Increase the actual length (+1 for current null character)
252 nActualLength
+= nLen
+ 1;
254 //if buffer too big, realloc the buffer
255 if (nActualLength
> (nCurrentSize
+1))
257 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
258 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
259 theBuffer
= theNewBuffer
;
263 //Convert the current (sub)string
264 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
267 theBuffer
.data()[0u] = wxT('\0');
271 //Increment to next (sub)string
272 //Note that we have to use strlen instead of nLen here
273 //because XX2XX gives us the size of the output buffer,
274 //which is not necessarily the length of the string
275 szPos
+= strlen(szPos
) + 1;
278 //success - return actual length and the buffer
279 *pOutSize
= nActualLength
;
283 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
285 wxASSERT(pOutSize
!= NULL
);
287 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
288 const wchar_t* szPos
= szString
;
289 const wchar_t* szStart
= szPos
;
291 size_t nActualLength
= 0;
292 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
294 wxCharBuffer
theBuffer(nCurrentSize
);
296 //Convert the string until the length() is reached, continuing the
297 //loop every time a null character is reached
298 while(szPos
!= szEnd
)
300 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
302 //Get the length of the current (sub)string
303 size_t nLen
= WC2MB(NULL
, szPos
, 0);
305 //Invalid conversion?
306 if( nLen
== (size_t)-1 )
309 theBuffer
.data()[0u] = wxT('\0');
313 //Increase the actual length (+1 for current null character)
314 nActualLength
+= nLen
+ 1;
316 //if buffer too big, realloc the buffer
317 if (nActualLength
> (nCurrentSize
+1))
319 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
320 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
321 theBuffer
= theNewBuffer
;
325 //Convert the current (sub)string
326 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
329 theBuffer
.data()[0u] = wxT('\0');
333 //Increment to next (sub)string
334 //Note that we have to use wxWcslen instead of nLen here
335 //because XX2XX gives us the size of the output buffer,
336 //which is not necessarily the length of the string
337 szPos
+= wxWcslen(szPos
) + 1;
340 //success - return actual length and the buffer
341 *pOutSize
= nActualLength
;
345 // ----------------------------------------------------------------------------
347 // ----------------------------------------------------------------------------
349 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
351 return wxMB2WC(buf
, psz
, n
);
354 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
356 return wxWC2MB(buf
, psz
, n
);
361 // ----------------------------------------------------------------------------
362 // wxConvBrokenFileNames
363 // ----------------------------------------------------------------------------
365 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
367 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
368 || wxStricmp(charset
, _T("UTF8")) == 0 )
369 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
371 m_conv
= new wxCSConv(charset
);
375 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
377 size_t outputSize
) const
379 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
383 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
385 size_t outputSize
) const
387 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
392 // ----------------------------------------------------------------------------
394 // ----------------------------------------------------------------------------
396 // Implementation (C) 2004 Fredrik Roubert
399 // BASE64 decoding table
401 static const unsigned char utf7unb64
[] =
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
409 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
410 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
412 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
413 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
414 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
416 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
417 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
418 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
437 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
441 while (*psz
&& ((!buf
) || (len
< n
)))
443 unsigned char cc
= *psz
++;
451 else if (*psz
== '-')
461 // BASE64 encoded string
465 for (lsb
= false, d
= 0, l
= 0;
466 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
470 for (l
+= 6; l
>= 8; lsb
= !lsb
)
472 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
481 *buf
= (wchar_t)(c
<< 8);
488 if (buf
&& (len
< n
))
494 // BASE64 encoding table
496 static const unsigned char utf7enb64
[] =
498 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
499 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
500 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
501 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
502 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
503 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
504 'w', 'x', 'y', 'z', '0', '1', '2', '3',
505 '4', '5', '6', '7', '8', '9', '+', '/'
509 // UTF-7 encoding table
511 // 0 - Set D (directly encoded characters)
512 // 1 - Set O (optional direct characters)
513 // 2 - whitespace characters (optional)
514 // 3 - special characters
516 static const unsigned char utf7encode
[128] =
518 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
519 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
520 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
524 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
528 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
534 while (*psz
&& ((!buf
) || (len
< n
)))
537 if (cc
< 0x80 && utf7encode
[cc
] < 1)
545 else if (((wxUint32
)cc
) > 0xffff)
547 // no surrogate pair generation (yet?)
558 // BASE64 encode string
559 unsigned int lsb
, d
, l
;
560 for (d
= 0, l
= 0;; psz
++)
562 for (lsb
= 0; lsb
< 2; lsb
++)
565 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
567 for (l
+= 8; l
>= 6; )
571 *buf
++ = utf7enb64
[(d
>> l
) % 64];
576 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
582 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
591 if (buf
&& (len
< n
))
596 // ----------------------------------------------------------------------------
598 // ----------------------------------------------------------------------------
600 static wxUint32 utf8_max
[]=
601 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
603 // boundaries of the private use area we use to (temporarily) remap invalid
604 // characters invalid in a UTF-8 encoded string
605 const wxUint32 wxUnicodePUA
= 0x100000;
606 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
608 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
612 while (*psz
&& ((!buf
) || (len
< n
)))
614 const char *opsz
= psz
;
615 bool invalid
= false;
616 unsigned char cc
= *psz
++, fc
= cc
;
618 for (cnt
= 0; fc
& 0x80; cnt
++)
627 // escape the escape character for octal escapes
628 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
629 && cc
== '\\' && (!buf
|| len
< n
))
641 // invalid UTF-8 sequence
646 unsigned ocnt
= cnt
- 1;
647 wxUint32 res
= cc
& (0x3f >> cnt
);
651 if ((cc
& 0xC0) != 0x80)
653 // invalid UTF-8 sequence
658 res
= (res
<< 6) | (cc
& 0x3f);
660 if (invalid
|| res
<= utf8_max
[ocnt
])
662 // illegal UTF-8 encoding
665 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
666 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
668 // if one of our PUA characters turns up externally
669 // it must also be treated as an illegal sequence
670 // (a bit like you have to escape an escape character)
676 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
677 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
678 if (pa
== (size_t)-1)
692 #endif // WC_UTF16/!WC_UTF16
697 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
699 while (opsz
< psz
&& (!buf
|| len
< n
))
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
704 wxASSERT(pa
!= (size_t)-1);
711 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
717 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
719 while (opsz
< psz
&& (!buf
|| len
< n
))
721 if ( buf
&& len
+ 3 < n
)
723 unsigned char n
= *opsz
;
725 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
726 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
727 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
733 else // MAP_INVALID_UTF8_NOT
740 if (buf
&& (len
< n
))
745 static inline bool isoctal(wchar_t wch
)
747 return L
'0' <= wch
&& wch
<= L
'7';
750 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
754 while (*psz
&& ((!buf
) || (len
< n
)))
758 // cast is ok for WC_UTF16
759 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
760 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
762 cc
=(*psz
++) & 0x7fffffff;
765 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
766 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
769 *buf
++ = (char)(cc
- wxUnicodePUA
);
772 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
773 && cc
== L
'\\' && psz
[0] == L
'\\' )
780 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
782 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
786 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
787 (psz
[1] - L
'0')*010 +
797 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
811 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
813 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
825 // ----------------------------------------------------------------------------
827 // ----------------------------------------------------------------------------
829 #ifdef WORDS_BIGENDIAN
830 #define wxMBConvUTF16straight wxMBConvUTF16BE
831 #define wxMBConvUTF16swap wxMBConvUTF16LE
833 #define wxMBConvUTF16swap wxMBConvUTF16BE
834 #define wxMBConvUTF16straight wxMBConvUTF16LE
840 // copy 16bit MB to 16bit String
841 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
845 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
848 *buf
++ = *(wxUint16
*)psz
;
851 psz
+= sizeof(wxUint16
);
853 if (buf
&& len
<n
) *buf
=0;
859 // copy 16bit String to 16bit MB
860 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
864 while (*psz
&& (!buf
|| len
< n
))
868 *(wxUint16
*)buf
= *psz
;
869 buf
+= sizeof(wxUint16
);
871 len
+= sizeof(wxUint16
);
874 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
880 // swap 16bit MB to 16bit String
881 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
885 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
889 ((char *)buf
)[0] = psz
[1];
890 ((char *)buf
)[1] = psz
[0];
894 psz
+= sizeof(wxUint16
);
896 if (buf
&& len
<n
) *buf
=0;
902 // swap 16bit MB to 16bit String
903 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
907 while (*psz
&& (!buf
|| len
< n
))
911 *buf
++ = ((char*)psz
)[1];
912 *buf
++ = ((char*)psz
)[0];
914 len
+= sizeof(wxUint16
);
917 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
926 // copy 16bit MB to 32bit String
927 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
931 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
934 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
935 if (pa
== (size_t)-1)
941 psz
+= pa
* sizeof(wxUint16
);
943 if (buf
&& len
<n
) *buf
=0;
949 // copy 32bit String to 16bit MB
950 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
954 while (*psz
&& (!buf
|| len
< n
))
957 size_t pa
=encode_utf16(*psz
, cc
);
959 if (pa
== (size_t)-1)
964 *(wxUint16
*)buf
= cc
[0];
965 buf
+= sizeof(wxUint16
);
968 *(wxUint16
*)buf
= cc
[1];
969 buf
+= sizeof(wxUint16
);
973 len
+= pa
*sizeof(wxUint16
);
976 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
982 // swap 16bit MB to 32bit String
983 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
987 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
991 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
992 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
994 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
995 if (pa
== (size_t)-1)
1002 psz
+= pa
* sizeof(wxUint16
);
1004 if (buf
&& len
<n
) *buf
=0;
1010 // swap 32bit String to 16bit MB
1011 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1015 while (*psz
&& (!buf
|| len
< n
))
1018 size_t pa
=encode_utf16(*psz
, cc
);
1020 if (pa
== (size_t)-1)
1025 *buf
++ = ((char*)cc
)[1];
1026 *buf
++ = ((char*)cc
)[0];
1029 *buf
++ = ((char*)cc
)[3];
1030 *buf
++ = ((char*)cc
)[2];
1034 len
+= pa
*sizeof(wxUint16
);
1037 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1045 // ----------------------------------------------------------------------------
1047 // ----------------------------------------------------------------------------
1049 #ifdef WORDS_BIGENDIAN
1050 #define wxMBConvUTF32straight wxMBConvUTF32BE
1051 #define wxMBConvUTF32swap wxMBConvUTF32LE
1053 #define wxMBConvUTF32swap wxMBConvUTF32BE
1054 #define wxMBConvUTF32straight wxMBConvUTF32LE
1058 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1064 // copy 32bit MB to 16bit String
1065 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1069 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1073 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1074 if (pa
== (size_t)-1)
1084 psz
+= sizeof(wxUint32
);
1086 if (buf
&& len
<n
) *buf
=0;
1092 // copy 16bit String to 32bit MB
1093 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1097 while (*psz
&& (!buf
|| len
< n
))
1101 // cast is ok for WC_UTF16
1102 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1103 if (pa
== (size_t)-1)
1108 *(wxUint32
*)buf
= cc
;
1109 buf
+= sizeof(wxUint32
);
1111 len
+= sizeof(wxUint32
);
1115 if (buf
&& len
<=n
-sizeof(wxUint32
))
1123 // swap 32bit MB to 16bit String
1124 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1128 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1131 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1132 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1137 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1138 if (pa
== (size_t)-1)
1148 psz
+= sizeof(wxUint32
);
1158 // swap 16bit String to 32bit MB
1159 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1163 while (*psz
&& (!buf
|| len
< n
))
1167 // cast is ok for WC_UTF16
1168 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1169 if (pa
== (size_t)-1)
1179 len
+= sizeof(wxUint32
);
1183 if (buf
&& len
<=n
-sizeof(wxUint32
))
1192 // copy 32bit MB to 32bit String
1193 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1197 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1200 *buf
++ = *(wxUint32
*)psz
;
1202 psz
+= sizeof(wxUint32
);
1212 // copy 32bit String to 32bit MB
1213 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1217 while (*psz
&& (!buf
|| len
< n
))
1221 *(wxUint32
*)buf
= *psz
;
1222 buf
+= sizeof(wxUint32
);
1225 len
+= sizeof(wxUint32
);
1229 if (buf
&& len
<=n
-sizeof(wxUint32
))
1236 // swap 32bit MB to 32bit String
1237 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1241 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1245 ((char *)buf
)[0] = psz
[3];
1246 ((char *)buf
)[1] = psz
[2];
1247 ((char *)buf
)[2] = psz
[1];
1248 ((char *)buf
)[3] = psz
[0];
1252 psz
+= sizeof(wxUint32
);
1262 // swap 32bit String to 32bit MB
1263 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1267 while (*psz
&& (!buf
|| len
< n
))
1271 *buf
++ = ((char *)psz
)[3];
1272 *buf
++ = ((char *)psz
)[2];
1273 *buf
++ = ((char *)psz
)[1];
1274 *buf
++ = ((char *)psz
)[0];
1276 len
+= sizeof(wxUint32
);
1280 if (buf
&& len
<=n
-sizeof(wxUint32
))
1290 // ============================================================================
1291 // The classes doing conversion using the iconv_xxx() functions
1292 // ============================================================================
1296 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1297 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1298 // (unless there's yet another bug in glibc) the only case when iconv()
1299 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1300 // left in the input buffer -- when _real_ error occurs,
1301 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1303 // [This bug does not appear in glibc 2.2.]
1304 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1305 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1306 (errno != E2BIG || bufLeft != 0))
1308 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1311 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1313 // ----------------------------------------------------------------------------
1314 // wxMBConv_iconv: encapsulates an iconv character set
1315 // ----------------------------------------------------------------------------
1317 class wxMBConv_iconv
: public wxMBConv
1320 wxMBConv_iconv(const wxChar
*name
);
1321 virtual ~wxMBConv_iconv();
1323 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1324 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1327 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1330 // the iconv handlers used to translate from multibyte to wide char and in
1331 // the other direction
1335 // guards access to m2w and w2m objects
1336 wxMutex m_iconvMutex
;
1340 // the name (for iconv_open()) of a wide char charset -- if none is
1341 // available on this machine, it will remain NULL
1342 static const char *ms_wcCharsetName
;
1344 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1345 // different endian-ness than the native one
1346 static bool ms_wcNeedsSwap
;
1349 // make the constructor available for unit testing
1350 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1352 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1353 if ( !result
->IsOk() )
1361 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1362 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1364 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1366 // Do it the hard way
1368 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1369 cname
[i
] = (char) name
[i
];
1371 // check for charset that represents wchar_t:
1372 if (ms_wcCharsetName
== NULL
)
1374 ms_wcNeedsSwap
= false;
1376 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377 ms_wcCharsetName
= WC_NAME_BEST
;
1378 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1380 if (m2w
== (iconv_t
)-1)
1382 // try charset w/o bytesex info (e.g. "UCS4")
1383 // and check for bytesex ourselves:
1384 ms_wcCharsetName
= WC_NAME
;
1385 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1387 // last bet, try if it knows WCHAR_T pseudo-charset
1388 if (m2w
== (iconv_t
)-1)
1390 ms_wcCharsetName
= "WCHAR_T";
1391 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1394 if (m2w
!= (iconv_t
)-1)
1396 char buf
[2], *bufPtr
;
1397 wchar_t wbuf
[2], *wbufPtr
;
1405 outsz
= SIZEOF_WCHAR_T
* 2;
1409 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1410 (char**)&wbufPtr
, &outsz
);
1412 if (ICONV_FAILED(res
, insz
))
1414 ms_wcCharsetName
= NULL
;
1415 wxLogLastError(wxT("iconv"));
1416 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1420 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1425 ms_wcCharsetName
= NULL
;
1427 // VS: we must not output an error here, since wxWidgets will safely
1428 // fall back to using wxEncodingConverter.
1429 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1433 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1435 else // we already have ms_wcCharsetName
1437 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1440 // NB: don't ever pass NULL to iconv_open(), it may crash!
1441 if ( ms_wcCharsetName
)
1443 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1451 wxMBConv_iconv::~wxMBConv_iconv()
1453 if ( m2w
!= (iconv_t
)-1 )
1455 if ( w2m
!= (iconv_t
)-1 )
1459 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1462 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1463 // Unfortunately there is a couple of global wxCSConv objects such as
1464 // wxConvLocal that are used all over wx code, so we have to make sure
1465 // the handle is used by at most one thread at the time. Otherwise
1466 // only a few wx classes would be safe to use from non-main threads
1467 // as MB<->WC conversion would fail "randomly".
1468 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1471 size_t inbuf
= strlen(psz
);
1472 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1474 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1475 wchar_t *bufPtr
= buf
;
1476 const char *pszPtr
= psz
;
1480 // have destination buffer, convert there
1482 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1483 (char**)&bufPtr
, &outbuf
);
1484 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1488 // convert to native endianness
1489 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1492 // NB: iconv was given only strlen(psz) characters on input, and so
1493 // it couldn't convert the trailing zero. Let's do it ourselves
1494 // if there's some room left for it in the output buffer.
1500 // no destination buffer... convert using temp buffer
1501 // to calculate destination buffer requirement
1506 outbuf
= 8*SIZEOF_WCHAR_T
;
1509 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1510 (char**)&bufPtr
, &outbuf
);
1512 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1513 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1516 if (ICONV_FAILED(cres
, inbuf
))
1518 //VS: it is ok if iconv fails, hence trace only
1519 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1526 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1529 // NB: explained in MB2WC
1530 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1533 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1537 wchar_t *tmpbuf
= 0;
1541 // need to copy to temp buffer to switch endianness
1542 // this absolutely doesn't rock!
1543 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1544 // could be in read-only memory, or be accessed in some other thread)
1545 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1546 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1547 WC_BSWAP(tmpbuf
, inbuf
)
1553 // have destination buffer, convert there
1554 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1558 // NB: iconv was given only wcslen(psz) characters on input, and so
1559 // it couldn't convert the trailing zero. Let's do it ourselves
1560 // if there's some room left for it in the output buffer.
1566 // no destination buffer... convert using temp buffer
1567 // to calculate destination buffer requirement
1571 buf
= tbuf
; outbuf
= 16;
1573 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1576 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1584 if (ICONV_FAILED(cres
, inbuf
))
1586 //VS: it is ok if iconv fails, hence trace only
1587 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1594 #endif // HAVE_ICONV
1597 // ============================================================================
1598 // Win32 conversion classes
1599 // ============================================================================
1601 #ifdef wxHAVE_WIN32_MB2WC
1605 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1606 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1609 class wxMBConv_win32
: public wxMBConv
1614 m_CodePage
= CP_ACP
;
1618 wxMBConv_win32(const wxChar
* name
)
1620 m_CodePage
= wxCharsetToCodepage(name
);
1623 wxMBConv_win32(wxFontEncoding encoding
)
1625 m_CodePage
= wxEncodingToCodepage(encoding
);
1629 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1631 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1632 // the behaviour is not compatible with the Unix version (using iconv)
1633 // and break the library itself, e.g. wxTextInputStream::NextChar()
1634 // wouldn't work if reading an incomplete MB char didn't result in an
1637 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1638 // an error (tested under Windows Server 2003) and apparently it is
1639 // done on purpose, i.e. the function accepts any input in this case
1640 // and although I'd prefer to return error on ill-formed output, our
1641 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1642 // explicitly ill-formed according to RFC 2152) neither so we don't
1643 // even have any fallback here...
1644 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1646 const size_t len
= ::MultiByteToWideChar
1648 m_CodePage
, // code page
1649 flags
, // flags: fall on error
1650 psz
, // input string
1651 -1, // its length (NUL-terminated)
1652 buf
, // output string
1653 buf
? n
: 0 // size of output buffer
1656 // note that it returns count of written chars for buf != NULL and size
1657 // of the needed buffer for buf == NULL so in either case the length of
1658 // the string (which never includes the terminating NUL) is one less
1659 return len
? len
- 1 : (size_t)-1;
1662 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1665 we have a problem here: by default, WideCharToMultiByte() may
1666 replace characters unrepresentable in the target code page with bad
1667 quality approximations such as turning "1/2" symbol (U+00BD) into
1668 "1" for the code pages which don't have it and we, obviously, want
1669 to avoid this at any price
1671 the trouble is that this function does it _silently_, i.e. it won't
1672 even tell us whether it did or not... Win98/2000 and higher provide
1673 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1674 we have to resort to a round trip, i.e. check that converting back
1675 results in the same string -- this is, of course, expensive but
1676 otherwise we simply can't be sure to not garble the data.
1679 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1680 // it doesn't work with CJK encodings (which we test for rather roughly
1681 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1683 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1686 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1688 // it's our lucky day
1689 flags
= WC_NO_BEST_FIT_CHARS
;
1690 pUsedDef
= &usedDef
;
1692 else // old system or unsupported encoding
1698 const size_t len
= ::WideCharToMultiByte
1700 m_CodePage
, // code page
1701 flags
, // either none or no best fit
1702 pwz
, // input string
1703 -1, // it is (wide) NUL-terminated
1704 buf
, // output buffer
1705 buf
? n
: 0, // and its size
1706 NULL
, // default "replacement" char
1707 pUsedDef
// [out] was it used?
1712 // function totally failed
1716 // if we were really converting, check if we succeeded
1721 // check if the conversion failed, i.e. if any replacements
1726 else // we must resort to double tripping...
1728 wxWCharBuffer
wcBuf(n
);
1729 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1730 wcscmp(wcBuf
, pwz
) != 0 )
1732 // we didn't obtain the same thing we started from, hence
1733 // the conversion was lossy and we consider that it failed
1739 // see the comment above for the reason of "len - 1"
1743 bool IsOk() const { return m_CodePage
!= -1; }
1746 static bool CanUseNoBestFit()
1748 static int s_isWin98Or2k
= -1;
1750 if ( s_isWin98Or2k
== -1 )
1753 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1756 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1760 s_isWin98Or2k
= verMaj
>= 5;
1764 // unknown, be conseravtive by default
1768 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1771 return s_isWin98Or2k
== 1;
1777 #endif // wxHAVE_WIN32_MB2WC
1779 // ============================================================================
1780 // Cocoa conversion classes
1781 // ============================================================================
1783 #if defined(__WXCOCOA__)
1785 // RN: There is no UTF-32 support in either Core Foundation or
1786 // Cocoa. Strangely enough, internally Core Foundation uses
1787 // UTF 32 internally quite a bit - its just not public (yet).
1789 #include <CoreFoundation/CFString.h>
1790 #include <CoreFoundation/CFStringEncodingExt.h>
1792 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1794 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1795 if ( encoding
== wxFONTENCODING_DEFAULT
)
1797 enc
= CFStringGetSystemEncoding();
1799 else switch( encoding
)
1801 case wxFONTENCODING_ISO8859_1
:
1802 enc
= kCFStringEncodingISOLatin1
;
1804 case wxFONTENCODING_ISO8859_2
:
1805 enc
= kCFStringEncodingISOLatin2
;
1807 case wxFONTENCODING_ISO8859_3
:
1808 enc
= kCFStringEncodingISOLatin3
;
1810 case wxFONTENCODING_ISO8859_4
:
1811 enc
= kCFStringEncodingISOLatin4
;
1813 case wxFONTENCODING_ISO8859_5
:
1814 enc
= kCFStringEncodingISOLatinCyrillic
;
1816 case wxFONTENCODING_ISO8859_6
:
1817 enc
= kCFStringEncodingISOLatinArabic
;
1819 case wxFONTENCODING_ISO8859_7
:
1820 enc
= kCFStringEncodingISOLatinGreek
;
1822 case wxFONTENCODING_ISO8859_8
:
1823 enc
= kCFStringEncodingISOLatinHebrew
;
1825 case wxFONTENCODING_ISO8859_9
:
1826 enc
= kCFStringEncodingISOLatin5
;
1828 case wxFONTENCODING_ISO8859_10
:
1829 enc
= kCFStringEncodingISOLatin6
;
1831 case wxFONTENCODING_ISO8859_11
:
1832 enc
= kCFStringEncodingISOLatinThai
;
1834 case wxFONTENCODING_ISO8859_13
:
1835 enc
= kCFStringEncodingISOLatin7
;
1837 case wxFONTENCODING_ISO8859_14
:
1838 enc
= kCFStringEncodingISOLatin8
;
1840 case wxFONTENCODING_ISO8859_15
:
1841 enc
= kCFStringEncodingISOLatin9
;
1844 case wxFONTENCODING_KOI8
:
1845 enc
= kCFStringEncodingKOI8_R
;
1847 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1848 enc
= kCFStringEncodingDOSRussian
;
1851 // case wxFONTENCODING_BULGARIAN :
1855 case wxFONTENCODING_CP437
:
1856 enc
=kCFStringEncodingDOSLatinUS
;
1858 case wxFONTENCODING_CP850
:
1859 enc
= kCFStringEncodingDOSLatin1
;
1861 case wxFONTENCODING_CP852
:
1862 enc
= kCFStringEncodingDOSLatin2
;
1864 case wxFONTENCODING_CP855
:
1865 enc
= kCFStringEncodingDOSCyrillic
;
1867 case wxFONTENCODING_CP866
:
1868 enc
=kCFStringEncodingDOSRussian
;
1870 case wxFONTENCODING_CP874
:
1871 enc
= kCFStringEncodingDOSThai
;
1873 case wxFONTENCODING_CP932
:
1874 enc
= kCFStringEncodingDOSJapanese
;
1876 case wxFONTENCODING_CP936
:
1877 enc
=kCFStringEncodingDOSChineseSimplif
;
1879 case wxFONTENCODING_CP949
:
1880 enc
= kCFStringEncodingDOSKorean
;
1882 case wxFONTENCODING_CP950
:
1883 enc
= kCFStringEncodingDOSChineseTrad
;
1885 case wxFONTENCODING_CP1250
:
1886 enc
= kCFStringEncodingWindowsLatin2
;
1888 case wxFONTENCODING_CP1251
:
1889 enc
=kCFStringEncodingWindowsCyrillic
;
1891 case wxFONTENCODING_CP1252
:
1892 enc
=kCFStringEncodingWindowsLatin1
;
1894 case wxFONTENCODING_CP1253
:
1895 enc
= kCFStringEncodingWindowsGreek
;
1897 case wxFONTENCODING_CP1254
:
1898 enc
= kCFStringEncodingWindowsLatin5
;
1900 case wxFONTENCODING_CP1255
:
1901 enc
=kCFStringEncodingWindowsHebrew
;
1903 case wxFONTENCODING_CP1256
:
1904 enc
=kCFStringEncodingWindowsArabic
;
1906 case wxFONTENCODING_CP1257
:
1907 enc
= kCFStringEncodingWindowsBalticRim
;
1909 // This only really encodes to UTF7 (if that) evidently
1910 // case wxFONTENCODING_UTF7 :
1911 // enc = kCFStringEncodingNonLossyASCII ;
1913 case wxFONTENCODING_UTF8
:
1914 enc
= kCFStringEncodingUTF8
;
1916 case wxFONTENCODING_EUC_JP
:
1917 enc
= kCFStringEncodingEUC_JP
;
1919 case wxFONTENCODING_UTF16
:
1920 enc
= kCFStringEncodingUnicode
;
1922 case wxFONTENCODING_MACROMAN
:
1923 enc
= kCFStringEncodingMacRoman
;
1925 case wxFONTENCODING_MACJAPANESE
:
1926 enc
= kCFStringEncodingMacJapanese
;
1928 case wxFONTENCODING_MACCHINESETRAD
:
1929 enc
= kCFStringEncodingMacChineseTrad
;
1931 case wxFONTENCODING_MACKOREAN
:
1932 enc
= kCFStringEncodingMacKorean
;
1934 case wxFONTENCODING_MACARABIC
:
1935 enc
= kCFStringEncodingMacArabic
;
1937 case wxFONTENCODING_MACHEBREW
:
1938 enc
= kCFStringEncodingMacHebrew
;
1940 case wxFONTENCODING_MACGREEK
:
1941 enc
= kCFStringEncodingMacGreek
;
1943 case wxFONTENCODING_MACCYRILLIC
:
1944 enc
= kCFStringEncodingMacCyrillic
;
1946 case wxFONTENCODING_MACDEVANAGARI
:
1947 enc
= kCFStringEncodingMacDevanagari
;
1949 case wxFONTENCODING_MACGURMUKHI
:
1950 enc
= kCFStringEncodingMacGurmukhi
;
1952 case wxFONTENCODING_MACGUJARATI
:
1953 enc
= kCFStringEncodingMacGujarati
;
1955 case wxFONTENCODING_MACORIYA
:
1956 enc
= kCFStringEncodingMacOriya
;
1958 case wxFONTENCODING_MACBENGALI
:
1959 enc
= kCFStringEncodingMacBengali
;
1961 case wxFONTENCODING_MACTAMIL
:
1962 enc
= kCFStringEncodingMacTamil
;
1964 case wxFONTENCODING_MACTELUGU
:
1965 enc
= kCFStringEncodingMacTelugu
;
1967 case wxFONTENCODING_MACKANNADA
:
1968 enc
= kCFStringEncodingMacKannada
;
1970 case wxFONTENCODING_MACMALAJALAM
:
1971 enc
= kCFStringEncodingMacMalayalam
;
1973 case wxFONTENCODING_MACSINHALESE
:
1974 enc
= kCFStringEncodingMacSinhalese
;
1976 case wxFONTENCODING_MACBURMESE
:
1977 enc
= kCFStringEncodingMacBurmese
;
1979 case wxFONTENCODING_MACKHMER
:
1980 enc
= kCFStringEncodingMacKhmer
;
1982 case wxFONTENCODING_MACTHAI
:
1983 enc
= kCFStringEncodingMacThai
;
1985 case wxFONTENCODING_MACLAOTIAN
:
1986 enc
= kCFStringEncodingMacLaotian
;
1988 case wxFONTENCODING_MACGEORGIAN
:
1989 enc
= kCFStringEncodingMacGeorgian
;
1991 case wxFONTENCODING_MACARMENIAN
:
1992 enc
= kCFStringEncodingMacArmenian
;
1994 case wxFONTENCODING_MACCHINESESIMP
:
1995 enc
= kCFStringEncodingMacChineseSimp
;
1997 case wxFONTENCODING_MACTIBETAN
:
1998 enc
= kCFStringEncodingMacTibetan
;
2000 case wxFONTENCODING_MACMONGOLIAN
:
2001 enc
= kCFStringEncodingMacMongolian
;
2003 case wxFONTENCODING_MACETHIOPIC
:
2004 enc
= kCFStringEncodingMacEthiopic
;
2006 case wxFONTENCODING_MACCENTRALEUR
:
2007 enc
= kCFStringEncodingMacCentralEurRoman
;
2009 case wxFONTENCODING_MACVIATNAMESE
:
2010 enc
= kCFStringEncodingMacVietnamese
;
2012 case wxFONTENCODING_MACARABICEXT
:
2013 enc
= kCFStringEncodingMacExtArabic
;
2015 case wxFONTENCODING_MACSYMBOL
:
2016 enc
= kCFStringEncodingMacSymbol
;
2018 case wxFONTENCODING_MACDINGBATS
:
2019 enc
= kCFStringEncodingMacDingbats
;
2021 case wxFONTENCODING_MACTURKISH
:
2022 enc
= kCFStringEncodingMacTurkish
;
2024 case wxFONTENCODING_MACCROATIAN
:
2025 enc
= kCFStringEncodingMacCroatian
;
2027 case wxFONTENCODING_MACICELANDIC
:
2028 enc
= kCFStringEncodingMacIcelandic
;
2030 case wxFONTENCODING_MACROMANIAN
:
2031 enc
= kCFStringEncodingMacRomanian
;
2033 case wxFONTENCODING_MACCELTIC
:
2034 enc
= kCFStringEncodingMacCeltic
;
2036 case wxFONTENCODING_MACGAELIC
:
2037 enc
= kCFStringEncodingMacGaelic
;
2039 // case wxFONTENCODING_MACKEYBOARD :
2040 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2043 // because gcc is picky
2049 class wxMBConv_cocoa
: public wxMBConv
2054 Init(CFStringGetSystemEncoding()) ;
2058 wxMBConv_cocoa(const wxChar
* name
)
2060 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2064 wxMBConv_cocoa(wxFontEncoding encoding
)
2066 Init( wxCFStringEncFromFontEnc(encoding
) );
2073 void Init( CFStringEncoding encoding
)
2075 m_encoding
= encoding
;
2078 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2082 CFStringRef theString
= CFStringCreateWithBytes (
2083 NULL
, //the allocator
2084 (const UInt8
*)szUnConv
,
2087 false //no BOM/external representation
2090 wxASSERT(theString
);
2092 size_t nOutLength
= CFStringGetLength(theString
);
2096 CFRelease(theString
);
2100 CFRange theRange
= { 0, nOutSize
};
2102 #if SIZEOF_WCHAR_T == 4
2103 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2106 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2108 CFRelease(theString
);
2110 szUniCharBuffer
[nOutLength
] = '\0' ;
2112 #if SIZEOF_WCHAR_T == 4
2113 wxMBConvUTF16 converter
;
2114 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2115 delete[] szUniCharBuffer
;
2121 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2125 size_t nRealOutSize
;
2126 size_t nBufSize
= wxWcslen(szUnConv
);
2127 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2129 #if SIZEOF_WCHAR_T == 4
2130 wxMBConvUTF16 converter
;
2131 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2132 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2133 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2134 nBufSize
/= sizeof(UniChar
);
2137 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2141 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2144 wxASSERT(theString
);
2146 //Note that CER puts a BOM when converting to unicode
2147 //so we check and use getchars instead in that case
2148 if (m_encoding
== kCFStringEncodingUnicode
)
2151 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2153 nRealOutSize
= CFStringGetLength(theString
) + 1;
2159 CFRangeMake(0, CFStringGetLength(theString
)),
2161 0, //what to put in characters that can't be converted -
2162 //0 tells CFString to return NULL if it meets such a character
2163 false, //not an external representation
2166 (CFIndex
*) &nRealOutSize
2170 CFRelease(theString
);
2172 #if SIZEOF_WCHAR_T == 4
2173 delete[] szUniBuffer
;
2176 return nRealOutSize
- 1;
2181 return m_encoding
!= kCFStringEncodingInvalidId
&&
2182 CFStringIsEncodingAvailable(m_encoding
);
2186 CFStringEncoding m_encoding
;
2189 #endif // defined(__WXCOCOA__)
2191 // ============================================================================
2192 // Mac conversion classes
2193 // ============================================================================
2195 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2197 class wxMBConv_mac
: public wxMBConv
2202 Init(CFStringGetSystemEncoding()) ;
2206 wxMBConv_mac(const wxChar
* name
)
2208 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2212 wxMBConv_mac(wxFontEncoding encoding
)
2214 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2219 OSStatus status
= noErr
;
2220 status
= TECDisposeConverter(m_MB2WC_converter
);
2221 status
= TECDisposeConverter(m_WC2MB_converter
);
2225 void Init( TextEncodingBase encoding
)
2227 OSStatus status
= noErr
;
2228 m_char_encoding
= encoding
;
2229 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2231 status
= TECCreateConverter(&m_MB2WC_converter
,
2233 m_unicode_encoding
);
2234 status
= TECCreateConverter(&m_WC2MB_converter
,
2239 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2241 OSStatus status
= noErr
;
2242 ByteCount byteOutLen
;
2243 ByteCount byteInLen
= strlen(psz
) ;
2244 wchar_t *tbuf
= NULL
;
2245 UniChar
* ubuf
= NULL
;
2250 //apple specs say at least 32
2251 n
= wxMax( 32 , byteInLen
) ;
2252 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2254 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2255 #if SIZEOF_WCHAR_T == 4
2256 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2258 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2260 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2261 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2262 #if SIZEOF_WCHAR_T == 4
2263 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2264 // is not properly terminated we get random characters at the end
2265 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2266 wxMBConvUTF16 converter
;
2267 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2270 res
= byteOutLen
/ sizeof( UniChar
) ;
2275 if ( buf
&& res
< n
)
2281 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2283 OSStatus status
= noErr
;
2284 ByteCount byteOutLen
;
2285 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2291 //apple specs say at least 32
2292 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2293 tbuf
= (char*) malloc( n
) ;
2296 ByteCount byteBufferLen
= n
;
2297 UniChar
* ubuf
= NULL
;
2298 #if SIZEOF_WCHAR_T == 4
2299 wxMBConvUTF16 converter
;
2300 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2301 byteInLen
= unicharlen
;
2302 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2303 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2305 ubuf
= (UniChar
*) psz
;
2307 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2308 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2309 #if SIZEOF_WCHAR_T == 4
2315 size_t res
= byteOutLen
;
2316 if ( buf
&& res
< n
)
2320 //we need to double-trip to verify it didn't insert any ? in place
2321 //of bogus characters
2322 wxWCharBuffer
wcBuf(n
);
2323 size_t pszlen
= wxWcslen(psz
);
2324 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2325 wxWcslen(wcBuf
) != pszlen
||
2326 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2328 // we didn't obtain the same thing we started from, hence
2329 // the conversion was lossy and we consider that it failed
2338 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2341 TECObjectRef m_MB2WC_converter
;
2342 TECObjectRef m_WC2MB_converter
;
2344 TextEncodingBase m_char_encoding
;
2345 TextEncodingBase m_unicode_encoding
;
2348 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2350 // ============================================================================
2351 // wxEncodingConverter based conversion classes
2352 // ============================================================================
2356 class wxMBConv_wxwin
: public wxMBConv
2361 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2362 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2366 // temporarily just use wxEncodingConverter stuff,
2367 // so that it works while a better implementation is built
2368 wxMBConv_wxwin(const wxChar
* name
)
2371 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2373 m_enc
= wxFONTENCODING_SYSTEM
;
2378 wxMBConv_wxwin(wxFontEncoding enc
)
2385 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2387 size_t inbuf
= strlen(psz
);
2390 if (!m2w
.Convert(psz
,buf
))
2396 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2398 const size_t inbuf
= wxWcslen(psz
);
2401 if (!w2m
.Convert(psz
,buf
))
2408 bool IsOk() const { return m_ok
; }
2411 wxFontEncoding m_enc
;
2412 wxEncodingConverter m2w
, w2m
;
2414 // were we initialized successfully?
2417 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2420 // make the constructors available for unit testing
2421 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2423 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2424 if ( !result
->IsOk() )
2432 #endif // wxUSE_FONTMAP
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2438 void wxCSConv::Init()
2445 wxCSConv::wxCSConv(const wxChar
*charset
)
2454 m_encoding
= wxFONTENCODING_SYSTEM
;
2457 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2459 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2461 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463 encoding
= wxFONTENCODING_SYSTEM
;
2468 m_encoding
= encoding
;
2471 wxCSConv::~wxCSConv()
2476 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2481 SetName(conv
.m_name
);
2482 m_encoding
= conv
.m_encoding
;
2485 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2489 SetName(conv
.m_name
);
2490 m_encoding
= conv
.m_encoding
;
2495 void wxCSConv::Clear()
2504 void wxCSConv::SetName(const wxChar
*charset
)
2508 m_name
= wxStrdup(charset
);
2513 wxMBConv
*wxCSConv::DoCreate() const
2515 // check for the special case of ASCII or ISO8859-1 charset: as we have
2516 // special knowledge of it anyhow, we don't need to create a special
2517 // conversion object
2518 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2520 // don't convert at all
2524 // we trust OS to do conversion better than we can so try external
2525 // conversion methods first
2527 // the full order is:
2528 // 1. OS conversion (iconv() under Unix or Win32 API)
2529 // 2. hard coded conversions for UTF
2530 // 3. wxEncodingConverter as fall back
2536 #endif // !wxUSE_FONTMAP
2538 wxString
name(m_name
);
2542 name
= wxFontMapperBase::GetEncodingName(m_encoding
);
2543 #endif // wxUSE_FONTMAP
2545 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2551 #endif // HAVE_ICONV
2553 #ifdef wxHAVE_WIN32_MB2WC
2556 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2557 : new wxMBConv_win32(m_encoding
);
2566 #endif // wxHAVE_WIN32_MB2WC
2567 #if defined(__WXMAC__)
2569 // leave UTF16 and UTF32 to the built-ins of wx
2570 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2571 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2575 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2576 : new wxMBConv_mac(m_encoding
);
2578 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2587 #if defined(__WXCOCOA__)
2589 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2593 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2594 : new wxMBConv_cocoa(m_encoding
);
2596 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2606 wxFontEncoding enc
= m_encoding
;
2608 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2610 // use "false" to suppress interactive dialogs -- we can be called from
2611 // anywhere and popping up a dialog from here is the last thing we want to
2613 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2615 #endif // wxUSE_FONTMAP
2619 case wxFONTENCODING_UTF7
:
2620 return new wxMBConvUTF7
;
2622 case wxFONTENCODING_UTF8
:
2623 return new wxMBConvUTF8
;
2625 case wxFONTENCODING_UTF16BE
:
2626 return new wxMBConvUTF16BE
;
2628 case wxFONTENCODING_UTF16LE
:
2629 return new wxMBConvUTF16LE
;
2631 case wxFONTENCODING_UTF32BE
:
2632 return new wxMBConvUTF32BE
;
2634 case wxFONTENCODING_UTF32LE
:
2635 return new wxMBConvUTF32LE
;
2638 // nothing to do but put here to suppress gcc warnings
2645 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2646 : new wxMBConv_wxwin(m_encoding
);
2652 #endif // wxUSE_FONTMAP
2654 // NB: This is a hack to prevent deadlock. What could otherwise happen
2655 // in Unicode build: wxConvLocal creation ends up being here
2656 // because of some failure and logs the error. But wxLog will try to
2657 // attach timestamp, for which it will need wxConvLocal (to convert
2658 // time to char* and then wchar_t*), but that fails, tries to log
2659 // error, but wxLog has a (already locked) critical section that
2660 // guards static buffer.
2661 static bool alreadyLoggingError
= false;
2662 if (!alreadyLoggingError
)
2664 alreadyLoggingError
= true;
2665 wxLogError(_("Cannot convert from the charset '%s'!"),
2669 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2670 #else // !wxUSE_FONTMAP
2671 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2672 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2674 alreadyLoggingError
= false;
2680 void wxCSConv::CreateConvIfNeeded() const
2684 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2687 // if we don't have neither the name nor the encoding, use the default
2688 // encoding for this system
2689 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2691 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2693 #endif // wxUSE_INTL
2695 self
->m_convReal
= DoCreate();
2696 self
->m_deferred
= false;
2700 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2702 CreateConvIfNeeded();
2705 return m_convReal
->MB2WC(buf
, psz
, n
);
2708 size_t len
= strlen(psz
);
2712 for (size_t c
= 0; c
<= len
; c
++)
2713 buf
[c
] = (unsigned char)(psz
[c
]);
2719 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2721 CreateConvIfNeeded();
2724 return m_convReal
->WC2MB(buf
, psz
, n
);
2727 const size_t len
= wxWcslen(psz
);
2730 for (size_t c
= 0; c
<= len
; c
++)
2734 buf
[c
] = (char)psz
[c
];
2739 for (size_t c
= 0; c
<= len
; c
++)
2749 // ----------------------------------------------------------------------------
2751 // ----------------------------------------------------------------------------
2754 static wxMBConv_win32 wxConvLibcObj
;
2755 #elif defined(__WXMAC__) && !defined(__MACH__)
2756 static wxMBConv_mac wxConvLibcObj
;
2758 static wxMBConvLibc wxConvLibcObj
;
2761 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2762 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2763 static wxMBConvUTF7 wxConvUTF7Obj
;
2764 static wxMBConvUTF8 wxConvUTF8Obj
;
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2768 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2772 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2780 #else // !wxUSE_WCHAR_T
2782 // stand-ins in absence of wchar_t
2783 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2788 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T