1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
45 #include "wx/msw/missing.h"
56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
57 #define wxHAVE_WIN32_MB2WC
58 #endif // __WIN32__ but !__WXMICROWIN__
60 // ----------------------------------------------------------------------------
62 // ----------------------------------------------------------------------------
70 #include "wx/thread.h"
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
79 #include <ATSUnicode.h>
80 #include <TextCommon.h>
81 #include <TextEncodingConverter.h>
84 #include "wx/mac/private.h" // includes mac headers
87 #define TRACE_STRCONV _T("strconv")
89 // ----------------------------------------------------------------------------
91 // ----------------------------------------------------------------------------
93 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
94 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
96 #if SIZEOF_WCHAR_T == 4
97 #define WC_NAME "UCS4"
98 #define WC_BSWAP BSWAP_UCS4
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UCS-4BE"
102 #define WC_NAME_BEST "UCS-4LE"
104 #elif SIZEOF_WCHAR_T == 2
105 #define WC_NAME "UTF16"
106 #define WC_BSWAP BSWAP_UTF16
108 #ifdef WORDS_BIGENDIAN
109 #define WC_NAME_BEST "UTF-16BE"
111 #define WC_NAME_BEST "UTF-16LE"
113 #else // sizeof(wchar_t) != 2 nor 4
114 // does this ever happen?
115 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
118 // ============================================================================
120 // ============================================================================
122 // ----------------------------------------------------------------------------
123 // UTF-16 en/decoding to/from UCS-4
124 // ----------------------------------------------------------------------------
127 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
132 *output
= (wxUint16
) input
;
135 else if (input
>=0x110000)
143 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
144 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
150 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
152 if ((*input
<0xd800) || (*input
>0xdfff))
157 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
164 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
170 // ----------------------------------------------------------------------------
172 // ----------------------------------------------------------------------------
174 wxMBConv::~wxMBConv()
176 // nothing to do here (necessary for Darwin linking probably)
179 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
183 // calculate the length of the buffer needed first
184 size_t nLen
= MB2WC(NULL
, psz
, 0);
185 if ( nLen
!= (size_t)-1 )
187 // now do the actual conversion
188 wxWCharBuffer
buf(nLen
);
189 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
190 if ( nLen
!= (size_t)-1 )
197 wxWCharBuffer
buf((wchar_t *)NULL
);
202 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
206 size_t nLen
= WC2MB(NULL
, pwz
, 0);
207 if ( nLen
!= (size_t)-1 )
209 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
210 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
211 if ( nLen
!= (size_t)-1 )
218 wxCharBuffer
buf((char *)NULL
);
223 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
225 wxASSERT(pOutSize
!= NULL
);
227 const char* szEnd
= szString
+ nStringLen
+ 1;
228 const char* szPos
= szString
;
229 const char* szStart
= szPos
;
231 size_t nActualLength
= 0;
232 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
234 wxWCharBuffer
theBuffer(nCurrentSize
);
236 //Convert the string until the length() is reached, continuing the
237 //loop every time a null character is reached
238 while(szPos
!= szEnd
)
240 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
242 //Get the length of the current (sub)string
243 size_t nLen
= MB2WC(NULL
, szPos
, 0);
245 //Invalid conversion?
246 if( nLen
== (size_t)-1 )
249 theBuffer
.data()[0u] = wxT('\0');
254 //Increase the actual length (+1 for current null character)
255 nActualLength
+= nLen
+ 1;
257 //if buffer too big, realloc the buffer
258 if (nActualLength
> (nCurrentSize
+1))
260 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
261 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
262 theBuffer
= theNewBuffer
;
266 //Convert the current (sub)string
267 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
270 theBuffer
.data()[0u] = wxT('\0');
274 //Increment to next (sub)string
275 //Note that we have to use strlen instead of nLen here
276 //because XX2XX gives us the size of the output buffer,
277 //which is not necessarily the length of the string
278 szPos
+= strlen(szPos
) + 1;
281 //success - return actual length and the buffer
282 *pOutSize
= nActualLength
;
286 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
288 wxASSERT(pOutSize
!= NULL
);
290 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
291 const wchar_t* szPos
= szString
;
292 const wchar_t* szStart
= szPos
;
294 size_t nActualLength
= 0;
295 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
297 wxCharBuffer
theBuffer(nCurrentSize
);
299 //Convert the string until the length() is reached, continuing the
300 //loop every time a null character is reached
301 while(szPos
!= szEnd
)
303 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
305 //Get the length of the current (sub)string
306 size_t nLen
= WC2MB(NULL
, szPos
, 0);
308 //Invalid conversion?
309 if( nLen
== (size_t)-1 )
312 theBuffer
.data()[0u] = wxT('\0');
316 //Increase the actual length (+1 for current null character)
317 nActualLength
+= nLen
+ 1;
319 //if buffer too big, realloc the buffer
320 if (nActualLength
> (nCurrentSize
+1))
322 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
323 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
324 theBuffer
= theNewBuffer
;
328 //Convert the current (sub)string
329 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
332 theBuffer
.data()[0u] = wxT('\0');
336 //Increment to next (sub)string
337 //Note that we have to use wxWcslen instead of nLen here
338 //because XX2XX gives us the size of the output buffer,
339 //which is not necessarily the length of the string
340 szPos
+= wxWcslen(szPos
) + 1;
343 //success - return actual length and the buffer
344 *pOutSize
= nActualLength
;
348 // ----------------------------------------------------------------------------
350 // ----------------------------------------------------------------------------
352 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
354 return wxMB2WC(buf
, psz
, n
);
357 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
359 return wxWC2MB(buf
, psz
, n
);
364 // ----------------------------------------------------------------------------
365 // wxConvBrokenFileNames
366 // ----------------------------------------------------------------------------
368 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
370 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
371 || wxStricmp(charset
, _T("UTF8")) == 0 )
372 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
374 m_conv
= new wxCSConv(charset
);
378 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
380 size_t outputSize
) const
382 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
386 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
388 size_t outputSize
) const
390 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
395 // ----------------------------------------------------------------------------
397 // ----------------------------------------------------------------------------
399 // Implementation (C) 2004 Fredrik Roubert
402 // BASE64 decoding table
404 static const unsigned char utf7unb64
[] =
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
412 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
413 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
415 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
416 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
417 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
419 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
420 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
421 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
440 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
444 while (*psz
&& ((!buf
) || (len
< n
)))
446 unsigned char cc
= *psz
++;
454 else if (*psz
== '-')
464 // BASE64 encoded string
468 for (lsb
= false, d
= 0, l
= 0;
469 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
473 for (l
+= 6; l
>= 8; lsb
= !lsb
)
475 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
484 *buf
= (wchar_t)(c
<< 8);
491 if (buf
&& (len
< n
))
497 // BASE64 encoding table
499 static const unsigned char utf7enb64
[] =
501 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
502 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
503 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
504 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
505 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
506 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
507 'w', 'x', 'y', 'z', '0', '1', '2', '3',
508 '4', '5', '6', '7', '8', '9', '+', '/'
512 // UTF-7 encoding table
514 // 0 - Set D (directly encoded characters)
515 // 1 - Set O (optional direct characters)
516 // 2 - whitespace characters (optional)
517 // 3 - special characters
519 static const unsigned char utf7encode
[128] =
521 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
522 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
523 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
525 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
527 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
531 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
537 while (*psz
&& ((!buf
) || (len
< n
)))
540 if (cc
< 0x80 && utf7encode
[cc
] < 1)
548 else if (((wxUint32
)cc
) > 0xffff)
550 // no surrogate pair generation (yet?)
561 // BASE64 encode string
562 unsigned int lsb
, d
, l
;
563 for (d
= 0, l
= 0;; psz
++)
565 for (lsb
= 0; lsb
< 2; lsb
++)
568 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
570 for (l
+= 8; l
>= 6; )
574 *buf
++ = utf7enb64
[(d
>> l
) % 64];
579 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
585 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
594 if (buf
&& (len
< n
))
599 // ----------------------------------------------------------------------------
601 // ----------------------------------------------------------------------------
603 static wxUint32 utf8_max
[]=
604 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
606 // boundaries of the private use area we use to (temporarily) remap invalid
607 // characters invalid in a UTF-8 encoded string
608 const wxUint32 wxUnicodePUA
= 0x100000;
609 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
611 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
615 while (*psz
&& ((!buf
) || (len
< n
)))
617 const char *opsz
= psz
;
618 bool invalid
= false;
619 unsigned char cc
= *psz
++, fc
= cc
;
621 for (cnt
= 0; fc
& 0x80; cnt
++)
630 // escape the escape character for octal escapes
631 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
632 && cc
== '\\' && (!buf
|| len
< n
))
644 // invalid UTF-8 sequence
649 unsigned ocnt
= cnt
- 1;
650 wxUint32 res
= cc
& (0x3f >> cnt
);
654 if ((cc
& 0xC0) != 0x80)
656 // invalid UTF-8 sequence
661 res
= (res
<< 6) | (cc
& 0x3f);
663 if (invalid
|| res
<= utf8_max
[ocnt
])
665 // illegal UTF-8 encoding
668 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
669 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
671 // if one of our PUA characters turns up externally
672 // it must also be treated as an illegal sequence
673 // (a bit like you have to escape an escape character)
679 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
680 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
681 if (pa
== (size_t)-1)
695 #endif // WC_UTF16/!WC_UTF16
700 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
702 while (opsz
< psz
&& (!buf
|| len
< n
))
705 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
706 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
707 wxASSERT(pa
!= (size_t)-1);
714 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
720 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
722 while (opsz
< psz
&& (!buf
|| len
< n
))
724 if ( buf
&& len
+ 3 < n
)
726 unsigned char n
= *opsz
;
728 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
729 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
730 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
736 else // MAP_INVALID_UTF8_NOT
743 if (buf
&& (len
< n
))
748 static inline bool isoctal(wchar_t wch
)
750 return L
'0' <= wch
&& wch
<= L
'7';
753 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
757 while (*psz
&& ((!buf
) || (len
< n
)))
761 // cast is ok for WC_UTF16
762 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
763 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
765 cc
=(*psz
++) & 0x7fffffff;
768 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
769 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
772 *buf
++ = (char)(cc
- wxUnicodePUA
);
775 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
776 && cc
== L
'\\' && psz
[0] == L
'\\' )
783 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
785 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
789 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
790 (psz
[1] - L
'0')*010 +
800 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
814 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
816 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
828 // ----------------------------------------------------------------------------
830 // ----------------------------------------------------------------------------
832 #ifdef WORDS_BIGENDIAN
833 #define wxMBConvUTF16straight wxMBConvUTF16BE
834 #define wxMBConvUTF16swap wxMBConvUTF16LE
836 #define wxMBConvUTF16swap wxMBConvUTF16BE
837 #define wxMBConvUTF16straight wxMBConvUTF16LE
843 // copy 16bit MB to 16bit String
844 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
848 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
851 *buf
++ = *(wxUint16
*)psz
;
854 psz
+= sizeof(wxUint16
);
856 if (buf
&& len
<n
) *buf
=0;
862 // copy 16bit String to 16bit MB
863 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
867 while (*psz
&& (!buf
|| len
< n
))
871 *(wxUint16
*)buf
= *psz
;
872 buf
+= sizeof(wxUint16
);
874 len
+= sizeof(wxUint16
);
877 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
883 // swap 16bit MB to 16bit String
884 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
888 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
892 ((char *)buf
)[0] = psz
[1];
893 ((char *)buf
)[1] = psz
[0];
897 psz
+= sizeof(wxUint16
);
899 if (buf
&& len
<n
) *buf
=0;
905 // swap 16bit MB to 16bit String
906 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
910 while (*psz
&& (!buf
|| len
< n
))
914 *buf
++ = ((char*)psz
)[1];
915 *buf
++ = ((char*)psz
)[0];
917 len
+= sizeof(wxUint16
);
920 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
929 // copy 16bit MB to 32bit String
930 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
934 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
937 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
938 if (pa
== (size_t)-1)
944 psz
+= pa
* sizeof(wxUint16
);
946 if (buf
&& len
<n
) *buf
=0;
952 // copy 32bit String to 16bit MB
953 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
957 while (*psz
&& (!buf
|| len
< n
))
960 size_t pa
=encode_utf16(*psz
, cc
);
962 if (pa
== (size_t)-1)
967 *(wxUint16
*)buf
= cc
[0];
968 buf
+= sizeof(wxUint16
);
971 *(wxUint16
*)buf
= cc
[1];
972 buf
+= sizeof(wxUint16
);
976 len
+= pa
*sizeof(wxUint16
);
979 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
985 // swap 16bit MB to 32bit String
986 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
990 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
994 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
995 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
997 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
998 if (pa
== (size_t)-1)
1005 psz
+= pa
* sizeof(wxUint16
);
1007 if (buf
&& len
<n
) *buf
=0;
1013 // swap 32bit String to 16bit MB
1014 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1018 while (*psz
&& (!buf
|| len
< n
))
1021 size_t pa
=encode_utf16(*psz
, cc
);
1023 if (pa
== (size_t)-1)
1028 *buf
++ = ((char*)cc
)[1];
1029 *buf
++ = ((char*)cc
)[0];
1032 *buf
++ = ((char*)cc
)[3];
1033 *buf
++ = ((char*)cc
)[2];
1037 len
+= pa
*sizeof(wxUint16
);
1040 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1048 // ----------------------------------------------------------------------------
1050 // ----------------------------------------------------------------------------
1052 #ifdef WORDS_BIGENDIAN
1053 #define wxMBConvUTF32straight wxMBConvUTF32BE
1054 #define wxMBConvUTF32swap wxMBConvUTF32LE
1056 #define wxMBConvUTF32swap wxMBConvUTF32BE
1057 #define wxMBConvUTF32straight wxMBConvUTF32LE
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1062 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1067 // copy 32bit MB to 16bit String
1068 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1072 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1076 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1077 if (pa
== (size_t)-1)
1087 psz
+= sizeof(wxUint32
);
1089 if (buf
&& len
<n
) *buf
=0;
1095 // copy 16bit String to 32bit MB
1096 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1100 while (*psz
&& (!buf
|| len
< n
))
1104 // cast is ok for WC_UTF16
1105 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1106 if (pa
== (size_t)-1)
1111 *(wxUint32
*)buf
= cc
;
1112 buf
+= sizeof(wxUint32
);
1114 len
+= sizeof(wxUint32
);
1118 if (buf
&& len
<=n
-sizeof(wxUint32
))
1126 // swap 32bit MB to 16bit String
1127 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1131 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1134 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1135 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1140 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1141 if (pa
== (size_t)-1)
1151 psz
+= sizeof(wxUint32
);
1161 // swap 16bit String to 32bit MB
1162 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1166 while (*psz
&& (!buf
|| len
< n
))
1170 // cast is ok for WC_UTF16
1171 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1172 if (pa
== (size_t)-1)
1182 len
+= sizeof(wxUint32
);
1186 if (buf
&& len
<=n
-sizeof(wxUint32
))
1195 // copy 32bit MB to 32bit String
1196 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1200 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1203 *buf
++ = *(wxUint32
*)psz
;
1205 psz
+= sizeof(wxUint32
);
1215 // copy 32bit String to 32bit MB
1216 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1220 while (*psz
&& (!buf
|| len
< n
))
1224 *(wxUint32
*)buf
= *psz
;
1225 buf
+= sizeof(wxUint32
);
1228 len
+= sizeof(wxUint32
);
1232 if (buf
&& len
<=n
-sizeof(wxUint32
))
1239 // swap 32bit MB to 32bit String
1240 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1244 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1248 ((char *)buf
)[0] = psz
[3];
1249 ((char *)buf
)[1] = psz
[2];
1250 ((char *)buf
)[2] = psz
[1];
1251 ((char *)buf
)[3] = psz
[0];
1255 psz
+= sizeof(wxUint32
);
1265 // swap 32bit String to 32bit MB
1266 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1270 while (*psz
&& (!buf
|| len
< n
))
1274 *buf
++ = ((char *)psz
)[3];
1275 *buf
++ = ((char *)psz
)[2];
1276 *buf
++ = ((char *)psz
)[1];
1277 *buf
++ = ((char *)psz
)[0];
1279 len
+= sizeof(wxUint32
);
1283 if (buf
&& len
<=n
-sizeof(wxUint32
))
1293 // ============================================================================
1294 // The classes doing conversion using the iconv_xxx() functions
1295 // ============================================================================
1299 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1300 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1301 // (unless there's yet another bug in glibc) the only case when iconv()
1302 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1303 // left in the input buffer -- when _real_ error occurs,
1304 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1306 // [This bug does not appear in glibc 2.2.]
1307 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1308 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1309 (errno != E2BIG || bufLeft != 0))
1311 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1314 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1316 // ----------------------------------------------------------------------------
1317 // wxMBConv_iconv: encapsulates an iconv character set
1318 // ----------------------------------------------------------------------------
1320 class wxMBConv_iconv
: public wxMBConv
1323 wxMBConv_iconv(const wxChar
*name
);
1324 virtual ~wxMBConv_iconv();
1326 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1327 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1330 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1333 // the iconv handlers used to translate from multibyte to wide char and in
1334 // the other direction
1338 // guards access to m2w and w2m objects
1339 wxMutex m_iconvMutex
;
1343 // the name (for iconv_open()) of a wide char charset -- if none is
1344 // available on this machine, it will remain NULL
1345 static const char *ms_wcCharsetName
;
1347 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1348 // different endian-ness than the native one
1349 static bool ms_wcNeedsSwap
;
1352 // make the constructor available for unit testing
1353 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1355 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1356 if ( !result
->IsOk() )
1364 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1365 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1367 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1369 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1370 // names for the charsets
1371 const wxCharBuffer
cname(wxString(name
).ToAscii());
1373 // check for charset that represents wchar_t:
1374 if (ms_wcCharsetName
== NULL
)
1376 ms_wcNeedsSwap
= false;
1378 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1379 ms_wcCharsetName
= WC_NAME_BEST
;
1380 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1382 if (m2w
== (iconv_t
)-1)
1384 // try charset w/o bytesex info (e.g. "UCS4")
1385 // and check for bytesex ourselves:
1386 ms_wcCharsetName
= WC_NAME
;
1387 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1389 // last bet, try if it knows WCHAR_T pseudo-charset
1390 if (m2w
== (iconv_t
)-1)
1392 ms_wcCharsetName
= "WCHAR_T";
1393 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1396 if (m2w
!= (iconv_t
)-1)
1398 char buf
[2], *bufPtr
;
1399 wchar_t wbuf
[2], *wbufPtr
;
1407 outsz
= SIZEOF_WCHAR_T
* 2;
1411 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1412 (char**)&wbufPtr
, &outsz
);
1414 if (ICONV_FAILED(res
, insz
))
1416 ms_wcCharsetName
= NULL
;
1417 wxLogLastError(wxT("iconv"));
1418 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1422 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1427 ms_wcCharsetName
= NULL
;
1429 // VS: we must not output an error here, since wxWidgets will safely
1430 // fall back to using wxEncodingConverter.
1431 wxLogTrace(TRACE_STRCONV
, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1435 wxLogTrace(TRACE_STRCONV
, wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1437 else // we already have ms_wcCharsetName
1439 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1442 // NB: don't ever pass NULL to iconv_open(), it may crash!
1443 if ( ms_wcCharsetName
)
1445 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1453 wxMBConv_iconv::~wxMBConv_iconv()
1455 if ( m2w
!= (iconv_t
)-1 )
1457 if ( w2m
!= (iconv_t
)-1 )
1461 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1464 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1465 // Unfortunately there is a couple of global wxCSConv objects such as
1466 // wxConvLocal that are used all over wx code, so we have to make sure
1467 // the handle is used by at most one thread at the time. Otherwise
1468 // only a few wx classes would be safe to use from non-main threads
1469 // as MB<->WC conversion would fail "randomly".
1470 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1473 size_t inbuf
= strlen(psz
);
1474 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1476 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1477 wchar_t *bufPtr
= buf
;
1478 const char *pszPtr
= psz
;
1482 // have destination buffer, convert there
1484 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1485 (char**)&bufPtr
, &outbuf
);
1486 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1490 // convert to native endianness
1491 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1494 // NB: iconv was given only strlen(psz) characters on input, and so
1495 // it couldn't convert the trailing zero. Let's do it ourselves
1496 // if there's some room left for it in the output buffer.
1502 // no destination buffer... convert using temp buffer
1503 // to calculate destination buffer requirement
1508 outbuf
= 8*SIZEOF_WCHAR_T
;
1511 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1512 (char**)&bufPtr
, &outbuf
);
1514 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1515 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1518 if (ICONV_FAILED(cres
, inbuf
))
1520 //VS: it is ok if iconv fails, hence trace only
1521 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1528 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1531 // NB: explained in MB2WC
1532 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1535 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1539 wchar_t *tmpbuf
= 0;
1543 // need to copy to temp buffer to switch endianness
1544 // this absolutely doesn't rock!
1545 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1546 // could be in read-only memory, or be accessed in some other thread)
1547 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1548 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1549 WC_BSWAP(tmpbuf
, inbuf
)
1555 // have destination buffer, convert there
1556 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1560 // NB: iconv was given only wcslen(psz) characters on input, and so
1561 // it couldn't convert the trailing zero. Let's do it ourselves
1562 // if there's some room left for it in the output buffer.
1568 // no destination buffer... convert using temp buffer
1569 // to calculate destination buffer requirement
1573 buf
= tbuf
; outbuf
= 16;
1575 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1578 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1586 if (ICONV_FAILED(cres
, inbuf
))
1588 //VS: it is ok if iconv fails, hence trace only
1589 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1596 #endif // HAVE_ICONV
1599 // ============================================================================
1600 // Win32 conversion classes
1601 // ============================================================================
1603 #ifdef wxHAVE_WIN32_MB2WC
1607 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1608 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1611 class wxMBConv_win32
: public wxMBConv
1616 m_CodePage
= CP_ACP
;
1620 wxMBConv_win32(const wxChar
* name
)
1622 m_CodePage
= wxCharsetToCodepage(name
);
1625 wxMBConv_win32(wxFontEncoding encoding
)
1627 m_CodePage
= wxEncodingToCodepage(encoding
);
1631 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1633 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1634 // the behaviour is not compatible with the Unix version (using iconv)
1635 // and break the library itself, e.g. wxTextInputStream::NextChar()
1636 // wouldn't work if reading an incomplete MB char didn't result in an
1639 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1640 // an error (tested under Windows Server 2003) and apparently it is
1641 // done on purpose, i.e. the function accepts any input in this case
1642 // and although I'd prefer to return error on ill-formed output, our
1643 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1644 // explicitly ill-formed according to RFC 2152) neither so we don't
1645 // even have any fallback here...
1646 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1648 const size_t len
= ::MultiByteToWideChar
1650 m_CodePage
, // code page
1651 flags
, // flags: fall on error
1652 psz
, // input string
1653 -1, // its length (NUL-terminated)
1654 buf
, // output string
1655 buf
? n
: 0 // size of output buffer
1658 // note that it returns count of written chars for buf != NULL and size
1659 // of the needed buffer for buf == NULL so in either case the length of
1660 // the string (which never includes the terminating NUL) is one less
1661 return len
? len
- 1 : (size_t)-1;
1664 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1667 we have a problem here: by default, WideCharToMultiByte() may
1668 replace characters unrepresentable in the target code page with bad
1669 quality approximations such as turning "1/2" symbol (U+00BD) into
1670 "1" for the code pages which don't have it and we, obviously, want
1671 to avoid this at any price
1673 the trouble is that this function does it _silently_, i.e. it won't
1674 even tell us whether it did or not... Win98/2000 and higher provide
1675 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1676 we have to resort to a round trip, i.e. check that converting back
1677 results in the same string -- this is, of course, expensive but
1678 otherwise we simply can't be sure to not garble the data.
1681 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1682 // it doesn't work with CJK encodings (which we test for rather roughly
1683 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1685 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1688 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1690 // it's our lucky day
1691 flags
= WC_NO_BEST_FIT_CHARS
;
1692 pUsedDef
= &usedDef
;
1694 else // old system or unsupported encoding
1700 const size_t len
= ::WideCharToMultiByte
1702 m_CodePage
, // code page
1703 flags
, // either none or no best fit
1704 pwz
, // input string
1705 -1, // it is (wide) NUL-terminated
1706 buf
, // output buffer
1707 buf
? n
: 0, // and its size
1708 NULL
, // default "replacement" char
1709 pUsedDef
// [out] was it used?
1714 // function totally failed
1718 // if we were really converting, check if we succeeded
1723 // check if the conversion failed, i.e. if any replacements
1728 else // we must resort to double tripping...
1730 wxWCharBuffer
wcBuf(n
);
1731 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1732 wcscmp(wcBuf
, pwz
) != 0 )
1734 // we didn't obtain the same thing we started from, hence
1735 // the conversion was lossy and we consider that it failed
1741 // see the comment above for the reason of "len - 1"
1745 bool IsOk() const { return m_CodePage
!= -1; }
1748 static bool CanUseNoBestFit()
1750 static int s_isWin98Or2k
= -1;
1752 if ( s_isWin98Or2k
== -1 )
1755 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1758 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1762 s_isWin98Or2k
= verMaj
>= 5;
1766 // unknown, be conseravtive by default
1770 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1773 return s_isWin98Or2k
== 1;
1779 #endif // wxHAVE_WIN32_MB2WC
1781 // ============================================================================
1782 // Cocoa conversion classes
1783 // ============================================================================
1785 #if defined(__WXCOCOA__)
1787 // RN: There is no UTF-32 support in either Core Foundation or
1788 // Cocoa. Strangely enough, internally Core Foundation uses
1789 // UTF 32 internally quite a bit - its just not public (yet).
1791 #include <CoreFoundation/CFString.h>
1792 #include <CoreFoundation/CFStringEncodingExt.h>
1794 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1796 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1797 if ( encoding
== wxFONTENCODING_DEFAULT
)
1799 enc
= CFStringGetSystemEncoding();
1801 else switch( encoding
)
1803 case wxFONTENCODING_ISO8859_1
:
1804 enc
= kCFStringEncodingISOLatin1
;
1806 case wxFONTENCODING_ISO8859_2
:
1807 enc
= kCFStringEncodingISOLatin2
;
1809 case wxFONTENCODING_ISO8859_3
:
1810 enc
= kCFStringEncodingISOLatin3
;
1812 case wxFONTENCODING_ISO8859_4
:
1813 enc
= kCFStringEncodingISOLatin4
;
1815 case wxFONTENCODING_ISO8859_5
:
1816 enc
= kCFStringEncodingISOLatinCyrillic
;
1818 case wxFONTENCODING_ISO8859_6
:
1819 enc
= kCFStringEncodingISOLatinArabic
;
1821 case wxFONTENCODING_ISO8859_7
:
1822 enc
= kCFStringEncodingISOLatinGreek
;
1824 case wxFONTENCODING_ISO8859_8
:
1825 enc
= kCFStringEncodingISOLatinHebrew
;
1827 case wxFONTENCODING_ISO8859_9
:
1828 enc
= kCFStringEncodingISOLatin5
;
1830 case wxFONTENCODING_ISO8859_10
:
1831 enc
= kCFStringEncodingISOLatin6
;
1833 case wxFONTENCODING_ISO8859_11
:
1834 enc
= kCFStringEncodingISOLatinThai
;
1836 case wxFONTENCODING_ISO8859_13
:
1837 enc
= kCFStringEncodingISOLatin7
;
1839 case wxFONTENCODING_ISO8859_14
:
1840 enc
= kCFStringEncodingISOLatin8
;
1842 case wxFONTENCODING_ISO8859_15
:
1843 enc
= kCFStringEncodingISOLatin9
;
1846 case wxFONTENCODING_KOI8
:
1847 enc
= kCFStringEncodingKOI8_R
;
1849 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1850 enc
= kCFStringEncodingDOSRussian
;
1853 // case wxFONTENCODING_BULGARIAN :
1857 case wxFONTENCODING_CP437
:
1858 enc
=kCFStringEncodingDOSLatinUS
;
1860 case wxFONTENCODING_CP850
:
1861 enc
= kCFStringEncodingDOSLatin1
;
1863 case wxFONTENCODING_CP852
:
1864 enc
= kCFStringEncodingDOSLatin2
;
1866 case wxFONTENCODING_CP855
:
1867 enc
= kCFStringEncodingDOSCyrillic
;
1869 case wxFONTENCODING_CP866
:
1870 enc
=kCFStringEncodingDOSRussian
;
1872 case wxFONTENCODING_CP874
:
1873 enc
= kCFStringEncodingDOSThai
;
1875 case wxFONTENCODING_CP932
:
1876 enc
= kCFStringEncodingDOSJapanese
;
1878 case wxFONTENCODING_CP936
:
1879 enc
=kCFStringEncodingDOSChineseSimplif
;
1881 case wxFONTENCODING_CP949
:
1882 enc
= kCFStringEncodingDOSKorean
;
1884 case wxFONTENCODING_CP950
:
1885 enc
= kCFStringEncodingDOSChineseTrad
;
1887 case wxFONTENCODING_CP1250
:
1888 enc
= kCFStringEncodingWindowsLatin2
;
1890 case wxFONTENCODING_CP1251
:
1891 enc
=kCFStringEncodingWindowsCyrillic
;
1893 case wxFONTENCODING_CP1252
:
1894 enc
=kCFStringEncodingWindowsLatin1
;
1896 case wxFONTENCODING_CP1253
:
1897 enc
= kCFStringEncodingWindowsGreek
;
1899 case wxFONTENCODING_CP1254
:
1900 enc
= kCFStringEncodingWindowsLatin5
;
1902 case wxFONTENCODING_CP1255
:
1903 enc
=kCFStringEncodingWindowsHebrew
;
1905 case wxFONTENCODING_CP1256
:
1906 enc
=kCFStringEncodingWindowsArabic
;
1908 case wxFONTENCODING_CP1257
:
1909 enc
= kCFStringEncodingWindowsBalticRim
;
1911 // This only really encodes to UTF7 (if that) evidently
1912 // case wxFONTENCODING_UTF7 :
1913 // enc = kCFStringEncodingNonLossyASCII ;
1915 case wxFONTENCODING_UTF8
:
1916 enc
= kCFStringEncodingUTF8
;
1918 case wxFONTENCODING_EUC_JP
:
1919 enc
= kCFStringEncodingEUC_JP
;
1921 case wxFONTENCODING_UTF16
:
1922 enc
= kCFStringEncodingUnicode
;
1924 case wxFONTENCODING_MACROMAN
:
1925 enc
= kCFStringEncodingMacRoman
;
1927 case wxFONTENCODING_MACJAPANESE
:
1928 enc
= kCFStringEncodingMacJapanese
;
1930 case wxFONTENCODING_MACCHINESETRAD
:
1931 enc
= kCFStringEncodingMacChineseTrad
;
1933 case wxFONTENCODING_MACKOREAN
:
1934 enc
= kCFStringEncodingMacKorean
;
1936 case wxFONTENCODING_MACARABIC
:
1937 enc
= kCFStringEncodingMacArabic
;
1939 case wxFONTENCODING_MACHEBREW
:
1940 enc
= kCFStringEncodingMacHebrew
;
1942 case wxFONTENCODING_MACGREEK
:
1943 enc
= kCFStringEncodingMacGreek
;
1945 case wxFONTENCODING_MACCYRILLIC
:
1946 enc
= kCFStringEncodingMacCyrillic
;
1948 case wxFONTENCODING_MACDEVANAGARI
:
1949 enc
= kCFStringEncodingMacDevanagari
;
1951 case wxFONTENCODING_MACGURMUKHI
:
1952 enc
= kCFStringEncodingMacGurmukhi
;
1954 case wxFONTENCODING_MACGUJARATI
:
1955 enc
= kCFStringEncodingMacGujarati
;
1957 case wxFONTENCODING_MACORIYA
:
1958 enc
= kCFStringEncodingMacOriya
;
1960 case wxFONTENCODING_MACBENGALI
:
1961 enc
= kCFStringEncodingMacBengali
;
1963 case wxFONTENCODING_MACTAMIL
:
1964 enc
= kCFStringEncodingMacTamil
;
1966 case wxFONTENCODING_MACTELUGU
:
1967 enc
= kCFStringEncodingMacTelugu
;
1969 case wxFONTENCODING_MACKANNADA
:
1970 enc
= kCFStringEncodingMacKannada
;
1972 case wxFONTENCODING_MACMALAJALAM
:
1973 enc
= kCFStringEncodingMacMalayalam
;
1975 case wxFONTENCODING_MACSINHALESE
:
1976 enc
= kCFStringEncodingMacSinhalese
;
1978 case wxFONTENCODING_MACBURMESE
:
1979 enc
= kCFStringEncodingMacBurmese
;
1981 case wxFONTENCODING_MACKHMER
:
1982 enc
= kCFStringEncodingMacKhmer
;
1984 case wxFONTENCODING_MACTHAI
:
1985 enc
= kCFStringEncodingMacThai
;
1987 case wxFONTENCODING_MACLAOTIAN
:
1988 enc
= kCFStringEncodingMacLaotian
;
1990 case wxFONTENCODING_MACGEORGIAN
:
1991 enc
= kCFStringEncodingMacGeorgian
;
1993 case wxFONTENCODING_MACARMENIAN
:
1994 enc
= kCFStringEncodingMacArmenian
;
1996 case wxFONTENCODING_MACCHINESESIMP
:
1997 enc
= kCFStringEncodingMacChineseSimp
;
1999 case wxFONTENCODING_MACTIBETAN
:
2000 enc
= kCFStringEncodingMacTibetan
;
2002 case wxFONTENCODING_MACMONGOLIAN
:
2003 enc
= kCFStringEncodingMacMongolian
;
2005 case wxFONTENCODING_MACETHIOPIC
:
2006 enc
= kCFStringEncodingMacEthiopic
;
2008 case wxFONTENCODING_MACCENTRALEUR
:
2009 enc
= kCFStringEncodingMacCentralEurRoman
;
2011 case wxFONTENCODING_MACVIATNAMESE
:
2012 enc
= kCFStringEncodingMacVietnamese
;
2014 case wxFONTENCODING_MACARABICEXT
:
2015 enc
= kCFStringEncodingMacExtArabic
;
2017 case wxFONTENCODING_MACSYMBOL
:
2018 enc
= kCFStringEncodingMacSymbol
;
2020 case wxFONTENCODING_MACDINGBATS
:
2021 enc
= kCFStringEncodingMacDingbats
;
2023 case wxFONTENCODING_MACTURKISH
:
2024 enc
= kCFStringEncodingMacTurkish
;
2026 case wxFONTENCODING_MACCROATIAN
:
2027 enc
= kCFStringEncodingMacCroatian
;
2029 case wxFONTENCODING_MACICELANDIC
:
2030 enc
= kCFStringEncodingMacIcelandic
;
2032 case wxFONTENCODING_MACROMANIAN
:
2033 enc
= kCFStringEncodingMacRomanian
;
2035 case wxFONTENCODING_MACCELTIC
:
2036 enc
= kCFStringEncodingMacCeltic
;
2038 case wxFONTENCODING_MACGAELIC
:
2039 enc
= kCFStringEncodingMacGaelic
;
2041 // case wxFONTENCODING_MACKEYBOARD :
2042 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2045 // because gcc is picky
2051 class wxMBConv_cocoa
: public wxMBConv
2056 Init(CFStringGetSystemEncoding()) ;
2060 wxMBConv_cocoa(const wxChar
* name
)
2062 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2066 wxMBConv_cocoa(wxFontEncoding encoding
)
2068 Init( wxCFStringEncFromFontEnc(encoding
) );
2075 void Init( CFStringEncoding encoding
)
2077 m_encoding
= encoding
;
2080 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2084 CFStringRef theString
= CFStringCreateWithBytes (
2085 NULL
, //the allocator
2086 (const UInt8
*)szUnConv
,
2089 false //no BOM/external representation
2092 wxASSERT(theString
);
2094 size_t nOutLength
= CFStringGetLength(theString
);
2098 CFRelease(theString
);
2102 CFRange theRange
= { 0, nOutSize
};
2104 #if SIZEOF_WCHAR_T == 4
2105 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2108 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2110 CFRelease(theString
);
2112 szUniCharBuffer
[nOutLength
] = '\0' ;
2114 #if SIZEOF_WCHAR_T == 4
2115 wxMBConvUTF16 converter
;
2116 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2117 delete[] szUniCharBuffer
;
2123 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2127 size_t nRealOutSize
;
2128 size_t nBufSize
= wxWcslen(szUnConv
);
2129 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2131 #if SIZEOF_WCHAR_T == 4
2132 wxMBConvUTF16 converter
;
2133 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2134 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2135 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2136 nBufSize
/= sizeof(UniChar
);
2139 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2143 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2146 wxASSERT(theString
);
2148 //Note that CER puts a BOM when converting to unicode
2149 //so we check and use getchars instead in that case
2150 if (m_encoding
== kCFStringEncodingUnicode
)
2153 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2155 nRealOutSize
= CFStringGetLength(theString
) + 1;
2161 CFRangeMake(0, CFStringGetLength(theString
)),
2163 0, //what to put in characters that can't be converted -
2164 //0 tells CFString to return NULL if it meets such a character
2165 false, //not an external representation
2168 (CFIndex
*) &nRealOutSize
2172 CFRelease(theString
);
2174 #if SIZEOF_WCHAR_T == 4
2175 delete[] szUniBuffer
;
2178 return nRealOutSize
- 1;
2183 return m_encoding
!= kCFStringEncodingInvalidId
&&
2184 CFStringIsEncodingAvailable(m_encoding
);
2188 CFStringEncoding m_encoding
;
2191 #endif // defined(__WXCOCOA__)
2193 // ============================================================================
2194 // Mac conversion classes
2195 // ============================================================================
2197 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2199 class wxMBConv_mac
: public wxMBConv
2204 Init(CFStringGetSystemEncoding()) ;
2208 wxMBConv_mac(const wxChar
* name
)
2210 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2214 wxMBConv_mac(wxFontEncoding encoding
)
2216 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2221 OSStatus status
= noErr
;
2222 status
= TECDisposeConverter(m_MB2WC_converter
);
2223 status
= TECDisposeConverter(m_WC2MB_converter
);
2227 void Init( TextEncodingBase encoding
)
2229 OSStatus status
= noErr
;
2230 m_char_encoding
= encoding
;
2231 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2233 status
= TECCreateConverter(&m_MB2WC_converter
,
2235 m_unicode_encoding
);
2236 status
= TECCreateConverter(&m_WC2MB_converter
,
2241 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2243 OSStatus status
= noErr
;
2244 ByteCount byteOutLen
;
2245 ByteCount byteInLen
= strlen(psz
) ;
2246 wchar_t *tbuf
= NULL
;
2247 UniChar
* ubuf
= NULL
;
2252 //apple specs say at least 32
2253 n
= wxMax( 32 , byteInLen
) ;
2254 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2256 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2257 #if SIZEOF_WCHAR_T == 4
2258 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2260 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2262 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2263 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2264 #if SIZEOF_WCHAR_T == 4
2265 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2266 // is not properly terminated we get random characters at the end
2267 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2268 wxMBConvUTF16 converter
;
2269 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2272 res
= byteOutLen
/ sizeof( UniChar
) ;
2277 if ( buf
&& res
< n
)
2283 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2285 OSStatus status
= noErr
;
2286 ByteCount byteOutLen
;
2287 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2293 //apple specs say at least 32
2294 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2295 tbuf
= (char*) malloc( n
) ;
2298 ByteCount byteBufferLen
= n
;
2299 UniChar
* ubuf
= NULL
;
2300 #if SIZEOF_WCHAR_T == 4
2301 wxMBConvUTF16 converter
;
2302 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2303 byteInLen
= unicharlen
;
2304 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2305 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2307 ubuf
= (UniChar
*) psz
;
2309 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2310 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2311 #if SIZEOF_WCHAR_T == 4
2317 size_t res
= byteOutLen
;
2318 if ( buf
&& res
< n
)
2322 //we need to double-trip to verify it didn't insert any ? in place
2323 //of bogus characters
2324 wxWCharBuffer
wcBuf(n
);
2325 size_t pszlen
= wxWcslen(psz
);
2326 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2327 wxWcslen(wcBuf
) != pszlen
||
2328 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2330 // we didn't obtain the same thing we started from, hence
2331 // the conversion was lossy and we consider that it failed
2340 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2343 TECObjectRef m_MB2WC_converter
;
2344 TECObjectRef m_WC2MB_converter
;
2346 TextEncodingBase m_char_encoding
;
2347 TextEncodingBase m_unicode_encoding
;
2350 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2352 // ============================================================================
2353 // wxEncodingConverter based conversion classes
2354 // ============================================================================
2358 class wxMBConv_wxwin
: public wxMBConv
2363 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2364 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2368 // temporarily just use wxEncodingConverter stuff,
2369 // so that it works while a better implementation is built
2370 wxMBConv_wxwin(const wxChar
* name
)
2373 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2375 m_enc
= wxFONTENCODING_SYSTEM
;
2380 wxMBConv_wxwin(wxFontEncoding enc
)
2387 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2389 size_t inbuf
= strlen(psz
);
2392 if (!m2w
.Convert(psz
,buf
))
2398 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2400 const size_t inbuf
= wxWcslen(psz
);
2403 if (!w2m
.Convert(psz
,buf
))
2410 bool IsOk() const { return m_ok
; }
2413 wxFontEncoding m_enc
;
2414 wxEncodingConverter m2w
, w2m
;
2416 // were we initialized successfully?
2419 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2422 // make the constructors available for unit testing
2423 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2425 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2426 if ( !result
->IsOk() )
2434 #endif // wxUSE_FONTMAP
2436 // ============================================================================
2437 // wxCSConv implementation
2438 // ============================================================================
2440 void wxCSConv::Init()
2447 wxCSConv::wxCSConv(const wxChar
*charset
)
2456 m_encoding
= wxFONTENCODING_SYSTEM
;
2459 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2461 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2463 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2465 encoding
= wxFONTENCODING_SYSTEM
;
2470 m_encoding
= encoding
;
2473 wxCSConv::~wxCSConv()
2478 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2483 SetName(conv
.m_name
);
2484 m_encoding
= conv
.m_encoding
;
2487 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2491 SetName(conv
.m_name
);
2492 m_encoding
= conv
.m_encoding
;
2497 void wxCSConv::Clear()
2506 void wxCSConv::SetName(const wxChar
*charset
)
2510 m_name
= wxStrdup(charset
);
2516 #include "wx/hashmap.h"
2518 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2519 wxEncodingNameCache
);
2521 static wxEncodingNameCache gs_nameCache
;
2524 wxMBConv
*wxCSConv::DoCreate() const
2527 wxLogTrace(TRACE_STRCONV
,
2528 wxT("creating conversion for %s"),
2530 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2531 #endif // wxUSE_FONTMAP
2533 // check for the special case of ASCII or ISO8859-1 charset: as we have
2534 // special knowledge of it anyhow, we don't need to create a special
2535 // conversion object
2536 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2538 // don't convert at all
2542 // we trust OS to do conversion better than we can so try external
2543 // conversion methods first
2545 // the full order is:
2546 // 1. OS conversion (iconv() under Unix or Win32 API)
2547 // 2. hard coded conversions for UTF
2548 // 3. wxEncodingConverter as fall back
2554 #endif // !wxUSE_FONTMAP
2556 wxString
name(m_name
);
2557 wxFontEncoding
encoding(m_encoding
);
2559 if ( !name
.empty() )
2561 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2569 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2570 #endif // wxUSE_FONTMAP
2574 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2575 if ( it
!= gs_nameCache
.end() )
2577 if ( it
->second
.empty() )
2580 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2587 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2589 for ( ; *names
; ++names
)
2591 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2594 gs_nameCache
[encoding
] = *names
;
2601 gs_nameCache
[encoding
] = ""; // cache the failure
2603 #endif // wxUSE_FONTMAP
2605 #endif // HAVE_ICONV
2607 #ifdef wxHAVE_WIN32_MB2WC
2610 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2611 : new wxMBConv_win32(m_encoding
);
2620 #endif // wxHAVE_WIN32_MB2WC
2621 #if defined(__WXMAC__)
2623 // leave UTF16 and UTF32 to the built-ins of wx
2624 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2625 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2629 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2630 : new wxMBConv_mac(m_encoding
);
2632 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2641 #if defined(__WXCOCOA__)
2643 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2647 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2648 : new wxMBConv_cocoa(m_encoding
);
2650 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2660 wxFontEncoding enc
= m_encoding
;
2662 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2664 // use "false" to suppress interactive dialogs -- we can be called from
2665 // anywhere and popping up a dialog from here is the last thing we want to
2667 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2669 #endif // wxUSE_FONTMAP
2673 case wxFONTENCODING_UTF7
:
2674 return new wxMBConvUTF7
;
2676 case wxFONTENCODING_UTF8
:
2677 return new wxMBConvUTF8
;
2679 case wxFONTENCODING_UTF16BE
:
2680 return new wxMBConvUTF16BE
;
2682 case wxFONTENCODING_UTF16LE
:
2683 return new wxMBConvUTF16LE
;
2685 case wxFONTENCODING_UTF32BE
:
2686 return new wxMBConvUTF32BE
;
2688 case wxFONTENCODING_UTF32LE
:
2689 return new wxMBConvUTF32LE
;
2692 // nothing to do but put here to suppress gcc warnings
2699 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2700 : new wxMBConv_wxwin(m_encoding
);
2706 #endif // wxUSE_FONTMAP
2708 // NB: This is a hack to prevent deadlock. What could otherwise happen
2709 // in Unicode build: wxConvLocal creation ends up being here
2710 // because of some failure and logs the error. But wxLog will try to
2711 // attach timestamp, for which it will need wxConvLocal (to convert
2712 // time to char* and then wchar_t*), but that fails, tries to log
2713 // error, but wxLog has a (already locked) critical section that
2714 // guards static buffer.
2715 static bool alreadyLoggingError
= false;
2716 if (!alreadyLoggingError
)
2718 alreadyLoggingError
= true;
2719 wxLogError(_("Cannot convert from the charset '%s'!"),
2723 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2724 #else // !wxUSE_FONTMAP
2725 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2726 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2728 alreadyLoggingError
= false;
2734 void wxCSConv::CreateConvIfNeeded() const
2738 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2741 // if we don't have neither the name nor the encoding, use the default
2742 // encoding for this system
2743 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2745 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2747 #endif // wxUSE_INTL
2749 self
->m_convReal
= DoCreate();
2750 self
->m_deferred
= false;
2754 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2756 CreateConvIfNeeded();
2759 return m_convReal
->MB2WC(buf
, psz
, n
);
2762 size_t len
= strlen(psz
);
2766 for (size_t c
= 0; c
<= len
; c
++)
2767 buf
[c
] = (unsigned char)(psz
[c
]);
2773 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2775 CreateConvIfNeeded();
2778 return m_convReal
->WC2MB(buf
, psz
, n
);
2781 const size_t len
= wxWcslen(psz
);
2784 for (size_t c
= 0; c
<= len
; c
++)
2788 buf
[c
] = (char)psz
[c
];
2793 for (size_t c
= 0; c
<= len
; c
++)
2803 // ----------------------------------------------------------------------------
2805 // ----------------------------------------------------------------------------
2808 static wxMBConv_win32 wxConvLibcObj
;
2809 #elif defined(__WXMAC__) && !defined(__MACH__)
2810 static wxMBConv_mac wxConvLibcObj
;
2812 static wxMBConvLibc wxConvLibcObj
;
2815 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2816 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2817 static wxMBConvUTF7 wxConvUTF7Obj
;
2818 static wxMBConvUTF8 wxConvUTF8Obj
;
2820 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2821 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2822 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2826 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2834 #else // !wxUSE_WCHAR_T
2836 // stand-ins in absence of wchar_t
2837 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2842 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T