1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 // ============================================================================
83 // ============================================================================
85 // ----------------------------------------------------------------------------
86 // UTF-16 en/decoding to/from UCS-4
87 // ----------------------------------------------------------------------------
90 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
95 *output
= (wxUint16
) input
;
98 else if (input
>=0x110000)
106 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
107 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
113 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
115 if ((*input
<0xd800) || (*input
>0xdfff))
120 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
127 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
133 // ----------------------------------------------------------------------------
135 // ----------------------------------------------------------------------------
137 wxMBConv::~wxMBConv()
139 // nothing to do here (necessary for Darwin linking probably)
142 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
146 // calculate the length of the buffer needed first
147 size_t nLen
= MB2WC(NULL
, psz
, 0);
148 if ( nLen
!= (size_t)-1 )
150 // now do the actual conversion
151 wxWCharBuffer
buf(nLen
);
152 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
153 if ( nLen
!= (size_t)-1 )
160 wxWCharBuffer
buf((wchar_t *)NULL
);
165 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
169 size_t nLen
= WC2MB(NULL
, pwz
, 0);
170 if ( nLen
!= (size_t)-1 )
172 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
173 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
174 if ( nLen
!= (size_t)-1 )
181 wxCharBuffer
buf((char *)NULL
);
186 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
188 wxASSERT(pOutSize
!= NULL
);
190 const char* szEnd
= szString
+ nStringLen
+ 1;
191 const char* szPos
= szString
;
192 const char* szStart
= szPos
;
194 size_t nActualLength
= 0;
195 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
197 wxWCharBuffer
theBuffer(nCurrentSize
);
199 //Convert the string until the length() is reached, continuing the
200 //loop every time a null character is reached
201 while(szPos
!= szEnd
)
203 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
205 //Get the length of the current (sub)string
206 size_t nLen
= MB2WC(NULL
, szPos
, 0);
208 //Invalid conversion?
209 if( nLen
== (size_t)-1 )
212 theBuffer
.data()[0u] = wxT('\0');
217 //Increase the actual length (+1 for current null character)
218 nActualLength
+= nLen
+ 1;
220 //if buffer too big, realloc the buffer
221 if (nActualLength
> (nCurrentSize
+1))
223 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
224 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
225 theBuffer
= theNewBuffer
;
229 //Convert the current (sub)string
230 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
233 theBuffer
.data()[0u] = wxT('\0');
237 //Increment to next (sub)string
238 //Note that we have to use strlen instead of nLen here
239 //because XX2XX gives us the size of the output buffer,
240 //which is not necessarily the length of the string
241 szPos
+= strlen(szPos
) + 1;
244 //success - return actual length and the buffer
245 *pOutSize
= nActualLength
;
249 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
251 wxASSERT(pOutSize
!= NULL
);
253 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
254 const wchar_t* szPos
= szString
;
255 const wchar_t* szStart
= szPos
;
257 size_t nActualLength
= 0;
258 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
260 wxCharBuffer
theBuffer(nCurrentSize
);
262 //Convert the string until the length() is reached, continuing the
263 //loop every time a null character is reached
264 while(szPos
!= szEnd
)
266 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
268 //Get the length of the current (sub)string
269 size_t nLen
= WC2MB(NULL
, szPos
, 0);
271 //Invalid conversion?
272 if( nLen
== (size_t)-1 )
275 theBuffer
.data()[0u] = wxT('\0');
279 //Increase the actual length (+1 for current null character)
280 nActualLength
+= nLen
+ 1;
282 //if buffer too big, realloc the buffer
283 if (nActualLength
> (nCurrentSize
+1))
285 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
286 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
287 theBuffer
= theNewBuffer
;
291 //Convert the current (sub)string
292 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
295 theBuffer
.data()[0u] = wxT('\0');
299 //Increment to next (sub)string
300 //Note that we have to use wxWcslen instead of nLen here
301 //because XX2XX gives us the size of the output buffer,
302 //which is not necessarily the length of the string
303 szPos
+= wxWcslen(szPos
) + 1;
306 //success - return actual length and the buffer
307 *pOutSize
= nActualLength
;
311 // ----------------------------------------------------------------------------
313 // ----------------------------------------------------------------------------
315 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
317 return wxMB2WC(buf
, psz
, n
);
320 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
322 return wxWC2MB(buf
, psz
, n
);
327 // ----------------------------------------------------------------------------
328 // wxConvBrokenFileNames
329 // ----------------------------------------------------------------------------
331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
333 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
334 || wxStricmp(charset
, _T("UTF8")) == 0 )
335 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
337 m_conv
= new wxCSConv(charset
);
341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
343 size_t outputSize
) const
345 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
349 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
351 size_t outputSize
) const
353 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
358 // ----------------------------------------------------------------------------
360 // ----------------------------------------------------------------------------
362 // Implementation (C) 2004 Fredrik Roubert
365 // BASE64 decoding table
367 static const unsigned char utf7unb64
[] =
369 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
375 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
376 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
378 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
379 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
380 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
382 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
383 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
384 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
407 while (*psz
&& ((!buf
) || (len
< n
)))
409 unsigned char cc
= *psz
++;
417 else if (*psz
== '-')
427 // BASE64 encoded string
431 for (lsb
= false, d
= 0, l
= 0;
432 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
436 for (l
+= 6; l
>= 8; lsb
= !lsb
)
438 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
447 *buf
= (wchar_t)(c
<< 8);
454 if (buf
&& (len
< n
))
460 // BASE64 encoding table
462 static const unsigned char utf7enb64
[] =
464 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
465 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
466 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
467 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
468 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
469 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
470 'w', 'x', 'y', 'z', '0', '1', '2', '3',
471 '4', '5', '6', '7', '8', '9', '+', '/'
475 // UTF-7 encoding table
477 // 0 - Set D (directly encoded characters)
478 // 1 - Set O (optional direct characters)
479 // 2 - whitespace characters (optional)
480 // 3 - special characters
482 static const unsigned char utf7encode
[128] =
484 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
486 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
488 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
490 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
494 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
500 while (*psz
&& ((!buf
) || (len
< n
)))
503 if (cc
< 0x80 && utf7encode
[cc
] < 1)
511 else if (((wxUint32
)cc
) > 0xffff)
513 // no surrogate pair generation (yet?)
524 // BASE64 encode string
525 unsigned int lsb
, d
, l
;
526 for (d
= 0, l
= 0;; psz
++)
528 for (lsb
= 0; lsb
< 2; lsb
++)
531 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
533 for (l
+= 8; l
>= 6; )
537 *buf
++ = utf7enb64
[(d
>> l
) % 64];
542 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
548 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
557 if (buf
&& (len
< n
))
562 // ----------------------------------------------------------------------------
564 // ----------------------------------------------------------------------------
566 static wxUint32 utf8_max
[]=
567 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
569 // boundaries of the private use area we use to (temporarily) remap invalid
570 // characters invalid in a UTF-8 encoded string
571 const wxUint32 wxUnicodePUA
= 0x100000;
572 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
578 while (*psz
&& ((!buf
) || (len
< n
)))
580 const char *opsz
= psz
;
581 bool invalid
= false;
582 unsigned char cc
= *psz
++, fc
= cc
;
584 for (cnt
= 0; fc
& 0x80; cnt
++)
593 // escape the escape character for octal escapes
594 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
595 && cc
== '\\' && (!buf
|| len
< n
))
607 // invalid UTF-8 sequence
612 unsigned ocnt
= cnt
- 1;
613 wxUint32 res
= cc
& (0x3f >> cnt
);
617 if ((cc
& 0xC0) != 0x80)
619 // invalid UTF-8 sequence
624 res
= (res
<< 6) | (cc
& 0x3f);
626 if (invalid
|| res
<= utf8_max
[ocnt
])
628 // illegal UTF-8 encoding
631 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
632 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
634 // if one of our PUA characters turns up externally
635 // it must also be treated as an illegal sequence
636 // (a bit like you have to escape an escape character)
642 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
643 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
644 if (pa
== (size_t)-1)
658 #endif // WC_UTF16/!WC_UTF16
663 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
665 while (opsz
< psz
&& (!buf
|| len
< n
))
668 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
669 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
670 wxASSERT(pa
!= (size_t)-1);
677 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
683 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
685 while (opsz
< psz
&& (!buf
|| len
< n
))
687 if ( buf
&& len
+ 3 < n
)
689 unsigned char on
= *opsz
;
691 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
692 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
693 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
699 else // MAP_INVALID_UTF8_NOT
706 if (buf
&& (len
< n
))
711 static inline bool isoctal(wchar_t wch
)
713 return L
'0' <= wch
&& wch
<= L
'7';
716 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
720 while (*psz
&& ((!buf
) || (len
< n
)))
724 // cast is ok for WC_UTF16
725 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
726 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
728 cc
=(*psz
++) & 0x7fffffff;
731 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
732 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
735 *buf
++ = (char)(cc
- wxUnicodePUA
);
738 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
739 && cc
== L
'\\' && psz
[0] == L
'\\' )
746 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
748 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
752 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
753 (psz
[1] - L
'0')*010 +
763 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
777 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
779 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
791 // ----------------------------------------------------------------------------
793 // ----------------------------------------------------------------------------
795 #ifdef WORDS_BIGENDIAN
796 #define wxMBConvUTF16straight wxMBConvUTF16BE
797 #define wxMBConvUTF16swap wxMBConvUTF16LE
799 #define wxMBConvUTF16swap wxMBConvUTF16BE
800 #define wxMBConvUTF16straight wxMBConvUTF16LE
806 // copy 16bit MB to 16bit String
807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
811 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
814 *buf
++ = *(wxUint16
*)psz
;
817 psz
+= sizeof(wxUint16
);
819 if (buf
&& len
<n
) *buf
=0;
825 // copy 16bit String to 16bit MB
826 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
830 while (*psz
&& (!buf
|| len
< n
))
834 *(wxUint16
*)buf
= *psz
;
835 buf
+= sizeof(wxUint16
);
837 len
+= sizeof(wxUint16
);
840 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
846 // swap 16bit MB to 16bit String
847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
851 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
855 ((char *)buf
)[0] = psz
[1];
856 ((char *)buf
)[1] = psz
[0];
860 psz
+= sizeof(wxUint16
);
862 if (buf
&& len
<n
) *buf
=0;
868 // swap 16bit MB to 16bit String
869 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
873 while (*psz
&& (!buf
|| len
< n
))
877 *buf
++ = ((char*)psz
)[1];
878 *buf
++ = ((char*)psz
)[0];
880 len
+= sizeof(wxUint16
);
883 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
892 // copy 16bit MB to 32bit String
893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
897 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
900 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
901 if (pa
== (size_t)-1)
907 psz
+= pa
* sizeof(wxUint16
);
909 if (buf
&& len
<n
) *buf
=0;
915 // copy 32bit String to 16bit MB
916 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
920 while (*psz
&& (!buf
|| len
< n
))
923 size_t pa
=encode_utf16(*psz
, cc
);
925 if (pa
== (size_t)-1)
930 *(wxUint16
*)buf
= cc
[0];
931 buf
+= sizeof(wxUint16
);
934 *(wxUint16
*)buf
= cc
[1];
935 buf
+= sizeof(wxUint16
);
939 len
+= pa
*sizeof(wxUint16
);
942 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
948 // swap 16bit MB to 32bit String
949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
953 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
957 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
958 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
960 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
961 if (pa
== (size_t)-1)
968 psz
+= pa
* sizeof(wxUint16
);
970 if (buf
&& len
<n
) *buf
=0;
976 // swap 32bit String to 16bit MB
977 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
981 while (*psz
&& (!buf
|| len
< n
))
984 size_t pa
=encode_utf16(*psz
, cc
);
986 if (pa
== (size_t)-1)
991 *buf
++ = ((char*)cc
)[1];
992 *buf
++ = ((char*)cc
)[0];
995 *buf
++ = ((char*)cc
)[3];
996 *buf
++ = ((char*)cc
)[2];
1000 len
+= pa
*sizeof(wxUint16
);
1003 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1011 // ----------------------------------------------------------------------------
1013 // ----------------------------------------------------------------------------
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap wxMBConvUTF32LE
1019 #define wxMBConvUTF32swap wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight wxMBConvUTF32LE
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1035 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1039 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1040 if (pa
== (size_t)-1)
1050 psz
+= sizeof(wxUint32
);
1052 if (buf
&& len
<n
) *buf
=0;
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1063 while (*psz
&& (!buf
|| len
< n
))
1067 // cast is ok for WC_UTF16
1068 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1069 if (pa
== (size_t)-1)
1074 *(wxUint32
*)buf
= cc
;
1075 buf
+= sizeof(wxUint32
);
1077 len
+= sizeof(wxUint32
);
1081 if (buf
&& len
<=n
-sizeof(wxUint32
))
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1094 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1097 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1098 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1103 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1104 if (pa
== (size_t)-1)
1114 psz
+= sizeof(wxUint32
);
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1129 while (*psz
&& (!buf
|| len
< n
))
1133 // cast is ok for WC_UTF16
1134 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1135 if (pa
== (size_t)-1)
1145 len
+= sizeof(wxUint32
);
1149 if (buf
&& len
<=n
-sizeof(wxUint32
))
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1163 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1166 *buf
++ = *(wxUint32
*)psz
;
1168 psz
+= sizeof(wxUint32
);
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1183 while (*psz
&& (!buf
|| len
< n
))
1187 *(wxUint32
*)buf
= *psz
;
1188 buf
+= sizeof(wxUint32
);
1191 len
+= sizeof(wxUint32
);
1195 if (buf
&& len
<=n
-sizeof(wxUint32
))
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1207 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1211 ((char *)buf
)[0] = psz
[3];
1212 ((char *)buf
)[1] = psz
[2];
1213 ((char *)buf
)[2] = psz
[1];
1214 ((char *)buf
)[3] = psz
[0];
1218 psz
+= sizeof(wxUint32
);
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1233 while (*psz
&& (!buf
|| len
< n
))
1237 *buf
++ = ((char *)psz
)[3];
1238 *buf
++ = ((char *)psz
)[2];
1239 *buf
++ = ((char *)psz
)[1];
1240 *buf
++ = ((char *)psz
)[0];
1242 len
+= sizeof(wxUint32
);
1246 if (buf
&& len
<=n
-sizeof(wxUint32
))
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 // (unless there's yet another bug in glibc) the only case when iconv()
1265 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 // left in the input buffer -- when _real_ error occurs,
1267 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1269 // [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272 (errno != E2BIG || bufLeft != 0))
1274 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1277 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1281 #if SIZEOF_WCHAR_T == 4
1282 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1283 #define WC_ENC wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1286 #define WC_ENC wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288 // does this ever happen?
1289 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1296 class wxMBConv_iconv
: public wxMBConv
1299 wxMBConv_iconv(const wxChar
*name
);
1300 virtual ~wxMBConv_iconv();
1302 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1303 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1306 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1309 // the iconv handlers used to translate from multibyte to wide char and in
1310 // the other direction
1314 // guards access to m2w and w2m objects
1315 wxMutex m_iconvMutex
;
1319 // the name (for iconv_open()) of a wide char charset -- if none is
1320 // available on this machine, it will remain NULL
1321 static wxString ms_wcCharsetName
;
1323 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324 // different endian-ness than the native one
1325 static bool ms_wcNeedsSwap
;
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1331 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1332 if ( !result
->IsOk() )
1340 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1345 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346 // names for the charsets
1347 const wxCharBuffer
cname(wxString(name
).ToAscii());
1349 // check for charset that represents wchar_t:
1350 if ( ms_wcCharsetName
.empty() )
1352 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1355 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1356 #else // !wxUSE_FONTMAP
1357 static const wxChar
*names
[] =
1359 #if SIZEOF_WCHAR_T == 4
1361 #elif SIZEOF_WCHAR_T = 2
1366 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1368 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1370 const wxString
nameCS(*names
);
1372 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1373 wxString
nameXE(nameCS
);
1374 #ifdef WORDS_BIGENDIAN
1376 #else // little endian
1380 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1383 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1384 if ( m2w
== ICONV_T_INVALID
)
1386 // try charset w/o bytesex info (e.g. "UCS4")
1387 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1389 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1391 // and check for bytesex ourselves:
1392 if ( m2w
!= ICONV_T_INVALID
)
1394 char buf
[2], *bufPtr
;
1395 wchar_t wbuf
[2], *wbufPtr
;
1403 outsz
= SIZEOF_WCHAR_T
* 2;
1407 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1408 (char**)&wbufPtr
, &outsz
);
1410 if (ICONV_FAILED(res
, insz
))
1412 wxLogLastError(wxT("iconv"));
1413 wxLogError(_("Conversion to charset '%s' doesn't work."),
1416 else // ok, can convert to this encoding, remember it
1418 ms_wcCharsetName
= nameCS
;
1419 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1423 else // use charset not requiring byte swapping
1425 ms_wcCharsetName
= nameXE
;
1429 wxLogTrace(TRACE_STRCONV
,
1430 wxT("iconv wchar_t charset is \"%s\"%s"),
1431 ms_wcCharsetName
.empty() ? _T("<none>")
1432 : ms_wcCharsetName
.c_str(),
1433 ms_wcNeedsSwap
? _T(" (needs swap)")
1436 else // we already have ms_wcCharsetName
1438 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1441 if ( ms_wcCharsetName
.empty() )
1443 w2m
= ICONV_T_INVALID
;
1447 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1448 if ( w2m
== ICONV_T_INVALID
)
1450 wxLogTrace(TRACE_STRCONV
,
1451 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1452 ms_wcCharsetName
.c_str(), cname
.data());
1457 wxMBConv_iconv::~wxMBConv_iconv()
1459 if ( m2w
!= ICONV_T_INVALID
)
1461 if ( w2m
!= ICONV_T_INVALID
)
1465 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1468 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1469 // Unfortunately there is a couple of global wxCSConv objects such as
1470 // wxConvLocal that are used all over wx code, so we have to make sure
1471 // the handle is used by at most one thread at the time. Otherwise
1472 // only a few wx classes would be safe to use from non-main threads
1473 // as MB<->WC conversion would fail "randomly".
1474 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1477 size_t inbuf
= strlen(psz
);
1478 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1480 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1481 wchar_t *bufPtr
= buf
;
1482 const char *pszPtr
= psz
;
1486 // have destination buffer, convert there
1488 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1489 (char**)&bufPtr
, &outbuf
);
1490 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1494 // convert to native endianness
1495 for ( unsigned i
= 0; i
< res
; i
++ )
1496 buf
[n
] = WC_BSWAP(buf
[i
]);
1499 // NB: iconv was given only strlen(psz) characters on input, and so
1500 // it couldn't convert the trailing zero. Let's do it ourselves
1501 // if there's some room left for it in the output buffer.
1507 // no destination buffer... convert using temp buffer
1508 // to calculate destination buffer requirement
1513 outbuf
= 8*SIZEOF_WCHAR_T
;
1516 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1517 (char**)&bufPtr
, &outbuf
);
1519 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1520 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1523 if (ICONV_FAILED(cres
, inbuf
))
1525 //VS: it is ok if iconv fails, hence trace only
1526 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1533 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1536 // NB: explained in MB2WC
1537 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1540 size_t inlen
= wxWcslen(psz
);
1541 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1545 wchar_t *tmpbuf
= 0;
1549 // need to copy to temp buffer to switch endianness
1550 // (doing WC_BSWAP twice on the original buffer won't help, as it
1551 // could be in read-only memory, or be accessed in some other thread)
1552 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1553 for ( size_t i
= 0; i
< inlen
; i
++ )
1554 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1555 tmpbuf
[inlen
] = L
'\0';
1561 // have destination buffer, convert there
1562 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1566 // NB: iconv was given only wcslen(psz) characters on input, and so
1567 // it couldn't convert the trailing zero. Let's do it ourselves
1568 // if there's some room left for it in the output buffer.
1574 // no destination buffer... convert using temp buffer
1575 // to calculate destination buffer requirement
1579 buf
= tbuf
; outbuf
= 16;
1581 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1584 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1592 if (ICONV_FAILED(cres
, inbuf
))
1594 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1601 #endif // HAVE_ICONV
1604 // ============================================================================
1605 // Win32 conversion classes
1606 // ============================================================================
1608 #ifdef wxHAVE_WIN32_MB2WC
1612 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1613 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1616 class wxMBConv_win32
: public wxMBConv
1621 m_CodePage
= CP_ACP
;
1625 wxMBConv_win32(const wxChar
* name
)
1627 m_CodePage
= wxCharsetToCodepage(name
);
1630 wxMBConv_win32(wxFontEncoding encoding
)
1632 m_CodePage
= wxEncodingToCodepage(encoding
);
1636 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1638 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1639 // the behaviour is not compatible with the Unix version (using iconv)
1640 // and break the library itself, e.g. wxTextInputStream::NextChar()
1641 // wouldn't work if reading an incomplete MB char didn't result in an
1644 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1645 // an error (tested under Windows Server 2003) and apparently it is
1646 // done on purpose, i.e. the function accepts any input in this case
1647 // and although I'd prefer to return error on ill-formed output, our
1648 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1649 // explicitly ill-formed according to RFC 2152) neither so we don't
1650 // even have any fallback here...
1651 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1653 const size_t len
= ::MultiByteToWideChar
1655 m_CodePage
, // code page
1656 flags
, // flags: fall on error
1657 psz
, // input string
1658 -1, // its length (NUL-terminated)
1659 buf
, // output string
1660 buf
? n
: 0 // size of output buffer
1663 // note that it returns count of written chars for buf != NULL and size
1664 // of the needed buffer for buf == NULL so in either case the length of
1665 // the string (which never includes the terminating NUL) is one less
1666 return len
? len
- 1 : (size_t)-1;
1669 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1672 we have a problem here: by default, WideCharToMultiByte() may
1673 replace characters unrepresentable in the target code page with bad
1674 quality approximations such as turning "1/2" symbol (U+00BD) into
1675 "1" for the code pages which don't have it and we, obviously, want
1676 to avoid this at any price
1678 the trouble is that this function does it _silently_, i.e. it won't
1679 even tell us whether it did or not... Win98/2000 and higher provide
1680 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1681 we have to resort to a round trip, i.e. check that converting back
1682 results in the same string -- this is, of course, expensive but
1683 otherwise we simply can't be sure to not garble the data.
1686 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1687 // it doesn't work with CJK encodings (which we test for rather roughly
1688 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1690 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1693 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1695 // it's our lucky day
1696 flags
= WC_NO_BEST_FIT_CHARS
;
1697 pUsedDef
= &usedDef
;
1699 else // old system or unsupported encoding
1705 const size_t len
= ::WideCharToMultiByte
1707 m_CodePage
, // code page
1708 flags
, // either none or no best fit
1709 pwz
, // input string
1710 -1, // it is (wide) NUL-terminated
1711 buf
, // output buffer
1712 buf
? n
: 0, // and its size
1713 NULL
, // default "replacement" char
1714 pUsedDef
// [out] was it used?
1719 // function totally failed
1723 // if we were really converting, check if we succeeded
1728 // check if the conversion failed, i.e. if any replacements
1733 else // we must resort to double tripping...
1735 wxWCharBuffer
wcBuf(n
);
1736 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1737 wcscmp(wcBuf
, pwz
) != 0 )
1739 // we didn't obtain the same thing we started from, hence
1740 // the conversion was lossy and we consider that it failed
1746 // see the comment above for the reason of "len - 1"
1750 bool IsOk() const { return m_CodePage
!= -1; }
1753 static bool CanUseNoBestFit()
1755 static int s_isWin98Or2k
= -1;
1757 if ( s_isWin98Or2k
== -1 )
1760 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1763 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1767 s_isWin98Or2k
= verMaj
>= 5;
1771 // unknown, be conseravtive by default
1775 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1778 return s_isWin98Or2k
== 1;
1784 #endif // wxHAVE_WIN32_MB2WC
1786 // ============================================================================
1787 // Cocoa conversion classes
1788 // ============================================================================
1790 #if defined(__WXCOCOA__)
1792 // RN: There is no UTF-32 support in either Core Foundation or
1793 // Cocoa. Strangely enough, internally Core Foundation uses
1794 // UTF 32 internally quite a bit - its just not public (yet).
1796 #include <CoreFoundation/CFString.h>
1797 #include <CoreFoundation/CFStringEncodingExt.h>
1799 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1801 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1802 if ( encoding
== wxFONTENCODING_DEFAULT
)
1804 enc
= CFStringGetSystemEncoding();
1806 else switch( encoding
)
1808 case wxFONTENCODING_ISO8859_1
:
1809 enc
= kCFStringEncodingISOLatin1
;
1811 case wxFONTENCODING_ISO8859_2
:
1812 enc
= kCFStringEncodingISOLatin2
;
1814 case wxFONTENCODING_ISO8859_3
:
1815 enc
= kCFStringEncodingISOLatin3
;
1817 case wxFONTENCODING_ISO8859_4
:
1818 enc
= kCFStringEncodingISOLatin4
;
1820 case wxFONTENCODING_ISO8859_5
:
1821 enc
= kCFStringEncodingISOLatinCyrillic
;
1823 case wxFONTENCODING_ISO8859_6
:
1824 enc
= kCFStringEncodingISOLatinArabic
;
1826 case wxFONTENCODING_ISO8859_7
:
1827 enc
= kCFStringEncodingISOLatinGreek
;
1829 case wxFONTENCODING_ISO8859_8
:
1830 enc
= kCFStringEncodingISOLatinHebrew
;
1832 case wxFONTENCODING_ISO8859_9
:
1833 enc
= kCFStringEncodingISOLatin5
;
1835 case wxFONTENCODING_ISO8859_10
:
1836 enc
= kCFStringEncodingISOLatin6
;
1838 case wxFONTENCODING_ISO8859_11
:
1839 enc
= kCFStringEncodingISOLatinThai
;
1841 case wxFONTENCODING_ISO8859_13
:
1842 enc
= kCFStringEncodingISOLatin7
;
1844 case wxFONTENCODING_ISO8859_14
:
1845 enc
= kCFStringEncodingISOLatin8
;
1847 case wxFONTENCODING_ISO8859_15
:
1848 enc
= kCFStringEncodingISOLatin9
;
1851 case wxFONTENCODING_KOI8
:
1852 enc
= kCFStringEncodingKOI8_R
;
1854 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1855 enc
= kCFStringEncodingDOSRussian
;
1858 // case wxFONTENCODING_BULGARIAN :
1862 case wxFONTENCODING_CP437
:
1863 enc
=kCFStringEncodingDOSLatinUS
;
1865 case wxFONTENCODING_CP850
:
1866 enc
= kCFStringEncodingDOSLatin1
;
1868 case wxFONTENCODING_CP852
:
1869 enc
= kCFStringEncodingDOSLatin2
;
1871 case wxFONTENCODING_CP855
:
1872 enc
= kCFStringEncodingDOSCyrillic
;
1874 case wxFONTENCODING_CP866
:
1875 enc
=kCFStringEncodingDOSRussian
;
1877 case wxFONTENCODING_CP874
:
1878 enc
= kCFStringEncodingDOSThai
;
1880 case wxFONTENCODING_CP932
:
1881 enc
= kCFStringEncodingDOSJapanese
;
1883 case wxFONTENCODING_CP936
:
1884 enc
=kCFStringEncodingDOSChineseSimplif
;
1886 case wxFONTENCODING_CP949
:
1887 enc
= kCFStringEncodingDOSKorean
;
1889 case wxFONTENCODING_CP950
:
1890 enc
= kCFStringEncodingDOSChineseTrad
;
1892 case wxFONTENCODING_CP1250
:
1893 enc
= kCFStringEncodingWindowsLatin2
;
1895 case wxFONTENCODING_CP1251
:
1896 enc
=kCFStringEncodingWindowsCyrillic
;
1898 case wxFONTENCODING_CP1252
:
1899 enc
=kCFStringEncodingWindowsLatin1
;
1901 case wxFONTENCODING_CP1253
:
1902 enc
= kCFStringEncodingWindowsGreek
;
1904 case wxFONTENCODING_CP1254
:
1905 enc
= kCFStringEncodingWindowsLatin5
;
1907 case wxFONTENCODING_CP1255
:
1908 enc
=kCFStringEncodingWindowsHebrew
;
1910 case wxFONTENCODING_CP1256
:
1911 enc
=kCFStringEncodingWindowsArabic
;
1913 case wxFONTENCODING_CP1257
:
1914 enc
= kCFStringEncodingWindowsBalticRim
;
1916 // This only really encodes to UTF7 (if that) evidently
1917 // case wxFONTENCODING_UTF7 :
1918 // enc = kCFStringEncodingNonLossyASCII ;
1920 case wxFONTENCODING_UTF8
:
1921 enc
= kCFStringEncodingUTF8
;
1923 case wxFONTENCODING_EUC_JP
:
1924 enc
= kCFStringEncodingEUC_JP
;
1926 case wxFONTENCODING_UTF16
:
1927 enc
= kCFStringEncodingUnicode
;
1929 case wxFONTENCODING_MACROMAN
:
1930 enc
= kCFStringEncodingMacRoman
;
1932 case wxFONTENCODING_MACJAPANESE
:
1933 enc
= kCFStringEncodingMacJapanese
;
1935 case wxFONTENCODING_MACCHINESETRAD
:
1936 enc
= kCFStringEncodingMacChineseTrad
;
1938 case wxFONTENCODING_MACKOREAN
:
1939 enc
= kCFStringEncodingMacKorean
;
1941 case wxFONTENCODING_MACARABIC
:
1942 enc
= kCFStringEncodingMacArabic
;
1944 case wxFONTENCODING_MACHEBREW
:
1945 enc
= kCFStringEncodingMacHebrew
;
1947 case wxFONTENCODING_MACGREEK
:
1948 enc
= kCFStringEncodingMacGreek
;
1950 case wxFONTENCODING_MACCYRILLIC
:
1951 enc
= kCFStringEncodingMacCyrillic
;
1953 case wxFONTENCODING_MACDEVANAGARI
:
1954 enc
= kCFStringEncodingMacDevanagari
;
1956 case wxFONTENCODING_MACGURMUKHI
:
1957 enc
= kCFStringEncodingMacGurmukhi
;
1959 case wxFONTENCODING_MACGUJARATI
:
1960 enc
= kCFStringEncodingMacGujarati
;
1962 case wxFONTENCODING_MACORIYA
:
1963 enc
= kCFStringEncodingMacOriya
;
1965 case wxFONTENCODING_MACBENGALI
:
1966 enc
= kCFStringEncodingMacBengali
;
1968 case wxFONTENCODING_MACTAMIL
:
1969 enc
= kCFStringEncodingMacTamil
;
1971 case wxFONTENCODING_MACTELUGU
:
1972 enc
= kCFStringEncodingMacTelugu
;
1974 case wxFONTENCODING_MACKANNADA
:
1975 enc
= kCFStringEncodingMacKannada
;
1977 case wxFONTENCODING_MACMALAJALAM
:
1978 enc
= kCFStringEncodingMacMalayalam
;
1980 case wxFONTENCODING_MACSINHALESE
:
1981 enc
= kCFStringEncodingMacSinhalese
;
1983 case wxFONTENCODING_MACBURMESE
:
1984 enc
= kCFStringEncodingMacBurmese
;
1986 case wxFONTENCODING_MACKHMER
:
1987 enc
= kCFStringEncodingMacKhmer
;
1989 case wxFONTENCODING_MACTHAI
:
1990 enc
= kCFStringEncodingMacThai
;
1992 case wxFONTENCODING_MACLAOTIAN
:
1993 enc
= kCFStringEncodingMacLaotian
;
1995 case wxFONTENCODING_MACGEORGIAN
:
1996 enc
= kCFStringEncodingMacGeorgian
;
1998 case wxFONTENCODING_MACARMENIAN
:
1999 enc
= kCFStringEncodingMacArmenian
;
2001 case wxFONTENCODING_MACCHINESESIMP
:
2002 enc
= kCFStringEncodingMacChineseSimp
;
2004 case wxFONTENCODING_MACTIBETAN
:
2005 enc
= kCFStringEncodingMacTibetan
;
2007 case wxFONTENCODING_MACMONGOLIAN
:
2008 enc
= kCFStringEncodingMacMongolian
;
2010 case wxFONTENCODING_MACETHIOPIC
:
2011 enc
= kCFStringEncodingMacEthiopic
;
2013 case wxFONTENCODING_MACCENTRALEUR
:
2014 enc
= kCFStringEncodingMacCentralEurRoman
;
2016 case wxFONTENCODING_MACVIATNAMESE
:
2017 enc
= kCFStringEncodingMacVietnamese
;
2019 case wxFONTENCODING_MACARABICEXT
:
2020 enc
= kCFStringEncodingMacExtArabic
;
2022 case wxFONTENCODING_MACSYMBOL
:
2023 enc
= kCFStringEncodingMacSymbol
;
2025 case wxFONTENCODING_MACDINGBATS
:
2026 enc
= kCFStringEncodingMacDingbats
;
2028 case wxFONTENCODING_MACTURKISH
:
2029 enc
= kCFStringEncodingMacTurkish
;
2031 case wxFONTENCODING_MACCROATIAN
:
2032 enc
= kCFStringEncodingMacCroatian
;
2034 case wxFONTENCODING_MACICELANDIC
:
2035 enc
= kCFStringEncodingMacIcelandic
;
2037 case wxFONTENCODING_MACROMANIAN
:
2038 enc
= kCFStringEncodingMacRomanian
;
2040 case wxFONTENCODING_MACCELTIC
:
2041 enc
= kCFStringEncodingMacCeltic
;
2043 case wxFONTENCODING_MACGAELIC
:
2044 enc
= kCFStringEncodingMacGaelic
;
2046 // case wxFONTENCODING_MACKEYBOARD :
2047 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2050 // because gcc is picky
2056 class wxMBConv_cocoa
: public wxMBConv
2061 Init(CFStringGetSystemEncoding()) ;
2065 wxMBConv_cocoa(const wxChar
* name
)
2067 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2071 wxMBConv_cocoa(wxFontEncoding encoding
)
2073 Init( wxCFStringEncFromFontEnc(encoding
) );
2080 void Init( CFStringEncoding encoding
)
2082 m_encoding
= encoding
;
2085 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2089 CFStringRef theString
= CFStringCreateWithBytes (
2090 NULL
, //the allocator
2091 (const UInt8
*)szUnConv
,
2094 false //no BOM/external representation
2097 wxASSERT(theString
);
2099 size_t nOutLength
= CFStringGetLength(theString
);
2103 CFRelease(theString
);
2107 CFRange theRange
= { 0, nOutSize
};
2109 #if SIZEOF_WCHAR_T == 4
2110 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2113 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2115 CFRelease(theString
);
2117 szUniCharBuffer
[nOutLength
] = '\0' ;
2119 #if SIZEOF_WCHAR_T == 4
2120 wxMBConvUTF16 converter
;
2121 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2122 delete[] szUniCharBuffer
;
2128 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2132 size_t nRealOutSize
;
2133 size_t nBufSize
= wxWcslen(szUnConv
);
2134 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2136 #if SIZEOF_WCHAR_T == 4
2137 wxMBConvUTF16 converter
;
2138 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2139 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2140 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2141 nBufSize
/= sizeof(UniChar
);
2144 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2148 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2151 wxASSERT(theString
);
2153 //Note that CER puts a BOM when converting to unicode
2154 //so we check and use getchars instead in that case
2155 if (m_encoding
== kCFStringEncodingUnicode
)
2158 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2160 nRealOutSize
= CFStringGetLength(theString
) + 1;
2166 CFRangeMake(0, CFStringGetLength(theString
)),
2168 0, //what to put in characters that can't be converted -
2169 //0 tells CFString to return NULL if it meets such a character
2170 false, //not an external representation
2173 (CFIndex
*) &nRealOutSize
2177 CFRelease(theString
);
2179 #if SIZEOF_WCHAR_T == 4
2180 delete[] szUniBuffer
;
2183 return nRealOutSize
- 1;
2188 return m_encoding
!= kCFStringEncodingInvalidId
&&
2189 CFStringIsEncodingAvailable(m_encoding
);
2193 CFStringEncoding m_encoding
;
2196 #endif // defined(__WXCOCOA__)
2198 // ============================================================================
2199 // Mac conversion classes
2200 // ============================================================================
2202 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2204 class wxMBConv_mac
: public wxMBConv
2209 Init(CFStringGetSystemEncoding()) ;
2213 wxMBConv_mac(const wxChar
* name
)
2215 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2219 wxMBConv_mac(wxFontEncoding encoding
)
2221 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2226 OSStatus status
= noErr
;
2227 status
= TECDisposeConverter(m_MB2WC_converter
);
2228 status
= TECDisposeConverter(m_WC2MB_converter
);
2232 void Init( TextEncodingBase encoding
)
2234 OSStatus status
= noErr
;
2235 m_char_encoding
= encoding
;
2236 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2238 status
= TECCreateConverter(&m_MB2WC_converter
,
2240 m_unicode_encoding
);
2241 status
= TECCreateConverter(&m_WC2MB_converter
,
2246 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2248 OSStatus status
= noErr
;
2249 ByteCount byteOutLen
;
2250 ByteCount byteInLen
= strlen(psz
) ;
2251 wchar_t *tbuf
= NULL
;
2252 UniChar
* ubuf
= NULL
;
2257 //apple specs say at least 32
2258 n
= wxMax( 32 , byteInLen
) ;
2259 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2261 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2262 #if SIZEOF_WCHAR_T == 4
2263 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2265 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2267 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2268 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2269 #if SIZEOF_WCHAR_T == 4
2270 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2271 // is not properly terminated we get random characters at the end
2272 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2273 wxMBConvUTF16 converter
;
2274 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2277 res
= byteOutLen
/ sizeof( UniChar
) ;
2282 if ( buf
&& res
< n
)
2288 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2290 OSStatus status
= noErr
;
2291 ByteCount byteOutLen
;
2292 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2298 //apple specs say at least 32
2299 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2300 tbuf
= (char*) malloc( n
) ;
2303 ByteCount byteBufferLen
= n
;
2304 UniChar
* ubuf
= NULL
;
2305 #if SIZEOF_WCHAR_T == 4
2306 wxMBConvUTF16 converter
;
2307 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2308 byteInLen
= unicharlen
;
2309 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2310 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2312 ubuf
= (UniChar
*) psz
;
2314 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2315 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2316 #if SIZEOF_WCHAR_T == 4
2322 size_t res
= byteOutLen
;
2323 if ( buf
&& res
< n
)
2327 //we need to double-trip to verify it didn't insert any ? in place
2328 //of bogus characters
2329 wxWCharBuffer
wcBuf(n
);
2330 size_t pszlen
= wxWcslen(psz
);
2331 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2332 wxWcslen(wcBuf
) != pszlen
||
2333 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2335 // we didn't obtain the same thing we started from, hence
2336 // the conversion was lossy and we consider that it failed
2345 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2348 TECObjectRef m_MB2WC_converter
;
2349 TECObjectRef m_WC2MB_converter
;
2351 TextEncodingBase m_char_encoding
;
2352 TextEncodingBase m_unicode_encoding
;
2355 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2357 // ============================================================================
2358 // wxEncodingConverter based conversion classes
2359 // ============================================================================
2363 class wxMBConv_wxwin
: public wxMBConv
2368 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2369 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2373 // temporarily just use wxEncodingConverter stuff,
2374 // so that it works while a better implementation is built
2375 wxMBConv_wxwin(const wxChar
* name
)
2378 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2380 m_enc
= wxFONTENCODING_SYSTEM
;
2385 wxMBConv_wxwin(wxFontEncoding enc
)
2392 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2394 size_t inbuf
= strlen(psz
);
2397 if (!m2w
.Convert(psz
,buf
))
2403 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2405 const size_t inbuf
= wxWcslen(psz
);
2408 if (!w2m
.Convert(psz
,buf
))
2415 bool IsOk() const { return m_ok
; }
2418 wxFontEncoding m_enc
;
2419 wxEncodingConverter m2w
, w2m
;
2421 // were we initialized successfully?
2424 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2427 // make the constructors available for unit testing
2428 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2430 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2431 if ( !result
->IsOk() )
2439 #endif // wxUSE_FONTMAP
2441 // ============================================================================
2442 // wxCSConv implementation
2443 // ============================================================================
2445 void wxCSConv::Init()
2452 wxCSConv::wxCSConv(const wxChar
*charset
)
2461 m_encoding
= wxFONTENCODING_SYSTEM
;
2464 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2466 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2468 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2470 encoding
= wxFONTENCODING_SYSTEM
;
2475 m_encoding
= encoding
;
2478 wxCSConv::~wxCSConv()
2483 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2488 SetName(conv
.m_name
);
2489 m_encoding
= conv
.m_encoding
;
2492 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2496 SetName(conv
.m_name
);
2497 m_encoding
= conv
.m_encoding
;
2502 void wxCSConv::Clear()
2511 void wxCSConv::SetName(const wxChar
*charset
)
2515 m_name
= wxStrdup(charset
);
2521 #include "wx/hashmap.h"
2523 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2524 wxEncodingNameCache
);
2526 static wxEncodingNameCache gs_nameCache
;
2529 wxMBConv
*wxCSConv::DoCreate() const
2532 wxLogTrace(TRACE_STRCONV
,
2533 wxT("creating conversion for %s"),
2535 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2536 #endif // wxUSE_FONTMAP
2538 // check for the special case of ASCII or ISO8859-1 charset: as we have
2539 // special knowledge of it anyhow, we don't need to create a special
2540 // conversion object
2541 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2543 // don't convert at all
2547 // we trust OS to do conversion better than we can so try external
2548 // conversion methods first
2550 // the full order is:
2551 // 1. OS conversion (iconv() under Unix or Win32 API)
2552 // 2. hard coded conversions for UTF
2553 // 3. wxEncodingConverter as fall back
2559 #endif // !wxUSE_FONTMAP
2561 wxString
name(m_name
);
2562 wxFontEncoding
encoding(m_encoding
);
2564 if ( !name
.empty() )
2566 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2574 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2575 #endif // wxUSE_FONTMAP
2579 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2580 if ( it
!= gs_nameCache
.end() )
2582 if ( it
->second
.empty() )
2585 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2592 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2594 for ( ; *names
; ++names
)
2596 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2599 gs_nameCache
[encoding
] = *names
;
2606 gs_nameCache
[encoding
] = _T(""); // cache the failure
2608 #endif // wxUSE_FONTMAP
2610 #endif // HAVE_ICONV
2612 #ifdef wxHAVE_WIN32_MB2WC
2615 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2616 : new wxMBConv_win32(m_encoding
);
2625 #endif // wxHAVE_WIN32_MB2WC
2626 #if defined(__WXMAC__)
2628 // leave UTF16 and UTF32 to the built-ins of wx
2629 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2630 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2634 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2635 : new wxMBConv_mac(m_encoding
);
2637 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2646 #if defined(__WXCOCOA__)
2648 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2652 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2653 : new wxMBConv_cocoa(m_encoding
);
2655 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2665 wxFontEncoding enc
= m_encoding
;
2667 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2669 // use "false" to suppress interactive dialogs -- we can be called from
2670 // anywhere and popping up a dialog from here is the last thing we want to
2672 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2674 #endif // wxUSE_FONTMAP
2678 case wxFONTENCODING_UTF7
:
2679 return new wxMBConvUTF7
;
2681 case wxFONTENCODING_UTF8
:
2682 return new wxMBConvUTF8
;
2684 case wxFONTENCODING_UTF16BE
:
2685 return new wxMBConvUTF16BE
;
2687 case wxFONTENCODING_UTF16LE
:
2688 return new wxMBConvUTF16LE
;
2690 case wxFONTENCODING_UTF32BE
:
2691 return new wxMBConvUTF32BE
;
2693 case wxFONTENCODING_UTF32LE
:
2694 return new wxMBConvUTF32LE
;
2697 // nothing to do but put here to suppress gcc warnings
2704 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2705 : new wxMBConv_wxwin(m_encoding
);
2711 #endif // wxUSE_FONTMAP
2713 // NB: This is a hack to prevent deadlock. What could otherwise happen
2714 // in Unicode build: wxConvLocal creation ends up being here
2715 // because of some failure and logs the error. But wxLog will try to
2716 // attach timestamp, for which it will need wxConvLocal (to convert
2717 // time to char* and then wchar_t*), but that fails, tries to log
2718 // error, but wxLog has a (already locked) critical section that
2719 // guards static buffer.
2720 static bool alreadyLoggingError
= false;
2721 if (!alreadyLoggingError
)
2723 alreadyLoggingError
= true;
2724 wxLogError(_("Cannot convert from the charset '%s'!"),
2728 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2729 #else // !wxUSE_FONTMAP
2730 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2731 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2733 alreadyLoggingError
= false;
2739 void wxCSConv::CreateConvIfNeeded() const
2743 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2746 // if we don't have neither the name nor the encoding, use the default
2747 // encoding for this system
2748 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2750 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2752 #endif // wxUSE_INTL
2754 self
->m_convReal
= DoCreate();
2755 self
->m_deferred
= false;
2759 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2761 CreateConvIfNeeded();
2764 return m_convReal
->MB2WC(buf
, psz
, n
);
2767 size_t len
= strlen(psz
);
2771 for (size_t c
= 0; c
<= len
; c
++)
2772 buf
[c
] = (unsigned char)(psz
[c
]);
2778 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2780 CreateConvIfNeeded();
2783 return m_convReal
->WC2MB(buf
, psz
, n
);
2786 const size_t len
= wxWcslen(psz
);
2789 for (size_t c
= 0; c
<= len
; c
++)
2793 buf
[c
] = (char)psz
[c
];
2798 for (size_t c
= 0; c
<= len
; c
++)
2808 // ----------------------------------------------------------------------------
2810 // ----------------------------------------------------------------------------
2813 static wxMBConv_win32 wxConvLibcObj
;
2814 #elif defined(__WXMAC__) && !defined(__MACH__)
2815 static wxMBConv_mac wxConvLibcObj
;
2817 static wxMBConvLibc wxConvLibcObj
;
2820 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2821 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2822 static wxMBConvUTF7 wxConvUTF7Obj
;
2823 static wxMBConvUTF8 wxConvUTF8Obj
;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2826 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2827 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2828 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2829 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2830 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2831 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2839 #else // !wxUSE_WCHAR_T
2841 // stand-ins in absence of wchar_t
2842 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2847 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T