1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 // ============================================================================
83 // ============================================================================
85 // ----------------------------------------------------------------------------
86 // UTF-16 en/decoding to/from UCS-4
87 // ----------------------------------------------------------------------------
90 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
95 *output
= (wxUint16
) input
;
98 else if (input
>=0x110000)
106 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
107 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
113 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
115 if ((*input
<0xd800) || (*input
>0xdfff))
120 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
127 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
133 // ----------------------------------------------------------------------------
135 // ----------------------------------------------------------------------------
137 wxMBConv::~wxMBConv()
139 // nothing to do here (necessary for Darwin linking probably)
142 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
146 // calculate the length of the buffer needed first
147 size_t nLen
= MB2WC(NULL
, psz
, 0);
148 if ( nLen
!= (size_t)-1 )
150 // now do the actual conversion
151 wxWCharBuffer
buf(nLen
);
152 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
153 if ( nLen
!= (size_t)-1 )
160 wxWCharBuffer
buf((wchar_t *)NULL
);
165 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
169 size_t nLen
= WC2MB(NULL
, pwz
, 0);
170 if ( nLen
!= (size_t)-1 )
172 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
173 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
174 if ( nLen
!= (size_t)-1 )
181 wxCharBuffer
buf((char *)NULL
);
186 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
188 wxASSERT(pOutSize
!= NULL
);
190 const char* szEnd
= szString
+ nStringLen
+ 1;
191 const char* szPos
= szString
;
192 const char* szStart
= szPos
;
194 size_t nActualLength
= 0;
195 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
197 wxWCharBuffer
theBuffer(nCurrentSize
);
199 //Convert the string until the length() is reached, continuing the
200 //loop every time a null character is reached
201 while(szPos
!= szEnd
)
203 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
205 //Get the length of the current (sub)string
206 size_t nLen
= MB2WC(NULL
, szPos
, 0);
208 //Invalid conversion?
209 if( nLen
== (size_t)-1 )
212 theBuffer
.data()[0u] = wxT('\0');
217 //Increase the actual length (+1 for current null character)
218 nActualLength
+= nLen
+ 1;
220 //if buffer too big, realloc the buffer
221 if (nActualLength
> (nCurrentSize
+1))
223 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
224 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
225 theBuffer
= theNewBuffer
;
229 //Convert the current (sub)string
230 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
233 theBuffer
.data()[0u] = wxT('\0');
237 //Increment to next (sub)string
238 //Note that we have to use strlen instead of nLen here
239 //because XX2XX gives us the size of the output buffer,
240 //which is not necessarily the length of the string
241 szPos
+= strlen(szPos
) + 1;
244 //success - return actual length and the buffer
245 *pOutSize
= nActualLength
;
249 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
251 wxASSERT(pOutSize
!= NULL
);
253 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
254 const wchar_t* szPos
= szString
;
255 const wchar_t* szStart
= szPos
;
257 size_t nActualLength
= 0;
258 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
260 wxCharBuffer
theBuffer(nCurrentSize
);
262 //Convert the string until the length() is reached, continuing the
263 //loop every time a null character is reached
264 while(szPos
!= szEnd
)
266 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
268 //Get the length of the current (sub)string
269 size_t nLen
= WC2MB(NULL
, szPos
, 0);
271 //Invalid conversion?
272 if( nLen
== (size_t)-1 )
275 theBuffer
.data()[0u] = wxT('\0');
279 //Increase the actual length (+1 for current null character)
280 nActualLength
+= nLen
+ 1;
282 //if buffer too big, realloc the buffer
283 if (nActualLength
> (nCurrentSize
+1))
285 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
286 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
287 theBuffer
= theNewBuffer
;
291 //Convert the current (sub)string
292 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
295 theBuffer
.data()[0u] = wxT('\0');
299 //Increment to next (sub)string
300 //Note that we have to use wxWcslen instead of nLen here
301 //because XX2XX gives us the size of the output buffer,
302 //which is not necessarily the length of the string
303 szPos
+= wxWcslen(szPos
) + 1;
306 //success - return actual length and the buffer
307 *pOutSize
= nActualLength
;
311 // ----------------------------------------------------------------------------
313 // ----------------------------------------------------------------------------
315 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
317 return wxMB2WC(buf
, psz
, n
);
320 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
322 return wxWC2MB(buf
, psz
, n
);
327 // ----------------------------------------------------------------------------
328 // wxConvBrokenFileNames
329 // ----------------------------------------------------------------------------
331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
333 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
334 || wxStricmp(charset
, _T("UTF8")) == 0 )
335 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
337 m_conv
= new wxCSConv(charset
);
341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
343 size_t outputSize
) const
345 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
349 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
351 size_t outputSize
) const
353 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
358 // ----------------------------------------------------------------------------
360 // ----------------------------------------------------------------------------
362 // Implementation (C) 2004 Fredrik Roubert
365 // BASE64 decoding table
367 static const unsigned char utf7unb64
[] =
369 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
375 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
376 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
378 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
379 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
380 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
382 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
383 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
384 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
407 while (*psz
&& ((!buf
) || (len
< n
)))
409 unsigned char cc
= *psz
++;
417 else if (*psz
== '-')
427 // BASE64 encoded string
431 for (lsb
= false, d
= 0, l
= 0;
432 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
436 for (l
+= 6; l
>= 8; lsb
= !lsb
)
438 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
447 *buf
= (wchar_t)(c
<< 8);
454 if (buf
&& (len
< n
))
460 // BASE64 encoding table
462 static const unsigned char utf7enb64
[] =
464 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
465 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
466 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
467 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
468 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
469 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
470 'w', 'x', 'y', 'z', '0', '1', '2', '3',
471 '4', '5', '6', '7', '8', '9', '+', '/'
475 // UTF-7 encoding table
477 // 0 - Set D (directly encoded characters)
478 // 1 - Set O (optional direct characters)
479 // 2 - whitespace characters (optional)
480 // 3 - special characters
482 static const unsigned char utf7encode
[128] =
484 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
486 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
488 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
490 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
494 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
500 while (*psz
&& ((!buf
) || (len
< n
)))
503 if (cc
< 0x80 && utf7encode
[cc
] < 1)
511 else if (((wxUint32
)cc
) > 0xffff)
513 // no surrogate pair generation (yet?)
524 // BASE64 encode string
525 unsigned int lsb
, d
, l
;
526 for (d
= 0, l
= 0;; psz
++)
528 for (lsb
= 0; lsb
< 2; lsb
++)
531 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
533 for (l
+= 8; l
>= 6; )
537 *buf
++ = utf7enb64
[(d
>> l
) % 64];
542 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
548 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
557 if (buf
&& (len
< n
))
562 // ----------------------------------------------------------------------------
564 // ----------------------------------------------------------------------------
566 static wxUint32 utf8_max
[]=
567 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
569 // boundaries of the private use area we use to (temporarily) remap invalid
570 // characters invalid in a UTF-8 encoded string
571 const wxUint32 wxUnicodePUA
= 0x100000;
572 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
578 while (*psz
&& ((!buf
) || (len
< n
)))
580 const char *opsz
= psz
;
581 bool invalid
= false;
582 unsigned char cc
= *psz
++, fc
= cc
;
584 for (cnt
= 0; fc
& 0x80; cnt
++)
593 // escape the escape character for octal escapes
594 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
595 && cc
== '\\' && (!buf
|| len
< n
))
607 // invalid UTF-8 sequence
612 unsigned ocnt
= cnt
- 1;
613 wxUint32 res
= cc
& (0x3f >> cnt
);
617 if ((cc
& 0xC0) != 0x80)
619 // invalid UTF-8 sequence
624 res
= (res
<< 6) | (cc
& 0x3f);
626 if (invalid
|| res
<= utf8_max
[ocnt
])
628 // illegal UTF-8 encoding
631 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
632 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
634 // if one of our PUA characters turns up externally
635 // it must also be treated as an illegal sequence
636 // (a bit like you have to escape an escape character)
642 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
643 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
644 if (pa
== (size_t)-1)
658 #endif // WC_UTF16/!WC_UTF16
663 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
665 while (opsz
< psz
&& (!buf
|| len
< n
))
668 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
669 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
670 wxASSERT(pa
!= (size_t)-1);
677 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
683 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
685 while (opsz
< psz
&& (!buf
|| len
< n
))
687 if ( buf
&& len
+ 3 < n
)
689 unsigned char n
= *opsz
;
691 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
692 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
693 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
699 else // MAP_INVALID_UTF8_NOT
706 if (buf
&& (len
< n
))
711 static inline bool isoctal(wchar_t wch
)
713 return L
'0' <= wch
&& wch
<= L
'7';
716 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
720 while (*psz
&& ((!buf
) || (len
< n
)))
724 // cast is ok for WC_UTF16
725 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
726 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
728 cc
=(*psz
++) & 0x7fffffff;
731 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
732 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
735 *buf
++ = (char)(cc
- wxUnicodePUA
);
738 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
739 && cc
== L
'\\' && psz
[0] == L
'\\' )
746 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
748 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
752 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
753 (psz
[1] - L
'0')*010 +
763 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
777 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
779 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
791 // ----------------------------------------------------------------------------
793 // ----------------------------------------------------------------------------
795 #ifdef WORDS_BIGENDIAN
796 #define wxMBConvUTF16straight wxMBConvUTF16BE
797 #define wxMBConvUTF16swap wxMBConvUTF16LE
799 #define wxMBConvUTF16swap wxMBConvUTF16BE
800 #define wxMBConvUTF16straight wxMBConvUTF16LE
806 // copy 16bit MB to 16bit String
807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
811 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
814 *buf
++ = *(wxUint16
*)psz
;
817 psz
+= sizeof(wxUint16
);
819 if (buf
&& len
<n
) *buf
=0;
825 // copy 16bit String to 16bit MB
826 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
830 while (*psz
&& (!buf
|| len
< n
))
834 *(wxUint16
*)buf
= *psz
;
835 buf
+= sizeof(wxUint16
);
837 len
+= sizeof(wxUint16
);
840 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
846 // swap 16bit MB to 16bit String
847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
851 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
855 ((char *)buf
)[0] = psz
[1];
856 ((char *)buf
)[1] = psz
[0];
860 psz
+= sizeof(wxUint16
);
862 if (buf
&& len
<n
) *buf
=0;
868 // swap 16bit MB to 16bit String
869 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
873 while (*psz
&& (!buf
|| len
< n
))
877 *buf
++ = ((char*)psz
)[1];
878 *buf
++ = ((char*)psz
)[0];
880 len
+= sizeof(wxUint16
);
883 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
892 // copy 16bit MB to 32bit String
893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
897 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
900 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
901 if (pa
== (size_t)-1)
907 psz
+= pa
* sizeof(wxUint16
);
909 if (buf
&& len
<n
) *buf
=0;
915 // copy 32bit String to 16bit MB
916 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
920 while (*psz
&& (!buf
|| len
< n
))
923 size_t pa
=encode_utf16(*psz
, cc
);
925 if (pa
== (size_t)-1)
930 *(wxUint16
*)buf
= cc
[0];
931 buf
+= sizeof(wxUint16
);
934 *(wxUint16
*)buf
= cc
[1];
935 buf
+= sizeof(wxUint16
);
939 len
+= pa
*sizeof(wxUint16
);
942 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
948 // swap 16bit MB to 32bit String
949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
953 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
957 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
958 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
960 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
961 if (pa
== (size_t)-1)
968 psz
+= pa
* sizeof(wxUint16
);
970 if (buf
&& len
<n
) *buf
=0;
976 // swap 32bit String to 16bit MB
977 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
981 while (*psz
&& (!buf
|| len
< n
))
984 size_t pa
=encode_utf16(*psz
, cc
);
986 if (pa
== (size_t)-1)
991 *buf
++ = ((char*)cc
)[1];
992 *buf
++ = ((char*)cc
)[0];
995 *buf
++ = ((char*)cc
)[3];
996 *buf
++ = ((char*)cc
)[2];
1000 len
+= pa
*sizeof(wxUint16
);
1003 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1011 // ----------------------------------------------------------------------------
1013 // ----------------------------------------------------------------------------
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap wxMBConvUTF32LE
1019 #define wxMBConvUTF32swap wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight wxMBConvUTF32LE
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1035 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1039 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1040 if (pa
== (size_t)-1)
1050 psz
+= sizeof(wxUint32
);
1052 if (buf
&& len
<n
) *buf
=0;
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1063 while (*psz
&& (!buf
|| len
< n
))
1067 // cast is ok for WC_UTF16
1068 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1069 if (pa
== (size_t)-1)
1074 *(wxUint32
*)buf
= cc
;
1075 buf
+= sizeof(wxUint32
);
1077 len
+= sizeof(wxUint32
);
1081 if (buf
&& len
<=n
-sizeof(wxUint32
))
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1094 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1097 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1098 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1103 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1104 if (pa
== (size_t)-1)
1114 psz
+= sizeof(wxUint32
);
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1129 while (*psz
&& (!buf
|| len
< n
))
1133 // cast is ok for WC_UTF16
1134 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1135 if (pa
== (size_t)-1)
1145 len
+= sizeof(wxUint32
);
1149 if (buf
&& len
<=n
-sizeof(wxUint32
))
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1163 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1166 *buf
++ = *(wxUint32
*)psz
;
1168 psz
+= sizeof(wxUint32
);
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1183 while (*psz
&& (!buf
|| len
< n
))
1187 *(wxUint32
*)buf
= *psz
;
1188 buf
+= sizeof(wxUint32
);
1191 len
+= sizeof(wxUint32
);
1195 if (buf
&& len
<=n
-sizeof(wxUint32
))
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1207 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1211 ((char *)buf
)[0] = psz
[3];
1212 ((char *)buf
)[1] = psz
[2];
1213 ((char *)buf
)[2] = psz
[1];
1214 ((char *)buf
)[3] = psz
[0];
1218 psz
+= sizeof(wxUint32
);
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1233 while (*psz
&& (!buf
|| len
< n
))
1237 *buf
++ = ((char *)psz
)[3];
1238 *buf
++ = ((char *)psz
)[2];
1239 *buf
++ = ((char *)psz
)[1];
1240 *buf
++ = ((char *)psz
)[0];
1242 len
+= sizeof(wxUint32
);
1246 if (buf
&& len
<=n
-sizeof(wxUint32
))
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 // (unless there's yet another bug in glibc) the only case when iconv()
1265 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 // left in the input buffer -- when _real_ error occurs,
1267 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1269 // [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272 (errno != E2BIG || bufLeft != 0))
1274 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1277 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1281 #if SIZEOF_WCHAR_T == 4
1282 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1283 #define WC_ENC wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1286 #define WC_ENC wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288 // does this ever happen?
1289 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1296 class wxMBConv_iconv
: public wxMBConv
1299 wxMBConv_iconv(const wxChar
*name
);
1300 virtual ~wxMBConv_iconv();
1302 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1303 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1306 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1309 // the iconv handlers used to translate from multibyte to wide char and in
1310 // the other direction
1314 // guards access to m2w and w2m objects
1315 wxMutex m_iconvMutex
;
1319 // the name (for iconv_open()) of a wide char charset -- if none is
1320 // available on this machine, it will remain NULL
1321 static wxString ms_wcCharsetName
;
1323 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324 // different endian-ness than the native one
1325 static bool ms_wcNeedsSwap
;
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1331 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1332 if ( !result
->IsOk() )
1340 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1345 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346 // names for the charsets
1347 const wxCharBuffer
cname(wxString(name
).ToAscii());
1349 // check for charset that represents wchar_t:
1350 if ( ms_wcCharsetName
.empty() )
1353 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1354 #else // !wxUSE_FONTMAP
1355 static const wxChar
*names
[] =
1357 #if SIZEOF_WCHAR_T == 4
1359 #elif SIZEOF_WCHAR_T = 2
1364 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1366 for ( ; *names
; ++names
)
1368 const wxString
name(*names
);
1370 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1371 wxString
nameXE(name
);
1372 #ifdef WORDS_BIGENDIAN
1374 #else // little endian
1378 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1379 if ( m2w
== ICONV_T_INVALID
)
1381 // try charset w/o bytesex info (e.g. "UCS4")
1382 m2w
= iconv_open(name
.ToAscii(), cname
);
1384 // and check for bytesex ourselves:
1385 if ( m2w
!= ICONV_T_INVALID
)
1387 char buf
[2], *bufPtr
;
1388 wchar_t wbuf
[2], *wbufPtr
;
1396 outsz
= SIZEOF_WCHAR_T
* 2;
1400 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1401 (char**)&wbufPtr
, &outsz
);
1403 if (ICONV_FAILED(res
, insz
))
1405 wxLogLastError(wxT("iconv"));
1406 wxLogError(_("Conversion to charset '%s' doesn't work."),
1409 else // ok, can convert to this encoding, remember it
1411 ms_wcCharsetName
= name
;
1412 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1416 else // use charset not requiring byte swapping
1418 ms_wcCharsetName
= nameXE
;
1422 wxLogTrace(TRACE_STRCONV
,
1423 wxT("iconv wchar_t charset is \"%s\"%s"),
1424 ms_wcCharsetName
.empty() ? _T("<none>")
1425 : ms_wcCharsetName
.c_str(),
1426 ms_wcNeedsSwap
? _T(" (needs swap)")
1429 else // we already have ms_wcCharsetName
1431 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1434 if ( ms_wcCharsetName
.empty() )
1436 w2m
= ICONV_T_INVALID
;
1440 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1441 if ( w2m
== ICONV_T_INVALID
)
1443 wxLogTrace(TRACE_STRCONV
,
1444 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1445 ms_wcCharsetName
.c_str(), cname
.data());
1450 wxMBConv_iconv::~wxMBConv_iconv()
1452 if ( m2w
!= ICONV_T_INVALID
)
1454 if ( w2m
!= ICONV_T_INVALID
)
1458 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1461 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462 // Unfortunately there is a couple of global wxCSConv objects such as
1463 // wxConvLocal that are used all over wx code, so we have to make sure
1464 // the handle is used by at most one thread at the time. Otherwise
1465 // only a few wx classes would be safe to use from non-main threads
1466 // as MB<->WC conversion would fail "randomly".
1467 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1470 size_t inbuf
= strlen(psz
);
1471 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1473 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474 wchar_t *bufPtr
= buf
;
1475 const char *pszPtr
= psz
;
1479 // have destination buffer, convert there
1481 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1482 (char**)&bufPtr
, &outbuf
);
1483 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1487 // convert to native endianness
1488 for ( unsigned n
= 0; n
< res
; n
++ )
1489 buf
[n
] = WC_BSWAP(buf
[n
]);
1492 // NB: iconv was given only strlen(psz) characters on input, and so
1493 // it couldn't convert the trailing zero. Let's do it ourselves
1494 // if there's some room left for it in the output buffer.
1500 // no destination buffer... convert using temp buffer
1501 // to calculate destination buffer requirement
1506 outbuf
= 8*SIZEOF_WCHAR_T
;
1509 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1510 (char**)&bufPtr
, &outbuf
);
1512 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1513 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1516 if (ICONV_FAILED(cres
, inbuf
))
1518 //VS: it is ok if iconv fails, hence trace only
1519 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1526 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1529 // NB: explained in MB2WC
1530 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1533 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1537 wchar_t *tmpbuf
= 0;
1541 // need to copy to temp buffer to switch endianness
1542 // (doing WC_BSWAP twice on the original buffer won't help, as it
1543 // could be in read-only memory, or be accessed in some other thread)
1544 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1545 for ( size_t n
= 0; n
< inbuf
; n
++ )
1546 tmpbuf
[n
] = WC_BSWAP(psz
[n
]);
1547 tmpbuf
[inbuf
] = L
'\0';
1553 // have destination buffer, convert there
1554 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1558 // NB: iconv was given only wcslen(psz) characters on input, and so
1559 // it couldn't convert the trailing zero. Let's do it ourselves
1560 // if there's some room left for it in the output buffer.
1566 // no destination buffer... convert using temp buffer
1567 // to calculate destination buffer requirement
1571 buf
= tbuf
; outbuf
= 16;
1573 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1576 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1584 if (ICONV_FAILED(cres
, inbuf
))
1586 //VS: it is ok if iconv fails, hence trace only
1587 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1594 #endif // HAVE_ICONV
1597 // ============================================================================
1598 // Win32 conversion classes
1599 // ============================================================================
1601 #ifdef wxHAVE_WIN32_MB2WC
1605 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1606 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1609 class wxMBConv_win32
: public wxMBConv
1614 m_CodePage
= CP_ACP
;
1618 wxMBConv_win32(const wxChar
* name
)
1620 m_CodePage
= wxCharsetToCodepage(name
);
1623 wxMBConv_win32(wxFontEncoding encoding
)
1625 m_CodePage
= wxEncodingToCodepage(encoding
);
1629 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1631 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1632 // the behaviour is not compatible with the Unix version (using iconv)
1633 // and break the library itself, e.g. wxTextInputStream::NextChar()
1634 // wouldn't work if reading an incomplete MB char didn't result in an
1637 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1638 // an error (tested under Windows Server 2003) and apparently it is
1639 // done on purpose, i.e. the function accepts any input in this case
1640 // and although I'd prefer to return error on ill-formed output, our
1641 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1642 // explicitly ill-formed according to RFC 2152) neither so we don't
1643 // even have any fallback here...
1644 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1646 const size_t len
= ::MultiByteToWideChar
1648 m_CodePage
, // code page
1649 flags
, // flags: fall on error
1650 psz
, // input string
1651 -1, // its length (NUL-terminated)
1652 buf
, // output string
1653 buf
? n
: 0 // size of output buffer
1656 // note that it returns count of written chars for buf != NULL and size
1657 // of the needed buffer for buf == NULL so in either case the length of
1658 // the string (which never includes the terminating NUL) is one less
1659 return len
? len
- 1 : (size_t)-1;
1662 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1665 we have a problem here: by default, WideCharToMultiByte() may
1666 replace characters unrepresentable in the target code page with bad
1667 quality approximations such as turning "1/2" symbol (U+00BD) into
1668 "1" for the code pages which don't have it and we, obviously, want
1669 to avoid this at any price
1671 the trouble is that this function does it _silently_, i.e. it won't
1672 even tell us whether it did or not... Win98/2000 and higher provide
1673 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1674 we have to resort to a round trip, i.e. check that converting back
1675 results in the same string -- this is, of course, expensive but
1676 otherwise we simply can't be sure to not garble the data.
1679 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1680 // it doesn't work with CJK encodings (which we test for rather roughly
1681 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1683 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1686 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1688 // it's our lucky day
1689 flags
= WC_NO_BEST_FIT_CHARS
;
1690 pUsedDef
= &usedDef
;
1692 else // old system or unsupported encoding
1698 const size_t len
= ::WideCharToMultiByte
1700 m_CodePage
, // code page
1701 flags
, // either none or no best fit
1702 pwz
, // input string
1703 -1, // it is (wide) NUL-terminated
1704 buf
, // output buffer
1705 buf
? n
: 0, // and its size
1706 NULL
, // default "replacement" char
1707 pUsedDef
// [out] was it used?
1712 // function totally failed
1716 // if we were really converting, check if we succeeded
1721 // check if the conversion failed, i.e. if any replacements
1726 else // we must resort to double tripping...
1728 wxWCharBuffer
wcBuf(n
);
1729 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1730 wcscmp(wcBuf
, pwz
) != 0 )
1732 // we didn't obtain the same thing we started from, hence
1733 // the conversion was lossy and we consider that it failed
1739 // see the comment above for the reason of "len - 1"
1743 bool IsOk() const { return m_CodePage
!= -1; }
1746 static bool CanUseNoBestFit()
1748 static int s_isWin98Or2k
= -1;
1750 if ( s_isWin98Or2k
== -1 )
1753 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1756 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1760 s_isWin98Or2k
= verMaj
>= 5;
1764 // unknown, be conseravtive by default
1768 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1771 return s_isWin98Or2k
== 1;
1777 #endif // wxHAVE_WIN32_MB2WC
1779 // ============================================================================
1780 // Cocoa conversion classes
1781 // ============================================================================
1783 #if defined(__WXCOCOA__)
1785 // RN: There is no UTF-32 support in either Core Foundation or
1786 // Cocoa. Strangely enough, internally Core Foundation uses
1787 // UTF 32 internally quite a bit - its just not public (yet).
1789 #include <CoreFoundation/CFString.h>
1790 #include <CoreFoundation/CFStringEncodingExt.h>
1792 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1794 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1795 if ( encoding
== wxFONTENCODING_DEFAULT
)
1797 enc
= CFStringGetSystemEncoding();
1799 else switch( encoding
)
1801 case wxFONTENCODING_ISO8859_1
:
1802 enc
= kCFStringEncodingISOLatin1
;
1804 case wxFONTENCODING_ISO8859_2
:
1805 enc
= kCFStringEncodingISOLatin2
;
1807 case wxFONTENCODING_ISO8859_3
:
1808 enc
= kCFStringEncodingISOLatin3
;
1810 case wxFONTENCODING_ISO8859_4
:
1811 enc
= kCFStringEncodingISOLatin4
;
1813 case wxFONTENCODING_ISO8859_5
:
1814 enc
= kCFStringEncodingISOLatinCyrillic
;
1816 case wxFONTENCODING_ISO8859_6
:
1817 enc
= kCFStringEncodingISOLatinArabic
;
1819 case wxFONTENCODING_ISO8859_7
:
1820 enc
= kCFStringEncodingISOLatinGreek
;
1822 case wxFONTENCODING_ISO8859_8
:
1823 enc
= kCFStringEncodingISOLatinHebrew
;
1825 case wxFONTENCODING_ISO8859_9
:
1826 enc
= kCFStringEncodingISOLatin5
;
1828 case wxFONTENCODING_ISO8859_10
:
1829 enc
= kCFStringEncodingISOLatin6
;
1831 case wxFONTENCODING_ISO8859_11
:
1832 enc
= kCFStringEncodingISOLatinThai
;
1834 case wxFONTENCODING_ISO8859_13
:
1835 enc
= kCFStringEncodingISOLatin7
;
1837 case wxFONTENCODING_ISO8859_14
:
1838 enc
= kCFStringEncodingISOLatin8
;
1840 case wxFONTENCODING_ISO8859_15
:
1841 enc
= kCFStringEncodingISOLatin9
;
1844 case wxFONTENCODING_KOI8
:
1845 enc
= kCFStringEncodingKOI8_R
;
1847 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1848 enc
= kCFStringEncodingDOSRussian
;
1851 // case wxFONTENCODING_BULGARIAN :
1855 case wxFONTENCODING_CP437
:
1856 enc
=kCFStringEncodingDOSLatinUS
;
1858 case wxFONTENCODING_CP850
:
1859 enc
= kCFStringEncodingDOSLatin1
;
1861 case wxFONTENCODING_CP852
:
1862 enc
= kCFStringEncodingDOSLatin2
;
1864 case wxFONTENCODING_CP855
:
1865 enc
= kCFStringEncodingDOSCyrillic
;
1867 case wxFONTENCODING_CP866
:
1868 enc
=kCFStringEncodingDOSRussian
;
1870 case wxFONTENCODING_CP874
:
1871 enc
= kCFStringEncodingDOSThai
;
1873 case wxFONTENCODING_CP932
:
1874 enc
= kCFStringEncodingDOSJapanese
;
1876 case wxFONTENCODING_CP936
:
1877 enc
=kCFStringEncodingDOSChineseSimplif
;
1879 case wxFONTENCODING_CP949
:
1880 enc
= kCFStringEncodingDOSKorean
;
1882 case wxFONTENCODING_CP950
:
1883 enc
= kCFStringEncodingDOSChineseTrad
;
1885 case wxFONTENCODING_CP1250
:
1886 enc
= kCFStringEncodingWindowsLatin2
;
1888 case wxFONTENCODING_CP1251
:
1889 enc
=kCFStringEncodingWindowsCyrillic
;
1891 case wxFONTENCODING_CP1252
:
1892 enc
=kCFStringEncodingWindowsLatin1
;
1894 case wxFONTENCODING_CP1253
:
1895 enc
= kCFStringEncodingWindowsGreek
;
1897 case wxFONTENCODING_CP1254
:
1898 enc
= kCFStringEncodingWindowsLatin5
;
1900 case wxFONTENCODING_CP1255
:
1901 enc
=kCFStringEncodingWindowsHebrew
;
1903 case wxFONTENCODING_CP1256
:
1904 enc
=kCFStringEncodingWindowsArabic
;
1906 case wxFONTENCODING_CP1257
:
1907 enc
= kCFStringEncodingWindowsBalticRim
;
1909 // This only really encodes to UTF7 (if that) evidently
1910 // case wxFONTENCODING_UTF7 :
1911 // enc = kCFStringEncodingNonLossyASCII ;
1913 case wxFONTENCODING_UTF8
:
1914 enc
= kCFStringEncodingUTF8
;
1916 case wxFONTENCODING_EUC_JP
:
1917 enc
= kCFStringEncodingEUC_JP
;
1919 case wxFONTENCODING_UTF16
:
1920 enc
= kCFStringEncodingUnicode
;
1922 case wxFONTENCODING_MACROMAN
:
1923 enc
= kCFStringEncodingMacRoman
;
1925 case wxFONTENCODING_MACJAPANESE
:
1926 enc
= kCFStringEncodingMacJapanese
;
1928 case wxFONTENCODING_MACCHINESETRAD
:
1929 enc
= kCFStringEncodingMacChineseTrad
;
1931 case wxFONTENCODING_MACKOREAN
:
1932 enc
= kCFStringEncodingMacKorean
;
1934 case wxFONTENCODING_MACARABIC
:
1935 enc
= kCFStringEncodingMacArabic
;
1937 case wxFONTENCODING_MACHEBREW
:
1938 enc
= kCFStringEncodingMacHebrew
;
1940 case wxFONTENCODING_MACGREEK
:
1941 enc
= kCFStringEncodingMacGreek
;
1943 case wxFONTENCODING_MACCYRILLIC
:
1944 enc
= kCFStringEncodingMacCyrillic
;
1946 case wxFONTENCODING_MACDEVANAGARI
:
1947 enc
= kCFStringEncodingMacDevanagari
;
1949 case wxFONTENCODING_MACGURMUKHI
:
1950 enc
= kCFStringEncodingMacGurmukhi
;
1952 case wxFONTENCODING_MACGUJARATI
:
1953 enc
= kCFStringEncodingMacGujarati
;
1955 case wxFONTENCODING_MACORIYA
:
1956 enc
= kCFStringEncodingMacOriya
;
1958 case wxFONTENCODING_MACBENGALI
:
1959 enc
= kCFStringEncodingMacBengali
;
1961 case wxFONTENCODING_MACTAMIL
:
1962 enc
= kCFStringEncodingMacTamil
;
1964 case wxFONTENCODING_MACTELUGU
:
1965 enc
= kCFStringEncodingMacTelugu
;
1967 case wxFONTENCODING_MACKANNADA
:
1968 enc
= kCFStringEncodingMacKannada
;
1970 case wxFONTENCODING_MACMALAJALAM
:
1971 enc
= kCFStringEncodingMacMalayalam
;
1973 case wxFONTENCODING_MACSINHALESE
:
1974 enc
= kCFStringEncodingMacSinhalese
;
1976 case wxFONTENCODING_MACBURMESE
:
1977 enc
= kCFStringEncodingMacBurmese
;
1979 case wxFONTENCODING_MACKHMER
:
1980 enc
= kCFStringEncodingMacKhmer
;
1982 case wxFONTENCODING_MACTHAI
:
1983 enc
= kCFStringEncodingMacThai
;
1985 case wxFONTENCODING_MACLAOTIAN
:
1986 enc
= kCFStringEncodingMacLaotian
;
1988 case wxFONTENCODING_MACGEORGIAN
:
1989 enc
= kCFStringEncodingMacGeorgian
;
1991 case wxFONTENCODING_MACARMENIAN
:
1992 enc
= kCFStringEncodingMacArmenian
;
1994 case wxFONTENCODING_MACCHINESESIMP
:
1995 enc
= kCFStringEncodingMacChineseSimp
;
1997 case wxFONTENCODING_MACTIBETAN
:
1998 enc
= kCFStringEncodingMacTibetan
;
2000 case wxFONTENCODING_MACMONGOLIAN
:
2001 enc
= kCFStringEncodingMacMongolian
;
2003 case wxFONTENCODING_MACETHIOPIC
:
2004 enc
= kCFStringEncodingMacEthiopic
;
2006 case wxFONTENCODING_MACCENTRALEUR
:
2007 enc
= kCFStringEncodingMacCentralEurRoman
;
2009 case wxFONTENCODING_MACVIATNAMESE
:
2010 enc
= kCFStringEncodingMacVietnamese
;
2012 case wxFONTENCODING_MACARABICEXT
:
2013 enc
= kCFStringEncodingMacExtArabic
;
2015 case wxFONTENCODING_MACSYMBOL
:
2016 enc
= kCFStringEncodingMacSymbol
;
2018 case wxFONTENCODING_MACDINGBATS
:
2019 enc
= kCFStringEncodingMacDingbats
;
2021 case wxFONTENCODING_MACTURKISH
:
2022 enc
= kCFStringEncodingMacTurkish
;
2024 case wxFONTENCODING_MACCROATIAN
:
2025 enc
= kCFStringEncodingMacCroatian
;
2027 case wxFONTENCODING_MACICELANDIC
:
2028 enc
= kCFStringEncodingMacIcelandic
;
2030 case wxFONTENCODING_MACROMANIAN
:
2031 enc
= kCFStringEncodingMacRomanian
;
2033 case wxFONTENCODING_MACCELTIC
:
2034 enc
= kCFStringEncodingMacCeltic
;
2036 case wxFONTENCODING_MACGAELIC
:
2037 enc
= kCFStringEncodingMacGaelic
;
2039 // case wxFONTENCODING_MACKEYBOARD :
2040 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2043 // because gcc is picky
2049 class wxMBConv_cocoa
: public wxMBConv
2054 Init(CFStringGetSystemEncoding()) ;
2058 wxMBConv_cocoa(const wxChar
* name
)
2060 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2064 wxMBConv_cocoa(wxFontEncoding encoding
)
2066 Init( wxCFStringEncFromFontEnc(encoding
) );
2073 void Init( CFStringEncoding encoding
)
2075 m_encoding
= encoding
;
2078 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2082 CFStringRef theString
= CFStringCreateWithBytes (
2083 NULL
, //the allocator
2084 (const UInt8
*)szUnConv
,
2087 false //no BOM/external representation
2090 wxASSERT(theString
);
2092 size_t nOutLength
= CFStringGetLength(theString
);
2096 CFRelease(theString
);
2100 CFRange theRange
= { 0, nOutSize
};
2102 #if SIZEOF_WCHAR_T == 4
2103 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2106 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2108 CFRelease(theString
);
2110 szUniCharBuffer
[nOutLength
] = '\0' ;
2112 #if SIZEOF_WCHAR_T == 4
2113 wxMBConvUTF16 converter
;
2114 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2115 delete[] szUniCharBuffer
;
2121 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2125 size_t nRealOutSize
;
2126 size_t nBufSize
= wxWcslen(szUnConv
);
2127 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2129 #if SIZEOF_WCHAR_T == 4
2130 wxMBConvUTF16 converter
;
2131 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2132 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2133 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2134 nBufSize
/= sizeof(UniChar
);
2137 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2141 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2144 wxASSERT(theString
);
2146 //Note that CER puts a BOM when converting to unicode
2147 //so we check and use getchars instead in that case
2148 if (m_encoding
== kCFStringEncodingUnicode
)
2151 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2153 nRealOutSize
= CFStringGetLength(theString
) + 1;
2159 CFRangeMake(0, CFStringGetLength(theString
)),
2161 0, //what to put in characters that can't be converted -
2162 //0 tells CFString to return NULL if it meets such a character
2163 false, //not an external representation
2166 (CFIndex
*) &nRealOutSize
2170 CFRelease(theString
);
2172 #if SIZEOF_WCHAR_T == 4
2173 delete[] szUniBuffer
;
2176 return nRealOutSize
- 1;
2181 return m_encoding
!= kCFStringEncodingInvalidId
&&
2182 CFStringIsEncodingAvailable(m_encoding
);
2186 CFStringEncoding m_encoding
;
2189 #endif // defined(__WXCOCOA__)
2191 // ============================================================================
2192 // Mac conversion classes
2193 // ============================================================================
2195 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2197 class wxMBConv_mac
: public wxMBConv
2202 Init(CFStringGetSystemEncoding()) ;
2206 wxMBConv_mac(const wxChar
* name
)
2208 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2212 wxMBConv_mac(wxFontEncoding encoding
)
2214 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2219 OSStatus status
= noErr
;
2220 status
= TECDisposeConverter(m_MB2WC_converter
);
2221 status
= TECDisposeConverter(m_WC2MB_converter
);
2225 void Init( TextEncodingBase encoding
)
2227 OSStatus status
= noErr
;
2228 m_char_encoding
= encoding
;
2229 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2231 status
= TECCreateConverter(&m_MB2WC_converter
,
2233 m_unicode_encoding
);
2234 status
= TECCreateConverter(&m_WC2MB_converter
,
2239 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2241 OSStatus status
= noErr
;
2242 ByteCount byteOutLen
;
2243 ByteCount byteInLen
= strlen(psz
) ;
2244 wchar_t *tbuf
= NULL
;
2245 UniChar
* ubuf
= NULL
;
2250 //apple specs say at least 32
2251 n
= wxMax( 32 , byteInLen
) ;
2252 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2254 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2255 #if SIZEOF_WCHAR_T == 4
2256 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2258 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2260 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2261 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2262 #if SIZEOF_WCHAR_T == 4
2263 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2264 // is not properly terminated we get random characters at the end
2265 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2266 wxMBConvUTF16 converter
;
2267 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2270 res
= byteOutLen
/ sizeof( UniChar
) ;
2275 if ( buf
&& res
< n
)
2281 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2283 OSStatus status
= noErr
;
2284 ByteCount byteOutLen
;
2285 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2291 //apple specs say at least 32
2292 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2293 tbuf
= (char*) malloc( n
) ;
2296 ByteCount byteBufferLen
= n
;
2297 UniChar
* ubuf
= NULL
;
2298 #if SIZEOF_WCHAR_T == 4
2299 wxMBConvUTF16 converter
;
2300 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2301 byteInLen
= unicharlen
;
2302 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2303 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2305 ubuf
= (UniChar
*) psz
;
2307 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2308 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2309 #if SIZEOF_WCHAR_T == 4
2315 size_t res
= byteOutLen
;
2316 if ( buf
&& res
< n
)
2320 //we need to double-trip to verify it didn't insert any ? in place
2321 //of bogus characters
2322 wxWCharBuffer
wcBuf(n
);
2323 size_t pszlen
= wxWcslen(psz
);
2324 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2325 wxWcslen(wcBuf
) != pszlen
||
2326 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2328 // we didn't obtain the same thing we started from, hence
2329 // the conversion was lossy and we consider that it failed
2338 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2341 TECObjectRef m_MB2WC_converter
;
2342 TECObjectRef m_WC2MB_converter
;
2344 TextEncodingBase m_char_encoding
;
2345 TextEncodingBase m_unicode_encoding
;
2348 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2350 // ============================================================================
2351 // wxEncodingConverter based conversion classes
2352 // ============================================================================
2356 class wxMBConv_wxwin
: public wxMBConv
2361 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2362 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2366 // temporarily just use wxEncodingConverter stuff,
2367 // so that it works while a better implementation is built
2368 wxMBConv_wxwin(const wxChar
* name
)
2371 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2373 m_enc
= wxFONTENCODING_SYSTEM
;
2378 wxMBConv_wxwin(wxFontEncoding enc
)
2385 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2387 size_t inbuf
= strlen(psz
);
2390 if (!m2w
.Convert(psz
,buf
))
2396 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2398 const size_t inbuf
= wxWcslen(psz
);
2401 if (!w2m
.Convert(psz
,buf
))
2408 bool IsOk() const { return m_ok
; }
2411 wxFontEncoding m_enc
;
2412 wxEncodingConverter m2w
, w2m
;
2414 // were we initialized successfully?
2417 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2420 // make the constructors available for unit testing
2421 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2423 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2424 if ( !result
->IsOk() )
2432 #endif // wxUSE_FONTMAP
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2438 void wxCSConv::Init()
2445 wxCSConv::wxCSConv(const wxChar
*charset
)
2454 m_encoding
= wxFONTENCODING_SYSTEM
;
2457 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2459 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2461 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463 encoding
= wxFONTENCODING_SYSTEM
;
2468 m_encoding
= encoding
;
2471 wxCSConv::~wxCSConv()
2476 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2481 SetName(conv
.m_name
);
2482 m_encoding
= conv
.m_encoding
;
2485 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2489 SetName(conv
.m_name
);
2490 m_encoding
= conv
.m_encoding
;
2495 void wxCSConv::Clear()
2504 void wxCSConv::SetName(const wxChar
*charset
)
2508 m_name
= wxStrdup(charset
);
2514 #include "wx/hashmap.h"
2516 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2517 wxEncodingNameCache
);
2519 static wxEncodingNameCache gs_nameCache
;
2522 wxMBConv
*wxCSConv::DoCreate() const
2525 wxLogTrace(TRACE_STRCONV
,
2526 wxT("creating conversion for %s"),
2528 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2529 #endif // wxUSE_FONTMAP
2531 // check for the special case of ASCII or ISO8859-1 charset: as we have
2532 // special knowledge of it anyhow, we don't need to create a special
2533 // conversion object
2534 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2536 // don't convert at all
2540 // we trust OS to do conversion better than we can so try external
2541 // conversion methods first
2543 // the full order is:
2544 // 1. OS conversion (iconv() under Unix or Win32 API)
2545 // 2. hard coded conversions for UTF
2546 // 3. wxEncodingConverter as fall back
2552 #endif // !wxUSE_FONTMAP
2554 wxString
name(m_name
);
2555 wxFontEncoding
encoding(m_encoding
);
2557 if ( !name
.empty() )
2559 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2567 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2568 #endif // wxUSE_FONTMAP
2572 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2573 if ( it
!= gs_nameCache
.end() )
2575 if ( it
->second
.empty() )
2578 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2585 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2587 for ( ; *names
; ++names
)
2589 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2592 gs_nameCache
[encoding
] = *names
;
2599 gs_nameCache
[encoding
] = _T(""); // cache the failure
2601 #endif // wxUSE_FONTMAP
2603 #endif // HAVE_ICONV
2605 #ifdef wxHAVE_WIN32_MB2WC
2608 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2609 : new wxMBConv_win32(m_encoding
);
2618 #endif // wxHAVE_WIN32_MB2WC
2619 #if defined(__WXMAC__)
2621 // leave UTF16 and UTF32 to the built-ins of wx
2622 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2623 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2627 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2628 : new wxMBConv_mac(m_encoding
);
2630 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2639 #if defined(__WXCOCOA__)
2641 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2645 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2646 : new wxMBConv_cocoa(m_encoding
);
2648 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2658 wxFontEncoding enc
= m_encoding
;
2660 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2662 // use "false" to suppress interactive dialogs -- we can be called from
2663 // anywhere and popping up a dialog from here is the last thing we want to
2665 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2667 #endif // wxUSE_FONTMAP
2671 case wxFONTENCODING_UTF7
:
2672 return new wxMBConvUTF7
;
2674 case wxFONTENCODING_UTF8
:
2675 return new wxMBConvUTF8
;
2677 case wxFONTENCODING_UTF16BE
:
2678 return new wxMBConvUTF16BE
;
2680 case wxFONTENCODING_UTF16LE
:
2681 return new wxMBConvUTF16LE
;
2683 case wxFONTENCODING_UTF32BE
:
2684 return new wxMBConvUTF32BE
;
2686 case wxFONTENCODING_UTF32LE
:
2687 return new wxMBConvUTF32LE
;
2690 // nothing to do but put here to suppress gcc warnings
2697 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2698 : new wxMBConv_wxwin(m_encoding
);
2704 #endif // wxUSE_FONTMAP
2706 // NB: This is a hack to prevent deadlock. What could otherwise happen
2707 // in Unicode build: wxConvLocal creation ends up being here
2708 // because of some failure and logs the error. But wxLog will try to
2709 // attach timestamp, for which it will need wxConvLocal (to convert
2710 // time to char* and then wchar_t*), but that fails, tries to log
2711 // error, but wxLog has a (already locked) critical section that
2712 // guards static buffer.
2713 static bool alreadyLoggingError
= false;
2714 if (!alreadyLoggingError
)
2716 alreadyLoggingError
= true;
2717 wxLogError(_("Cannot convert from the charset '%s'!"),
2721 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2722 #else // !wxUSE_FONTMAP
2723 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2724 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2726 alreadyLoggingError
= false;
2732 void wxCSConv::CreateConvIfNeeded() const
2736 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2739 // if we don't have neither the name nor the encoding, use the default
2740 // encoding for this system
2741 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2743 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2745 #endif // wxUSE_INTL
2747 self
->m_convReal
= DoCreate();
2748 self
->m_deferred
= false;
2752 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2754 CreateConvIfNeeded();
2757 return m_convReal
->MB2WC(buf
, psz
, n
);
2760 size_t len
= strlen(psz
);
2764 for (size_t c
= 0; c
<= len
; c
++)
2765 buf
[c
] = (unsigned char)(psz
[c
]);
2771 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2773 CreateConvIfNeeded();
2776 return m_convReal
->WC2MB(buf
, psz
, n
);
2779 const size_t len
= wxWcslen(psz
);
2782 for (size_t c
= 0; c
<= len
; c
++)
2786 buf
[c
] = (char)psz
[c
];
2791 for (size_t c
= 0; c
<= len
; c
++)
2801 // ----------------------------------------------------------------------------
2803 // ----------------------------------------------------------------------------
2806 static wxMBConv_win32 wxConvLibcObj
;
2807 #elif defined(__WXMAC__) && !defined(__MACH__)
2808 static wxMBConv_mac wxConvLibcObj
;
2810 static wxMBConvLibc wxConvLibcObj
;
2813 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2814 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2815 static wxMBConvUTF7 wxConvUTF7Obj
;
2816 static wxMBConvUTF8 wxConvUTF8Obj
;
2818 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2819 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2820 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2821 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2822 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2832 #else // !wxUSE_WCHAR_T
2834 // stand-ins in absence of wchar_t
2835 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2840 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T