1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 // ============================================================================
83 // ============================================================================
85 // ----------------------------------------------------------------------------
86 // UTF-16 en/decoding to/from UCS-4
87 // ----------------------------------------------------------------------------
90 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
95 *output
= (wxUint16
) input
;
98 else if (input
>=0x110000)
106 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
107 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
113 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
115 if ((*input
<0xd800) || (*input
>0xdfff))
120 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
127 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
133 // ----------------------------------------------------------------------------
135 // ----------------------------------------------------------------------------
137 wxMBConv::~wxMBConv()
139 // nothing to do here (necessary for Darwin linking probably)
142 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
146 // calculate the length of the buffer needed first
147 size_t nLen
= MB2WC(NULL
, psz
, 0);
148 if ( nLen
!= (size_t)-1 )
150 // now do the actual conversion
151 wxWCharBuffer
buf(nLen
);
152 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
153 if ( nLen
!= (size_t)-1 )
160 wxWCharBuffer
buf((wchar_t *)NULL
);
165 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
169 size_t nLen
= WC2MB(NULL
, pwz
, 0);
170 if ( nLen
!= (size_t)-1 )
172 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
173 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
174 if ( nLen
!= (size_t)-1 )
181 wxCharBuffer
buf((char *)NULL
);
186 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
188 wxASSERT(pOutSize
!= NULL
);
190 const char* szEnd
= szString
+ nStringLen
+ 1;
191 const char* szPos
= szString
;
192 const char* szStart
= szPos
;
194 size_t nActualLength
= 0;
195 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
197 wxWCharBuffer
theBuffer(nCurrentSize
);
199 //Convert the string until the length() is reached, continuing the
200 //loop every time a null character is reached
201 while(szPos
!= szEnd
)
203 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
205 //Get the length of the current (sub)string
206 size_t nLen
= MB2WC(NULL
, szPos
, 0);
208 //Invalid conversion?
209 if( nLen
== (size_t)-1 )
212 theBuffer
.data()[0u] = wxT('\0');
217 //Increase the actual length (+1 for current null character)
218 nActualLength
+= nLen
+ 1;
220 //if buffer too big, realloc the buffer
221 if (nActualLength
> (nCurrentSize
+1))
223 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
224 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
225 theBuffer
= theNewBuffer
;
229 //Convert the current (sub)string
230 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
233 theBuffer
.data()[0u] = wxT('\0');
237 //Increment to next (sub)string
238 //Note that we have to use strlen instead of nLen here
239 //because XX2XX gives us the size of the output buffer,
240 //which is not necessarily the length of the string
241 szPos
+= strlen(szPos
) + 1;
244 //success - return actual length and the buffer
245 *pOutSize
= nActualLength
;
249 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
251 wxASSERT(pOutSize
!= NULL
);
253 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
254 const wchar_t* szPos
= szString
;
255 const wchar_t* szStart
= szPos
;
257 size_t nActualLength
= 0;
258 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
260 wxCharBuffer
theBuffer(nCurrentSize
);
262 //Convert the string until the length() is reached, continuing the
263 //loop every time a null character is reached
264 while(szPos
!= szEnd
)
266 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
268 //Get the length of the current (sub)string
269 size_t nLen
= WC2MB(NULL
, szPos
, 0);
271 //Invalid conversion?
272 if( nLen
== (size_t)-1 )
275 theBuffer
.data()[0u] = wxT('\0');
279 //Increase the actual length (+1 for current null character)
280 nActualLength
+= nLen
+ 1;
282 //if buffer too big, realloc the buffer
283 if (nActualLength
> (nCurrentSize
+1))
285 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
286 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
287 theBuffer
= theNewBuffer
;
291 //Convert the current (sub)string
292 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
295 theBuffer
.data()[0u] = wxT('\0');
299 //Increment to next (sub)string
300 //Note that we have to use wxWcslen instead of nLen here
301 //because XX2XX gives us the size of the output buffer,
302 //which is not necessarily the length of the string
303 szPos
+= wxWcslen(szPos
) + 1;
306 //success - return actual length and the buffer
307 *pOutSize
= nActualLength
;
311 // ----------------------------------------------------------------------------
313 // ----------------------------------------------------------------------------
315 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
317 return wxMB2WC(buf
, psz
, n
);
320 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
322 return wxWC2MB(buf
, psz
, n
);
327 // ----------------------------------------------------------------------------
328 // wxConvBrokenFileNames
329 // ----------------------------------------------------------------------------
331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
333 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
334 || wxStricmp(charset
, _T("UTF8")) == 0 )
335 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
337 m_conv
= new wxCSConv(charset
);
341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
343 size_t outputSize
) const
345 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
349 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
351 size_t outputSize
) const
353 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
358 // ----------------------------------------------------------------------------
360 // ----------------------------------------------------------------------------
362 // Implementation (C) 2004 Fredrik Roubert
365 // BASE64 decoding table
367 static const unsigned char utf7unb64
[] =
369 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
375 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
376 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
378 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
379 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
380 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
382 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
383 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
384 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
407 while (*psz
&& ((!buf
) || (len
< n
)))
409 unsigned char cc
= *psz
++;
417 else if (*psz
== '-')
427 // BASE64 encoded string
431 for (lsb
= false, d
= 0, l
= 0;
432 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
436 for (l
+= 6; l
>= 8; lsb
= !lsb
)
438 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
447 *buf
= (wchar_t)(c
<< 8);
454 if (buf
&& (len
< n
))
460 // BASE64 encoding table
462 static const unsigned char utf7enb64
[] =
464 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
465 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
466 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
467 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
468 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
469 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
470 'w', 'x', 'y', 'z', '0', '1', '2', '3',
471 '4', '5', '6', '7', '8', '9', '+', '/'
475 // UTF-7 encoding table
477 // 0 - Set D (directly encoded characters)
478 // 1 - Set O (optional direct characters)
479 // 2 - whitespace characters (optional)
480 // 3 - special characters
482 static const unsigned char utf7encode
[128] =
484 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
486 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
488 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
490 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
494 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
500 while (*psz
&& ((!buf
) || (len
< n
)))
503 if (cc
< 0x80 && utf7encode
[cc
] < 1)
511 else if (((wxUint32
)cc
) > 0xffff)
513 // no surrogate pair generation (yet?)
524 // BASE64 encode string
525 unsigned int lsb
, d
, l
;
526 for (d
= 0, l
= 0;; psz
++)
528 for (lsb
= 0; lsb
< 2; lsb
++)
531 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
533 for (l
+= 8; l
>= 6; )
537 *buf
++ = utf7enb64
[(d
>> l
) % 64];
542 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
548 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
557 if (buf
&& (len
< n
))
562 // ----------------------------------------------------------------------------
564 // ----------------------------------------------------------------------------
566 static wxUint32 utf8_max
[]=
567 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
569 // boundaries of the private use area we use to (temporarily) remap invalid
570 // characters invalid in a UTF-8 encoded string
571 const wxUint32 wxUnicodePUA
= 0x100000;
572 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
578 while (*psz
&& ((!buf
) || (len
< n
)))
580 const char *opsz
= psz
;
581 bool invalid
= false;
582 unsigned char cc
= *psz
++, fc
= cc
;
584 for (cnt
= 0; fc
& 0x80; cnt
++)
593 // escape the escape character for octal escapes
594 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
595 && cc
== '\\' && (!buf
|| len
< n
))
607 // invalid UTF-8 sequence
612 unsigned ocnt
= cnt
- 1;
613 wxUint32 res
= cc
& (0x3f >> cnt
);
617 if ((cc
& 0xC0) != 0x80)
619 // invalid UTF-8 sequence
624 res
= (res
<< 6) | (cc
& 0x3f);
626 if (invalid
|| res
<= utf8_max
[ocnt
])
628 // illegal UTF-8 encoding
631 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
632 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
634 // if one of our PUA characters turns up externally
635 // it must also be treated as an illegal sequence
636 // (a bit like you have to escape an escape character)
642 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
643 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
644 if (pa
== (size_t)-1)
658 #endif // WC_UTF16/!WC_UTF16
663 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
665 while (opsz
< psz
&& (!buf
|| len
< n
))
668 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
669 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
670 wxASSERT(pa
!= (size_t)-1);
677 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
683 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
685 while (opsz
< psz
&& (!buf
|| len
< n
))
687 if ( buf
&& len
+ 3 < n
)
689 unsigned char n
= *opsz
;
691 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
692 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
693 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
699 else // MAP_INVALID_UTF8_NOT
706 if (buf
&& (len
< n
))
711 static inline bool isoctal(wchar_t wch
)
713 return L
'0' <= wch
&& wch
<= L
'7';
716 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
720 while (*psz
&& ((!buf
) || (len
< n
)))
724 // cast is ok for WC_UTF16
725 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
726 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
728 cc
=(*psz
++) & 0x7fffffff;
731 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
732 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
735 *buf
++ = (char)(cc
- wxUnicodePUA
);
738 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
739 && cc
== L
'\\' && psz
[0] == L
'\\' )
746 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
748 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
752 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
753 (psz
[1] - L
'0')*010 +
763 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
777 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
779 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
791 // ----------------------------------------------------------------------------
793 // ----------------------------------------------------------------------------
795 #ifdef WORDS_BIGENDIAN
796 #define wxMBConvUTF16straight wxMBConvUTF16BE
797 #define wxMBConvUTF16swap wxMBConvUTF16LE
799 #define wxMBConvUTF16swap wxMBConvUTF16BE
800 #define wxMBConvUTF16straight wxMBConvUTF16LE
806 // copy 16bit MB to 16bit String
807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
811 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
814 *buf
++ = *(wxUint16
*)psz
;
817 psz
+= sizeof(wxUint16
);
819 if (buf
&& len
<n
) *buf
=0;
825 // copy 16bit String to 16bit MB
826 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
830 while (*psz
&& (!buf
|| len
< n
))
834 *(wxUint16
*)buf
= *psz
;
835 buf
+= sizeof(wxUint16
);
837 len
+= sizeof(wxUint16
);
840 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
846 // swap 16bit MB to 16bit String
847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
851 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
855 ((char *)buf
)[0] = psz
[1];
856 ((char *)buf
)[1] = psz
[0];
860 psz
+= sizeof(wxUint16
);
862 if (buf
&& len
<n
) *buf
=0;
868 // swap 16bit MB to 16bit String
869 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
873 while (*psz
&& (!buf
|| len
< n
))
877 *buf
++ = ((char*)psz
)[1];
878 *buf
++ = ((char*)psz
)[0];
880 len
+= sizeof(wxUint16
);
883 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
892 // copy 16bit MB to 32bit String
893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
897 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
900 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
901 if (pa
== (size_t)-1)
907 psz
+= pa
* sizeof(wxUint16
);
909 if (buf
&& len
<n
) *buf
=0;
915 // copy 32bit String to 16bit MB
916 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
920 while (*psz
&& (!buf
|| len
< n
))
923 size_t pa
=encode_utf16(*psz
, cc
);
925 if (pa
== (size_t)-1)
930 *(wxUint16
*)buf
= cc
[0];
931 buf
+= sizeof(wxUint16
);
934 *(wxUint16
*)buf
= cc
[1];
935 buf
+= sizeof(wxUint16
);
939 len
+= pa
*sizeof(wxUint16
);
942 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
948 // swap 16bit MB to 32bit String
949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
953 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
957 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
958 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
960 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
961 if (pa
== (size_t)-1)
968 psz
+= pa
* sizeof(wxUint16
);
970 if (buf
&& len
<n
) *buf
=0;
976 // swap 32bit String to 16bit MB
977 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
981 while (*psz
&& (!buf
|| len
< n
))
984 size_t pa
=encode_utf16(*psz
, cc
);
986 if (pa
== (size_t)-1)
991 *buf
++ = ((char*)cc
)[1];
992 *buf
++ = ((char*)cc
)[0];
995 *buf
++ = ((char*)cc
)[3];
996 *buf
++ = ((char*)cc
)[2];
1000 len
+= pa
*sizeof(wxUint16
);
1003 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1011 // ----------------------------------------------------------------------------
1013 // ----------------------------------------------------------------------------
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap wxMBConvUTF32LE
1019 #define wxMBConvUTF32swap wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight wxMBConvUTF32LE
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1035 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1039 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1040 if (pa
== (size_t)-1)
1050 psz
+= sizeof(wxUint32
);
1052 if (buf
&& len
<n
) *buf
=0;
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1063 while (*psz
&& (!buf
|| len
< n
))
1067 // cast is ok for WC_UTF16
1068 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1069 if (pa
== (size_t)-1)
1074 *(wxUint32
*)buf
= cc
;
1075 buf
+= sizeof(wxUint32
);
1077 len
+= sizeof(wxUint32
);
1081 if (buf
&& len
<=n
-sizeof(wxUint32
))
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1094 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1097 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1098 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1103 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1104 if (pa
== (size_t)-1)
1114 psz
+= sizeof(wxUint32
);
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1129 while (*psz
&& (!buf
|| len
< n
))
1133 // cast is ok for WC_UTF16
1134 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1135 if (pa
== (size_t)-1)
1145 len
+= sizeof(wxUint32
);
1149 if (buf
&& len
<=n
-sizeof(wxUint32
))
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1163 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1166 *buf
++ = *(wxUint32
*)psz
;
1168 psz
+= sizeof(wxUint32
);
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1183 while (*psz
&& (!buf
|| len
< n
))
1187 *(wxUint32
*)buf
= *psz
;
1188 buf
+= sizeof(wxUint32
);
1191 len
+= sizeof(wxUint32
);
1195 if (buf
&& len
<=n
-sizeof(wxUint32
))
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1207 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1211 ((char *)buf
)[0] = psz
[3];
1212 ((char *)buf
)[1] = psz
[2];
1213 ((char *)buf
)[2] = psz
[1];
1214 ((char *)buf
)[3] = psz
[0];
1218 psz
+= sizeof(wxUint32
);
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1233 while (*psz
&& (!buf
|| len
< n
))
1237 *buf
++ = ((char *)psz
)[3];
1238 *buf
++ = ((char *)psz
)[2];
1239 *buf
++ = ((char *)psz
)[1];
1240 *buf
++ = ((char *)psz
)[0];
1242 len
+= sizeof(wxUint32
);
1246 if (buf
&& len
<=n
-sizeof(wxUint32
))
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 // (unless there's yet another bug in glibc) the only case when iconv()
1265 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 // left in the input buffer -- when _real_ error occurs,
1267 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1269 // [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272 (errno != E2BIG || bufLeft != 0))
1274 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1277 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1281 #if SIZEOF_WCHAR_T == 4
1282 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1283 #define WC_ENC wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1286 #define WC_ENC wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288 // does this ever happen?
1289 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1296 class wxMBConv_iconv
: public wxMBConv
1299 wxMBConv_iconv(const wxChar
*name
);
1300 virtual ~wxMBConv_iconv();
1302 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1303 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1306 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1309 // the iconv handlers used to translate from multibyte to wide char and in
1310 // the other direction
1314 // guards access to m2w and w2m objects
1315 wxMutex m_iconvMutex
;
1319 // the name (for iconv_open()) of a wide char charset -- if none is
1320 // available on this machine, it will remain NULL
1321 static wxString ms_wcCharsetName
;
1323 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324 // different endian-ness than the native one
1325 static bool ms_wcNeedsSwap
;
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1331 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1332 if ( !result
->IsOk() )
1340 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1345 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346 // names for the charsets
1347 const wxCharBuffer
cname(wxString(name
).ToAscii());
1349 // check for charset that represents wchar_t:
1350 if ( ms_wcCharsetName
.empty() )
1353 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1354 #else // !wxUSE_FONTMAP
1355 static const wxChar
*names
[] =
1357 #if SIZEOF_WCHAR_T == 4
1359 #elif SIZEOF_WCHAR_T = 2
1364 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1366 for ( ; *names
; ++names
)
1368 const wxString
name(*names
);
1370 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1371 wxString
nameXE(name
);
1372 #ifdef WORDS_BIGENDIAN
1374 #else // little endian
1378 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1379 if ( m2w
== ICONV_T_INVALID
)
1381 // try charset w/o bytesex info (e.g. "UCS4")
1382 m2w
= iconv_open(name
.ToAscii(), cname
);
1384 // and check for bytesex ourselves:
1385 if ( m2w
!= ICONV_T_INVALID
)
1387 char buf
[2], *bufPtr
;
1388 wchar_t wbuf
[2], *wbufPtr
;
1396 outsz
= SIZEOF_WCHAR_T
* 2;
1400 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1401 (char**)&wbufPtr
, &outsz
);
1403 if (ICONV_FAILED(res
, insz
))
1405 wxLogLastError(wxT("iconv"));
1406 wxLogError(_("Conversion to charset '%s' doesn't work."),
1409 else // ok, can convert to this encoding, remember it
1411 ms_wcCharsetName
= name
;
1412 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1416 else // use charset not requiring byte swapping
1418 ms_wcCharsetName
= nameXE
;
1422 wxLogTrace(TRACE_STRCONV
,
1423 wxT("iconv wchar_t charset is \"%s\"%s"),
1424 ms_wcCharsetName
.empty() ? _T("<none>")
1425 : ms_wcCharsetName
.c_str(),
1426 ms_wcNeedsSwap
? _T(" (needs swap)")
1429 else // we already have ms_wcCharsetName
1431 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1434 if ( ms_wcCharsetName
.empty() )
1436 w2m
= ICONV_T_INVALID
;
1440 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1441 if ( w2m
== ICONV_T_INVALID
)
1443 wxLogTrace(TRACE_STRCONV
,
1444 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1445 ms_wcCharsetName
.c_str(), cname
.data());
1450 wxMBConv_iconv::~wxMBConv_iconv()
1452 if ( m2w
!= ICONV_T_INVALID
)
1454 if ( w2m
!= ICONV_T_INVALID
)
1458 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1461 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462 // Unfortunately there is a couple of global wxCSConv objects such as
1463 // wxConvLocal that are used all over wx code, so we have to make sure
1464 // the handle is used by at most one thread at the time. Otherwise
1465 // only a few wx classes would be safe to use from non-main threads
1466 // as MB<->WC conversion would fail "randomly".
1467 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1470 size_t inbuf
= strlen(psz
);
1471 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1473 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474 wchar_t *bufPtr
= buf
;
1475 const char *pszPtr
= psz
;
1479 // have destination buffer, convert there
1481 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1482 (char**)&bufPtr
, &outbuf
);
1483 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1487 // convert to native endianness
1488 for ( unsigned n
= 0; n
< res
; n
++ )
1489 buf
[n
] = WC_BSWAP(buf
[n
]);
1492 // NB: iconv was given only strlen(psz) characters on input, and so
1493 // it couldn't convert the trailing zero. Let's do it ourselves
1494 // if there's some room left for it in the output buffer.
1500 // no destination buffer... convert using temp buffer
1501 // to calculate destination buffer requirement
1506 outbuf
= 8*SIZEOF_WCHAR_T
;
1509 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1510 (char**)&bufPtr
, &outbuf
);
1512 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1513 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1516 if (ICONV_FAILED(cres
, inbuf
))
1518 //VS: it is ok if iconv fails, hence trace only
1519 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1526 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1529 // NB: explained in MB2WC
1530 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1533 size_t inlen
= wxWcslen(psz
);
1534 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1538 wchar_t *tmpbuf
= 0;
1542 // need to copy to temp buffer to switch endianness
1543 // (doing WC_BSWAP twice on the original buffer won't help, as it
1544 // could be in read-only memory, or be accessed in some other thread)
1545 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1546 for ( size_t n
= 0; n
< inlen
; n
++ )
1547 tmpbuf
[n
] = WC_BSWAP(psz
[n
]);
1548 tmpbuf
[inlen
] = L
'\0';
1554 // have destination buffer, convert there
1555 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1559 // NB: iconv was given only wcslen(psz) characters on input, and so
1560 // it couldn't convert the trailing zero. Let's do it ourselves
1561 // if there's some room left for it in the output buffer.
1567 // no destination buffer... convert using temp buffer
1568 // to calculate destination buffer requirement
1572 buf
= tbuf
; outbuf
= 16;
1574 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1577 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1585 if (ICONV_FAILED(cres
, inbuf
))
1587 //VS: it is ok if iconv fails, hence trace only
1588 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1595 #endif // HAVE_ICONV
1598 // ============================================================================
1599 // Win32 conversion classes
1600 // ============================================================================
1602 #ifdef wxHAVE_WIN32_MB2WC
1606 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1607 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1610 class wxMBConv_win32
: public wxMBConv
1615 m_CodePage
= CP_ACP
;
1619 wxMBConv_win32(const wxChar
* name
)
1621 m_CodePage
= wxCharsetToCodepage(name
);
1624 wxMBConv_win32(wxFontEncoding encoding
)
1626 m_CodePage
= wxEncodingToCodepage(encoding
);
1630 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1632 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1633 // the behaviour is not compatible with the Unix version (using iconv)
1634 // and break the library itself, e.g. wxTextInputStream::NextChar()
1635 // wouldn't work if reading an incomplete MB char didn't result in an
1638 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1639 // an error (tested under Windows Server 2003) and apparently it is
1640 // done on purpose, i.e. the function accepts any input in this case
1641 // and although I'd prefer to return error on ill-formed output, our
1642 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1643 // explicitly ill-formed according to RFC 2152) neither so we don't
1644 // even have any fallback here...
1645 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1647 const size_t len
= ::MultiByteToWideChar
1649 m_CodePage
, // code page
1650 flags
, // flags: fall on error
1651 psz
, // input string
1652 -1, // its length (NUL-terminated)
1653 buf
, // output string
1654 buf
? n
: 0 // size of output buffer
1657 // note that it returns count of written chars for buf != NULL and size
1658 // of the needed buffer for buf == NULL so in either case the length of
1659 // the string (which never includes the terminating NUL) is one less
1660 return len
? len
- 1 : (size_t)-1;
1663 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1666 we have a problem here: by default, WideCharToMultiByte() may
1667 replace characters unrepresentable in the target code page with bad
1668 quality approximations such as turning "1/2" symbol (U+00BD) into
1669 "1" for the code pages which don't have it and we, obviously, want
1670 to avoid this at any price
1672 the trouble is that this function does it _silently_, i.e. it won't
1673 even tell us whether it did or not... Win98/2000 and higher provide
1674 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1675 we have to resort to a round trip, i.e. check that converting back
1676 results in the same string -- this is, of course, expensive but
1677 otherwise we simply can't be sure to not garble the data.
1680 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1681 // it doesn't work with CJK encodings (which we test for rather roughly
1682 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1684 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1687 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1689 // it's our lucky day
1690 flags
= WC_NO_BEST_FIT_CHARS
;
1691 pUsedDef
= &usedDef
;
1693 else // old system or unsupported encoding
1699 const size_t len
= ::WideCharToMultiByte
1701 m_CodePage
, // code page
1702 flags
, // either none or no best fit
1703 pwz
, // input string
1704 -1, // it is (wide) NUL-terminated
1705 buf
, // output buffer
1706 buf
? n
: 0, // and its size
1707 NULL
, // default "replacement" char
1708 pUsedDef
// [out] was it used?
1713 // function totally failed
1717 // if we were really converting, check if we succeeded
1722 // check if the conversion failed, i.e. if any replacements
1727 else // we must resort to double tripping...
1729 wxWCharBuffer
wcBuf(n
);
1730 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1731 wcscmp(wcBuf
, pwz
) != 0 )
1733 // we didn't obtain the same thing we started from, hence
1734 // the conversion was lossy and we consider that it failed
1740 // see the comment above for the reason of "len - 1"
1744 bool IsOk() const { return m_CodePage
!= -1; }
1747 static bool CanUseNoBestFit()
1749 static int s_isWin98Or2k
= -1;
1751 if ( s_isWin98Or2k
== -1 )
1754 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1757 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1761 s_isWin98Or2k
= verMaj
>= 5;
1765 // unknown, be conseravtive by default
1769 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1772 return s_isWin98Or2k
== 1;
1778 #endif // wxHAVE_WIN32_MB2WC
1780 // ============================================================================
1781 // Cocoa conversion classes
1782 // ============================================================================
1784 #if defined(__WXCOCOA__)
1786 // RN: There is no UTF-32 support in either Core Foundation or
1787 // Cocoa. Strangely enough, internally Core Foundation uses
1788 // UTF 32 internally quite a bit - its just not public (yet).
1790 #include <CoreFoundation/CFString.h>
1791 #include <CoreFoundation/CFStringEncodingExt.h>
1793 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1795 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1796 if ( encoding
== wxFONTENCODING_DEFAULT
)
1798 enc
= CFStringGetSystemEncoding();
1800 else switch( encoding
)
1802 case wxFONTENCODING_ISO8859_1
:
1803 enc
= kCFStringEncodingISOLatin1
;
1805 case wxFONTENCODING_ISO8859_2
:
1806 enc
= kCFStringEncodingISOLatin2
;
1808 case wxFONTENCODING_ISO8859_3
:
1809 enc
= kCFStringEncodingISOLatin3
;
1811 case wxFONTENCODING_ISO8859_4
:
1812 enc
= kCFStringEncodingISOLatin4
;
1814 case wxFONTENCODING_ISO8859_5
:
1815 enc
= kCFStringEncodingISOLatinCyrillic
;
1817 case wxFONTENCODING_ISO8859_6
:
1818 enc
= kCFStringEncodingISOLatinArabic
;
1820 case wxFONTENCODING_ISO8859_7
:
1821 enc
= kCFStringEncodingISOLatinGreek
;
1823 case wxFONTENCODING_ISO8859_8
:
1824 enc
= kCFStringEncodingISOLatinHebrew
;
1826 case wxFONTENCODING_ISO8859_9
:
1827 enc
= kCFStringEncodingISOLatin5
;
1829 case wxFONTENCODING_ISO8859_10
:
1830 enc
= kCFStringEncodingISOLatin6
;
1832 case wxFONTENCODING_ISO8859_11
:
1833 enc
= kCFStringEncodingISOLatinThai
;
1835 case wxFONTENCODING_ISO8859_13
:
1836 enc
= kCFStringEncodingISOLatin7
;
1838 case wxFONTENCODING_ISO8859_14
:
1839 enc
= kCFStringEncodingISOLatin8
;
1841 case wxFONTENCODING_ISO8859_15
:
1842 enc
= kCFStringEncodingISOLatin9
;
1845 case wxFONTENCODING_KOI8
:
1846 enc
= kCFStringEncodingKOI8_R
;
1848 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1849 enc
= kCFStringEncodingDOSRussian
;
1852 // case wxFONTENCODING_BULGARIAN :
1856 case wxFONTENCODING_CP437
:
1857 enc
=kCFStringEncodingDOSLatinUS
;
1859 case wxFONTENCODING_CP850
:
1860 enc
= kCFStringEncodingDOSLatin1
;
1862 case wxFONTENCODING_CP852
:
1863 enc
= kCFStringEncodingDOSLatin2
;
1865 case wxFONTENCODING_CP855
:
1866 enc
= kCFStringEncodingDOSCyrillic
;
1868 case wxFONTENCODING_CP866
:
1869 enc
=kCFStringEncodingDOSRussian
;
1871 case wxFONTENCODING_CP874
:
1872 enc
= kCFStringEncodingDOSThai
;
1874 case wxFONTENCODING_CP932
:
1875 enc
= kCFStringEncodingDOSJapanese
;
1877 case wxFONTENCODING_CP936
:
1878 enc
=kCFStringEncodingDOSChineseSimplif
;
1880 case wxFONTENCODING_CP949
:
1881 enc
= kCFStringEncodingDOSKorean
;
1883 case wxFONTENCODING_CP950
:
1884 enc
= kCFStringEncodingDOSChineseTrad
;
1886 case wxFONTENCODING_CP1250
:
1887 enc
= kCFStringEncodingWindowsLatin2
;
1889 case wxFONTENCODING_CP1251
:
1890 enc
=kCFStringEncodingWindowsCyrillic
;
1892 case wxFONTENCODING_CP1252
:
1893 enc
=kCFStringEncodingWindowsLatin1
;
1895 case wxFONTENCODING_CP1253
:
1896 enc
= kCFStringEncodingWindowsGreek
;
1898 case wxFONTENCODING_CP1254
:
1899 enc
= kCFStringEncodingWindowsLatin5
;
1901 case wxFONTENCODING_CP1255
:
1902 enc
=kCFStringEncodingWindowsHebrew
;
1904 case wxFONTENCODING_CP1256
:
1905 enc
=kCFStringEncodingWindowsArabic
;
1907 case wxFONTENCODING_CP1257
:
1908 enc
= kCFStringEncodingWindowsBalticRim
;
1910 // This only really encodes to UTF7 (if that) evidently
1911 // case wxFONTENCODING_UTF7 :
1912 // enc = kCFStringEncodingNonLossyASCII ;
1914 case wxFONTENCODING_UTF8
:
1915 enc
= kCFStringEncodingUTF8
;
1917 case wxFONTENCODING_EUC_JP
:
1918 enc
= kCFStringEncodingEUC_JP
;
1920 case wxFONTENCODING_UTF16
:
1921 enc
= kCFStringEncodingUnicode
;
1923 case wxFONTENCODING_MACROMAN
:
1924 enc
= kCFStringEncodingMacRoman
;
1926 case wxFONTENCODING_MACJAPANESE
:
1927 enc
= kCFStringEncodingMacJapanese
;
1929 case wxFONTENCODING_MACCHINESETRAD
:
1930 enc
= kCFStringEncodingMacChineseTrad
;
1932 case wxFONTENCODING_MACKOREAN
:
1933 enc
= kCFStringEncodingMacKorean
;
1935 case wxFONTENCODING_MACARABIC
:
1936 enc
= kCFStringEncodingMacArabic
;
1938 case wxFONTENCODING_MACHEBREW
:
1939 enc
= kCFStringEncodingMacHebrew
;
1941 case wxFONTENCODING_MACGREEK
:
1942 enc
= kCFStringEncodingMacGreek
;
1944 case wxFONTENCODING_MACCYRILLIC
:
1945 enc
= kCFStringEncodingMacCyrillic
;
1947 case wxFONTENCODING_MACDEVANAGARI
:
1948 enc
= kCFStringEncodingMacDevanagari
;
1950 case wxFONTENCODING_MACGURMUKHI
:
1951 enc
= kCFStringEncodingMacGurmukhi
;
1953 case wxFONTENCODING_MACGUJARATI
:
1954 enc
= kCFStringEncodingMacGujarati
;
1956 case wxFONTENCODING_MACORIYA
:
1957 enc
= kCFStringEncodingMacOriya
;
1959 case wxFONTENCODING_MACBENGALI
:
1960 enc
= kCFStringEncodingMacBengali
;
1962 case wxFONTENCODING_MACTAMIL
:
1963 enc
= kCFStringEncodingMacTamil
;
1965 case wxFONTENCODING_MACTELUGU
:
1966 enc
= kCFStringEncodingMacTelugu
;
1968 case wxFONTENCODING_MACKANNADA
:
1969 enc
= kCFStringEncodingMacKannada
;
1971 case wxFONTENCODING_MACMALAJALAM
:
1972 enc
= kCFStringEncodingMacMalayalam
;
1974 case wxFONTENCODING_MACSINHALESE
:
1975 enc
= kCFStringEncodingMacSinhalese
;
1977 case wxFONTENCODING_MACBURMESE
:
1978 enc
= kCFStringEncodingMacBurmese
;
1980 case wxFONTENCODING_MACKHMER
:
1981 enc
= kCFStringEncodingMacKhmer
;
1983 case wxFONTENCODING_MACTHAI
:
1984 enc
= kCFStringEncodingMacThai
;
1986 case wxFONTENCODING_MACLAOTIAN
:
1987 enc
= kCFStringEncodingMacLaotian
;
1989 case wxFONTENCODING_MACGEORGIAN
:
1990 enc
= kCFStringEncodingMacGeorgian
;
1992 case wxFONTENCODING_MACARMENIAN
:
1993 enc
= kCFStringEncodingMacArmenian
;
1995 case wxFONTENCODING_MACCHINESESIMP
:
1996 enc
= kCFStringEncodingMacChineseSimp
;
1998 case wxFONTENCODING_MACTIBETAN
:
1999 enc
= kCFStringEncodingMacTibetan
;
2001 case wxFONTENCODING_MACMONGOLIAN
:
2002 enc
= kCFStringEncodingMacMongolian
;
2004 case wxFONTENCODING_MACETHIOPIC
:
2005 enc
= kCFStringEncodingMacEthiopic
;
2007 case wxFONTENCODING_MACCENTRALEUR
:
2008 enc
= kCFStringEncodingMacCentralEurRoman
;
2010 case wxFONTENCODING_MACVIATNAMESE
:
2011 enc
= kCFStringEncodingMacVietnamese
;
2013 case wxFONTENCODING_MACARABICEXT
:
2014 enc
= kCFStringEncodingMacExtArabic
;
2016 case wxFONTENCODING_MACSYMBOL
:
2017 enc
= kCFStringEncodingMacSymbol
;
2019 case wxFONTENCODING_MACDINGBATS
:
2020 enc
= kCFStringEncodingMacDingbats
;
2022 case wxFONTENCODING_MACTURKISH
:
2023 enc
= kCFStringEncodingMacTurkish
;
2025 case wxFONTENCODING_MACCROATIAN
:
2026 enc
= kCFStringEncodingMacCroatian
;
2028 case wxFONTENCODING_MACICELANDIC
:
2029 enc
= kCFStringEncodingMacIcelandic
;
2031 case wxFONTENCODING_MACROMANIAN
:
2032 enc
= kCFStringEncodingMacRomanian
;
2034 case wxFONTENCODING_MACCELTIC
:
2035 enc
= kCFStringEncodingMacCeltic
;
2037 case wxFONTENCODING_MACGAELIC
:
2038 enc
= kCFStringEncodingMacGaelic
;
2040 // case wxFONTENCODING_MACKEYBOARD :
2041 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2044 // because gcc is picky
2050 class wxMBConv_cocoa
: public wxMBConv
2055 Init(CFStringGetSystemEncoding()) ;
2059 wxMBConv_cocoa(const wxChar
* name
)
2061 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2065 wxMBConv_cocoa(wxFontEncoding encoding
)
2067 Init( wxCFStringEncFromFontEnc(encoding
) );
2074 void Init( CFStringEncoding encoding
)
2076 m_encoding
= encoding
;
2079 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2083 CFStringRef theString
= CFStringCreateWithBytes (
2084 NULL
, //the allocator
2085 (const UInt8
*)szUnConv
,
2088 false //no BOM/external representation
2091 wxASSERT(theString
);
2093 size_t nOutLength
= CFStringGetLength(theString
);
2097 CFRelease(theString
);
2101 CFRange theRange
= { 0, nOutSize
};
2103 #if SIZEOF_WCHAR_T == 4
2104 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2107 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2109 CFRelease(theString
);
2111 szUniCharBuffer
[nOutLength
] = '\0' ;
2113 #if SIZEOF_WCHAR_T == 4
2114 wxMBConvUTF16 converter
;
2115 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2116 delete[] szUniCharBuffer
;
2122 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2126 size_t nRealOutSize
;
2127 size_t nBufSize
= wxWcslen(szUnConv
);
2128 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2130 #if SIZEOF_WCHAR_T == 4
2131 wxMBConvUTF16 converter
;
2132 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2133 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2134 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2135 nBufSize
/= sizeof(UniChar
);
2138 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2142 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2145 wxASSERT(theString
);
2147 //Note that CER puts a BOM when converting to unicode
2148 //so we check and use getchars instead in that case
2149 if (m_encoding
== kCFStringEncodingUnicode
)
2152 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2154 nRealOutSize
= CFStringGetLength(theString
) + 1;
2160 CFRangeMake(0, CFStringGetLength(theString
)),
2162 0, //what to put in characters that can't be converted -
2163 //0 tells CFString to return NULL if it meets such a character
2164 false, //not an external representation
2167 (CFIndex
*) &nRealOutSize
2171 CFRelease(theString
);
2173 #if SIZEOF_WCHAR_T == 4
2174 delete[] szUniBuffer
;
2177 return nRealOutSize
- 1;
2182 return m_encoding
!= kCFStringEncodingInvalidId
&&
2183 CFStringIsEncodingAvailable(m_encoding
);
2187 CFStringEncoding m_encoding
;
2190 #endif // defined(__WXCOCOA__)
2192 // ============================================================================
2193 // Mac conversion classes
2194 // ============================================================================
2196 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2198 class wxMBConv_mac
: public wxMBConv
2203 Init(CFStringGetSystemEncoding()) ;
2207 wxMBConv_mac(const wxChar
* name
)
2209 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2213 wxMBConv_mac(wxFontEncoding encoding
)
2215 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2220 OSStatus status
= noErr
;
2221 status
= TECDisposeConverter(m_MB2WC_converter
);
2222 status
= TECDisposeConverter(m_WC2MB_converter
);
2226 void Init( TextEncodingBase encoding
)
2228 OSStatus status
= noErr
;
2229 m_char_encoding
= encoding
;
2230 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2232 status
= TECCreateConverter(&m_MB2WC_converter
,
2234 m_unicode_encoding
);
2235 status
= TECCreateConverter(&m_WC2MB_converter
,
2240 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2242 OSStatus status
= noErr
;
2243 ByteCount byteOutLen
;
2244 ByteCount byteInLen
= strlen(psz
) ;
2245 wchar_t *tbuf
= NULL
;
2246 UniChar
* ubuf
= NULL
;
2251 //apple specs say at least 32
2252 n
= wxMax( 32 , byteInLen
) ;
2253 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2255 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2256 #if SIZEOF_WCHAR_T == 4
2257 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2259 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2261 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2262 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2263 #if SIZEOF_WCHAR_T == 4
2264 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2265 // is not properly terminated we get random characters at the end
2266 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2267 wxMBConvUTF16 converter
;
2268 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2271 res
= byteOutLen
/ sizeof( UniChar
) ;
2276 if ( buf
&& res
< n
)
2282 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2284 OSStatus status
= noErr
;
2285 ByteCount byteOutLen
;
2286 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2292 //apple specs say at least 32
2293 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2294 tbuf
= (char*) malloc( n
) ;
2297 ByteCount byteBufferLen
= n
;
2298 UniChar
* ubuf
= NULL
;
2299 #if SIZEOF_WCHAR_T == 4
2300 wxMBConvUTF16 converter
;
2301 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2302 byteInLen
= unicharlen
;
2303 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2304 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2306 ubuf
= (UniChar
*) psz
;
2308 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2309 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2310 #if SIZEOF_WCHAR_T == 4
2316 size_t res
= byteOutLen
;
2317 if ( buf
&& res
< n
)
2321 //we need to double-trip to verify it didn't insert any ? in place
2322 //of bogus characters
2323 wxWCharBuffer
wcBuf(n
);
2324 size_t pszlen
= wxWcslen(psz
);
2325 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2326 wxWcslen(wcBuf
) != pszlen
||
2327 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2329 // we didn't obtain the same thing we started from, hence
2330 // the conversion was lossy and we consider that it failed
2339 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2342 TECObjectRef m_MB2WC_converter
;
2343 TECObjectRef m_WC2MB_converter
;
2345 TextEncodingBase m_char_encoding
;
2346 TextEncodingBase m_unicode_encoding
;
2349 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2351 // ============================================================================
2352 // wxEncodingConverter based conversion classes
2353 // ============================================================================
2357 class wxMBConv_wxwin
: public wxMBConv
2362 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2363 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2367 // temporarily just use wxEncodingConverter stuff,
2368 // so that it works while a better implementation is built
2369 wxMBConv_wxwin(const wxChar
* name
)
2372 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2374 m_enc
= wxFONTENCODING_SYSTEM
;
2379 wxMBConv_wxwin(wxFontEncoding enc
)
2386 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2388 size_t inbuf
= strlen(psz
);
2391 if (!m2w
.Convert(psz
,buf
))
2397 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2399 const size_t inbuf
= wxWcslen(psz
);
2402 if (!w2m
.Convert(psz
,buf
))
2409 bool IsOk() const { return m_ok
; }
2412 wxFontEncoding m_enc
;
2413 wxEncodingConverter m2w
, w2m
;
2415 // were we initialized successfully?
2418 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2421 // make the constructors available for unit testing
2422 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2424 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2425 if ( !result
->IsOk() )
2433 #endif // wxUSE_FONTMAP
2435 // ============================================================================
2436 // wxCSConv implementation
2437 // ============================================================================
2439 void wxCSConv::Init()
2446 wxCSConv::wxCSConv(const wxChar
*charset
)
2455 m_encoding
= wxFONTENCODING_SYSTEM
;
2458 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2460 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2462 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2464 encoding
= wxFONTENCODING_SYSTEM
;
2469 m_encoding
= encoding
;
2472 wxCSConv::~wxCSConv()
2477 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2482 SetName(conv
.m_name
);
2483 m_encoding
= conv
.m_encoding
;
2486 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2490 SetName(conv
.m_name
);
2491 m_encoding
= conv
.m_encoding
;
2496 void wxCSConv::Clear()
2505 void wxCSConv::SetName(const wxChar
*charset
)
2509 m_name
= wxStrdup(charset
);
2515 #include "wx/hashmap.h"
2517 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2518 wxEncodingNameCache
);
2520 static wxEncodingNameCache gs_nameCache
;
2523 wxMBConv
*wxCSConv::DoCreate() const
2526 wxLogTrace(TRACE_STRCONV
,
2527 wxT("creating conversion for %s"),
2529 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2530 #endif // wxUSE_FONTMAP
2532 // check for the special case of ASCII or ISO8859-1 charset: as we have
2533 // special knowledge of it anyhow, we don't need to create a special
2534 // conversion object
2535 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2537 // don't convert at all
2541 // we trust OS to do conversion better than we can so try external
2542 // conversion methods first
2544 // the full order is:
2545 // 1. OS conversion (iconv() under Unix or Win32 API)
2546 // 2. hard coded conversions for UTF
2547 // 3. wxEncodingConverter as fall back
2553 #endif // !wxUSE_FONTMAP
2555 wxString
name(m_name
);
2556 wxFontEncoding
encoding(m_encoding
);
2558 if ( !name
.empty() )
2560 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2568 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2569 #endif // wxUSE_FONTMAP
2573 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2574 if ( it
!= gs_nameCache
.end() )
2576 if ( it
->second
.empty() )
2579 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2586 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2588 for ( ; *names
; ++names
)
2590 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2593 gs_nameCache
[encoding
] = *names
;
2600 gs_nameCache
[encoding
] = _T(""); // cache the failure
2602 #endif // wxUSE_FONTMAP
2604 #endif // HAVE_ICONV
2606 #ifdef wxHAVE_WIN32_MB2WC
2609 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2610 : new wxMBConv_win32(m_encoding
);
2619 #endif // wxHAVE_WIN32_MB2WC
2620 #if defined(__WXMAC__)
2622 // leave UTF16 and UTF32 to the built-ins of wx
2623 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2624 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2628 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2629 : new wxMBConv_mac(m_encoding
);
2631 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2640 #if defined(__WXCOCOA__)
2642 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2646 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2647 : new wxMBConv_cocoa(m_encoding
);
2649 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2659 wxFontEncoding enc
= m_encoding
;
2661 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2663 // use "false" to suppress interactive dialogs -- we can be called from
2664 // anywhere and popping up a dialog from here is the last thing we want to
2666 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2668 #endif // wxUSE_FONTMAP
2672 case wxFONTENCODING_UTF7
:
2673 return new wxMBConvUTF7
;
2675 case wxFONTENCODING_UTF8
:
2676 return new wxMBConvUTF8
;
2678 case wxFONTENCODING_UTF16BE
:
2679 return new wxMBConvUTF16BE
;
2681 case wxFONTENCODING_UTF16LE
:
2682 return new wxMBConvUTF16LE
;
2684 case wxFONTENCODING_UTF32BE
:
2685 return new wxMBConvUTF32BE
;
2687 case wxFONTENCODING_UTF32LE
:
2688 return new wxMBConvUTF32LE
;
2691 // nothing to do but put here to suppress gcc warnings
2698 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2699 : new wxMBConv_wxwin(m_encoding
);
2705 #endif // wxUSE_FONTMAP
2707 // NB: This is a hack to prevent deadlock. What could otherwise happen
2708 // in Unicode build: wxConvLocal creation ends up being here
2709 // because of some failure and logs the error. But wxLog will try to
2710 // attach timestamp, for which it will need wxConvLocal (to convert
2711 // time to char* and then wchar_t*), but that fails, tries to log
2712 // error, but wxLog has a (already locked) critical section that
2713 // guards static buffer.
2714 static bool alreadyLoggingError
= false;
2715 if (!alreadyLoggingError
)
2717 alreadyLoggingError
= true;
2718 wxLogError(_("Cannot convert from the charset '%s'!"),
2722 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2723 #else // !wxUSE_FONTMAP
2724 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2725 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2727 alreadyLoggingError
= false;
2733 void wxCSConv::CreateConvIfNeeded() const
2737 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2740 // if we don't have neither the name nor the encoding, use the default
2741 // encoding for this system
2742 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2744 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2746 #endif // wxUSE_INTL
2748 self
->m_convReal
= DoCreate();
2749 self
->m_deferred
= false;
2753 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2755 CreateConvIfNeeded();
2758 return m_convReal
->MB2WC(buf
, psz
, n
);
2761 size_t len
= strlen(psz
);
2765 for (size_t c
= 0; c
<= len
; c
++)
2766 buf
[c
] = (unsigned char)(psz
[c
]);
2772 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2774 CreateConvIfNeeded();
2777 return m_convReal
->WC2MB(buf
, psz
, n
);
2780 const size_t len
= wxWcslen(psz
);
2783 for (size_t c
= 0; c
<= len
; c
++)
2787 buf
[c
] = (char)psz
[c
];
2792 for (size_t c
= 0; c
<= len
; c
++)
2802 // ----------------------------------------------------------------------------
2804 // ----------------------------------------------------------------------------
2807 static wxMBConv_win32 wxConvLibcObj
;
2808 #elif defined(__WXMAC__) && !defined(__MACH__)
2809 static wxMBConv_mac wxConvLibcObj
;
2811 static wxMBConvLibc wxConvLibcObj
;
2814 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2815 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2816 static wxMBConvUTF7 wxConvUTF7Obj
;
2817 static wxMBConvUTF8 wxConvUTF8Obj
;
2819 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2820 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2821 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2822 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2833 #else // !wxUSE_WCHAR_T
2835 // stand-ins in absence of wchar_t
2836 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2841 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T