1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
190 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
192 wxASSERT(pOutSize
!= NULL
);
194 const char* szEnd
= szString
+ nStringLen
+ 1;
195 const char* szPos
= szString
;
196 const char* szStart
= szPos
;
198 size_t nActualLength
= 0;
199 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
201 wxWCharBuffer
theBuffer(nCurrentSize
);
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos
!= szEnd
)
207 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
209 //Get the length of the current (sub)string
210 size_t nLen
= MB2WC(NULL
, szPos
, 0);
212 //Invalid conversion?
213 if( nLen
== (size_t)-1 )
216 theBuffer
.data()[0u] = wxT('\0');
221 //Increase the actual length (+1 for current null character)
222 nActualLength
+= nLen
+ 1;
224 //if buffer too big, realloc the buffer
225 if (nActualLength
> (nCurrentSize
+1))
227 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
228 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
229 theBuffer
= theNewBuffer
;
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
237 theBuffer
.data()[0u] = wxT('\0');
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos
+= strlen(szPos
) + 1;
248 //success - return actual length and the buffer
249 *pOutSize
= nActualLength
;
253 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
255 wxASSERT(pOutSize
!= NULL
);
257 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
258 const wchar_t* szPos
= szString
;
259 const wchar_t* szStart
= szPos
;
261 size_t nActualLength
= 0;
262 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
264 wxCharBuffer
theBuffer(nCurrentSize
);
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos
!= szEnd
)
270 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
272 //Get the length of the current (sub)string
273 size_t nLen
= WC2MB(NULL
, szPos
, 0);
275 //Invalid conversion?
276 if( nLen
== (size_t)-1 )
279 theBuffer
.data()[0u] = wxT('\0');
283 //Increase the actual length (+1 for current null character)
284 nActualLength
+= nLen
+ 1;
286 //if buffer too big, realloc the buffer
287 if (nActualLength
> (nCurrentSize
+1))
289 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
290 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
291 theBuffer
= theNewBuffer
;
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
299 theBuffer
.data()[0u] = wxT('\0');
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos
+= wxWcslen(szPos
) + 1;
310 //success - return actual length and the buffer
311 *pOutSize
= nActualLength
;
315 // ----------------------------------------------------------------------------
317 // ----------------------------------------------------------------------------
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
321 return wxMB2WC(buf
, psz
, n
);
324 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
326 return wxWC2MB(buf
, psz
, n
);
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
337 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
338 || wxStricmp(charset
, _T("UTF8")) == 0 )
339 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
341 m_conv
= new wxCSConv(charset
);
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
347 size_t outputSize
) const
349 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
353 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
355 size_t outputSize
) const
357 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
362 // ----------------------------------------------------------------------------
364 // ----------------------------------------------------------------------------
366 // Implementation (C) 2004 Fredrik Roubert
369 // BASE64 decoding table
371 static const unsigned char utf7unb64
[] =
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
411 while (*psz
&& ((!buf
) || (len
< n
)))
413 unsigned char cc
= *psz
++;
421 else if (*psz
== '-')
431 // BASE64 encoded string
435 for (lsb
= false, d
= 0, l
= 0;
436 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
440 for (l
+= 6; l
>= 8; lsb
= !lsb
)
442 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
451 *buf
= (wchar_t)(c
<< 8);
458 if (buf
&& (len
< n
))
464 // BASE64 encoding table
466 static const unsigned char utf7enb64
[] =
468 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
469 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
470 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
471 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
472 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
473 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
474 'w', 'x', 'y', 'z', '0', '1', '2', '3',
475 '4', '5', '6', '7', '8', '9', '+', '/'
479 // UTF-7 encoding table
481 // 0 - Set D (directly encoded characters)
482 // 1 - Set O (optional direct characters)
483 // 2 - whitespace characters (optional)
484 // 3 - special characters
486 static const unsigned char utf7encode
[128] =
488 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
489 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
490 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
492 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
494 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
498 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
504 while (*psz
&& ((!buf
) || (len
< n
)))
507 if (cc
< 0x80 && utf7encode
[cc
] < 1)
515 else if (((wxUint32
)cc
) > 0xffff)
517 // no surrogate pair generation (yet?)
528 // BASE64 encode string
529 unsigned int lsb
, d
, l
;
530 for (d
= 0, l
= 0; /*nothing*/; psz
++)
532 for (lsb
= 0; lsb
< 2; lsb
++)
535 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
537 for (l
+= 8; l
>= 6; )
541 *buf
++ = utf7enb64
[(d
>> l
) % 64];
546 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
552 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
561 if (buf
&& (len
< n
))
566 // ----------------------------------------------------------------------------
568 // ----------------------------------------------------------------------------
570 static wxUint32 utf8_max
[]=
571 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
573 // boundaries of the private use area we use to (temporarily) remap invalid
574 // characters invalid in a UTF-8 encoded string
575 const wxUint32 wxUnicodePUA
= 0x100000;
576 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
578 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
582 while (*psz
&& ((!buf
) || (len
< n
)))
584 const char *opsz
= psz
;
585 bool invalid
= false;
586 unsigned char cc
= *psz
++, fc
= cc
;
588 for (cnt
= 0; fc
& 0x80; cnt
++)
597 // escape the escape character for octal escapes
598 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
599 && cc
== '\\' && (!buf
|| len
< n
))
611 // invalid UTF-8 sequence
616 unsigned ocnt
= cnt
- 1;
617 wxUint32 res
= cc
& (0x3f >> cnt
);
621 if ((cc
& 0xC0) != 0x80)
623 // invalid UTF-8 sequence
628 res
= (res
<< 6) | (cc
& 0x3f);
630 if (invalid
|| res
<= utf8_max
[ocnt
])
632 // illegal UTF-8 encoding
635 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
636 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
638 // if one of our PUA characters turns up externally
639 // it must also be treated as an illegal sequence
640 // (a bit like you have to escape an escape character)
646 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
647 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
648 if (pa
== (size_t)-1)
660 *buf
++ = (wchar_t)res
;
662 #endif // WC_UTF16/!WC_UTF16
667 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
669 while (opsz
< psz
&& (!buf
|| len
< n
))
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
674 wxASSERT(pa
!= (size_t)-1);
681 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
687 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
689 while (opsz
< psz
&& (!buf
|| len
< n
))
691 if ( buf
&& len
+ 3 < n
)
693 unsigned char on
= *opsz
;
695 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
696 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
697 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
703 else // MAP_INVALID_UTF8_NOT
710 if (buf
&& (len
< n
))
715 static inline bool isoctal(wchar_t wch
)
717 return L
'0' <= wch
&& wch
<= L
'7';
720 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
724 while (*psz
&& ((!buf
) || (len
< n
)))
728 // cast is ok for WC_UTF16
729 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
730 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
732 cc
=(*psz
++) & 0x7fffffff;
735 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
736 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
739 *buf
++ = (char)(cc
- wxUnicodePUA
);
742 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
743 && cc
== L
'\\' && psz
[0] == L
'\\' )
750 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
752 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
756 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
757 (psz
[1] - L
'0')*010 +
767 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
781 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
783 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
795 // ----------------------------------------------------------------------------
797 // ----------------------------------------------------------------------------
799 #ifdef WORDS_BIGENDIAN
800 #define wxMBConvUTF16straight wxMBConvUTF16BE
801 #define wxMBConvUTF16swap wxMBConvUTF16LE
803 #define wxMBConvUTF16swap wxMBConvUTF16BE
804 #define wxMBConvUTF16straight wxMBConvUTF16LE
810 // copy 16bit MB to 16bit String
811 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
815 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
818 *buf
++ = *(wxUint16
*)psz
;
821 psz
+= sizeof(wxUint16
);
823 if (buf
&& len
<n
) *buf
=0;
829 // copy 16bit String to 16bit MB
830 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
834 while (*psz
&& (!buf
|| len
< n
))
838 *(wxUint16
*)buf
= *psz
;
839 buf
+= sizeof(wxUint16
);
841 len
+= sizeof(wxUint16
);
844 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
850 // swap 16bit MB to 16bit String
851 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
855 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
859 ((char *)buf
)[0] = psz
[1];
860 ((char *)buf
)[1] = psz
[0];
864 psz
+= sizeof(wxUint16
);
866 if (buf
&& len
<n
) *buf
=0;
872 // swap 16bit MB to 16bit String
873 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
877 while (*psz
&& (!buf
|| len
< n
))
881 *buf
++ = ((char*)psz
)[1];
882 *buf
++ = ((char*)psz
)[0];
884 len
+= sizeof(wxUint16
);
887 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
896 // copy 16bit MB to 32bit String
897 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
901 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
904 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
905 if (pa
== (size_t)-1)
909 *buf
++ = (wchar_t)cc
;
911 psz
+= pa
* sizeof(wxUint16
);
913 if (buf
&& len
<n
) *buf
=0;
919 // copy 32bit String to 16bit MB
920 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
924 while (*psz
&& (!buf
|| len
< n
))
927 size_t pa
=encode_utf16(*psz
, cc
);
929 if (pa
== (size_t)-1)
934 *(wxUint16
*)buf
= cc
[0];
935 buf
+= sizeof(wxUint16
);
938 *(wxUint16
*)buf
= cc
[1];
939 buf
+= sizeof(wxUint16
);
943 len
+= pa
*sizeof(wxUint16
);
946 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
952 // swap 16bit MB to 32bit String
953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
957 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
961 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
962 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
964 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
965 if (pa
== (size_t)-1)
969 *buf
++ = (wchar_t)cc
;
972 psz
+= pa
* sizeof(wxUint16
);
974 if (buf
&& len
<n
) *buf
=0;
980 // swap 32bit String to 16bit MB
981 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
985 while (*psz
&& (!buf
|| len
< n
))
988 size_t pa
=encode_utf16(*psz
, cc
);
990 if (pa
== (size_t)-1)
995 *buf
++ = ((char*)cc
)[1];
996 *buf
++ = ((char*)cc
)[0];
999 *buf
++ = ((char*)cc
)[3];
1000 *buf
++ = ((char*)cc
)[2];
1004 len
+= pa
*sizeof(wxUint16
);
1007 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1015 // ----------------------------------------------------------------------------
1017 // ----------------------------------------------------------------------------
1019 #ifdef WORDS_BIGENDIAN
1020 #define wxMBConvUTF32straight wxMBConvUTF32BE
1021 #define wxMBConvUTF32swap wxMBConvUTF32LE
1023 #define wxMBConvUTF32swap wxMBConvUTF32BE
1024 #define wxMBConvUTF32straight wxMBConvUTF32LE
1028 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1029 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1034 // copy 32bit MB to 16bit String
1035 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1039 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1043 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1044 if (pa
== (size_t)-1)
1054 psz
+= sizeof(wxUint32
);
1056 if (buf
&& len
<n
) *buf
=0;
1062 // copy 16bit String to 32bit MB
1063 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1067 while (*psz
&& (!buf
|| len
< n
))
1071 // cast is ok for WC_UTF16
1072 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1073 if (pa
== (size_t)-1)
1078 *(wxUint32
*)buf
= cc
;
1079 buf
+= sizeof(wxUint32
);
1081 len
+= sizeof(wxUint32
);
1085 if (buf
&& len
<=n
-sizeof(wxUint32
))
1093 // swap 32bit MB to 16bit String
1094 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1098 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1101 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1102 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1107 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1108 if (pa
== (size_t)-1)
1118 psz
+= sizeof(wxUint32
);
1128 // swap 16bit String to 32bit MB
1129 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1133 while (*psz
&& (!buf
|| len
< n
))
1137 // cast is ok for WC_UTF16
1138 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1139 if (pa
== (size_t)-1)
1149 len
+= sizeof(wxUint32
);
1153 if (buf
&& len
<=n
-sizeof(wxUint32
))
1162 // copy 32bit MB to 32bit String
1163 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1167 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1170 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1172 psz
+= sizeof(wxUint32
);
1182 // copy 32bit String to 32bit MB
1183 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1187 while (*psz
&& (!buf
|| len
< n
))
1191 *(wxUint32
*)buf
= *psz
;
1192 buf
+= sizeof(wxUint32
);
1195 len
+= sizeof(wxUint32
);
1199 if (buf
&& len
<=n
-sizeof(wxUint32
))
1206 // swap 32bit MB to 32bit String
1207 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1211 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1215 ((char *)buf
)[0] = psz
[3];
1216 ((char *)buf
)[1] = psz
[2];
1217 ((char *)buf
)[2] = psz
[1];
1218 ((char *)buf
)[3] = psz
[0];
1222 psz
+= sizeof(wxUint32
);
1232 // swap 32bit String to 32bit MB
1233 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1237 while (*psz
&& (!buf
|| len
< n
))
1241 *buf
++ = ((char *)psz
)[3];
1242 *buf
++ = ((char *)psz
)[2];
1243 *buf
++ = ((char *)psz
)[1];
1244 *buf
++ = ((char *)psz
)[0];
1246 len
+= sizeof(wxUint32
);
1250 if (buf
&& len
<=n
-sizeof(wxUint32
))
1260 // ============================================================================
1261 // The classes doing conversion using the iconv_xxx() functions
1262 // ============================================================================
1266 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1267 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1268 // (unless there's yet another bug in glibc) the only case when iconv()
1269 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1270 // left in the input buffer -- when _real_ error occurs,
1271 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1273 // [This bug does not appear in glibc 2.2.]
1274 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1275 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1276 (errno != E2BIG || bufLeft != 0))
1278 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1281 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1283 #define ICONV_T_INVALID ((iconv_t)-1)
1285 #if SIZEOF_WCHAR_T == 4
1286 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1287 #define WC_ENC wxFONTENCODING_UTF32
1288 #elif SIZEOF_WCHAR_T == 2
1289 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1290 #define WC_ENC wxFONTENCODING_UTF16
1291 #else // sizeof(wchar_t) != 2 nor 4
1292 // does this ever happen?
1293 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1296 // ----------------------------------------------------------------------------
1297 // wxMBConv_iconv: encapsulates an iconv character set
1298 // ----------------------------------------------------------------------------
1300 class wxMBConv_iconv
: public wxMBConv
1303 wxMBConv_iconv(const wxChar
*name
);
1304 virtual ~wxMBConv_iconv();
1306 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1307 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1310 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1313 // the iconv handlers used to translate from multibyte to wide char and in
1314 // the other direction
1318 // guards access to m2w and w2m objects
1319 wxMutex m_iconvMutex
;
1323 // the name (for iconv_open()) of a wide char charset -- if none is
1324 // available on this machine, it will remain NULL
1325 static wxString ms_wcCharsetName
;
1327 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1328 // different endian-ness than the native one
1329 static bool ms_wcNeedsSwap
;
1332 // make the constructor available for unit testing
1333 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1335 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1336 if ( !result
->IsOk() )
1344 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1345 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1347 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1349 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1350 // names for the charsets
1351 const wxCharBuffer
cname(wxString(name
).ToAscii());
1353 // check for charset that represents wchar_t:
1354 if ( ms_wcCharsetName
.empty() )
1356 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1359 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1360 #else // !wxUSE_FONTMAP
1361 static const wxChar
*names
[] =
1363 #if SIZEOF_WCHAR_T == 4
1365 #elif SIZEOF_WCHAR_T = 2
1370 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1372 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1374 const wxString
nameCS(*names
);
1376 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377 wxString
nameXE(nameCS
);
1378 #ifdef WORDS_BIGENDIAN
1380 #else // little endian
1384 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1387 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1388 if ( m2w
== ICONV_T_INVALID
)
1390 // try charset w/o bytesex info (e.g. "UCS4")
1391 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1393 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1395 // and check for bytesex ourselves:
1396 if ( m2w
!= ICONV_T_INVALID
)
1398 char buf
[2], *bufPtr
;
1399 wchar_t wbuf
[2], *wbufPtr
;
1407 outsz
= SIZEOF_WCHAR_T
* 2;
1411 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1412 (char**)&wbufPtr
, &outsz
);
1414 if (ICONV_FAILED(res
, insz
))
1416 wxLogLastError(wxT("iconv"));
1417 wxLogError(_("Conversion to charset '%s' doesn't work."),
1420 else // ok, can convert to this encoding, remember it
1422 ms_wcCharsetName
= nameCS
;
1423 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1427 else // use charset not requiring byte swapping
1429 ms_wcCharsetName
= nameXE
;
1433 wxLogTrace(TRACE_STRCONV
,
1434 wxT("iconv wchar_t charset is \"%s\"%s"),
1435 ms_wcCharsetName
.empty() ? _T("<none>")
1436 : ms_wcCharsetName
.c_str(),
1437 ms_wcNeedsSwap
? _T(" (needs swap)")
1440 else // we already have ms_wcCharsetName
1442 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1445 if ( ms_wcCharsetName
.empty() )
1447 w2m
= ICONV_T_INVALID
;
1451 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1452 if ( w2m
== ICONV_T_INVALID
)
1454 wxLogTrace(TRACE_STRCONV
,
1455 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1456 ms_wcCharsetName
.c_str(), cname
.data());
1461 wxMBConv_iconv::~wxMBConv_iconv()
1463 if ( m2w
!= ICONV_T_INVALID
)
1465 if ( w2m
!= ICONV_T_INVALID
)
1469 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1472 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1473 // Unfortunately there is a couple of global wxCSConv objects such as
1474 // wxConvLocal that are used all over wx code, so we have to make sure
1475 // the handle is used by at most one thread at the time. Otherwise
1476 // only a few wx classes would be safe to use from non-main threads
1477 // as MB<->WC conversion would fail "randomly".
1478 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1481 size_t inbuf
= strlen(psz
);
1482 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1484 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1485 wchar_t *bufPtr
= buf
;
1486 const char *pszPtr
= psz
;
1490 // have destination buffer, convert there
1492 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1493 (char**)&bufPtr
, &outbuf
);
1494 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1498 // convert to native endianness
1499 for ( unsigned i
= 0; i
< res
; i
++ )
1500 buf
[n
] = WC_BSWAP(buf
[i
]);
1503 // NB: iconv was given only strlen(psz) characters on input, and so
1504 // it couldn't convert the trailing zero. Let's do it ourselves
1505 // if there's some room left for it in the output buffer.
1511 // no destination buffer... convert using temp buffer
1512 // to calculate destination buffer requirement
1517 outbuf
= 8*SIZEOF_WCHAR_T
;
1520 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1521 (char**)&bufPtr
, &outbuf
);
1523 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1524 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1527 if (ICONV_FAILED(cres
, inbuf
))
1529 //VS: it is ok if iconv fails, hence trace only
1530 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1537 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1540 // NB: explained in MB2WC
1541 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1544 size_t inlen
= wxWcslen(psz
);
1545 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1549 wchar_t *tmpbuf
= 0;
1553 // need to copy to temp buffer to switch endianness
1554 // (doing WC_BSWAP twice on the original buffer won't help, as it
1555 // could be in read-only memory, or be accessed in some other thread)
1556 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1557 for ( size_t i
= 0; i
< inlen
; i
++ )
1558 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1559 tmpbuf
[inlen
] = L
'\0';
1565 // have destination buffer, convert there
1566 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1570 // NB: iconv was given only wcslen(psz) characters on input, and so
1571 // it couldn't convert the trailing zero. Let's do it ourselves
1572 // if there's some room left for it in the output buffer.
1578 // no destination buffer... convert using temp buffer
1579 // to calculate destination buffer requirement
1583 buf
= tbuf
; outbuf
= 16;
1585 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1588 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1596 if (ICONV_FAILED(cres
, inbuf
))
1598 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1605 #endif // HAVE_ICONV
1608 // ============================================================================
1609 // Win32 conversion classes
1610 // ============================================================================
1612 #ifdef wxHAVE_WIN32_MB2WC
1616 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1617 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1620 class wxMBConv_win32
: public wxMBConv
1625 m_CodePage
= CP_ACP
;
1629 wxMBConv_win32(const wxChar
* name
)
1631 m_CodePage
= wxCharsetToCodepage(name
);
1634 wxMBConv_win32(wxFontEncoding encoding
)
1636 m_CodePage
= wxEncodingToCodepage(encoding
);
1640 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1642 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1643 // the behaviour is not compatible with the Unix version (using iconv)
1644 // and break the library itself, e.g. wxTextInputStream::NextChar()
1645 // wouldn't work if reading an incomplete MB char didn't result in an
1648 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1649 // an error (tested under Windows Server 2003) and apparently it is
1650 // done on purpose, i.e. the function accepts any input in this case
1651 // and although I'd prefer to return error on ill-formed output, our
1652 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1653 // explicitly ill-formed according to RFC 2152) neither so we don't
1654 // even have any fallback here...
1655 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1657 const size_t len
= ::MultiByteToWideChar
1659 m_CodePage
, // code page
1660 flags
, // flags: fall on error
1661 psz
, // input string
1662 -1, // its length (NUL-terminated)
1663 buf
, // output string
1664 buf
? n
: 0 // size of output buffer
1667 // note that it returns count of written chars for buf != NULL and size
1668 // of the needed buffer for buf == NULL so in either case the length of
1669 // the string (which never includes the terminating NUL) is one less
1670 return len
? len
- 1 : (size_t)-1;
1673 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1676 we have a problem here: by default, WideCharToMultiByte() may
1677 replace characters unrepresentable in the target code page with bad
1678 quality approximations such as turning "1/2" symbol (U+00BD) into
1679 "1" for the code pages which don't have it and we, obviously, want
1680 to avoid this at any price
1682 the trouble is that this function does it _silently_, i.e. it won't
1683 even tell us whether it did or not... Win98/2000 and higher provide
1684 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1685 we have to resort to a round trip, i.e. check that converting back
1686 results in the same string -- this is, of course, expensive but
1687 otherwise we simply can't be sure to not garble the data.
1690 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1691 // it doesn't work with CJK encodings (which we test for rather roughly
1692 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1694 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1697 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1699 // it's our lucky day
1700 flags
= WC_NO_BEST_FIT_CHARS
;
1701 pUsedDef
= &usedDef
;
1703 else // old system or unsupported encoding
1709 const size_t len
= ::WideCharToMultiByte
1711 m_CodePage
, // code page
1712 flags
, // either none or no best fit
1713 pwz
, // input string
1714 -1, // it is (wide) NUL-terminated
1715 buf
, // output buffer
1716 buf
? n
: 0, // and its size
1717 NULL
, // default "replacement" char
1718 pUsedDef
// [out] was it used?
1723 // function totally failed
1727 // if we were really converting, check if we succeeded
1732 // check if the conversion failed, i.e. if any replacements
1737 else // we must resort to double tripping...
1739 wxWCharBuffer
wcBuf(n
);
1740 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1741 wcscmp(wcBuf
, pwz
) != 0 )
1743 // we didn't obtain the same thing we started from, hence
1744 // the conversion was lossy and we consider that it failed
1750 // see the comment above for the reason of "len - 1"
1754 bool IsOk() const { return m_CodePage
!= -1; }
1757 static bool CanUseNoBestFit()
1759 static int s_isWin98Or2k
= -1;
1761 if ( s_isWin98Or2k
== -1 )
1764 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1767 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1771 s_isWin98Or2k
= verMaj
>= 5;
1775 // unknown, be conseravtive by default
1779 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1782 return s_isWin98Or2k
== 1;
1788 #endif // wxHAVE_WIN32_MB2WC
1790 // ============================================================================
1791 // Cocoa conversion classes
1792 // ============================================================================
1794 #if defined(__WXCOCOA__)
1796 // RN: There is no UTF-32 support in either Core Foundation or
1797 // Cocoa. Strangely enough, internally Core Foundation uses
1798 // UTF 32 internally quite a bit - its just not public (yet).
1800 #include <CoreFoundation/CFString.h>
1801 #include <CoreFoundation/CFStringEncodingExt.h>
1803 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1805 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1806 if ( encoding
== wxFONTENCODING_DEFAULT
)
1808 enc
= CFStringGetSystemEncoding();
1810 else switch( encoding
)
1812 case wxFONTENCODING_ISO8859_1
:
1813 enc
= kCFStringEncodingISOLatin1
;
1815 case wxFONTENCODING_ISO8859_2
:
1816 enc
= kCFStringEncodingISOLatin2
;
1818 case wxFONTENCODING_ISO8859_3
:
1819 enc
= kCFStringEncodingISOLatin3
;
1821 case wxFONTENCODING_ISO8859_4
:
1822 enc
= kCFStringEncodingISOLatin4
;
1824 case wxFONTENCODING_ISO8859_5
:
1825 enc
= kCFStringEncodingISOLatinCyrillic
;
1827 case wxFONTENCODING_ISO8859_6
:
1828 enc
= kCFStringEncodingISOLatinArabic
;
1830 case wxFONTENCODING_ISO8859_7
:
1831 enc
= kCFStringEncodingISOLatinGreek
;
1833 case wxFONTENCODING_ISO8859_8
:
1834 enc
= kCFStringEncodingISOLatinHebrew
;
1836 case wxFONTENCODING_ISO8859_9
:
1837 enc
= kCFStringEncodingISOLatin5
;
1839 case wxFONTENCODING_ISO8859_10
:
1840 enc
= kCFStringEncodingISOLatin6
;
1842 case wxFONTENCODING_ISO8859_11
:
1843 enc
= kCFStringEncodingISOLatinThai
;
1845 case wxFONTENCODING_ISO8859_13
:
1846 enc
= kCFStringEncodingISOLatin7
;
1848 case wxFONTENCODING_ISO8859_14
:
1849 enc
= kCFStringEncodingISOLatin8
;
1851 case wxFONTENCODING_ISO8859_15
:
1852 enc
= kCFStringEncodingISOLatin9
;
1855 case wxFONTENCODING_KOI8
:
1856 enc
= kCFStringEncodingKOI8_R
;
1858 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1859 enc
= kCFStringEncodingDOSRussian
;
1862 // case wxFONTENCODING_BULGARIAN :
1866 case wxFONTENCODING_CP437
:
1867 enc
=kCFStringEncodingDOSLatinUS
;
1869 case wxFONTENCODING_CP850
:
1870 enc
= kCFStringEncodingDOSLatin1
;
1872 case wxFONTENCODING_CP852
:
1873 enc
= kCFStringEncodingDOSLatin2
;
1875 case wxFONTENCODING_CP855
:
1876 enc
= kCFStringEncodingDOSCyrillic
;
1878 case wxFONTENCODING_CP866
:
1879 enc
=kCFStringEncodingDOSRussian
;
1881 case wxFONTENCODING_CP874
:
1882 enc
= kCFStringEncodingDOSThai
;
1884 case wxFONTENCODING_CP932
:
1885 enc
= kCFStringEncodingDOSJapanese
;
1887 case wxFONTENCODING_CP936
:
1888 enc
=kCFStringEncodingDOSChineseSimplif
;
1890 case wxFONTENCODING_CP949
:
1891 enc
= kCFStringEncodingDOSKorean
;
1893 case wxFONTENCODING_CP950
:
1894 enc
= kCFStringEncodingDOSChineseTrad
;
1896 case wxFONTENCODING_CP1250
:
1897 enc
= kCFStringEncodingWindowsLatin2
;
1899 case wxFONTENCODING_CP1251
:
1900 enc
=kCFStringEncodingWindowsCyrillic
;
1902 case wxFONTENCODING_CP1252
:
1903 enc
=kCFStringEncodingWindowsLatin1
;
1905 case wxFONTENCODING_CP1253
:
1906 enc
= kCFStringEncodingWindowsGreek
;
1908 case wxFONTENCODING_CP1254
:
1909 enc
= kCFStringEncodingWindowsLatin5
;
1911 case wxFONTENCODING_CP1255
:
1912 enc
=kCFStringEncodingWindowsHebrew
;
1914 case wxFONTENCODING_CP1256
:
1915 enc
=kCFStringEncodingWindowsArabic
;
1917 case wxFONTENCODING_CP1257
:
1918 enc
= kCFStringEncodingWindowsBalticRim
;
1920 // This only really encodes to UTF7 (if that) evidently
1921 // case wxFONTENCODING_UTF7 :
1922 // enc = kCFStringEncodingNonLossyASCII ;
1924 case wxFONTENCODING_UTF8
:
1925 enc
= kCFStringEncodingUTF8
;
1927 case wxFONTENCODING_EUC_JP
:
1928 enc
= kCFStringEncodingEUC_JP
;
1930 case wxFONTENCODING_UTF16
:
1931 enc
= kCFStringEncodingUnicode
;
1933 case wxFONTENCODING_MACROMAN
:
1934 enc
= kCFStringEncodingMacRoman
;
1936 case wxFONTENCODING_MACJAPANESE
:
1937 enc
= kCFStringEncodingMacJapanese
;
1939 case wxFONTENCODING_MACCHINESETRAD
:
1940 enc
= kCFStringEncodingMacChineseTrad
;
1942 case wxFONTENCODING_MACKOREAN
:
1943 enc
= kCFStringEncodingMacKorean
;
1945 case wxFONTENCODING_MACARABIC
:
1946 enc
= kCFStringEncodingMacArabic
;
1948 case wxFONTENCODING_MACHEBREW
:
1949 enc
= kCFStringEncodingMacHebrew
;
1951 case wxFONTENCODING_MACGREEK
:
1952 enc
= kCFStringEncodingMacGreek
;
1954 case wxFONTENCODING_MACCYRILLIC
:
1955 enc
= kCFStringEncodingMacCyrillic
;
1957 case wxFONTENCODING_MACDEVANAGARI
:
1958 enc
= kCFStringEncodingMacDevanagari
;
1960 case wxFONTENCODING_MACGURMUKHI
:
1961 enc
= kCFStringEncodingMacGurmukhi
;
1963 case wxFONTENCODING_MACGUJARATI
:
1964 enc
= kCFStringEncodingMacGujarati
;
1966 case wxFONTENCODING_MACORIYA
:
1967 enc
= kCFStringEncodingMacOriya
;
1969 case wxFONTENCODING_MACBENGALI
:
1970 enc
= kCFStringEncodingMacBengali
;
1972 case wxFONTENCODING_MACTAMIL
:
1973 enc
= kCFStringEncodingMacTamil
;
1975 case wxFONTENCODING_MACTELUGU
:
1976 enc
= kCFStringEncodingMacTelugu
;
1978 case wxFONTENCODING_MACKANNADA
:
1979 enc
= kCFStringEncodingMacKannada
;
1981 case wxFONTENCODING_MACMALAJALAM
:
1982 enc
= kCFStringEncodingMacMalayalam
;
1984 case wxFONTENCODING_MACSINHALESE
:
1985 enc
= kCFStringEncodingMacSinhalese
;
1987 case wxFONTENCODING_MACBURMESE
:
1988 enc
= kCFStringEncodingMacBurmese
;
1990 case wxFONTENCODING_MACKHMER
:
1991 enc
= kCFStringEncodingMacKhmer
;
1993 case wxFONTENCODING_MACTHAI
:
1994 enc
= kCFStringEncodingMacThai
;
1996 case wxFONTENCODING_MACLAOTIAN
:
1997 enc
= kCFStringEncodingMacLaotian
;
1999 case wxFONTENCODING_MACGEORGIAN
:
2000 enc
= kCFStringEncodingMacGeorgian
;
2002 case wxFONTENCODING_MACARMENIAN
:
2003 enc
= kCFStringEncodingMacArmenian
;
2005 case wxFONTENCODING_MACCHINESESIMP
:
2006 enc
= kCFStringEncodingMacChineseSimp
;
2008 case wxFONTENCODING_MACTIBETAN
:
2009 enc
= kCFStringEncodingMacTibetan
;
2011 case wxFONTENCODING_MACMONGOLIAN
:
2012 enc
= kCFStringEncodingMacMongolian
;
2014 case wxFONTENCODING_MACETHIOPIC
:
2015 enc
= kCFStringEncodingMacEthiopic
;
2017 case wxFONTENCODING_MACCENTRALEUR
:
2018 enc
= kCFStringEncodingMacCentralEurRoman
;
2020 case wxFONTENCODING_MACVIATNAMESE
:
2021 enc
= kCFStringEncodingMacVietnamese
;
2023 case wxFONTENCODING_MACARABICEXT
:
2024 enc
= kCFStringEncodingMacExtArabic
;
2026 case wxFONTENCODING_MACSYMBOL
:
2027 enc
= kCFStringEncodingMacSymbol
;
2029 case wxFONTENCODING_MACDINGBATS
:
2030 enc
= kCFStringEncodingMacDingbats
;
2032 case wxFONTENCODING_MACTURKISH
:
2033 enc
= kCFStringEncodingMacTurkish
;
2035 case wxFONTENCODING_MACCROATIAN
:
2036 enc
= kCFStringEncodingMacCroatian
;
2038 case wxFONTENCODING_MACICELANDIC
:
2039 enc
= kCFStringEncodingMacIcelandic
;
2041 case wxFONTENCODING_MACROMANIAN
:
2042 enc
= kCFStringEncodingMacRomanian
;
2044 case wxFONTENCODING_MACCELTIC
:
2045 enc
= kCFStringEncodingMacCeltic
;
2047 case wxFONTENCODING_MACGAELIC
:
2048 enc
= kCFStringEncodingMacGaelic
;
2050 // case wxFONTENCODING_MACKEYBOARD :
2051 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2054 // because gcc is picky
2060 class wxMBConv_cocoa
: public wxMBConv
2065 Init(CFStringGetSystemEncoding()) ;
2069 wxMBConv_cocoa(const wxChar
* name
)
2071 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2075 wxMBConv_cocoa(wxFontEncoding encoding
)
2077 Init( wxCFStringEncFromFontEnc(encoding
) );
2084 void Init( CFStringEncoding encoding
)
2086 m_encoding
= encoding
;
2089 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2093 CFStringRef theString
= CFStringCreateWithBytes (
2094 NULL
, //the allocator
2095 (const UInt8
*)szUnConv
,
2098 false //no BOM/external representation
2101 wxASSERT(theString
);
2103 size_t nOutLength
= CFStringGetLength(theString
);
2107 CFRelease(theString
);
2111 CFRange theRange
= { 0, nOutSize
};
2113 #if SIZEOF_WCHAR_T == 4
2114 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2117 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2119 CFRelease(theString
);
2121 szUniCharBuffer
[nOutLength
] = '\0' ;
2123 #if SIZEOF_WCHAR_T == 4
2124 wxMBConvUTF16 converter
;
2125 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2126 delete[] szUniCharBuffer
;
2132 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2136 size_t nRealOutSize
;
2137 size_t nBufSize
= wxWcslen(szUnConv
);
2138 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2140 #if SIZEOF_WCHAR_T == 4
2141 wxMBConvUTF16 converter
;
2142 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2143 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2144 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2145 nBufSize
/= sizeof(UniChar
);
2148 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2152 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2155 wxASSERT(theString
);
2157 //Note that CER puts a BOM when converting to unicode
2158 //so we check and use getchars instead in that case
2159 if (m_encoding
== kCFStringEncodingUnicode
)
2162 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2164 nRealOutSize
= CFStringGetLength(theString
) + 1;
2170 CFRangeMake(0, CFStringGetLength(theString
)),
2172 0, //what to put in characters that can't be converted -
2173 //0 tells CFString to return NULL if it meets such a character
2174 false, //not an external representation
2177 (CFIndex
*) &nRealOutSize
2181 CFRelease(theString
);
2183 #if SIZEOF_WCHAR_T == 4
2184 delete[] szUniBuffer
;
2187 return nRealOutSize
- 1;
2192 return m_encoding
!= kCFStringEncodingInvalidId
&&
2193 CFStringIsEncodingAvailable(m_encoding
);
2197 CFStringEncoding m_encoding
;
2200 #endif // defined(__WXCOCOA__)
2202 // ============================================================================
2203 // Mac conversion classes
2204 // ============================================================================
2206 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2208 class wxMBConv_mac
: public wxMBConv
2213 Init(CFStringGetSystemEncoding()) ;
2217 wxMBConv_mac(const wxChar
* name
)
2219 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2223 wxMBConv_mac(wxFontEncoding encoding
)
2225 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2230 OSStatus status
= noErr
;
2231 status
= TECDisposeConverter(m_MB2WC_converter
);
2232 status
= TECDisposeConverter(m_WC2MB_converter
);
2236 void Init( TextEncodingBase encoding
)
2238 OSStatus status
= noErr
;
2239 m_char_encoding
= encoding
;
2240 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2242 status
= TECCreateConverter(&m_MB2WC_converter
,
2244 m_unicode_encoding
);
2245 status
= TECCreateConverter(&m_WC2MB_converter
,
2250 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2252 OSStatus status
= noErr
;
2253 ByteCount byteOutLen
;
2254 ByteCount byteInLen
= strlen(psz
) ;
2255 wchar_t *tbuf
= NULL
;
2256 UniChar
* ubuf
= NULL
;
2261 //apple specs say at least 32
2262 n
= wxMax( 32 , byteInLen
) ;
2263 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2265 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2266 #if SIZEOF_WCHAR_T == 4
2267 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2269 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2271 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2272 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2273 #if SIZEOF_WCHAR_T == 4
2274 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2275 // is not properly terminated we get random characters at the end
2276 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2277 wxMBConvUTF16 converter
;
2278 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2281 res
= byteOutLen
/ sizeof( UniChar
) ;
2286 if ( buf
&& res
< n
)
2292 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2294 OSStatus status
= noErr
;
2295 ByteCount byteOutLen
;
2296 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2302 //apple specs say at least 32
2303 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2304 tbuf
= (char*) malloc( n
) ;
2307 ByteCount byteBufferLen
= n
;
2308 UniChar
* ubuf
= NULL
;
2309 #if SIZEOF_WCHAR_T == 4
2310 wxMBConvUTF16 converter
;
2311 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2312 byteInLen
= unicharlen
;
2313 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2314 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2316 ubuf
= (UniChar
*) psz
;
2318 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2319 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2320 #if SIZEOF_WCHAR_T == 4
2326 size_t res
= byteOutLen
;
2327 if ( buf
&& res
< n
)
2331 //we need to double-trip to verify it didn't insert any ? in place
2332 //of bogus characters
2333 wxWCharBuffer
wcBuf(n
);
2334 size_t pszlen
= wxWcslen(psz
);
2335 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2336 wxWcslen(wcBuf
) != pszlen
||
2337 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2339 // we didn't obtain the same thing we started from, hence
2340 // the conversion was lossy and we consider that it failed
2349 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2352 TECObjectRef m_MB2WC_converter
;
2353 TECObjectRef m_WC2MB_converter
;
2355 TextEncodingBase m_char_encoding
;
2356 TextEncodingBase m_unicode_encoding
;
2359 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2361 // ============================================================================
2362 // wxEncodingConverter based conversion classes
2363 // ============================================================================
2367 class wxMBConv_wxwin
: public wxMBConv
2372 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2373 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2377 // temporarily just use wxEncodingConverter stuff,
2378 // so that it works while a better implementation is built
2379 wxMBConv_wxwin(const wxChar
* name
)
2382 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2384 m_enc
= wxFONTENCODING_SYSTEM
;
2389 wxMBConv_wxwin(wxFontEncoding enc
)
2396 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2398 size_t inbuf
= strlen(psz
);
2401 if (!m2w
.Convert(psz
,buf
))
2407 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2409 const size_t inbuf
= wxWcslen(psz
);
2412 if (!w2m
.Convert(psz
,buf
))
2419 bool IsOk() const { return m_ok
; }
2422 wxFontEncoding m_enc
;
2423 wxEncodingConverter m2w
, w2m
;
2425 // were we initialized successfully?
2428 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2431 // make the constructors available for unit testing
2432 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2434 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2435 if ( !result
->IsOk() )
2443 #endif // wxUSE_FONTMAP
2445 // ============================================================================
2446 // wxCSConv implementation
2447 // ============================================================================
2449 void wxCSConv::Init()
2456 wxCSConv::wxCSConv(const wxChar
*charset
)
2465 m_encoding
= wxFONTENCODING_SYSTEM
;
2468 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2470 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2472 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2474 encoding
= wxFONTENCODING_SYSTEM
;
2479 m_encoding
= encoding
;
2482 wxCSConv::~wxCSConv()
2487 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2492 SetName(conv
.m_name
);
2493 m_encoding
= conv
.m_encoding
;
2496 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2500 SetName(conv
.m_name
);
2501 m_encoding
= conv
.m_encoding
;
2506 void wxCSConv::Clear()
2515 void wxCSConv::SetName(const wxChar
*charset
)
2519 m_name
= wxStrdup(charset
);
2525 #include "wx/hashmap.h"
2527 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2528 wxEncodingNameCache
);
2530 static wxEncodingNameCache gs_nameCache
;
2533 wxMBConv
*wxCSConv::DoCreate() const
2536 wxLogTrace(TRACE_STRCONV
,
2537 wxT("creating conversion for %s"),
2539 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2540 #endif // wxUSE_FONTMAP
2542 // check for the special case of ASCII or ISO8859-1 charset: as we have
2543 // special knowledge of it anyhow, we don't need to create a special
2544 // conversion object
2545 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2547 // don't convert at all
2551 // we trust OS to do conversion better than we can so try external
2552 // conversion methods first
2554 // the full order is:
2555 // 1. OS conversion (iconv() under Unix or Win32 API)
2556 // 2. hard coded conversions for UTF
2557 // 3. wxEncodingConverter as fall back
2563 #endif // !wxUSE_FONTMAP
2565 wxString
name(m_name
);
2566 wxFontEncoding
encoding(m_encoding
);
2568 if ( !name
.empty() )
2570 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2578 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2579 #endif // wxUSE_FONTMAP
2583 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2584 if ( it
!= gs_nameCache
.end() )
2586 if ( it
->second
.empty() )
2589 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2596 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2598 for ( ; *names
; ++names
)
2600 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2603 gs_nameCache
[encoding
] = *names
;
2610 gs_nameCache
[encoding
] = _T(""); // cache the failure
2612 #endif // wxUSE_FONTMAP
2614 #endif // HAVE_ICONV
2616 #ifdef wxHAVE_WIN32_MB2WC
2619 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2620 : new wxMBConv_win32(m_encoding
);
2629 #endif // wxHAVE_WIN32_MB2WC
2630 #if defined(__WXMAC__)
2632 // leave UTF16 and UTF32 to the built-ins of wx
2633 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2634 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2638 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2639 : new wxMBConv_mac(m_encoding
);
2641 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2650 #if defined(__WXCOCOA__)
2652 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2656 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2657 : new wxMBConv_cocoa(m_encoding
);
2659 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2669 wxFontEncoding enc
= m_encoding
;
2671 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2673 // use "false" to suppress interactive dialogs -- we can be called from
2674 // anywhere and popping up a dialog from here is the last thing we want to
2676 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2678 #endif // wxUSE_FONTMAP
2682 case wxFONTENCODING_UTF7
:
2683 return new wxMBConvUTF7
;
2685 case wxFONTENCODING_UTF8
:
2686 return new wxMBConvUTF8
;
2688 case wxFONTENCODING_UTF16BE
:
2689 return new wxMBConvUTF16BE
;
2691 case wxFONTENCODING_UTF16LE
:
2692 return new wxMBConvUTF16LE
;
2694 case wxFONTENCODING_UTF32BE
:
2695 return new wxMBConvUTF32BE
;
2697 case wxFONTENCODING_UTF32LE
:
2698 return new wxMBConvUTF32LE
;
2701 // nothing to do but put here to suppress gcc warnings
2708 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2709 : new wxMBConv_wxwin(m_encoding
);
2715 #endif // wxUSE_FONTMAP
2717 // NB: This is a hack to prevent deadlock. What could otherwise happen
2718 // in Unicode build: wxConvLocal creation ends up being here
2719 // because of some failure and logs the error. But wxLog will try to
2720 // attach timestamp, for which it will need wxConvLocal (to convert
2721 // time to char* and then wchar_t*), but that fails, tries to log
2722 // error, but wxLog has a (already locked) critical section that
2723 // guards static buffer.
2724 static bool alreadyLoggingError
= false;
2725 if (!alreadyLoggingError
)
2727 alreadyLoggingError
= true;
2728 wxLogError(_("Cannot convert from the charset '%s'!"),
2732 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2733 #else // !wxUSE_FONTMAP
2734 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2735 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2737 alreadyLoggingError
= false;
2743 void wxCSConv::CreateConvIfNeeded() const
2747 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2750 // if we don't have neither the name nor the encoding, use the default
2751 // encoding for this system
2752 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2754 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2756 #endif // wxUSE_INTL
2758 self
->m_convReal
= DoCreate();
2759 self
->m_deferred
= false;
2763 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2765 CreateConvIfNeeded();
2768 return m_convReal
->MB2WC(buf
, psz
, n
);
2771 size_t len
= strlen(psz
);
2775 for (size_t c
= 0; c
<= len
; c
++)
2776 buf
[c
] = (unsigned char)(psz
[c
]);
2782 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2784 CreateConvIfNeeded();
2787 return m_convReal
->WC2MB(buf
, psz
, n
);
2790 const size_t len
= wxWcslen(psz
);
2793 for (size_t c
= 0; c
<= len
; c
++)
2797 buf
[c
] = (char)psz
[c
];
2802 for (size_t c
= 0; c
<= len
; c
++)
2812 // ----------------------------------------------------------------------------
2814 // ----------------------------------------------------------------------------
2817 static wxMBConv_win32 wxConvLibcObj
;
2818 #elif defined(__WXMAC__) && !defined(__MACH__)
2819 static wxMBConv_mac wxConvLibcObj
;
2821 static wxMBConvLibc wxConvLibcObj
;
2824 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2825 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2826 static wxMBConvUTF7 wxConvUTF7Obj
;
2827 static wxMBConvUTF8 wxConvUTF8Obj
;
2829 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2830 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2831 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2832 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2833 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2834 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2835 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2843 #else // !wxUSE_WCHAR_T
2845 // stand-ins in absence of wchar_t
2846 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2851 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T