1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
190 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
192 wxASSERT(pOutSize
!= NULL
);
194 const char* szEnd
= szString
+ nStringLen
+ 1;
195 const char* szPos
= szString
;
196 const char* szStart
= szPos
;
198 size_t nActualLength
= 0;
199 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
201 wxWCharBuffer
theBuffer(nCurrentSize
);
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos
!= szEnd
)
207 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
209 //Get the length of the current (sub)string
210 size_t nLen
= MB2WC(NULL
, szPos
, 0);
212 //Invalid conversion?
213 if( nLen
== (size_t)-1 )
216 theBuffer
.data()[0u] = wxT('\0');
221 //Increase the actual length (+1 for current null character)
222 nActualLength
+= nLen
+ 1;
224 //if buffer too big, realloc the buffer
225 if (nActualLength
> (nCurrentSize
+1))
227 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
228 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
229 theBuffer
= theNewBuffer
;
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
237 theBuffer
.data()[0u] = wxT('\0');
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos
+= strlen(szPos
) + 1;
248 //success - return actual length and the buffer
249 *pOutSize
= nActualLength
;
253 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
255 wxASSERT(pOutSize
!= NULL
);
257 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
258 const wchar_t* szPos
= szString
;
259 const wchar_t* szStart
= szPos
;
261 size_t nActualLength
= 0;
262 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
264 wxCharBuffer
theBuffer(nCurrentSize
);
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos
!= szEnd
)
270 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
272 //Get the length of the current (sub)string
273 size_t nLen
= WC2MB(NULL
, szPos
, 0);
275 //Invalid conversion?
276 if( nLen
== (size_t)-1 )
279 theBuffer
.data()[0u] = wxT('\0');
283 //Increase the actual length (+1 for current null character)
284 nActualLength
+= nLen
+ 1;
286 //if buffer too big, realloc the buffer
287 if (nActualLength
> (nCurrentSize
+1))
289 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
290 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
291 theBuffer
= theNewBuffer
;
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
299 theBuffer
.data()[0u] = wxT('\0');
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos
+= wxWcslen(szPos
) + 1;
310 //success - return actual length and the buffer
311 *pOutSize
= nActualLength
;
315 // ----------------------------------------------------------------------------
317 // ----------------------------------------------------------------------------
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
321 return wxMB2WC(buf
, psz
, n
);
324 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
326 return wxWC2MB(buf
, psz
, n
);
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
337 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
338 || wxStricmp(charset
, _T("UTF8")) == 0 )
339 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
341 m_conv
= new wxCSConv(charset
);
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
347 size_t outputSize
) const
349 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
353 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
355 size_t outputSize
) const
357 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
362 // ----------------------------------------------------------------------------
364 // ----------------------------------------------------------------------------
366 // Implementation (C) 2004 Fredrik Roubert
369 // BASE64 decoding table
371 static const unsigned char utf7unb64
[] =
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
411 while ( *psz
&& (!buf
|| (len
< n
)) )
413 unsigned char cc
= *psz
++;
421 else if (*psz
== '-')
429 else // start of BASE64 encoded string
433 for ( ok
= lsb
= false, d
= 0, l
= 0;
434 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
439 for (l
+= 6; l
>= 8; lsb
= !lsb
)
441 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
451 *buf
= (wchar_t)(c
<< 8);
460 // in valid UTF7 we should have valid characters after '+'
469 if ( buf
&& (len
< n
) )
476 // BASE64 encoding table
478 static const unsigned char utf7enb64
[] =
480 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
481 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
482 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
483 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
484 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
485 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
486 'w', 'x', 'y', 'z', '0', '1', '2', '3',
487 '4', '5', '6', '7', '8', '9', '+', '/'
491 // UTF-7 encoding table
493 // 0 - Set D (directly encoded characters)
494 // 1 - Set O (optional direct characters)
495 // 2 - whitespace characters (optional)
496 // 3 - special characters
498 static const unsigned char utf7encode
[128] =
500 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
502 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
504 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
506 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
510 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
516 while (*psz
&& ((!buf
) || (len
< n
)))
519 if (cc
< 0x80 && utf7encode
[cc
] < 1)
527 else if (((wxUint32
)cc
) > 0xffff)
529 // no surrogate pair generation (yet?)
540 // BASE64 encode string
541 unsigned int lsb
, d
, l
;
542 for (d
= 0, l
= 0; /*nothing*/; psz
++)
544 for (lsb
= 0; lsb
< 2; lsb
++)
547 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
549 for (l
+= 8; l
>= 6; )
553 *buf
++ = utf7enb64
[(d
>> l
) % 64];
558 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
564 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
573 if (buf
&& (len
< n
))
578 // ----------------------------------------------------------------------------
580 // ----------------------------------------------------------------------------
582 static wxUint32 utf8_max
[]=
583 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
585 // boundaries of the private use area we use to (temporarily) remap invalid
586 // characters invalid in a UTF-8 encoded string
587 const wxUint32 wxUnicodePUA
= 0x100000;
588 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
594 while (*psz
&& ((!buf
) || (len
< n
)))
596 const char *opsz
= psz
;
597 bool invalid
= false;
598 unsigned char cc
= *psz
++, fc
= cc
;
600 for (cnt
= 0; fc
& 0x80; cnt
++)
609 // escape the escape character for octal escapes
610 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
611 && cc
== '\\' && (!buf
|| len
< n
))
623 // invalid UTF-8 sequence
628 unsigned ocnt
= cnt
- 1;
629 wxUint32 res
= cc
& (0x3f >> cnt
);
633 if ((cc
& 0xC0) != 0x80)
635 // invalid UTF-8 sequence
640 res
= (res
<< 6) | (cc
& 0x3f);
642 if (invalid
|| res
<= utf8_max
[ocnt
])
644 // illegal UTF-8 encoding
647 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
648 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
650 // if one of our PUA characters turns up externally
651 // it must also be treated as an illegal sequence
652 // (a bit like you have to escape an escape character)
658 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
659 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
660 if (pa
== (size_t)-1)
672 *buf
++ = (wchar_t)res
;
674 #endif // WC_UTF16/!WC_UTF16
679 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
681 while (opsz
< psz
&& (!buf
|| len
< n
))
684 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
685 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
686 wxASSERT(pa
!= (size_t)-1);
693 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
699 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
701 while (opsz
< psz
&& (!buf
|| len
< n
))
703 if ( buf
&& len
+ 3 < n
)
705 unsigned char on
= *opsz
;
707 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
708 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
709 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
715 else // MAP_INVALID_UTF8_NOT
722 if (buf
&& (len
< n
))
727 static inline bool isoctal(wchar_t wch
)
729 return L
'0' <= wch
&& wch
<= L
'7';
732 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
736 while (*psz
&& ((!buf
) || (len
< n
)))
740 // cast is ok for WC_UTF16
741 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
742 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
744 cc
=(*psz
++) & 0x7fffffff;
747 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
748 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
751 *buf
++ = (char)(cc
- wxUnicodePUA
);
754 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
755 && cc
== L
'\\' && psz
[0] == L
'\\' )
762 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
764 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
768 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
769 (psz
[1] - L
'0')*010 +
779 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
793 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
795 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
807 // ----------------------------------------------------------------------------
809 // ----------------------------------------------------------------------------
811 #ifdef WORDS_BIGENDIAN
812 #define wxMBConvUTF16straight wxMBConvUTF16BE
813 #define wxMBConvUTF16swap wxMBConvUTF16LE
815 #define wxMBConvUTF16swap wxMBConvUTF16BE
816 #define wxMBConvUTF16straight wxMBConvUTF16LE
822 // copy 16bit MB to 16bit String
823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
827 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
830 *buf
++ = *(wxUint16
*)psz
;
833 psz
+= sizeof(wxUint16
);
835 if (buf
&& len
<n
) *buf
=0;
841 // copy 16bit String to 16bit MB
842 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
846 while (*psz
&& (!buf
|| len
< n
))
850 *(wxUint16
*)buf
= *psz
;
851 buf
+= sizeof(wxUint16
);
853 len
+= sizeof(wxUint16
);
856 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
862 // swap 16bit MB to 16bit String
863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
867 // UTF16 string must be terminated by 2 NULs as single NULs may occur
869 while ( (psz
[0] || psz
[1]) && (!buf
|| len
< n
) )
873 ((char *)buf
)[0] = psz
[1];
874 ((char *)buf
)[1] = psz
[0];
881 if ( buf
&& len
< n
)
888 // swap 16bit MB to 16bit String
889 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
893 while (*psz
&& (!buf
|| len
< n
))
897 *buf
++ = ((char*)psz
)[1];
898 *buf
++ = ((char*)psz
)[0];
900 len
+= sizeof(wxUint16
);
903 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
912 // copy 16bit MB to 32bit String
913 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
917 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
920 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
921 if (pa
== (size_t)-1)
925 *buf
++ = (wchar_t)cc
;
927 psz
+= pa
* sizeof(wxUint16
);
929 if (buf
&& len
<n
) *buf
=0;
935 // copy 32bit String to 16bit MB
936 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
940 while (*psz
&& (!buf
|| len
< n
))
943 size_t pa
=encode_utf16(*psz
, cc
);
945 if (pa
== (size_t)-1)
950 *(wxUint16
*)buf
= cc
[0];
951 buf
+= sizeof(wxUint16
);
954 *(wxUint16
*)buf
= cc
[1];
955 buf
+= sizeof(wxUint16
);
959 len
+= pa
*sizeof(wxUint16
);
962 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
968 // swap 16bit MB to 32bit String
969 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
973 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
977 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
978 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
980 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
981 if (pa
== (size_t)-1)
985 *buf
++ = (wchar_t)cc
;
988 psz
+= pa
* sizeof(wxUint16
);
990 if (buf
&& len
<n
) *buf
=0;
996 // swap 32bit String to 16bit MB
997 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1001 while (*psz
&& (!buf
|| len
< n
))
1004 size_t pa
=encode_utf16(*psz
, cc
);
1006 if (pa
== (size_t)-1)
1011 *buf
++ = ((char*)cc
)[1];
1012 *buf
++ = ((char*)cc
)[0];
1015 *buf
++ = ((char*)cc
)[3];
1016 *buf
++ = ((char*)cc
)[2];
1020 len
+= pa
*sizeof(wxUint16
);
1023 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1031 // ----------------------------------------------------------------------------
1033 // ----------------------------------------------------------------------------
1035 #ifdef WORDS_BIGENDIAN
1036 #define wxMBConvUTF32straight wxMBConvUTF32BE
1037 #define wxMBConvUTF32swap wxMBConvUTF32LE
1039 #define wxMBConvUTF32swap wxMBConvUTF32BE
1040 #define wxMBConvUTF32straight wxMBConvUTF32LE
1044 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1045 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1050 // copy 32bit MB to 16bit String
1051 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1055 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1059 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1060 if (pa
== (size_t)-1)
1070 psz
+= sizeof(wxUint32
);
1072 if (buf
&& len
<n
) *buf
=0;
1078 // copy 16bit String to 32bit MB
1079 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1083 while (*psz
&& (!buf
|| len
< n
))
1087 // cast is ok for WC_UTF16
1088 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1089 if (pa
== (size_t)-1)
1094 *(wxUint32
*)buf
= cc
;
1095 buf
+= sizeof(wxUint32
);
1097 len
+= sizeof(wxUint32
);
1101 if (buf
&& len
<=n
-sizeof(wxUint32
))
1109 // swap 32bit MB to 16bit String
1110 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1114 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1117 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1118 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1123 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1124 if (pa
== (size_t)-1)
1134 psz
+= sizeof(wxUint32
);
1144 // swap 16bit String to 32bit MB
1145 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1149 while (*psz
&& (!buf
|| len
< n
))
1153 // cast is ok for WC_UTF16
1154 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1155 if (pa
== (size_t)-1)
1165 len
+= sizeof(wxUint32
);
1169 if (buf
&& len
<=n
-sizeof(wxUint32
))
1178 // copy 32bit MB to 32bit String
1179 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1183 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1186 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1188 psz
+= sizeof(wxUint32
);
1198 // copy 32bit String to 32bit MB
1199 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1203 while (*psz
&& (!buf
|| len
< n
))
1207 *(wxUint32
*)buf
= *psz
;
1208 buf
+= sizeof(wxUint32
);
1211 len
+= sizeof(wxUint32
);
1215 if (buf
&& len
<=n
-sizeof(wxUint32
))
1222 // swap 32bit MB to 32bit String
1223 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1227 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1231 ((char *)buf
)[0] = psz
[3];
1232 ((char *)buf
)[1] = psz
[2];
1233 ((char *)buf
)[2] = psz
[1];
1234 ((char *)buf
)[3] = psz
[0];
1238 psz
+= sizeof(wxUint32
);
1248 // swap 32bit String to 32bit MB
1249 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1253 while (*psz
&& (!buf
|| len
< n
))
1257 *buf
++ = ((char *)psz
)[3];
1258 *buf
++ = ((char *)psz
)[2];
1259 *buf
++ = ((char *)psz
)[1];
1260 *buf
++ = ((char *)psz
)[0];
1262 len
+= sizeof(wxUint32
);
1266 if (buf
&& len
<=n
-sizeof(wxUint32
))
1276 // ============================================================================
1277 // The classes doing conversion using the iconv_xxx() functions
1278 // ============================================================================
1282 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1283 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1284 // (unless there's yet another bug in glibc) the only case when iconv()
1285 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1286 // left in the input buffer -- when _real_ error occurs,
1287 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1289 // [This bug does not appear in glibc 2.2.]
1290 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1291 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1292 (errno != E2BIG || bufLeft != 0))
1294 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1297 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1299 #define ICONV_T_INVALID ((iconv_t)-1)
1301 #if SIZEOF_WCHAR_T == 4
1302 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1303 #define WC_ENC wxFONTENCODING_UTF32
1304 #elif SIZEOF_WCHAR_T == 2
1305 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1306 #define WC_ENC wxFONTENCODING_UTF16
1307 #else // sizeof(wchar_t) != 2 nor 4
1308 // does this ever happen?
1309 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1312 // ----------------------------------------------------------------------------
1313 // wxMBConv_iconv: encapsulates an iconv character set
1314 // ----------------------------------------------------------------------------
1316 class wxMBConv_iconv
: public wxMBConv
1319 wxMBConv_iconv(const wxChar
*name
);
1320 virtual ~wxMBConv_iconv();
1322 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1323 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1326 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1329 // the iconv handlers used to translate from multibyte to wide char and in
1330 // the other direction
1334 // guards access to m2w and w2m objects
1335 wxMutex m_iconvMutex
;
1339 // the name (for iconv_open()) of a wide char charset -- if none is
1340 // available on this machine, it will remain NULL
1341 static wxString ms_wcCharsetName
;
1343 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1344 // different endian-ness than the native one
1345 static bool ms_wcNeedsSwap
;
1348 // make the constructor available for unit testing
1349 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1351 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1352 if ( !result
->IsOk() )
1360 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1361 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1363 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1365 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1366 // names for the charsets
1367 const wxCharBuffer
cname(wxString(name
).ToAscii());
1369 // check for charset that represents wchar_t:
1370 if ( ms_wcCharsetName
.empty() )
1372 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1375 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1376 #else // !wxUSE_FONTMAP
1377 static const wxChar
*names
[] =
1379 #if SIZEOF_WCHAR_T == 4
1381 #elif SIZEOF_WCHAR_T = 2
1386 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1388 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1390 const wxString
nameCS(*names
);
1392 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1393 wxString
nameXE(nameCS
);
1394 #ifdef WORDS_BIGENDIAN
1396 #else // little endian
1400 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1403 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1404 if ( m2w
== ICONV_T_INVALID
)
1406 // try charset w/o bytesex info (e.g. "UCS4")
1407 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1409 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1411 // and check for bytesex ourselves:
1412 if ( m2w
!= ICONV_T_INVALID
)
1414 char buf
[2], *bufPtr
;
1415 wchar_t wbuf
[2], *wbufPtr
;
1423 outsz
= SIZEOF_WCHAR_T
* 2;
1427 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1428 (char**)&wbufPtr
, &outsz
);
1430 if (ICONV_FAILED(res
, insz
))
1432 wxLogLastError(wxT("iconv"));
1433 wxLogError(_("Conversion to charset '%s' doesn't work."),
1436 else // ok, can convert to this encoding, remember it
1438 ms_wcCharsetName
= nameCS
;
1439 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1443 else // use charset not requiring byte swapping
1445 ms_wcCharsetName
= nameXE
;
1449 wxLogTrace(TRACE_STRCONV
,
1450 wxT("iconv wchar_t charset is \"%s\"%s"),
1451 ms_wcCharsetName
.empty() ? _T("<none>")
1452 : ms_wcCharsetName
.c_str(),
1453 ms_wcNeedsSwap
? _T(" (needs swap)")
1456 else // we already have ms_wcCharsetName
1458 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1461 if ( ms_wcCharsetName
.empty() )
1463 w2m
= ICONV_T_INVALID
;
1467 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1468 if ( w2m
== ICONV_T_INVALID
)
1470 wxLogTrace(TRACE_STRCONV
,
1471 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1472 ms_wcCharsetName
.c_str(), cname
.data());
1477 wxMBConv_iconv::~wxMBConv_iconv()
1479 if ( m2w
!= ICONV_T_INVALID
)
1481 if ( w2m
!= ICONV_T_INVALID
)
1485 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1488 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1489 // Unfortunately there is a couple of global wxCSConv objects such as
1490 // wxConvLocal that are used all over wx code, so we have to make sure
1491 // the handle is used by at most one thread at the time. Otherwise
1492 // only a few wx classes would be safe to use from non-main threads
1493 // as MB<->WC conversion would fail "randomly".
1494 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1497 size_t inbuf
= strlen(psz
);
1498 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1500 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1501 wchar_t *bufPtr
= buf
;
1502 const char *pszPtr
= psz
;
1506 // have destination buffer, convert there
1508 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1509 (char**)&bufPtr
, &outbuf
);
1510 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1514 // convert to native endianness
1515 for ( unsigned i
= 0; i
< res
; i
++ )
1516 buf
[n
] = WC_BSWAP(buf
[i
]);
1519 // NB: iconv was given only strlen(psz) characters on input, and so
1520 // it couldn't convert the trailing zero. Let's do it ourselves
1521 // if there's some room left for it in the output buffer.
1527 // no destination buffer... convert using temp buffer
1528 // to calculate destination buffer requirement
1533 outbuf
= 8*SIZEOF_WCHAR_T
;
1536 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1537 (char**)&bufPtr
, &outbuf
);
1539 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1540 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1543 if (ICONV_FAILED(cres
, inbuf
))
1545 //VS: it is ok if iconv fails, hence trace only
1546 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1553 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1556 // NB: explained in MB2WC
1557 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1560 size_t inlen
= wxWcslen(psz
);
1561 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1565 wchar_t *tmpbuf
= 0;
1569 // need to copy to temp buffer to switch endianness
1570 // (doing WC_BSWAP twice on the original buffer won't help, as it
1571 // could be in read-only memory, or be accessed in some other thread)
1572 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1573 for ( size_t i
= 0; i
< inlen
; i
++ )
1574 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1575 tmpbuf
[inlen
] = L
'\0';
1581 // have destination buffer, convert there
1582 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1586 // NB: iconv was given only wcslen(psz) characters on input, and so
1587 // it couldn't convert the trailing zero. Let's do it ourselves
1588 // if there's some room left for it in the output buffer.
1594 // no destination buffer... convert using temp buffer
1595 // to calculate destination buffer requirement
1599 buf
= tbuf
; outbuf
= 16;
1601 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1604 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1612 if (ICONV_FAILED(cres
, inbuf
))
1614 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1621 #endif // HAVE_ICONV
1624 // ============================================================================
1625 // Win32 conversion classes
1626 // ============================================================================
1628 #ifdef wxHAVE_WIN32_MB2WC
1632 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1633 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1636 class wxMBConv_win32
: public wxMBConv
1641 m_CodePage
= CP_ACP
;
1645 wxMBConv_win32(const wxChar
* name
)
1647 m_CodePage
= wxCharsetToCodepage(name
);
1650 wxMBConv_win32(wxFontEncoding encoding
)
1652 m_CodePage
= wxEncodingToCodepage(encoding
);
1656 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1658 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1659 // the behaviour is not compatible with the Unix version (using iconv)
1660 // and break the library itself, e.g. wxTextInputStream::NextChar()
1661 // wouldn't work if reading an incomplete MB char didn't result in an
1664 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1665 // an error (tested under Windows Server 2003) and apparently it is
1666 // done on purpose, i.e. the function accepts any input in this case
1667 // and although I'd prefer to return error on ill-formed output, our
1668 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1669 // explicitly ill-formed according to RFC 2152) neither so we don't
1670 // even have any fallback here...
1672 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1673 // Win XP or newer and if it is specified on older versions, conversion
1674 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1675 // fails. So we can only use the flag on newer Windows versions.
1676 // Additionally, the flag is not supported by UTF7, symbol and CJK
1677 // encodings. See here:
1678 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1679 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1681 if ( m_CodePage
!= CP_UTF7
&& m_CodePage
!= CP_SYMBOL
&&
1682 m_CodePage
< 50000 &&
1683 IsAtLeastWin2kSP4() )
1685 flags
= MB_ERR_INVALID_CHARS
;
1687 else if ( m_CodePage
== CP_UTF8
)
1689 // Avoid round-trip in the special case of UTF-8 by using our
1690 // own UTF-8 conversion code:
1691 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
1694 const size_t len
= ::MultiByteToWideChar
1696 m_CodePage
, // code page
1697 flags
, // flags: fall on error
1698 psz
, // input string
1699 -1, // its length (NUL-terminated)
1700 buf
, // output string
1701 buf
? n
: 0 // size of output buffer
1705 // function totally failed
1709 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1710 // check if we succeeded, by doing a double trip:
1711 if ( !flags
&& buf
)
1713 const size_t mbLen
= strlen(psz
);
1714 wxCharBuffer
mbBuf(mbLen
);
1715 if ( ::WideCharToMultiByte
1722 mbLen
+ 1, // size in bytes, not length
1726 strcmp(mbBuf
, psz
) != 0 )
1728 // we didn't obtain the same thing we started from, hence
1729 // the conversion was lossy and we consider that it failed
1734 // note that it returns count of written chars for buf != NULL and size
1735 // of the needed buffer for buf == NULL so in either case the length of
1736 // the string (which never includes the terminating NUL) is one less
1740 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1743 we have a problem here: by default, WideCharToMultiByte() may
1744 replace characters unrepresentable in the target code page with bad
1745 quality approximations such as turning "1/2" symbol (U+00BD) into
1746 "1" for the code pages which don't have it and we, obviously, want
1747 to avoid this at any price
1749 the trouble is that this function does it _silently_, i.e. it won't
1750 even tell us whether it did or not... Win98/2000 and higher provide
1751 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1752 we have to resort to a round trip, i.e. check that converting back
1753 results in the same string -- this is, of course, expensive but
1754 otherwise we simply can't be sure to not garble the data.
1757 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1758 // it doesn't work with CJK encodings (which we test for rather roughly
1759 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1761 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1764 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1766 // it's our lucky day
1767 flags
= WC_NO_BEST_FIT_CHARS
;
1768 pUsedDef
= &usedDef
;
1770 else // old system or unsupported encoding
1776 const size_t len
= ::WideCharToMultiByte
1778 m_CodePage
, // code page
1779 flags
, // either none or no best fit
1780 pwz
, // input string
1781 -1, // it is (wide) NUL-terminated
1782 buf
, // output buffer
1783 buf
? n
: 0, // and its size
1784 NULL
, // default "replacement" char
1785 pUsedDef
// [out] was it used?
1790 // function totally failed
1794 // if we were really converting, check if we succeeded
1799 // check if the conversion failed, i.e. if any replacements
1804 else // we must resort to double tripping...
1806 wxWCharBuffer
wcBuf(n
);
1807 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1808 wcscmp(wcBuf
, pwz
) != 0 )
1810 // we didn't obtain the same thing we started from, hence
1811 // the conversion was lossy and we consider that it failed
1817 // see the comment above for the reason of "len - 1"
1821 bool IsOk() const { return m_CodePage
!= -1; }
1824 static bool CanUseNoBestFit()
1826 static int s_isWin98Or2k
= -1;
1828 if ( s_isWin98Or2k
== -1 )
1831 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1834 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1838 s_isWin98Or2k
= verMaj
>= 5;
1842 // unknown, be conseravtive by default
1846 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1849 return s_isWin98Or2k
== 1;
1852 static bool IsAtLeastWin2kSP4()
1857 static int s_isAtLeastWin2kSP4
= -1;
1859 if ( s_isAtLeastWin2kSP4
== -1 )
1861 OSVERSIONINFOEX ver
;
1863 memset(&ver
, 0, sizeof(ver
));
1864 ver
.dwOSVersionInfoSize
= sizeof(ver
);
1865 GetVersionEx((OSVERSIONINFO
*)&ver
);
1867 s_isAtLeastWin2kSP4
=
1868 ((ver
.dwMajorVersion
> 5) || // Vista+
1869 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
1870 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
1871 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
1875 return s_isAtLeastWin2kSP4
== 1;
1882 #endif // wxHAVE_WIN32_MB2WC
1884 // ============================================================================
1885 // Cocoa conversion classes
1886 // ============================================================================
1888 #if defined(__WXCOCOA__)
1890 // RN: There is no UTF-32 support in either Core Foundation or
1891 // Cocoa. Strangely enough, internally Core Foundation uses
1892 // UTF 32 internally quite a bit - its just not public (yet).
1894 #include <CoreFoundation/CFString.h>
1895 #include <CoreFoundation/CFStringEncodingExt.h>
1897 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1899 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1900 if ( encoding
== wxFONTENCODING_DEFAULT
)
1902 enc
= CFStringGetSystemEncoding();
1904 else switch( encoding
)
1906 case wxFONTENCODING_ISO8859_1
:
1907 enc
= kCFStringEncodingISOLatin1
;
1909 case wxFONTENCODING_ISO8859_2
:
1910 enc
= kCFStringEncodingISOLatin2
;
1912 case wxFONTENCODING_ISO8859_3
:
1913 enc
= kCFStringEncodingISOLatin3
;
1915 case wxFONTENCODING_ISO8859_4
:
1916 enc
= kCFStringEncodingISOLatin4
;
1918 case wxFONTENCODING_ISO8859_5
:
1919 enc
= kCFStringEncodingISOLatinCyrillic
;
1921 case wxFONTENCODING_ISO8859_6
:
1922 enc
= kCFStringEncodingISOLatinArabic
;
1924 case wxFONTENCODING_ISO8859_7
:
1925 enc
= kCFStringEncodingISOLatinGreek
;
1927 case wxFONTENCODING_ISO8859_8
:
1928 enc
= kCFStringEncodingISOLatinHebrew
;
1930 case wxFONTENCODING_ISO8859_9
:
1931 enc
= kCFStringEncodingISOLatin5
;
1933 case wxFONTENCODING_ISO8859_10
:
1934 enc
= kCFStringEncodingISOLatin6
;
1936 case wxFONTENCODING_ISO8859_11
:
1937 enc
= kCFStringEncodingISOLatinThai
;
1939 case wxFONTENCODING_ISO8859_13
:
1940 enc
= kCFStringEncodingISOLatin7
;
1942 case wxFONTENCODING_ISO8859_14
:
1943 enc
= kCFStringEncodingISOLatin8
;
1945 case wxFONTENCODING_ISO8859_15
:
1946 enc
= kCFStringEncodingISOLatin9
;
1949 case wxFONTENCODING_KOI8
:
1950 enc
= kCFStringEncodingKOI8_R
;
1952 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1953 enc
= kCFStringEncodingDOSRussian
;
1956 // case wxFONTENCODING_BULGARIAN :
1960 case wxFONTENCODING_CP437
:
1961 enc
=kCFStringEncodingDOSLatinUS
;
1963 case wxFONTENCODING_CP850
:
1964 enc
= kCFStringEncodingDOSLatin1
;
1966 case wxFONTENCODING_CP852
:
1967 enc
= kCFStringEncodingDOSLatin2
;
1969 case wxFONTENCODING_CP855
:
1970 enc
= kCFStringEncodingDOSCyrillic
;
1972 case wxFONTENCODING_CP866
:
1973 enc
=kCFStringEncodingDOSRussian
;
1975 case wxFONTENCODING_CP874
:
1976 enc
= kCFStringEncodingDOSThai
;
1978 case wxFONTENCODING_CP932
:
1979 enc
= kCFStringEncodingDOSJapanese
;
1981 case wxFONTENCODING_CP936
:
1982 enc
=kCFStringEncodingDOSChineseSimplif
;
1984 case wxFONTENCODING_CP949
:
1985 enc
= kCFStringEncodingDOSKorean
;
1987 case wxFONTENCODING_CP950
:
1988 enc
= kCFStringEncodingDOSChineseTrad
;
1990 case wxFONTENCODING_CP1250
:
1991 enc
= kCFStringEncodingWindowsLatin2
;
1993 case wxFONTENCODING_CP1251
:
1994 enc
=kCFStringEncodingWindowsCyrillic
;
1996 case wxFONTENCODING_CP1252
:
1997 enc
=kCFStringEncodingWindowsLatin1
;
1999 case wxFONTENCODING_CP1253
:
2000 enc
= kCFStringEncodingWindowsGreek
;
2002 case wxFONTENCODING_CP1254
:
2003 enc
= kCFStringEncodingWindowsLatin5
;
2005 case wxFONTENCODING_CP1255
:
2006 enc
=kCFStringEncodingWindowsHebrew
;
2008 case wxFONTENCODING_CP1256
:
2009 enc
=kCFStringEncodingWindowsArabic
;
2011 case wxFONTENCODING_CP1257
:
2012 enc
= kCFStringEncodingWindowsBalticRim
;
2014 // This only really encodes to UTF7 (if that) evidently
2015 // case wxFONTENCODING_UTF7 :
2016 // enc = kCFStringEncodingNonLossyASCII ;
2018 case wxFONTENCODING_UTF8
:
2019 enc
= kCFStringEncodingUTF8
;
2021 case wxFONTENCODING_EUC_JP
:
2022 enc
= kCFStringEncodingEUC_JP
;
2024 case wxFONTENCODING_UTF16
:
2025 enc
= kCFStringEncodingUnicode
;
2027 case wxFONTENCODING_MACROMAN
:
2028 enc
= kCFStringEncodingMacRoman
;
2030 case wxFONTENCODING_MACJAPANESE
:
2031 enc
= kCFStringEncodingMacJapanese
;
2033 case wxFONTENCODING_MACCHINESETRAD
:
2034 enc
= kCFStringEncodingMacChineseTrad
;
2036 case wxFONTENCODING_MACKOREAN
:
2037 enc
= kCFStringEncodingMacKorean
;
2039 case wxFONTENCODING_MACARABIC
:
2040 enc
= kCFStringEncodingMacArabic
;
2042 case wxFONTENCODING_MACHEBREW
:
2043 enc
= kCFStringEncodingMacHebrew
;
2045 case wxFONTENCODING_MACGREEK
:
2046 enc
= kCFStringEncodingMacGreek
;
2048 case wxFONTENCODING_MACCYRILLIC
:
2049 enc
= kCFStringEncodingMacCyrillic
;
2051 case wxFONTENCODING_MACDEVANAGARI
:
2052 enc
= kCFStringEncodingMacDevanagari
;
2054 case wxFONTENCODING_MACGURMUKHI
:
2055 enc
= kCFStringEncodingMacGurmukhi
;
2057 case wxFONTENCODING_MACGUJARATI
:
2058 enc
= kCFStringEncodingMacGujarati
;
2060 case wxFONTENCODING_MACORIYA
:
2061 enc
= kCFStringEncodingMacOriya
;
2063 case wxFONTENCODING_MACBENGALI
:
2064 enc
= kCFStringEncodingMacBengali
;
2066 case wxFONTENCODING_MACTAMIL
:
2067 enc
= kCFStringEncodingMacTamil
;
2069 case wxFONTENCODING_MACTELUGU
:
2070 enc
= kCFStringEncodingMacTelugu
;
2072 case wxFONTENCODING_MACKANNADA
:
2073 enc
= kCFStringEncodingMacKannada
;
2075 case wxFONTENCODING_MACMALAJALAM
:
2076 enc
= kCFStringEncodingMacMalayalam
;
2078 case wxFONTENCODING_MACSINHALESE
:
2079 enc
= kCFStringEncodingMacSinhalese
;
2081 case wxFONTENCODING_MACBURMESE
:
2082 enc
= kCFStringEncodingMacBurmese
;
2084 case wxFONTENCODING_MACKHMER
:
2085 enc
= kCFStringEncodingMacKhmer
;
2087 case wxFONTENCODING_MACTHAI
:
2088 enc
= kCFStringEncodingMacThai
;
2090 case wxFONTENCODING_MACLAOTIAN
:
2091 enc
= kCFStringEncodingMacLaotian
;
2093 case wxFONTENCODING_MACGEORGIAN
:
2094 enc
= kCFStringEncodingMacGeorgian
;
2096 case wxFONTENCODING_MACARMENIAN
:
2097 enc
= kCFStringEncodingMacArmenian
;
2099 case wxFONTENCODING_MACCHINESESIMP
:
2100 enc
= kCFStringEncodingMacChineseSimp
;
2102 case wxFONTENCODING_MACTIBETAN
:
2103 enc
= kCFStringEncodingMacTibetan
;
2105 case wxFONTENCODING_MACMONGOLIAN
:
2106 enc
= kCFStringEncodingMacMongolian
;
2108 case wxFONTENCODING_MACETHIOPIC
:
2109 enc
= kCFStringEncodingMacEthiopic
;
2111 case wxFONTENCODING_MACCENTRALEUR
:
2112 enc
= kCFStringEncodingMacCentralEurRoman
;
2114 case wxFONTENCODING_MACVIATNAMESE
:
2115 enc
= kCFStringEncodingMacVietnamese
;
2117 case wxFONTENCODING_MACARABICEXT
:
2118 enc
= kCFStringEncodingMacExtArabic
;
2120 case wxFONTENCODING_MACSYMBOL
:
2121 enc
= kCFStringEncodingMacSymbol
;
2123 case wxFONTENCODING_MACDINGBATS
:
2124 enc
= kCFStringEncodingMacDingbats
;
2126 case wxFONTENCODING_MACTURKISH
:
2127 enc
= kCFStringEncodingMacTurkish
;
2129 case wxFONTENCODING_MACCROATIAN
:
2130 enc
= kCFStringEncodingMacCroatian
;
2132 case wxFONTENCODING_MACICELANDIC
:
2133 enc
= kCFStringEncodingMacIcelandic
;
2135 case wxFONTENCODING_MACROMANIAN
:
2136 enc
= kCFStringEncodingMacRomanian
;
2138 case wxFONTENCODING_MACCELTIC
:
2139 enc
= kCFStringEncodingMacCeltic
;
2141 case wxFONTENCODING_MACGAELIC
:
2142 enc
= kCFStringEncodingMacGaelic
;
2144 // case wxFONTENCODING_MACKEYBOARD :
2145 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2148 // because gcc is picky
2154 class wxMBConv_cocoa
: public wxMBConv
2159 Init(CFStringGetSystemEncoding()) ;
2163 wxMBConv_cocoa(const wxChar
* name
)
2165 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2169 wxMBConv_cocoa(wxFontEncoding encoding
)
2171 Init( wxCFStringEncFromFontEnc(encoding
) );
2178 void Init( CFStringEncoding encoding
)
2180 m_encoding
= encoding
;
2183 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2187 CFStringRef theString
= CFStringCreateWithBytes (
2188 NULL
, //the allocator
2189 (const UInt8
*)szUnConv
,
2192 false //no BOM/external representation
2195 wxASSERT(theString
);
2197 size_t nOutLength
= CFStringGetLength(theString
);
2201 CFRelease(theString
);
2205 CFRange theRange
= { 0, nOutSize
};
2207 #if SIZEOF_WCHAR_T == 4
2208 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2211 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2213 CFRelease(theString
);
2215 szUniCharBuffer
[nOutLength
] = '\0' ;
2217 #if SIZEOF_WCHAR_T == 4
2218 wxMBConvUTF16 converter
;
2219 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2220 delete[] szUniCharBuffer
;
2226 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2230 size_t nRealOutSize
;
2231 size_t nBufSize
= wxWcslen(szUnConv
);
2232 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2234 #if SIZEOF_WCHAR_T == 4
2235 wxMBConvUTF16 converter
;
2236 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2237 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2238 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2239 nBufSize
/= sizeof(UniChar
);
2242 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2246 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2249 wxASSERT(theString
);
2251 //Note that CER puts a BOM when converting to unicode
2252 //so we check and use getchars instead in that case
2253 if (m_encoding
== kCFStringEncodingUnicode
)
2256 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2258 nRealOutSize
= CFStringGetLength(theString
) + 1;
2264 CFRangeMake(0, CFStringGetLength(theString
)),
2266 0, //what to put in characters that can't be converted -
2267 //0 tells CFString to return NULL if it meets such a character
2268 false, //not an external representation
2271 (CFIndex
*) &nRealOutSize
2275 CFRelease(theString
);
2277 #if SIZEOF_WCHAR_T == 4
2278 delete[] szUniBuffer
;
2281 return nRealOutSize
- 1;
2286 return m_encoding
!= kCFStringEncodingInvalidId
&&
2287 CFStringIsEncodingAvailable(m_encoding
);
2291 CFStringEncoding m_encoding
;
2294 #endif // defined(__WXCOCOA__)
2296 // ============================================================================
2297 // Mac conversion classes
2298 // ============================================================================
2300 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2302 class wxMBConv_mac
: public wxMBConv
2307 Init(CFStringGetSystemEncoding()) ;
2311 wxMBConv_mac(const wxChar
* name
)
2313 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2317 wxMBConv_mac(wxFontEncoding encoding
)
2319 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2324 OSStatus status
= noErr
;
2325 status
= TECDisposeConverter(m_MB2WC_converter
);
2326 status
= TECDisposeConverter(m_WC2MB_converter
);
2330 void Init( TextEncodingBase encoding
)
2332 OSStatus status
= noErr
;
2333 m_char_encoding
= encoding
;
2334 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2336 status
= TECCreateConverter(&m_MB2WC_converter
,
2338 m_unicode_encoding
);
2339 status
= TECCreateConverter(&m_WC2MB_converter
,
2344 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2346 OSStatus status
= noErr
;
2347 ByteCount byteOutLen
;
2348 ByteCount byteInLen
= strlen(psz
) ;
2349 wchar_t *tbuf
= NULL
;
2350 UniChar
* ubuf
= NULL
;
2355 //apple specs say at least 32
2356 n
= wxMax( 32 , byteInLen
) ;
2357 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2359 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2360 #if SIZEOF_WCHAR_T == 4
2361 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2363 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2365 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2366 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2367 #if SIZEOF_WCHAR_T == 4
2368 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2369 // is not properly terminated we get random characters at the end
2370 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2371 wxMBConvUTF16 converter
;
2372 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2375 res
= byteOutLen
/ sizeof( UniChar
) ;
2380 if ( buf
&& res
< n
)
2386 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2388 OSStatus status
= noErr
;
2389 ByteCount byteOutLen
;
2390 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2396 //apple specs say at least 32
2397 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2398 tbuf
= (char*) malloc( n
) ;
2401 ByteCount byteBufferLen
= n
;
2402 UniChar
* ubuf
= NULL
;
2403 #if SIZEOF_WCHAR_T == 4
2404 wxMBConvUTF16 converter
;
2405 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2406 byteInLen
= unicharlen
;
2407 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2408 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2410 ubuf
= (UniChar
*) psz
;
2412 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2413 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2414 #if SIZEOF_WCHAR_T == 4
2420 size_t res
= byteOutLen
;
2421 if ( buf
&& res
< n
)
2425 //we need to double-trip to verify it didn't insert any ? in place
2426 //of bogus characters
2427 wxWCharBuffer
wcBuf(n
);
2428 size_t pszlen
= wxWcslen(psz
);
2429 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2430 wxWcslen(wcBuf
) != pszlen
||
2431 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2433 // we didn't obtain the same thing we started from, hence
2434 // the conversion was lossy and we consider that it failed
2443 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2446 TECObjectRef m_MB2WC_converter
;
2447 TECObjectRef m_WC2MB_converter
;
2449 TextEncodingBase m_char_encoding
;
2450 TextEncodingBase m_unicode_encoding
;
2453 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2455 // ============================================================================
2456 // wxEncodingConverter based conversion classes
2457 // ============================================================================
2461 class wxMBConv_wxwin
: public wxMBConv
2466 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2467 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2471 // temporarily just use wxEncodingConverter stuff,
2472 // so that it works while a better implementation is built
2473 wxMBConv_wxwin(const wxChar
* name
)
2476 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2478 m_enc
= wxFONTENCODING_SYSTEM
;
2483 wxMBConv_wxwin(wxFontEncoding enc
)
2490 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2492 size_t inbuf
= strlen(psz
);
2495 if (!m2w
.Convert(psz
,buf
))
2501 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2503 const size_t inbuf
= wxWcslen(psz
);
2506 if (!w2m
.Convert(psz
,buf
))
2513 bool IsOk() const { return m_ok
; }
2516 wxFontEncoding m_enc
;
2517 wxEncodingConverter m2w
, w2m
;
2519 // were we initialized successfully?
2522 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2525 // make the constructors available for unit testing
2526 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2528 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2529 if ( !result
->IsOk() )
2537 #endif // wxUSE_FONTMAP
2539 // ============================================================================
2540 // wxCSConv implementation
2541 // ============================================================================
2543 void wxCSConv::Init()
2550 wxCSConv::wxCSConv(const wxChar
*charset
)
2560 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2562 m_encoding
= wxFONTENCODING_SYSTEM
;
2566 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2568 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2570 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2572 encoding
= wxFONTENCODING_SYSTEM
;
2577 m_encoding
= encoding
;
2580 wxCSConv::~wxCSConv()
2585 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2590 SetName(conv
.m_name
);
2591 m_encoding
= conv
.m_encoding
;
2594 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2598 SetName(conv
.m_name
);
2599 m_encoding
= conv
.m_encoding
;
2604 void wxCSConv::Clear()
2613 void wxCSConv::SetName(const wxChar
*charset
)
2617 m_name
= wxStrdup(charset
);
2623 #include "wx/hashmap.h"
2625 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2626 wxEncodingNameCache
);
2628 static wxEncodingNameCache gs_nameCache
;
2631 wxMBConv
*wxCSConv::DoCreate() const
2634 wxLogTrace(TRACE_STRCONV
,
2635 wxT("creating conversion for %s"),
2637 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2638 #endif // wxUSE_FONTMAP
2640 // check for the special case of ASCII or ISO8859-1 charset: as we have
2641 // special knowledge of it anyhow, we don't need to create a special
2642 // conversion object
2643 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2644 m_encoding
== wxFONTENCODING_DEFAULT
)
2646 // don't convert at all
2650 // we trust OS to do conversion better than we can so try external
2651 // conversion methods first
2653 // the full order is:
2654 // 1. OS conversion (iconv() under Unix or Win32 API)
2655 // 2. hard coded conversions for UTF
2656 // 3. wxEncodingConverter as fall back
2662 #endif // !wxUSE_FONTMAP
2664 wxString
name(m_name
);
2665 wxFontEncoding
encoding(m_encoding
);
2667 if ( !name
.empty() )
2669 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2677 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2678 #endif // wxUSE_FONTMAP
2682 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2683 if ( it
!= gs_nameCache
.end() )
2685 if ( it
->second
.empty() )
2688 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2695 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2697 for ( ; *names
; ++names
)
2699 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2702 gs_nameCache
[encoding
] = *names
;
2709 gs_nameCache
[encoding
] = _T(""); // cache the failure
2711 #endif // wxUSE_FONTMAP
2713 #endif // HAVE_ICONV
2715 #ifdef wxHAVE_WIN32_MB2WC
2718 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2719 : new wxMBConv_win32(m_encoding
);
2728 #endif // wxHAVE_WIN32_MB2WC
2729 #if defined(__WXMAC__)
2731 // leave UTF16 and UTF32 to the built-ins of wx
2732 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2733 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2737 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2738 : new wxMBConv_mac(m_encoding
);
2740 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2749 #if defined(__WXCOCOA__)
2751 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2755 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2756 : new wxMBConv_cocoa(m_encoding
);
2758 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2768 wxFontEncoding enc
= m_encoding
;
2770 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2772 // use "false" to suppress interactive dialogs -- we can be called from
2773 // anywhere and popping up a dialog from here is the last thing we want to
2775 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2777 #endif // wxUSE_FONTMAP
2781 case wxFONTENCODING_UTF7
:
2782 return new wxMBConvUTF7
;
2784 case wxFONTENCODING_UTF8
:
2785 return new wxMBConvUTF8
;
2787 case wxFONTENCODING_UTF16BE
:
2788 return new wxMBConvUTF16BE
;
2790 case wxFONTENCODING_UTF16LE
:
2791 return new wxMBConvUTF16LE
;
2793 case wxFONTENCODING_UTF32BE
:
2794 return new wxMBConvUTF32BE
;
2796 case wxFONTENCODING_UTF32LE
:
2797 return new wxMBConvUTF32LE
;
2800 // nothing to do but put here to suppress gcc warnings
2807 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2808 : new wxMBConv_wxwin(m_encoding
);
2814 #endif // wxUSE_FONTMAP
2816 // NB: This is a hack to prevent deadlock. What could otherwise happen
2817 // in Unicode build: wxConvLocal creation ends up being here
2818 // because of some failure and logs the error. But wxLog will try to
2819 // attach timestamp, for which it will need wxConvLocal (to convert
2820 // time to char* and then wchar_t*), but that fails, tries to log
2821 // error, but wxLog has a (already locked) critical section that
2822 // guards static buffer.
2823 static bool alreadyLoggingError
= false;
2824 if (!alreadyLoggingError
)
2826 alreadyLoggingError
= true;
2827 wxLogError(_("Cannot convert from the charset '%s'!"),
2831 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2832 #else // !wxUSE_FONTMAP
2833 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2834 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2836 alreadyLoggingError
= false;
2842 void wxCSConv::CreateConvIfNeeded() const
2846 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2849 // if we don't have neither the name nor the encoding, use the default
2850 // encoding for this system
2851 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2853 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2855 #endif // wxUSE_INTL
2857 self
->m_convReal
= DoCreate();
2858 self
->m_deferred
= false;
2862 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2864 CreateConvIfNeeded();
2867 return m_convReal
->MB2WC(buf
, psz
, n
);
2870 size_t len
= strlen(psz
);
2874 for (size_t c
= 0; c
<= len
; c
++)
2875 buf
[c
] = (unsigned char)(psz
[c
]);
2881 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2883 CreateConvIfNeeded();
2886 return m_convReal
->WC2MB(buf
, psz
, n
);
2889 const size_t len
= wxWcslen(psz
);
2892 for (size_t c
= 0; c
<= len
; c
++)
2896 buf
[c
] = (char)psz
[c
];
2901 for (size_t c
= 0; c
<= len
; c
++)
2911 // ----------------------------------------------------------------------------
2913 // ----------------------------------------------------------------------------
2916 static wxMBConv_win32 wxConvLibcObj
;
2917 #elif defined(__WXMAC__) && !defined(__MACH__)
2918 static wxMBConv_mac wxConvLibcObj
;
2920 static wxMBConvLibc wxConvLibcObj
;
2923 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2924 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2925 static wxMBConvUTF7 wxConvUTF7Obj
;
2926 static wxMBConvUTF8 wxConvUTF8Obj
;
2928 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2929 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2930 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2933 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2934 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2942 #else // !wxUSE_WCHAR_T
2944 // stand-ins in absence of wchar_t
2945 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2950 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T