1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
190 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
192 wxASSERT(pOutSize
!= NULL
);
194 const char* szEnd
= szString
+ nStringLen
+ 1;
195 const char* szPos
= szString
;
196 const char* szStart
= szPos
;
198 size_t nActualLength
= 0;
199 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
201 wxWCharBuffer
theBuffer(nCurrentSize
);
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos
!= szEnd
)
207 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
209 //Get the length of the current (sub)string
210 size_t nLen
= MB2WC(NULL
, szPos
, 0);
212 //Invalid conversion?
213 if( nLen
== (size_t)-1 )
216 theBuffer
.data()[0u] = wxT('\0');
221 //Increase the actual length (+1 for current null character)
222 nActualLength
+= nLen
+ 1;
224 //if buffer too big, realloc the buffer
225 if (nActualLength
> (nCurrentSize
+1))
227 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
228 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
229 theBuffer
= theNewBuffer
;
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
237 theBuffer
.data()[0u] = wxT('\0');
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos
+= strlen(szPos
) + 1;
248 //success - return actual length and the buffer
249 *pOutSize
= nActualLength
;
253 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
255 wxASSERT(pOutSize
!= NULL
);
257 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
258 const wchar_t* szPos
= szString
;
259 const wchar_t* szStart
= szPos
;
261 size_t nActualLength
= 0;
262 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
264 wxCharBuffer
theBuffer(nCurrentSize
);
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos
!= szEnd
)
270 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
272 //Get the length of the current (sub)string
273 size_t nLen
= WC2MB(NULL
, szPos
, 0);
275 //Invalid conversion?
276 if( nLen
== (size_t)-1 )
279 theBuffer
.data()[0u] = wxT('\0');
283 //Increase the actual length (+1 for current null character)
284 nActualLength
+= nLen
+ 1;
286 //if buffer too big, realloc the buffer
287 if (nActualLength
> (nCurrentSize
+1))
289 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
290 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
291 theBuffer
= theNewBuffer
;
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
299 theBuffer
.data()[0u] = wxT('\0');
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos
+= wxWcslen(szPos
) + 1;
310 //success - return actual length and the buffer
311 *pOutSize
= nActualLength
;
315 // ----------------------------------------------------------------------------
317 // ----------------------------------------------------------------------------
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
321 return wxMB2WC(buf
, psz
, n
);
324 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
326 return wxWC2MB(buf
, psz
, n
);
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
337 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
338 || wxStricmp(charset
, _T("UTF8")) == 0 )
339 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
341 m_conv
= new wxCSConv(charset
);
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
347 size_t outputSize
) const
349 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
353 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
355 size_t outputSize
) const
357 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
362 // ----------------------------------------------------------------------------
364 // ----------------------------------------------------------------------------
366 // Implementation (C) 2004 Fredrik Roubert
369 // BASE64 decoding table
371 static const unsigned char utf7unb64
[] =
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
411 while ( *psz
&& (!buf
|| (len
< n
)) )
413 unsigned char cc
= *psz
++;
421 else if (*psz
== '-')
429 else // start of BASE64 encoded string
433 for ( ok
= lsb
= false, d
= 0, l
= 0;
434 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
439 for (l
+= 6; l
>= 8; lsb
= !lsb
)
441 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
451 *buf
= (wchar_t)(c
<< 8);
460 // in valid UTF7 we should have valid characters after '+'
469 if ( buf
&& (len
< n
) )
476 // BASE64 encoding table
478 static const unsigned char utf7enb64
[] =
480 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
481 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
482 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
483 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
484 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
485 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
486 'w', 'x', 'y', 'z', '0', '1', '2', '3',
487 '4', '5', '6', '7', '8', '9', '+', '/'
491 // UTF-7 encoding table
493 // 0 - Set D (directly encoded characters)
494 // 1 - Set O (optional direct characters)
495 // 2 - whitespace characters (optional)
496 // 3 - special characters
498 static const unsigned char utf7encode
[128] =
500 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
502 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
504 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
506 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
510 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
516 while (*psz
&& ((!buf
) || (len
< n
)))
519 if (cc
< 0x80 && utf7encode
[cc
] < 1)
527 else if (((wxUint32
)cc
) > 0xffff)
529 // no surrogate pair generation (yet?)
540 // BASE64 encode string
541 unsigned int lsb
, d
, l
;
542 for (d
= 0, l
= 0; /*nothing*/; psz
++)
544 for (lsb
= 0; lsb
< 2; lsb
++)
547 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
549 for (l
+= 8; l
>= 6; )
553 *buf
++ = utf7enb64
[(d
>> l
) % 64];
558 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
564 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
573 if (buf
&& (len
< n
))
578 // ----------------------------------------------------------------------------
580 // ----------------------------------------------------------------------------
582 static wxUint32 utf8_max
[]=
583 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
585 // boundaries of the private use area we use to (temporarily) remap invalid
586 // characters invalid in a UTF-8 encoded string
587 const wxUint32 wxUnicodePUA
= 0x100000;
588 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
594 while (*psz
&& ((!buf
) || (len
< n
)))
596 const char *opsz
= psz
;
597 bool invalid
= false;
598 unsigned char cc
= *psz
++, fc
= cc
;
600 for (cnt
= 0; fc
& 0x80; cnt
++)
609 // escape the escape character for octal escapes
610 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
611 && cc
== '\\' && (!buf
|| len
< n
))
623 // invalid UTF-8 sequence
628 unsigned ocnt
= cnt
- 1;
629 wxUint32 res
= cc
& (0x3f >> cnt
);
633 if ((cc
& 0xC0) != 0x80)
635 // invalid UTF-8 sequence
640 res
= (res
<< 6) | (cc
& 0x3f);
642 if (invalid
|| res
<= utf8_max
[ocnt
])
644 // illegal UTF-8 encoding
647 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
648 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
650 // if one of our PUA characters turns up externally
651 // it must also be treated as an illegal sequence
652 // (a bit like you have to escape an escape character)
658 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
659 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
660 if (pa
== (size_t)-1)
672 *buf
++ = (wchar_t)res
;
674 #endif // WC_UTF16/!WC_UTF16
679 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
681 while (opsz
< psz
&& (!buf
|| len
< n
))
684 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
685 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
686 wxASSERT(pa
!= (size_t)-1);
693 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
699 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
701 while (opsz
< psz
&& (!buf
|| len
< n
))
703 if ( buf
&& len
+ 3 < n
)
705 unsigned char on
= *opsz
;
707 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
708 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
709 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
715 else // MAP_INVALID_UTF8_NOT
722 if (buf
&& (len
< n
))
727 static inline bool isoctal(wchar_t wch
)
729 return L
'0' <= wch
&& wch
<= L
'7';
732 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
736 while (*psz
&& ((!buf
) || (len
< n
)))
740 // cast is ok for WC_UTF16
741 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
742 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
744 cc
=(*psz
++) & 0x7fffffff;
747 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
748 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
751 *buf
++ = (char)(cc
- wxUnicodePUA
);
754 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
755 && cc
== L
'\\' && psz
[0] == L
'\\' )
762 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
764 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
768 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
769 (psz
[1] - L
'0')*010 +
779 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
793 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
795 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
807 // ----------------------------------------------------------------------------
809 // ----------------------------------------------------------------------------
811 #ifdef WORDS_BIGENDIAN
812 #define wxMBConvUTF16straight wxMBConvUTF16BE
813 #define wxMBConvUTF16swap wxMBConvUTF16LE
815 #define wxMBConvUTF16swap wxMBConvUTF16BE
816 #define wxMBConvUTF16straight wxMBConvUTF16LE
822 // copy 16bit MB to 16bit String
823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
827 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
830 *buf
++ = *(wxUint16
*)psz
;
833 psz
+= sizeof(wxUint16
);
835 if (buf
&& len
<n
) *buf
=0;
841 // copy 16bit String to 16bit MB
842 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
846 while (*psz
&& (!buf
|| len
< n
))
850 *(wxUint16
*)buf
= *psz
;
851 buf
+= sizeof(wxUint16
);
853 len
+= sizeof(wxUint16
);
856 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
862 // swap 16bit MB to 16bit String
863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
867 while ( *psz
&& (!buf
|| len
< n
) )
871 ((char *)buf
)[0] = psz
[1];
872 ((char *)buf
)[1] = psz
[0];
879 if ( buf
&& len
< n
)
886 // swap 16bit MB to 16bit String
887 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
891 while (*psz
&& (!buf
|| len
< n
))
895 *buf
++ = ((char*)psz
)[1];
896 *buf
++ = ((char*)psz
)[0];
898 len
+= sizeof(wxUint16
);
901 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
910 // copy 16bit MB to 32bit String
911 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
915 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
918 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
919 if (pa
== (size_t)-1)
923 *buf
++ = (wchar_t)cc
;
925 psz
+= pa
* sizeof(wxUint16
);
927 if (buf
&& len
<n
) *buf
=0;
933 // copy 32bit String to 16bit MB
934 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
938 while (*psz
&& (!buf
|| len
< n
))
941 size_t pa
=encode_utf16(*psz
, cc
);
943 if (pa
== (size_t)-1)
948 *(wxUint16
*)buf
= cc
[0];
949 buf
+= sizeof(wxUint16
);
952 *(wxUint16
*)buf
= cc
[1];
953 buf
+= sizeof(wxUint16
);
957 len
+= pa
*sizeof(wxUint16
);
960 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
966 // swap 16bit MB to 32bit String
967 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
971 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
975 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
976 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
978 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
979 if (pa
== (size_t)-1)
983 *buf
++ = (wchar_t)cc
;
986 psz
+= pa
* sizeof(wxUint16
);
988 if (buf
&& len
<n
) *buf
=0;
994 // swap 32bit String to 16bit MB
995 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
999 while (*psz
&& (!buf
|| len
< n
))
1002 size_t pa
=encode_utf16(*psz
, cc
);
1004 if (pa
== (size_t)-1)
1009 *buf
++ = ((char*)cc
)[1];
1010 *buf
++ = ((char*)cc
)[0];
1013 *buf
++ = ((char*)cc
)[3];
1014 *buf
++ = ((char*)cc
)[2];
1018 len
+= pa
*sizeof(wxUint16
);
1021 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1029 // ----------------------------------------------------------------------------
1031 // ----------------------------------------------------------------------------
1033 #ifdef WORDS_BIGENDIAN
1034 #define wxMBConvUTF32straight wxMBConvUTF32BE
1035 #define wxMBConvUTF32swap wxMBConvUTF32LE
1037 #define wxMBConvUTF32swap wxMBConvUTF32BE
1038 #define wxMBConvUTF32straight wxMBConvUTF32LE
1042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1048 // copy 32bit MB to 16bit String
1049 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1053 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1057 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1058 if (pa
== (size_t)-1)
1068 psz
+= sizeof(wxUint32
);
1070 if (buf
&& len
<n
) *buf
=0;
1076 // copy 16bit String to 32bit MB
1077 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1081 while (*psz
&& (!buf
|| len
< n
))
1085 // cast is ok for WC_UTF16
1086 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1087 if (pa
== (size_t)-1)
1092 *(wxUint32
*)buf
= cc
;
1093 buf
+= sizeof(wxUint32
);
1095 len
+= sizeof(wxUint32
);
1099 if (buf
&& len
<=n
-sizeof(wxUint32
))
1107 // swap 32bit MB to 16bit String
1108 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1112 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1115 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1116 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1121 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1122 if (pa
== (size_t)-1)
1132 psz
+= sizeof(wxUint32
);
1142 // swap 16bit String to 32bit MB
1143 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1147 while (*psz
&& (!buf
|| len
< n
))
1151 // cast is ok for WC_UTF16
1152 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1153 if (pa
== (size_t)-1)
1163 len
+= sizeof(wxUint32
);
1167 if (buf
&& len
<=n
-sizeof(wxUint32
))
1176 // copy 32bit MB to 32bit String
1177 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1181 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1184 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1186 psz
+= sizeof(wxUint32
);
1196 // copy 32bit String to 32bit MB
1197 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1201 while (*psz
&& (!buf
|| len
< n
))
1205 *(wxUint32
*)buf
= *psz
;
1206 buf
+= sizeof(wxUint32
);
1209 len
+= sizeof(wxUint32
);
1213 if (buf
&& len
<=n
-sizeof(wxUint32
))
1220 // swap 32bit MB to 32bit String
1221 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1225 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1229 ((char *)buf
)[0] = psz
[3];
1230 ((char *)buf
)[1] = psz
[2];
1231 ((char *)buf
)[2] = psz
[1];
1232 ((char *)buf
)[3] = psz
[0];
1236 psz
+= sizeof(wxUint32
);
1246 // swap 32bit String to 32bit MB
1247 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1251 while (*psz
&& (!buf
|| len
< n
))
1255 *buf
++ = ((char *)psz
)[3];
1256 *buf
++ = ((char *)psz
)[2];
1257 *buf
++ = ((char *)psz
)[1];
1258 *buf
++ = ((char *)psz
)[0];
1260 len
+= sizeof(wxUint32
);
1264 if (buf
&& len
<=n
-sizeof(wxUint32
))
1274 // ============================================================================
1275 // The classes doing conversion using the iconv_xxx() functions
1276 // ============================================================================
1280 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1281 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1282 // (unless there's yet another bug in glibc) the only case when iconv()
1283 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1284 // left in the input buffer -- when _real_ error occurs,
1285 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1287 // [This bug does not appear in glibc 2.2.]
1288 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1289 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1290 (errno != E2BIG || bufLeft != 0))
1292 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1295 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1297 #define ICONV_T_INVALID ((iconv_t)-1)
1299 #if SIZEOF_WCHAR_T == 4
1300 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1301 #define WC_ENC wxFONTENCODING_UTF32
1302 #elif SIZEOF_WCHAR_T == 2
1303 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1304 #define WC_ENC wxFONTENCODING_UTF16
1305 #else // sizeof(wchar_t) != 2 nor 4
1306 // does this ever happen?
1307 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1310 // ----------------------------------------------------------------------------
1311 // wxMBConv_iconv: encapsulates an iconv character set
1312 // ----------------------------------------------------------------------------
1314 class wxMBConv_iconv
: public wxMBConv
1317 wxMBConv_iconv(const wxChar
*name
);
1318 virtual ~wxMBConv_iconv();
1320 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1321 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1324 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1327 // the iconv handlers used to translate from multibyte to wide char and in
1328 // the other direction
1332 // guards access to m2w and w2m objects
1333 wxMutex m_iconvMutex
;
1337 // the name (for iconv_open()) of a wide char charset -- if none is
1338 // available on this machine, it will remain NULL
1339 static wxString ms_wcCharsetName
;
1341 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1342 // different endian-ness than the native one
1343 static bool ms_wcNeedsSwap
;
1346 // make the constructor available for unit testing
1347 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1349 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1350 if ( !result
->IsOk() )
1358 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1359 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1361 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1363 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1364 // names for the charsets
1365 const wxCharBuffer
cname(wxString(name
).ToAscii());
1367 // check for charset that represents wchar_t:
1368 if ( ms_wcCharsetName
.empty() )
1370 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1373 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1374 #else // !wxUSE_FONTMAP
1375 static const wxChar
*names
[] =
1377 #if SIZEOF_WCHAR_T == 4
1379 #elif SIZEOF_WCHAR_T = 2
1384 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1386 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1388 const wxString
nameCS(*names
);
1390 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1391 wxString
nameXE(nameCS
);
1392 #ifdef WORDS_BIGENDIAN
1394 #else // little endian
1398 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1401 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1402 if ( m2w
== ICONV_T_INVALID
)
1404 // try charset w/o bytesex info (e.g. "UCS4")
1405 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1407 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1409 // and check for bytesex ourselves:
1410 if ( m2w
!= ICONV_T_INVALID
)
1412 char buf
[2], *bufPtr
;
1413 wchar_t wbuf
[2], *wbufPtr
;
1421 outsz
= SIZEOF_WCHAR_T
* 2;
1425 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1426 (char**)&wbufPtr
, &outsz
);
1428 if (ICONV_FAILED(res
, insz
))
1430 wxLogLastError(wxT("iconv"));
1431 wxLogError(_("Conversion to charset '%s' doesn't work."),
1434 else // ok, can convert to this encoding, remember it
1436 ms_wcCharsetName
= nameCS
;
1437 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1441 else // use charset not requiring byte swapping
1443 ms_wcCharsetName
= nameXE
;
1447 wxLogTrace(TRACE_STRCONV
,
1448 wxT("iconv wchar_t charset is \"%s\"%s"),
1449 ms_wcCharsetName
.empty() ? _T("<none>")
1450 : ms_wcCharsetName
.c_str(),
1451 ms_wcNeedsSwap
? _T(" (needs swap)")
1454 else // we already have ms_wcCharsetName
1456 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1459 if ( ms_wcCharsetName
.empty() )
1461 w2m
= ICONV_T_INVALID
;
1465 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1466 if ( w2m
== ICONV_T_INVALID
)
1468 wxLogTrace(TRACE_STRCONV
,
1469 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1470 ms_wcCharsetName
.c_str(), cname
.data());
1475 wxMBConv_iconv::~wxMBConv_iconv()
1477 if ( m2w
!= ICONV_T_INVALID
)
1479 if ( w2m
!= ICONV_T_INVALID
)
1483 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1486 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1487 // Unfortunately there is a couple of global wxCSConv objects such as
1488 // wxConvLocal that are used all over wx code, so we have to make sure
1489 // the handle is used by at most one thread at the time. Otherwise
1490 // only a few wx classes would be safe to use from non-main threads
1491 // as MB<->WC conversion would fail "randomly".
1492 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1495 size_t inbuf
= strlen(psz
);
1496 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1498 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1499 wchar_t *bufPtr
= buf
;
1500 const char *pszPtr
= psz
;
1504 // have destination buffer, convert there
1506 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1507 (char**)&bufPtr
, &outbuf
);
1508 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1512 // convert to native endianness
1513 for ( unsigned i
= 0; i
< res
; i
++ )
1514 buf
[n
] = WC_BSWAP(buf
[i
]);
1517 // NB: iconv was given only strlen(psz) characters on input, and so
1518 // it couldn't convert the trailing zero. Let's do it ourselves
1519 // if there's some room left for it in the output buffer.
1525 // no destination buffer... convert using temp buffer
1526 // to calculate destination buffer requirement
1531 outbuf
= 8*SIZEOF_WCHAR_T
;
1534 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1535 (char**)&bufPtr
, &outbuf
);
1537 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1538 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1541 if (ICONV_FAILED(cres
, inbuf
))
1543 //VS: it is ok if iconv fails, hence trace only
1544 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1551 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1554 // NB: explained in MB2WC
1555 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1558 size_t inlen
= wxWcslen(psz
);
1559 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1563 wchar_t *tmpbuf
= 0;
1567 // need to copy to temp buffer to switch endianness
1568 // (doing WC_BSWAP twice on the original buffer won't help, as it
1569 // could be in read-only memory, or be accessed in some other thread)
1570 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1571 for ( size_t i
= 0; i
< inlen
; i
++ )
1572 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1573 tmpbuf
[inlen
] = L
'\0';
1579 // have destination buffer, convert there
1580 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1584 // NB: iconv was given only wcslen(psz) characters on input, and so
1585 // it couldn't convert the trailing zero. Let's do it ourselves
1586 // if there's some room left for it in the output buffer.
1592 // no destination buffer... convert using temp buffer
1593 // to calculate destination buffer requirement
1597 buf
= tbuf
; outbuf
= 16;
1599 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1602 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1610 if (ICONV_FAILED(cres
, inbuf
))
1612 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1619 #endif // HAVE_ICONV
1622 // ============================================================================
1623 // Win32 conversion classes
1624 // ============================================================================
1626 #ifdef wxHAVE_WIN32_MB2WC
1630 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1631 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1634 class wxMBConv_win32
: public wxMBConv
1639 m_CodePage
= CP_ACP
;
1643 wxMBConv_win32(const wxChar
* name
)
1645 m_CodePage
= wxCharsetToCodepage(name
);
1648 wxMBConv_win32(wxFontEncoding encoding
)
1650 m_CodePage
= wxEncodingToCodepage(encoding
);
1654 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1656 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1657 // the behaviour is not compatible with the Unix version (using iconv)
1658 // and break the library itself, e.g. wxTextInputStream::NextChar()
1659 // wouldn't work if reading an incomplete MB char didn't result in an
1662 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1663 // an error (tested under Windows Server 2003) and apparently it is
1664 // done on purpose, i.e. the function accepts any input in this case
1665 // and although I'd prefer to return error on ill-formed output, our
1666 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1667 // explicitly ill-formed according to RFC 2152) neither so we don't
1668 // even have any fallback here...
1670 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1671 // Win XP or newer and if it is specified on older versions, conversion
1672 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1673 // fails. So we can only use the flag on newer Windows versions.
1674 // Additionally, the flag is not supported by UTF7, symbol and CJK
1675 // encodings. See here:
1676 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1677 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1679 if ( m_CodePage
!= CP_UTF7
&& m_CodePage
!= CP_SYMBOL
&&
1680 m_CodePage
< 50000 &&
1681 IsAtLeastWin2kSP4() )
1683 flags
= MB_ERR_INVALID_CHARS
;
1685 else if ( m_CodePage
== CP_UTF8
)
1687 // Avoid round-trip in the special case of UTF-8 by using our
1688 // own UTF-8 conversion code:
1689 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
1692 const size_t len
= ::MultiByteToWideChar
1694 m_CodePage
, // code page
1695 flags
, // flags: fall on error
1696 psz
, // input string
1697 -1, // its length (NUL-terminated)
1698 buf
, // output string
1699 buf
? n
: 0 // size of output buffer
1703 // function totally failed
1707 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1708 // check if we succeeded, by doing a double trip:
1709 if ( !flags
&& buf
)
1711 const size_t mbLen
= strlen(psz
);
1712 wxCharBuffer
mbBuf(mbLen
);
1713 if ( ::WideCharToMultiByte
1720 mbLen
+ 1, // size in bytes, not length
1724 strcmp(mbBuf
, psz
) != 0 )
1726 // we didn't obtain the same thing we started from, hence
1727 // the conversion was lossy and we consider that it failed
1732 // note that it returns count of written chars for buf != NULL and size
1733 // of the needed buffer for buf == NULL so in either case the length of
1734 // the string (which never includes the terminating NUL) is one less
1738 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1741 we have a problem here: by default, WideCharToMultiByte() may
1742 replace characters unrepresentable in the target code page with bad
1743 quality approximations such as turning "1/2" symbol (U+00BD) into
1744 "1" for the code pages which don't have it and we, obviously, want
1745 to avoid this at any price
1747 the trouble is that this function does it _silently_, i.e. it won't
1748 even tell us whether it did or not... Win98/2000 and higher provide
1749 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1750 we have to resort to a round trip, i.e. check that converting back
1751 results in the same string -- this is, of course, expensive but
1752 otherwise we simply can't be sure to not garble the data.
1755 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1756 // it doesn't work with CJK encodings (which we test for rather roughly
1757 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1759 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1762 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1764 // it's our lucky day
1765 flags
= WC_NO_BEST_FIT_CHARS
;
1766 pUsedDef
= &usedDef
;
1768 else // old system or unsupported encoding
1774 const size_t len
= ::WideCharToMultiByte
1776 m_CodePage
, // code page
1777 flags
, // either none or no best fit
1778 pwz
, // input string
1779 -1, // it is (wide) NUL-terminated
1780 buf
, // output buffer
1781 buf
? n
: 0, // and its size
1782 NULL
, // default "replacement" char
1783 pUsedDef
// [out] was it used?
1788 // function totally failed
1792 // if we were really converting, check if we succeeded
1797 // check if the conversion failed, i.e. if any replacements
1802 else // we must resort to double tripping...
1804 wxWCharBuffer
wcBuf(n
);
1805 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1806 wcscmp(wcBuf
, pwz
) != 0 )
1808 // we didn't obtain the same thing we started from, hence
1809 // the conversion was lossy and we consider that it failed
1815 // see the comment above for the reason of "len - 1"
1819 bool IsOk() const { return m_CodePage
!= -1; }
1822 static bool CanUseNoBestFit()
1824 static int s_isWin98Or2k
= -1;
1826 if ( s_isWin98Or2k
== -1 )
1829 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1832 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1836 s_isWin98Or2k
= verMaj
>= 5;
1840 // unknown, be conseravtive by default
1844 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1847 return s_isWin98Or2k
== 1;
1850 static bool IsAtLeastWin2kSP4()
1855 static int s_isAtLeastWin2kSP4
= -1;
1857 if ( s_isAtLeastWin2kSP4
== -1 )
1859 OSVERSIONINFOEX ver
;
1861 memset(&ver
, 0, sizeof(ver
));
1862 ver
.dwOSVersionInfoSize
= sizeof(ver
);
1863 GetVersionEx((OSVERSIONINFO
*)&ver
);
1865 s_isAtLeastWin2kSP4
=
1866 ((ver
.dwMajorVersion
> 5) || // Vista+
1867 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
1868 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
1869 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
1873 return s_isAtLeastWin2kSP4
== 1;
1880 #endif // wxHAVE_WIN32_MB2WC
1882 // ============================================================================
1883 // Cocoa conversion classes
1884 // ============================================================================
1886 #if defined(__WXCOCOA__)
1888 // RN: There is no UTF-32 support in either Core Foundation or
1889 // Cocoa. Strangely enough, internally Core Foundation uses
1890 // UTF 32 internally quite a bit - its just not public (yet).
1892 #include <CoreFoundation/CFString.h>
1893 #include <CoreFoundation/CFStringEncodingExt.h>
1895 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1897 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1898 if ( encoding
== wxFONTENCODING_DEFAULT
)
1900 enc
= CFStringGetSystemEncoding();
1902 else switch( encoding
)
1904 case wxFONTENCODING_ISO8859_1
:
1905 enc
= kCFStringEncodingISOLatin1
;
1907 case wxFONTENCODING_ISO8859_2
:
1908 enc
= kCFStringEncodingISOLatin2
;
1910 case wxFONTENCODING_ISO8859_3
:
1911 enc
= kCFStringEncodingISOLatin3
;
1913 case wxFONTENCODING_ISO8859_4
:
1914 enc
= kCFStringEncodingISOLatin4
;
1916 case wxFONTENCODING_ISO8859_5
:
1917 enc
= kCFStringEncodingISOLatinCyrillic
;
1919 case wxFONTENCODING_ISO8859_6
:
1920 enc
= kCFStringEncodingISOLatinArabic
;
1922 case wxFONTENCODING_ISO8859_7
:
1923 enc
= kCFStringEncodingISOLatinGreek
;
1925 case wxFONTENCODING_ISO8859_8
:
1926 enc
= kCFStringEncodingISOLatinHebrew
;
1928 case wxFONTENCODING_ISO8859_9
:
1929 enc
= kCFStringEncodingISOLatin5
;
1931 case wxFONTENCODING_ISO8859_10
:
1932 enc
= kCFStringEncodingISOLatin6
;
1934 case wxFONTENCODING_ISO8859_11
:
1935 enc
= kCFStringEncodingISOLatinThai
;
1937 case wxFONTENCODING_ISO8859_13
:
1938 enc
= kCFStringEncodingISOLatin7
;
1940 case wxFONTENCODING_ISO8859_14
:
1941 enc
= kCFStringEncodingISOLatin8
;
1943 case wxFONTENCODING_ISO8859_15
:
1944 enc
= kCFStringEncodingISOLatin9
;
1947 case wxFONTENCODING_KOI8
:
1948 enc
= kCFStringEncodingKOI8_R
;
1950 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1951 enc
= kCFStringEncodingDOSRussian
;
1954 // case wxFONTENCODING_BULGARIAN :
1958 case wxFONTENCODING_CP437
:
1959 enc
=kCFStringEncodingDOSLatinUS
;
1961 case wxFONTENCODING_CP850
:
1962 enc
= kCFStringEncodingDOSLatin1
;
1964 case wxFONTENCODING_CP852
:
1965 enc
= kCFStringEncodingDOSLatin2
;
1967 case wxFONTENCODING_CP855
:
1968 enc
= kCFStringEncodingDOSCyrillic
;
1970 case wxFONTENCODING_CP866
:
1971 enc
=kCFStringEncodingDOSRussian
;
1973 case wxFONTENCODING_CP874
:
1974 enc
= kCFStringEncodingDOSThai
;
1976 case wxFONTENCODING_CP932
:
1977 enc
= kCFStringEncodingDOSJapanese
;
1979 case wxFONTENCODING_CP936
:
1980 enc
=kCFStringEncodingDOSChineseSimplif
;
1982 case wxFONTENCODING_CP949
:
1983 enc
= kCFStringEncodingDOSKorean
;
1985 case wxFONTENCODING_CP950
:
1986 enc
= kCFStringEncodingDOSChineseTrad
;
1988 case wxFONTENCODING_CP1250
:
1989 enc
= kCFStringEncodingWindowsLatin2
;
1991 case wxFONTENCODING_CP1251
:
1992 enc
=kCFStringEncodingWindowsCyrillic
;
1994 case wxFONTENCODING_CP1252
:
1995 enc
=kCFStringEncodingWindowsLatin1
;
1997 case wxFONTENCODING_CP1253
:
1998 enc
= kCFStringEncodingWindowsGreek
;
2000 case wxFONTENCODING_CP1254
:
2001 enc
= kCFStringEncodingWindowsLatin5
;
2003 case wxFONTENCODING_CP1255
:
2004 enc
=kCFStringEncodingWindowsHebrew
;
2006 case wxFONTENCODING_CP1256
:
2007 enc
=kCFStringEncodingWindowsArabic
;
2009 case wxFONTENCODING_CP1257
:
2010 enc
= kCFStringEncodingWindowsBalticRim
;
2012 // This only really encodes to UTF7 (if that) evidently
2013 // case wxFONTENCODING_UTF7 :
2014 // enc = kCFStringEncodingNonLossyASCII ;
2016 case wxFONTENCODING_UTF8
:
2017 enc
= kCFStringEncodingUTF8
;
2019 case wxFONTENCODING_EUC_JP
:
2020 enc
= kCFStringEncodingEUC_JP
;
2022 case wxFONTENCODING_UTF16
:
2023 enc
= kCFStringEncodingUnicode
;
2025 case wxFONTENCODING_MACROMAN
:
2026 enc
= kCFStringEncodingMacRoman
;
2028 case wxFONTENCODING_MACJAPANESE
:
2029 enc
= kCFStringEncodingMacJapanese
;
2031 case wxFONTENCODING_MACCHINESETRAD
:
2032 enc
= kCFStringEncodingMacChineseTrad
;
2034 case wxFONTENCODING_MACKOREAN
:
2035 enc
= kCFStringEncodingMacKorean
;
2037 case wxFONTENCODING_MACARABIC
:
2038 enc
= kCFStringEncodingMacArabic
;
2040 case wxFONTENCODING_MACHEBREW
:
2041 enc
= kCFStringEncodingMacHebrew
;
2043 case wxFONTENCODING_MACGREEK
:
2044 enc
= kCFStringEncodingMacGreek
;
2046 case wxFONTENCODING_MACCYRILLIC
:
2047 enc
= kCFStringEncodingMacCyrillic
;
2049 case wxFONTENCODING_MACDEVANAGARI
:
2050 enc
= kCFStringEncodingMacDevanagari
;
2052 case wxFONTENCODING_MACGURMUKHI
:
2053 enc
= kCFStringEncodingMacGurmukhi
;
2055 case wxFONTENCODING_MACGUJARATI
:
2056 enc
= kCFStringEncodingMacGujarati
;
2058 case wxFONTENCODING_MACORIYA
:
2059 enc
= kCFStringEncodingMacOriya
;
2061 case wxFONTENCODING_MACBENGALI
:
2062 enc
= kCFStringEncodingMacBengali
;
2064 case wxFONTENCODING_MACTAMIL
:
2065 enc
= kCFStringEncodingMacTamil
;
2067 case wxFONTENCODING_MACTELUGU
:
2068 enc
= kCFStringEncodingMacTelugu
;
2070 case wxFONTENCODING_MACKANNADA
:
2071 enc
= kCFStringEncodingMacKannada
;
2073 case wxFONTENCODING_MACMALAJALAM
:
2074 enc
= kCFStringEncodingMacMalayalam
;
2076 case wxFONTENCODING_MACSINHALESE
:
2077 enc
= kCFStringEncodingMacSinhalese
;
2079 case wxFONTENCODING_MACBURMESE
:
2080 enc
= kCFStringEncodingMacBurmese
;
2082 case wxFONTENCODING_MACKHMER
:
2083 enc
= kCFStringEncodingMacKhmer
;
2085 case wxFONTENCODING_MACTHAI
:
2086 enc
= kCFStringEncodingMacThai
;
2088 case wxFONTENCODING_MACLAOTIAN
:
2089 enc
= kCFStringEncodingMacLaotian
;
2091 case wxFONTENCODING_MACGEORGIAN
:
2092 enc
= kCFStringEncodingMacGeorgian
;
2094 case wxFONTENCODING_MACARMENIAN
:
2095 enc
= kCFStringEncodingMacArmenian
;
2097 case wxFONTENCODING_MACCHINESESIMP
:
2098 enc
= kCFStringEncodingMacChineseSimp
;
2100 case wxFONTENCODING_MACTIBETAN
:
2101 enc
= kCFStringEncodingMacTibetan
;
2103 case wxFONTENCODING_MACMONGOLIAN
:
2104 enc
= kCFStringEncodingMacMongolian
;
2106 case wxFONTENCODING_MACETHIOPIC
:
2107 enc
= kCFStringEncodingMacEthiopic
;
2109 case wxFONTENCODING_MACCENTRALEUR
:
2110 enc
= kCFStringEncodingMacCentralEurRoman
;
2112 case wxFONTENCODING_MACVIATNAMESE
:
2113 enc
= kCFStringEncodingMacVietnamese
;
2115 case wxFONTENCODING_MACARABICEXT
:
2116 enc
= kCFStringEncodingMacExtArabic
;
2118 case wxFONTENCODING_MACSYMBOL
:
2119 enc
= kCFStringEncodingMacSymbol
;
2121 case wxFONTENCODING_MACDINGBATS
:
2122 enc
= kCFStringEncodingMacDingbats
;
2124 case wxFONTENCODING_MACTURKISH
:
2125 enc
= kCFStringEncodingMacTurkish
;
2127 case wxFONTENCODING_MACCROATIAN
:
2128 enc
= kCFStringEncodingMacCroatian
;
2130 case wxFONTENCODING_MACICELANDIC
:
2131 enc
= kCFStringEncodingMacIcelandic
;
2133 case wxFONTENCODING_MACROMANIAN
:
2134 enc
= kCFStringEncodingMacRomanian
;
2136 case wxFONTENCODING_MACCELTIC
:
2137 enc
= kCFStringEncodingMacCeltic
;
2139 case wxFONTENCODING_MACGAELIC
:
2140 enc
= kCFStringEncodingMacGaelic
;
2142 // case wxFONTENCODING_MACKEYBOARD :
2143 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2146 // because gcc is picky
2152 class wxMBConv_cocoa
: public wxMBConv
2157 Init(CFStringGetSystemEncoding()) ;
2161 wxMBConv_cocoa(const wxChar
* name
)
2163 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2167 wxMBConv_cocoa(wxFontEncoding encoding
)
2169 Init( wxCFStringEncFromFontEnc(encoding
) );
2176 void Init( CFStringEncoding encoding
)
2178 m_encoding
= encoding
;
2181 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2185 CFStringRef theString
= CFStringCreateWithBytes (
2186 NULL
, //the allocator
2187 (const UInt8
*)szUnConv
,
2190 false //no BOM/external representation
2193 wxASSERT(theString
);
2195 size_t nOutLength
= CFStringGetLength(theString
);
2199 CFRelease(theString
);
2203 CFRange theRange
= { 0, nOutSize
};
2205 #if SIZEOF_WCHAR_T == 4
2206 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2209 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2211 CFRelease(theString
);
2213 szUniCharBuffer
[nOutLength
] = '\0' ;
2215 #if SIZEOF_WCHAR_T == 4
2216 wxMBConvUTF16 converter
;
2217 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2218 delete[] szUniCharBuffer
;
2224 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2228 size_t nRealOutSize
;
2229 size_t nBufSize
= wxWcslen(szUnConv
);
2230 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2232 #if SIZEOF_WCHAR_T == 4
2233 wxMBConvUTF16 converter
;
2234 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2235 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2236 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2237 nBufSize
/= sizeof(UniChar
);
2240 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2244 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2247 wxASSERT(theString
);
2249 //Note that CER puts a BOM when converting to unicode
2250 //so we check and use getchars instead in that case
2251 if (m_encoding
== kCFStringEncodingUnicode
)
2254 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2256 nRealOutSize
= CFStringGetLength(theString
) + 1;
2262 CFRangeMake(0, CFStringGetLength(theString
)),
2264 0, //what to put in characters that can't be converted -
2265 //0 tells CFString to return NULL if it meets such a character
2266 false, //not an external representation
2269 (CFIndex
*) &nRealOutSize
2273 CFRelease(theString
);
2275 #if SIZEOF_WCHAR_T == 4
2276 delete[] szUniBuffer
;
2279 return nRealOutSize
- 1;
2284 return m_encoding
!= kCFStringEncodingInvalidId
&&
2285 CFStringIsEncodingAvailable(m_encoding
);
2289 CFStringEncoding m_encoding
;
2292 #endif // defined(__WXCOCOA__)
2294 // ============================================================================
2295 // Mac conversion classes
2296 // ============================================================================
2298 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2300 class wxMBConv_mac
: public wxMBConv
2305 Init(CFStringGetSystemEncoding()) ;
2309 wxMBConv_mac(const wxChar
* name
)
2311 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2315 wxMBConv_mac(wxFontEncoding encoding
)
2317 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2322 OSStatus status
= noErr
;
2323 status
= TECDisposeConverter(m_MB2WC_converter
);
2324 status
= TECDisposeConverter(m_WC2MB_converter
);
2328 void Init( TextEncodingBase encoding
)
2330 OSStatus status
= noErr
;
2331 m_char_encoding
= encoding
;
2332 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2334 status
= TECCreateConverter(&m_MB2WC_converter
,
2336 m_unicode_encoding
);
2337 status
= TECCreateConverter(&m_WC2MB_converter
,
2342 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2344 OSStatus status
= noErr
;
2345 ByteCount byteOutLen
;
2346 ByteCount byteInLen
= strlen(psz
) ;
2347 wchar_t *tbuf
= NULL
;
2348 UniChar
* ubuf
= NULL
;
2353 //apple specs say at least 32
2354 n
= wxMax( 32 , byteInLen
) ;
2355 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2357 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2358 #if SIZEOF_WCHAR_T == 4
2359 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2361 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2363 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2364 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2365 #if SIZEOF_WCHAR_T == 4
2366 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2367 // is not properly terminated we get random characters at the end
2368 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2369 wxMBConvUTF16 converter
;
2370 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2373 res
= byteOutLen
/ sizeof( UniChar
) ;
2378 if ( buf
&& res
< n
)
2384 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2386 OSStatus status
= noErr
;
2387 ByteCount byteOutLen
;
2388 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2394 //apple specs say at least 32
2395 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2396 tbuf
= (char*) malloc( n
) ;
2399 ByteCount byteBufferLen
= n
;
2400 UniChar
* ubuf
= NULL
;
2401 #if SIZEOF_WCHAR_T == 4
2402 wxMBConvUTF16 converter
;
2403 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2404 byteInLen
= unicharlen
;
2405 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2406 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2408 ubuf
= (UniChar
*) psz
;
2410 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2411 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2412 #if SIZEOF_WCHAR_T == 4
2418 size_t res
= byteOutLen
;
2419 if ( buf
&& res
< n
)
2423 //we need to double-trip to verify it didn't insert any ? in place
2424 //of bogus characters
2425 wxWCharBuffer
wcBuf(n
);
2426 size_t pszlen
= wxWcslen(psz
);
2427 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2428 wxWcslen(wcBuf
) != pszlen
||
2429 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2431 // we didn't obtain the same thing we started from, hence
2432 // the conversion was lossy and we consider that it failed
2441 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2444 TECObjectRef m_MB2WC_converter
;
2445 TECObjectRef m_WC2MB_converter
;
2447 TextEncodingBase m_char_encoding
;
2448 TextEncodingBase m_unicode_encoding
;
2451 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2453 // ============================================================================
2454 // wxEncodingConverter based conversion classes
2455 // ============================================================================
2459 class wxMBConv_wxwin
: public wxMBConv
2464 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2465 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2469 // temporarily just use wxEncodingConverter stuff,
2470 // so that it works while a better implementation is built
2471 wxMBConv_wxwin(const wxChar
* name
)
2474 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2476 m_enc
= wxFONTENCODING_SYSTEM
;
2481 wxMBConv_wxwin(wxFontEncoding enc
)
2488 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2490 size_t inbuf
= strlen(psz
);
2493 if (!m2w
.Convert(psz
,buf
))
2499 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2501 const size_t inbuf
= wxWcslen(psz
);
2504 if (!w2m
.Convert(psz
,buf
))
2511 bool IsOk() const { return m_ok
; }
2514 wxFontEncoding m_enc
;
2515 wxEncodingConverter m2w
, w2m
;
2517 // were we initialized successfully?
2520 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2523 // make the constructors available for unit testing
2524 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2526 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2527 if ( !result
->IsOk() )
2535 #endif // wxUSE_FONTMAP
2537 // ============================================================================
2538 // wxCSConv implementation
2539 // ============================================================================
2541 void wxCSConv::Init()
2548 wxCSConv::wxCSConv(const wxChar
*charset
)
2558 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2560 m_encoding
= wxFONTENCODING_SYSTEM
;
2564 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2566 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2568 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2570 encoding
= wxFONTENCODING_SYSTEM
;
2575 m_encoding
= encoding
;
2578 wxCSConv::~wxCSConv()
2583 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2588 SetName(conv
.m_name
);
2589 m_encoding
= conv
.m_encoding
;
2592 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2596 SetName(conv
.m_name
);
2597 m_encoding
= conv
.m_encoding
;
2602 void wxCSConv::Clear()
2611 void wxCSConv::SetName(const wxChar
*charset
)
2615 m_name
= wxStrdup(charset
);
2621 #include "wx/hashmap.h"
2623 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2624 wxEncodingNameCache
);
2626 static wxEncodingNameCache gs_nameCache
;
2629 wxMBConv
*wxCSConv::DoCreate() const
2632 wxLogTrace(TRACE_STRCONV
,
2633 wxT("creating conversion for %s"),
2635 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2636 #endif // wxUSE_FONTMAP
2638 // check for the special case of ASCII or ISO8859-1 charset: as we have
2639 // special knowledge of it anyhow, we don't need to create a special
2640 // conversion object
2641 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2642 m_encoding
== wxFONTENCODING_DEFAULT
)
2644 // don't convert at all
2648 // we trust OS to do conversion better than we can so try external
2649 // conversion methods first
2651 // the full order is:
2652 // 1. OS conversion (iconv() under Unix or Win32 API)
2653 // 2. hard coded conversions for UTF
2654 // 3. wxEncodingConverter as fall back
2660 #endif // !wxUSE_FONTMAP
2662 wxString
name(m_name
);
2663 wxFontEncoding
encoding(m_encoding
);
2665 if ( !name
.empty() )
2667 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2675 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2676 #endif // wxUSE_FONTMAP
2680 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2681 if ( it
!= gs_nameCache
.end() )
2683 if ( it
->second
.empty() )
2686 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2693 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2695 for ( ; *names
; ++names
)
2697 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2700 gs_nameCache
[encoding
] = *names
;
2707 gs_nameCache
[encoding
] = _T(""); // cache the failure
2709 #endif // wxUSE_FONTMAP
2711 #endif // HAVE_ICONV
2713 #ifdef wxHAVE_WIN32_MB2WC
2716 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2717 : new wxMBConv_win32(m_encoding
);
2726 #endif // wxHAVE_WIN32_MB2WC
2727 #if defined(__WXMAC__)
2729 // leave UTF16 and UTF32 to the built-ins of wx
2730 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2731 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2735 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2736 : new wxMBConv_mac(m_encoding
);
2738 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2747 #if defined(__WXCOCOA__)
2749 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2753 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2754 : new wxMBConv_cocoa(m_encoding
);
2756 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2766 wxFontEncoding enc
= m_encoding
;
2768 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2770 // use "false" to suppress interactive dialogs -- we can be called from
2771 // anywhere and popping up a dialog from here is the last thing we want to
2773 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2775 #endif // wxUSE_FONTMAP
2779 case wxFONTENCODING_UTF7
:
2780 return new wxMBConvUTF7
;
2782 case wxFONTENCODING_UTF8
:
2783 return new wxMBConvUTF8
;
2785 case wxFONTENCODING_UTF16BE
:
2786 return new wxMBConvUTF16BE
;
2788 case wxFONTENCODING_UTF16LE
:
2789 return new wxMBConvUTF16LE
;
2791 case wxFONTENCODING_UTF32BE
:
2792 return new wxMBConvUTF32BE
;
2794 case wxFONTENCODING_UTF32LE
:
2795 return new wxMBConvUTF32LE
;
2798 // nothing to do but put here to suppress gcc warnings
2805 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2806 : new wxMBConv_wxwin(m_encoding
);
2812 #endif // wxUSE_FONTMAP
2814 // NB: This is a hack to prevent deadlock. What could otherwise happen
2815 // in Unicode build: wxConvLocal creation ends up being here
2816 // because of some failure and logs the error. But wxLog will try to
2817 // attach timestamp, for which it will need wxConvLocal (to convert
2818 // time to char* and then wchar_t*), but that fails, tries to log
2819 // error, but wxLog has a (already locked) critical section that
2820 // guards static buffer.
2821 static bool alreadyLoggingError
= false;
2822 if (!alreadyLoggingError
)
2824 alreadyLoggingError
= true;
2825 wxLogError(_("Cannot convert from the charset '%s'!"),
2829 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2830 #else // !wxUSE_FONTMAP
2831 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2832 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2834 alreadyLoggingError
= false;
2840 void wxCSConv::CreateConvIfNeeded() const
2844 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2847 // if we don't have neither the name nor the encoding, use the default
2848 // encoding for this system
2849 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2851 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2853 #endif // wxUSE_INTL
2855 self
->m_convReal
= DoCreate();
2856 self
->m_deferred
= false;
2860 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2862 CreateConvIfNeeded();
2865 return m_convReal
->MB2WC(buf
, psz
, n
);
2868 size_t len
= strlen(psz
);
2872 for (size_t c
= 0; c
<= len
; c
++)
2873 buf
[c
] = (unsigned char)(psz
[c
]);
2879 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2881 CreateConvIfNeeded();
2884 return m_convReal
->WC2MB(buf
, psz
, n
);
2887 const size_t len
= wxWcslen(psz
);
2890 for (size_t c
= 0; c
<= len
; c
++)
2894 buf
[c
] = (char)psz
[c
];
2899 for (size_t c
= 0; c
<= len
; c
++)
2909 // ----------------------------------------------------------------------------
2911 // ----------------------------------------------------------------------------
2914 static wxMBConv_win32 wxConvLibcObj
;
2915 #elif defined(__WXMAC__) && !defined(__MACH__)
2916 static wxMBConv_mac wxConvLibcObj
;
2918 static wxMBConvLibc wxConvLibcObj
;
2921 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2922 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2923 static wxMBConvUTF7 wxConvUTF7Obj
;
2924 static wxMBConvUTF8 wxConvUTF8Obj
;
2926 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2927 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2928 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2929 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2930 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2940 #else // !wxUSE_WCHAR_T
2942 // stand-ins in absence of wchar_t
2943 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2948 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T