1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
190 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
192 wxASSERT(pOutSize
!= NULL
);
194 const char* szEnd
= szString
+ nStringLen
+ 1;
195 const char* szPos
= szString
;
196 const char* szStart
= szPos
;
198 size_t nActualLength
= 0;
199 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
201 wxWCharBuffer
theBuffer(nCurrentSize
);
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos
!= szEnd
)
207 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
209 //Get the length of the current (sub)string
210 size_t nLen
= MB2WC(NULL
, szPos
, 0);
212 //Invalid conversion?
213 if( nLen
== (size_t)-1 )
216 theBuffer
.data()[0u] = wxT('\0');
221 //Increase the actual length (+1 for current null character)
222 nActualLength
+= nLen
+ 1;
224 //if buffer too big, realloc the buffer
225 if (nActualLength
> (nCurrentSize
+1))
227 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
228 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
229 theBuffer
= theNewBuffer
;
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
237 theBuffer
.data()[0u] = wxT('\0');
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos
+= strlen(szPos
) + 1;
248 //success - return actual length and the buffer
249 *pOutSize
= nActualLength
;
253 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
255 wxASSERT(pOutSize
!= NULL
);
257 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
258 const wchar_t* szPos
= szString
;
259 const wchar_t* szStart
= szPos
;
261 size_t nActualLength
= 0;
262 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
264 wxCharBuffer
theBuffer(nCurrentSize
);
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos
!= szEnd
)
270 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
272 //Get the length of the current (sub)string
273 size_t nLen
= WC2MB(NULL
, szPos
, 0);
275 //Invalid conversion?
276 if( nLen
== (size_t)-1 )
279 theBuffer
.data()[0u] = wxT('\0');
283 //Increase the actual length (+1 for current null character)
284 nActualLength
+= nLen
+ 1;
286 //if buffer too big, realloc the buffer
287 if (nActualLength
> (nCurrentSize
+1))
289 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
290 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
291 theBuffer
= theNewBuffer
;
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
299 theBuffer
.data()[0u] = wxT('\0');
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos
+= wxWcslen(szPos
) + 1;
310 //success - return actual length and the buffer
311 *pOutSize
= nActualLength
;
315 // ----------------------------------------------------------------------------
317 // ----------------------------------------------------------------------------
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
321 return wxMB2WC(buf
, psz
, n
);
324 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
326 return wxWC2MB(buf
, psz
, n
);
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
337 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
338 || wxStricmp(charset
, _T("UTF8")) == 0 )
339 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
341 m_conv
= new wxCSConv(charset
);
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
347 size_t outputSize
) const
349 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
353 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
355 size_t outputSize
) const
357 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
362 // ----------------------------------------------------------------------------
364 // ----------------------------------------------------------------------------
366 // Implementation (C) 2004 Fredrik Roubert
369 // BASE64 decoding table
371 static const unsigned char utf7unb64
[] =
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
411 while (*psz
&& ((!buf
) || (len
< n
)))
413 unsigned char cc
= *psz
++;
421 else if (*psz
== '-')
431 // BASE64 encoded string
435 for (lsb
= false, d
= 0, l
= 0;
436 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
440 for (l
+= 6; l
>= 8; lsb
= !lsb
)
442 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
451 *buf
= (wchar_t)(c
<< 8);
458 if (buf
&& (len
< n
))
464 // BASE64 encoding table
466 static const unsigned char utf7enb64
[] =
468 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
469 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
470 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
471 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
472 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
473 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
474 'w', 'x', 'y', 'z', '0', '1', '2', '3',
475 '4', '5', '6', '7', '8', '9', '+', '/'
479 // UTF-7 encoding table
481 // 0 - Set D (directly encoded characters)
482 // 1 - Set O (optional direct characters)
483 // 2 - whitespace characters (optional)
484 // 3 - special characters
486 static const unsigned char utf7encode
[128] =
488 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
489 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
490 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
492 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
494 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
498 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
504 while (*psz
&& ((!buf
) || (len
< n
)))
507 if (cc
< 0x80 && utf7encode
[cc
] < 1)
515 else if (((wxUint32
)cc
) > 0xffff)
517 // no surrogate pair generation (yet?)
528 // BASE64 encode string
529 unsigned int lsb
, d
, l
;
530 for (d
= 0, l
= 0; /*nothing*/; psz
++)
532 for (lsb
= 0; lsb
< 2; lsb
++)
535 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
537 for (l
+= 8; l
>= 6; )
541 *buf
++ = utf7enb64
[(d
>> l
) % 64];
546 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
552 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
561 if (buf
&& (len
< n
))
566 // ----------------------------------------------------------------------------
568 // ----------------------------------------------------------------------------
570 static wxUint32 utf8_max
[]=
571 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
573 // boundaries of the private use area we use to (temporarily) remap invalid
574 // characters invalid in a UTF-8 encoded string
575 const wxUint32 wxUnicodePUA
= 0x100000;
576 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
578 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
582 while (*psz
&& ((!buf
) || (len
< n
)))
584 const char *opsz
= psz
;
585 bool invalid
= false;
586 unsigned char cc
= *psz
++, fc
= cc
;
588 for (cnt
= 0; fc
& 0x80; cnt
++)
597 // escape the escape character for octal escapes
598 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
599 && cc
== '\\' && (!buf
|| len
< n
))
611 // invalid UTF-8 sequence
616 unsigned ocnt
= cnt
- 1;
617 wxUint32 res
= cc
& (0x3f >> cnt
);
621 if ((cc
& 0xC0) != 0x80)
623 // invalid UTF-8 sequence
628 res
= (res
<< 6) | (cc
& 0x3f);
630 if (invalid
|| res
<= utf8_max
[ocnt
])
632 // illegal UTF-8 encoding
635 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
636 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
638 // if one of our PUA characters turns up externally
639 // it must also be treated as an illegal sequence
640 // (a bit like you have to escape an escape character)
646 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
647 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
648 if (pa
== (size_t)-1)
660 *buf
++ = (wchar_t)res
;
662 #endif // WC_UTF16/!WC_UTF16
667 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
669 while (opsz
< psz
&& (!buf
|| len
< n
))
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
674 wxASSERT(pa
!= (size_t)-1);
681 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
687 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
689 while (opsz
< psz
&& (!buf
|| len
< n
))
691 if ( buf
&& len
+ 3 < n
)
693 unsigned char on
= *opsz
;
695 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
696 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
697 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
703 else // MAP_INVALID_UTF8_NOT
710 if (buf
&& (len
< n
))
715 static inline bool isoctal(wchar_t wch
)
717 return L
'0' <= wch
&& wch
<= L
'7';
720 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
724 while (*psz
&& ((!buf
) || (len
< n
)))
728 // cast is ok for WC_UTF16
729 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
730 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
732 cc
=(*psz
++) & 0x7fffffff;
735 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
736 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
739 *buf
++ = (char)(cc
- wxUnicodePUA
);
742 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
743 && cc
== L
'\\' && psz
[0] == L
'\\' )
750 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
752 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
756 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
757 (psz
[1] - L
'0')*010 +
767 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
781 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
783 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
795 // ----------------------------------------------------------------------------
797 // ----------------------------------------------------------------------------
799 #ifdef WORDS_BIGENDIAN
800 #define wxMBConvUTF16straight wxMBConvUTF16BE
801 #define wxMBConvUTF16swap wxMBConvUTF16LE
803 #define wxMBConvUTF16swap wxMBConvUTF16BE
804 #define wxMBConvUTF16straight wxMBConvUTF16LE
810 // copy 16bit MB to 16bit String
811 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
815 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
818 *buf
++ = *(wxUint16
*)psz
;
821 psz
+= sizeof(wxUint16
);
823 if (buf
&& len
<n
) *buf
=0;
829 // copy 16bit String to 16bit MB
830 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
834 while (*psz
&& (!buf
|| len
< n
))
838 *(wxUint16
*)buf
= *psz
;
839 buf
+= sizeof(wxUint16
);
841 len
+= sizeof(wxUint16
);
844 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
850 // swap 16bit MB to 16bit String
851 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
855 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
859 ((char *)buf
)[0] = psz
[1];
860 ((char *)buf
)[1] = psz
[0];
864 psz
+= sizeof(wxUint16
);
866 if (buf
&& len
<n
) *buf
=0;
872 // swap 16bit MB to 16bit String
873 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
877 while (*psz
&& (!buf
|| len
< n
))
881 *buf
++ = ((char*)psz
)[1];
882 *buf
++ = ((char*)psz
)[0];
884 len
+= sizeof(wxUint16
);
887 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
896 // copy 16bit MB to 32bit String
897 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
901 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
904 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
905 if (pa
== (size_t)-1)
909 *buf
++ = (wchar_t)cc
;
911 psz
+= pa
* sizeof(wxUint16
);
913 if (buf
&& len
<n
) *buf
=0;
919 // copy 32bit String to 16bit MB
920 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
924 while (*psz
&& (!buf
|| len
< n
))
927 size_t pa
=encode_utf16(*psz
, cc
);
929 if (pa
== (size_t)-1)
934 *(wxUint16
*)buf
= cc
[0];
935 buf
+= sizeof(wxUint16
);
938 *(wxUint16
*)buf
= cc
[1];
939 buf
+= sizeof(wxUint16
);
943 len
+= pa
*sizeof(wxUint16
);
946 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
952 // swap 16bit MB to 32bit String
953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
957 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
961 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
962 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
964 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
965 if (pa
== (size_t)-1)
969 *buf
++ = (wchar_t)cc
;
972 psz
+= pa
* sizeof(wxUint16
);
974 if (buf
&& len
<n
) *buf
=0;
980 // swap 32bit String to 16bit MB
981 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
985 while (*psz
&& (!buf
|| len
< n
))
988 size_t pa
=encode_utf16(*psz
, cc
);
990 if (pa
== (size_t)-1)
995 *buf
++ = ((char*)cc
)[1];
996 *buf
++ = ((char*)cc
)[0];
999 *buf
++ = ((char*)cc
)[3];
1000 *buf
++ = ((char*)cc
)[2];
1004 len
+= pa
*sizeof(wxUint16
);
1007 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1015 // ----------------------------------------------------------------------------
1017 // ----------------------------------------------------------------------------
1019 #ifdef WORDS_BIGENDIAN
1020 #define wxMBConvUTF32straight wxMBConvUTF32BE
1021 #define wxMBConvUTF32swap wxMBConvUTF32LE
1023 #define wxMBConvUTF32swap wxMBConvUTF32BE
1024 #define wxMBConvUTF32straight wxMBConvUTF32LE
1028 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1029 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1034 // copy 32bit MB to 16bit String
1035 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1039 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1043 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1044 if (pa
== (size_t)-1)
1054 psz
+= sizeof(wxUint32
);
1056 if (buf
&& len
<n
) *buf
=0;
1062 // copy 16bit String to 32bit MB
1063 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1067 while (*psz
&& (!buf
|| len
< n
))
1071 // cast is ok for WC_UTF16
1072 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1073 if (pa
== (size_t)-1)
1078 *(wxUint32
*)buf
= cc
;
1079 buf
+= sizeof(wxUint32
);
1081 len
+= sizeof(wxUint32
);
1085 if (buf
&& len
<=n
-sizeof(wxUint32
))
1093 // swap 32bit MB to 16bit String
1094 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1098 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1101 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1102 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1107 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1108 if (pa
== (size_t)-1)
1118 psz
+= sizeof(wxUint32
);
1128 // swap 16bit String to 32bit MB
1129 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1133 while (*psz
&& (!buf
|| len
< n
))
1137 // cast is ok for WC_UTF16
1138 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1139 if (pa
== (size_t)-1)
1149 len
+= sizeof(wxUint32
);
1153 if (buf
&& len
<=n
-sizeof(wxUint32
))
1162 // copy 32bit MB to 32bit String
1163 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1167 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1170 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1172 psz
+= sizeof(wxUint32
);
1182 // copy 32bit String to 32bit MB
1183 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1187 while (*psz
&& (!buf
|| len
< n
))
1191 *(wxUint32
*)buf
= *psz
;
1192 buf
+= sizeof(wxUint32
);
1195 len
+= sizeof(wxUint32
);
1199 if (buf
&& len
<=n
-sizeof(wxUint32
))
1206 // swap 32bit MB to 32bit String
1207 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1211 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1215 ((char *)buf
)[0] = psz
[3];
1216 ((char *)buf
)[1] = psz
[2];
1217 ((char *)buf
)[2] = psz
[1];
1218 ((char *)buf
)[3] = psz
[0];
1222 psz
+= sizeof(wxUint32
);
1232 // swap 32bit String to 32bit MB
1233 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1237 while (*psz
&& (!buf
|| len
< n
))
1241 *buf
++ = ((char *)psz
)[3];
1242 *buf
++ = ((char *)psz
)[2];
1243 *buf
++ = ((char *)psz
)[1];
1244 *buf
++ = ((char *)psz
)[0];
1246 len
+= sizeof(wxUint32
);
1250 if (buf
&& len
<=n
-sizeof(wxUint32
))
1260 // ============================================================================
1261 // The classes doing conversion using the iconv_xxx() functions
1262 // ============================================================================
1266 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1267 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1268 // (unless there's yet another bug in glibc) the only case when iconv()
1269 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1270 // left in the input buffer -- when _real_ error occurs,
1271 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1273 // [This bug does not appear in glibc 2.2.]
1274 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1275 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1276 (errno != E2BIG || bufLeft != 0))
1278 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1281 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1283 #define ICONV_T_INVALID ((iconv_t)-1)
1285 #if SIZEOF_WCHAR_T == 4
1286 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1287 #define WC_ENC wxFONTENCODING_UTF32
1288 #elif SIZEOF_WCHAR_T == 2
1289 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1290 #define WC_ENC wxFONTENCODING_UTF16
1291 #else // sizeof(wchar_t) != 2 nor 4
1292 // does this ever happen?
1293 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1296 // ----------------------------------------------------------------------------
1297 // wxMBConv_iconv: encapsulates an iconv character set
1298 // ----------------------------------------------------------------------------
1300 class wxMBConv_iconv
: public wxMBConv
1303 wxMBConv_iconv(const wxChar
*name
);
1304 virtual ~wxMBConv_iconv();
1306 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1307 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1310 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1313 // the iconv handlers used to translate from multibyte to wide char and in
1314 // the other direction
1318 // guards access to m2w and w2m objects
1319 wxMutex m_iconvMutex
;
1323 // the name (for iconv_open()) of a wide char charset -- if none is
1324 // available on this machine, it will remain NULL
1325 static wxString ms_wcCharsetName
;
1327 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1328 // different endian-ness than the native one
1329 static bool ms_wcNeedsSwap
;
1332 // make the constructor available for unit testing
1333 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1335 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1336 if ( !result
->IsOk() )
1344 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1345 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1347 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1349 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1350 // names for the charsets
1351 const wxCharBuffer
cname(wxString(name
).ToAscii());
1353 // check for charset that represents wchar_t:
1354 if ( ms_wcCharsetName
.empty() )
1356 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1359 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1360 #else // !wxUSE_FONTMAP
1361 static const wxChar
*names
[] =
1363 #if SIZEOF_WCHAR_T == 4
1365 #elif SIZEOF_WCHAR_T = 2
1370 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1372 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1374 const wxString
nameCS(*names
);
1376 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377 wxString
nameXE(nameCS
);
1378 #ifdef WORDS_BIGENDIAN
1380 #else // little endian
1384 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1387 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1388 if ( m2w
== ICONV_T_INVALID
)
1390 // try charset w/o bytesex info (e.g. "UCS4")
1391 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1393 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1395 // and check for bytesex ourselves:
1396 if ( m2w
!= ICONV_T_INVALID
)
1398 char buf
[2], *bufPtr
;
1399 wchar_t wbuf
[2], *wbufPtr
;
1407 outsz
= SIZEOF_WCHAR_T
* 2;
1411 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1412 (char**)&wbufPtr
, &outsz
);
1414 if (ICONV_FAILED(res
, insz
))
1416 wxLogLastError(wxT("iconv"));
1417 wxLogError(_("Conversion to charset '%s' doesn't work."),
1420 else // ok, can convert to this encoding, remember it
1422 ms_wcCharsetName
= nameCS
;
1423 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1427 else // use charset not requiring byte swapping
1429 ms_wcCharsetName
= nameXE
;
1433 wxLogTrace(TRACE_STRCONV
,
1434 wxT("iconv wchar_t charset is \"%s\"%s"),
1435 ms_wcCharsetName
.empty() ? _T("<none>")
1436 : ms_wcCharsetName
.c_str(),
1437 ms_wcNeedsSwap
? _T(" (needs swap)")
1440 else // we already have ms_wcCharsetName
1442 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1445 if ( ms_wcCharsetName
.empty() )
1447 w2m
= ICONV_T_INVALID
;
1451 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1452 if ( w2m
== ICONV_T_INVALID
)
1454 wxLogTrace(TRACE_STRCONV
,
1455 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1456 ms_wcCharsetName
.c_str(), cname
.data());
1461 wxMBConv_iconv::~wxMBConv_iconv()
1463 if ( m2w
!= ICONV_T_INVALID
)
1465 if ( w2m
!= ICONV_T_INVALID
)
1469 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1472 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1473 // Unfortunately there is a couple of global wxCSConv objects such as
1474 // wxConvLocal that are used all over wx code, so we have to make sure
1475 // the handle is used by at most one thread at the time. Otherwise
1476 // only a few wx classes would be safe to use from non-main threads
1477 // as MB<->WC conversion would fail "randomly".
1478 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1481 size_t inbuf
= strlen(psz
);
1482 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1484 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1485 wchar_t *bufPtr
= buf
;
1486 const char *pszPtr
= psz
;
1490 // have destination buffer, convert there
1492 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1493 (char**)&bufPtr
, &outbuf
);
1494 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1498 // convert to native endianness
1499 for ( unsigned i
= 0; i
< res
; i
++ )
1500 buf
[n
] = WC_BSWAP(buf
[i
]);
1503 // NB: iconv was given only strlen(psz) characters on input, and so
1504 // it couldn't convert the trailing zero. Let's do it ourselves
1505 // if there's some room left for it in the output buffer.
1511 // no destination buffer... convert using temp buffer
1512 // to calculate destination buffer requirement
1517 outbuf
= 8*SIZEOF_WCHAR_T
;
1520 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1521 (char**)&bufPtr
, &outbuf
);
1523 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1524 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1527 if (ICONV_FAILED(cres
, inbuf
))
1529 //VS: it is ok if iconv fails, hence trace only
1530 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1537 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1540 // NB: explained in MB2WC
1541 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1544 size_t inlen
= wxWcslen(psz
);
1545 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1549 wchar_t *tmpbuf
= 0;
1553 // need to copy to temp buffer to switch endianness
1554 // (doing WC_BSWAP twice on the original buffer won't help, as it
1555 // could be in read-only memory, or be accessed in some other thread)
1556 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1557 for ( size_t i
= 0; i
< inlen
; i
++ )
1558 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1559 tmpbuf
[inlen
] = L
'\0';
1565 // have destination buffer, convert there
1566 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1570 // NB: iconv was given only wcslen(psz) characters on input, and so
1571 // it couldn't convert the trailing zero. Let's do it ourselves
1572 // if there's some room left for it in the output buffer.
1578 // no destination buffer... convert using temp buffer
1579 // to calculate destination buffer requirement
1583 buf
= tbuf
; outbuf
= 16;
1585 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1588 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1596 if (ICONV_FAILED(cres
, inbuf
))
1598 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1605 #endif // HAVE_ICONV
1608 // ============================================================================
1609 // Win32 conversion classes
1610 // ============================================================================
1612 #ifdef wxHAVE_WIN32_MB2WC
1616 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1617 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1620 class wxMBConv_win32
: public wxMBConv
1625 m_CodePage
= CP_ACP
;
1629 wxMBConv_win32(const wxChar
* name
)
1631 m_CodePage
= wxCharsetToCodepage(name
);
1634 wxMBConv_win32(wxFontEncoding encoding
)
1636 m_CodePage
= wxEncodingToCodepage(encoding
);
1640 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1642 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1643 // the behaviour is not compatible with the Unix version (using iconv)
1644 // and break the library itself, e.g. wxTextInputStream::NextChar()
1645 // wouldn't work if reading an incomplete MB char didn't result in an
1648 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1649 // an error (tested under Windows Server 2003) and apparently it is
1650 // done on purpose, i.e. the function accepts any input in this case
1651 // and although I'd prefer to return error on ill-formed output, our
1652 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1653 // explicitly ill-formed according to RFC 2152) neither so we don't
1654 // even have any fallback here...
1656 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1657 // Win XP or newer and if it is specified on older versions, conversion
1658 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1659 // fails. So we can only use the flag on newer Windows versions.
1660 // Additionally, the flag is not supported by UTF7, symbol and CJK
1661 // encodings. See here:
1662 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1663 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1665 if ( m_CodePage
!= CP_UTF7
&& m_CodePage
!= CP_SYMBOL
&&
1666 m_CodePage
< 50000 &&
1667 IsAtLeastWin2kSP4() )
1669 flags
= MB_ERR_INVALID_CHARS
;
1671 else if ( m_CodePage
== CP_UTF8
)
1673 // Avoid round-trip in the special case of UTF-8 by using our
1674 // own UTF-8 conversion code:
1675 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
1678 const size_t len
= ::MultiByteToWideChar
1680 m_CodePage
, // code page
1681 flags
, // flags: fall on error
1682 psz
, // input string
1683 -1, // its length (NUL-terminated)
1684 buf
, // output string
1685 buf
? n
: 0 // size of output buffer
1689 // function totally failed
1693 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1694 // check if we succeeded, by doing a double trip:
1695 if ( !flags
&& buf
)
1697 const size_t mbLen
= strlen(psz
);
1698 wxCharBuffer
mbBuf(mbLen
);
1699 if ( ::WideCharToMultiByte
1706 mbLen
+ 1, // size in bytes, not length
1710 strcmp(mbBuf
, psz
) != 0 )
1712 // we didn't obtain the same thing we started from, hence
1713 // the conversion was lossy and we consider that it failed
1718 // note that it returns count of written chars for buf != NULL and size
1719 // of the needed buffer for buf == NULL so in either case the length of
1720 // the string (which never includes the terminating NUL) is one less
1724 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1727 we have a problem here: by default, WideCharToMultiByte() may
1728 replace characters unrepresentable in the target code page with bad
1729 quality approximations such as turning "1/2" symbol (U+00BD) into
1730 "1" for the code pages which don't have it and we, obviously, want
1731 to avoid this at any price
1733 the trouble is that this function does it _silently_, i.e. it won't
1734 even tell us whether it did or not... Win98/2000 and higher provide
1735 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1736 we have to resort to a round trip, i.e. check that converting back
1737 results in the same string -- this is, of course, expensive but
1738 otherwise we simply can't be sure to not garble the data.
1741 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1742 // it doesn't work with CJK encodings (which we test for rather roughly
1743 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1745 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1748 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1750 // it's our lucky day
1751 flags
= WC_NO_BEST_FIT_CHARS
;
1752 pUsedDef
= &usedDef
;
1754 else // old system or unsupported encoding
1760 const size_t len
= ::WideCharToMultiByte
1762 m_CodePage
, // code page
1763 flags
, // either none or no best fit
1764 pwz
, // input string
1765 -1, // it is (wide) NUL-terminated
1766 buf
, // output buffer
1767 buf
? n
: 0, // and its size
1768 NULL
, // default "replacement" char
1769 pUsedDef
// [out] was it used?
1774 // function totally failed
1778 // if we were really converting, check if we succeeded
1783 // check if the conversion failed, i.e. if any replacements
1788 else // we must resort to double tripping...
1790 wxWCharBuffer
wcBuf(n
);
1791 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1792 wcscmp(wcBuf
, pwz
) != 0 )
1794 // we didn't obtain the same thing we started from, hence
1795 // the conversion was lossy and we consider that it failed
1801 // see the comment above for the reason of "len - 1"
1805 bool IsOk() const { return m_CodePage
!= -1; }
1808 static bool CanUseNoBestFit()
1810 static int s_isWin98Or2k
= -1;
1812 if ( s_isWin98Or2k
== -1 )
1815 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1818 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1822 s_isWin98Or2k
= verMaj
>= 5;
1826 // unknown, be conseravtive by default
1830 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1833 return s_isWin98Or2k
== 1;
1836 static bool IsAtLeastWin2kSP4()
1841 static int s_isAtLeastWin2kSP4
= -1;
1843 if ( s_isAtLeastWin2kSP4
== -1 )
1845 OSVERSIONINFOEX ver
;
1847 memset(&ver
, 0, sizeof(ver
));
1848 ver
.dwOSVersionInfoSize
= sizeof(ver
);
1849 GetVersionEx((OSVERSIONINFO
*)&ver
);
1851 s_isAtLeastWin2kSP4
=
1852 ((ver
.dwMajorVersion
> 5) || // Vista+
1853 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
1854 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
1855 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
1859 return s_isAtLeastWin2kSP4
== 1;
1866 #endif // wxHAVE_WIN32_MB2WC
1868 // ============================================================================
1869 // Cocoa conversion classes
1870 // ============================================================================
1872 #if defined(__WXCOCOA__)
1874 // RN: There is no UTF-32 support in either Core Foundation or
1875 // Cocoa. Strangely enough, internally Core Foundation uses
1876 // UTF 32 internally quite a bit - its just not public (yet).
1878 #include <CoreFoundation/CFString.h>
1879 #include <CoreFoundation/CFStringEncodingExt.h>
1881 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1883 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1884 if ( encoding
== wxFONTENCODING_DEFAULT
)
1886 enc
= CFStringGetSystemEncoding();
1888 else switch( encoding
)
1890 case wxFONTENCODING_ISO8859_1
:
1891 enc
= kCFStringEncodingISOLatin1
;
1893 case wxFONTENCODING_ISO8859_2
:
1894 enc
= kCFStringEncodingISOLatin2
;
1896 case wxFONTENCODING_ISO8859_3
:
1897 enc
= kCFStringEncodingISOLatin3
;
1899 case wxFONTENCODING_ISO8859_4
:
1900 enc
= kCFStringEncodingISOLatin4
;
1902 case wxFONTENCODING_ISO8859_5
:
1903 enc
= kCFStringEncodingISOLatinCyrillic
;
1905 case wxFONTENCODING_ISO8859_6
:
1906 enc
= kCFStringEncodingISOLatinArabic
;
1908 case wxFONTENCODING_ISO8859_7
:
1909 enc
= kCFStringEncodingISOLatinGreek
;
1911 case wxFONTENCODING_ISO8859_8
:
1912 enc
= kCFStringEncodingISOLatinHebrew
;
1914 case wxFONTENCODING_ISO8859_9
:
1915 enc
= kCFStringEncodingISOLatin5
;
1917 case wxFONTENCODING_ISO8859_10
:
1918 enc
= kCFStringEncodingISOLatin6
;
1920 case wxFONTENCODING_ISO8859_11
:
1921 enc
= kCFStringEncodingISOLatinThai
;
1923 case wxFONTENCODING_ISO8859_13
:
1924 enc
= kCFStringEncodingISOLatin7
;
1926 case wxFONTENCODING_ISO8859_14
:
1927 enc
= kCFStringEncodingISOLatin8
;
1929 case wxFONTENCODING_ISO8859_15
:
1930 enc
= kCFStringEncodingISOLatin9
;
1933 case wxFONTENCODING_KOI8
:
1934 enc
= kCFStringEncodingKOI8_R
;
1936 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1937 enc
= kCFStringEncodingDOSRussian
;
1940 // case wxFONTENCODING_BULGARIAN :
1944 case wxFONTENCODING_CP437
:
1945 enc
=kCFStringEncodingDOSLatinUS
;
1947 case wxFONTENCODING_CP850
:
1948 enc
= kCFStringEncodingDOSLatin1
;
1950 case wxFONTENCODING_CP852
:
1951 enc
= kCFStringEncodingDOSLatin2
;
1953 case wxFONTENCODING_CP855
:
1954 enc
= kCFStringEncodingDOSCyrillic
;
1956 case wxFONTENCODING_CP866
:
1957 enc
=kCFStringEncodingDOSRussian
;
1959 case wxFONTENCODING_CP874
:
1960 enc
= kCFStringEncodingDOSThai
;
1962 case wxFONTENCODING_CP932
:
1963 enc
= kCFStringEncodingDOSJapanese
;
1965 case wxFONTENCODING_CP936
:
1966 enc
=kCFStringEncodingDOSChineseSimplif
;
1968 case wxFONTENCODING_CP949
:
1969 enc
= kCFStringEncodingDOSKorean
;
1971 case wxFONTENCODING_CP950
:
1972 enc
= kCFStringEncodingDOSChineseTrad
;
1974 case wxFONTENCODING_CP1250
:
1975 enc
= kCFStringEncodingWindowsLatin2
;
1977 case wxFONTENCODING_CP1251
:
1978 enc
=kCFStringEncodingWindowsCyrillic
;
1980 case wxFONTENCODING_CP1252
:
1981 enc
=kCFStringEncodingWindowsLatin1
;
1983 case wxFONTENCODING_CP1253
:
1984 enc
= kCFStringEncodingWindowsGreek
;
1986 case wxFONTENCODING_CP1254
:
1987 enc
= kCFStringEncodingWindowsLatin5
;
1989 case wxFONTENCODING_CP1255
:
1990 enc
=kCFStringEncodingWindowsHebrew
;
1992 case wxFONTENCODING_CP1256
:
1993 enc
=kCFStringEncodingWindowsArabic
;
1995 case wxFONTENCODING_CP1257
:
1996 enc
= kCFStringEncodingWindowsBalticRim
;
1998 // This only really encodes to UTF7 (if that) evidently
1999 // case wxFONTENCODING_UTF7 :
2000 // enc = kCFStringEncodingNonLossyASCII ;
2002 case wxFONTENCODING_UTF8
:
2003 enc
= kCFStringEncodingUTF8
;
2005 case wxFONTENCODING_EUC_JP
:
2006 enc
= kCFStringEncodingEUC_JP
;
2008 case wxFONTENCODING_UTF16
:
2009 enc
= kCFStringEncodingUnicode
;
2011 case wxFONTENCODING_MACROMAN
:
2012 enc
= kCFStringEncodingMacRoman
;
2014 case wxFONTENCODING_MACJAPANESE
:
2015 enc
= kCFStringEncodingMacJapanese
;
2017 case wxFONTENCODING_MACCHINESETRAD
:
2018 enc
= kCFStringEncodingMacChineseTrad
;
2020 case wxFONTENCODING_MACKOREAN
:
2021 enc
= kCFStringEncodingMacKorean
;
2023 case wxFONTENCODING_MACARABIC
:
2024 enc
= kCFStringEncodingMacArabic
;
2026 case wxFONTENCODING_MACHEBREW
:
2027 enc
= kCFStringEncodingMacHebrew
;
2029 case wxFONTENCODING_MACGREEK
:
2030 enc
= kCFStringEncodingMacGreek
;
2032 case wxFONTENCODING_MACCYRILLIC
:
2033 enc
= kCFStringEncodingMacCyrillic
;
2035 case wxFONTENCODING_MACDEVANAGARI
:
2036 enc
= kCFStringEncodingMacDevanagari
;
2038 case wxFONTENCODING_MACGURMUKHI
:
2039 enc
= kCFStringEncodingMacGurmukhi
;
2041 case wxFONTENCODING_MACGUJARATI
:
2042 enc
= kCFStringEncodingMacGujarati
;
2044 case wxFONTENCODING_MACORIYA
:
2045 enc
= kCFStringEncodingMacOriya
;
2047 case wxFONTENCODING_MACBENGALI
:
2048 enc
= kCFStringEncodingMacBengali
;
2050 case wxFONTENCODING_MACTAMIL
:
2051 enc
= kCFStringEncodingMacTamil
;
2053 case wxFONTENCODING_MACTELUGU
:
2054 enc
= kCFStringEncodingMacTelugu
;
2056 case wxFONTENCODING_MACKANNADA
:
2057 enc
= kCFStringEncodingMacKannada
;
2059 case wxFONTENCODING_MACMALAJALAM
:
2060 enc
= kCFStringEncodingMacMalayalam
;
2062 case wxFONTENCODING_MACSINHALESE
:
2063 enc
= kCFStringEncodingMacSinhalese
;
2065 case wxFONTENCODING_MACBURMESE
:
2066 enc
= kCFStringEncodingMacBurmese
;
2068 case wxFONTENCODING_MACKHMER
:
2069 enc
= kCFStringEncodingMacKhmer
;
2071 case wxFONTENCODING_MACTHAI
:
2072 enc
= kCFStringEncodingMacThai
;
2074 case wxFONTENCODING_MACLAOTIAN
:
2075 enc
= kCFStringEncodingMacLaotian
;
2077 case wxFONTENCODING_MACGEORGIAN
:
2078 enc
= kCFStringEncodingMacGeorgian
;
2080 case wxFONTENCODING_MACARMENIAN
:
2081 enc
= kCFStringEncodingMacArmenian
;
2083 case wxFONTENCODING_MACCHINESESIMP
:
2084 enc
= kCFStringEncodingMacChineseSimp
;
2086 case wxFONTENCODING_MACTIBETAN
:
2087 enc
= kCFStringEncodingMacTibetan
;
2089 case wxFONTENCODING_MACMONGOLIAN
:
2090 enc
= kCFStringEncodingMacMongolian
;
2092 case wxFONTENCODING_MACETHIOPIC
:
2093 enc
= kCFStringEncodingMacEthiopic
;
2095 case wxFONTENCODING_MACCENTRALEUR
:
2096 enc
= kCFStringEncodingMacCentralEurRoman
;
2098 case wxFONTENCODING_MACVIATNAMESE
:
2099 enc
= kCFStringEncodingMacVietnamese
;
2101 case wxFONTENCODING_MACARABICEXT
:
2102 enc
= kCFStringEncodingMacExtArabic
;
2104 case wxFONTENCODING_MACSYMBOL
:
2105 enc
= kCFStringEncodingMacSymbol
;
2107 case wxFONTENCODING_MACDINGBATS
:
2108 enc
= kCFStringEncodingMacDingbats
;
2110 case wxFONTENCODING_MACTURKISH
:
2111 enc
= kCFStringEncodingMacTurkish
;
2113 case wxFONTENCODING_MACCROATIAN
:
2114 enc
= kCFStringEncodingMacCroatian
;
2116 case wxFONTENCODING_MACICELANDIC
:
2117 enc
= kCFStringEncodingMacIcelandic
;
2119 case wxFONTENCODING_MACROMANIAN
:
2120 enc
= kCFStringEncodingMacRomanian
;
2122 case wxFONTENCODING_MACCELTIC
:
2123 enc
= kCFStringEncodingMacCeltic
;
2125 case wxFONTENCODING_MACGAELIC
:
2126 enc
= kCFStringEncodingMacGaelic
;
2128 // case wxFONTENCODING_MACKEYBOARD :
2129 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2132 // because gcc is picky
2138 class wxMBConv_cocoa
: public wxMBConv
2143 Init(CFStringGetSystemEncoding()) ;
2147 wxMBConv_cocoa(const wxChar
* name
)
2149 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2153 wxMBConv_cocoa(wxFontEncoding encoding
)
2155 Init( wxCFStringEncFromFontEnc(encoding
) );
2162 void Init( CFStringEncoding encoding
)
2164 m_encoding
= encoding
;
2167 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2171 CFStringRef theString
= CFStringCreateWithBytes (
2172 NULL
, //the allocator
2173 (const UInt8
*)szUnConv
,
2176 false //no BOM/external representation
2179 wxASSERT(theString
);
2181 size_t nOutLength
= CFStringGetLength(theString
);
2185 CFRelease(theString
);
2189 CFRange theRange
= { 0, nOutSize
};
2191 #if SIZEOF_WCHAR_T == 4
2192 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2195 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2197 CFRelease(theString
);
2199 szUniCharBuffer
[nOutLength
] = '\0' ;
2201 #if SIZEOF_WCHAR_T == 4
2202 wxMBConvUTF16 converter
;
2203 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2204 delete[] szUniCharBuffer
;
2210 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2214 size_t nRealOutSize
;
2215 size_t nBufSize
= wxWcslen(szUnConv
);
2216 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2218 #if SIZEOF_WCHAR_T == 4
2219 wxMBConvUTF16 converter
;
2220 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2221 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2222 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2223 nBufSize
/= sizeof(UniChar
);
2226 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2230 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2233 wxASSERT(theString
);
2235 //Note that CER puts a BOM when converting to unicode
2236 //so we check and use getchars instead in that case
2237 if (m_encoding
== kCFStringEncodingUnicode
)
2240 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2242 nRealOutSize
= CFStringGetLength(theString
) + 1;
2248 CFRangeMake(0, CFStringGetLength(theString
)),
2250 0, //what to put in characters that can't be converted -
2251 //0 tells CFString to return NULL if it meets such a character
2252 false, //not an external representation
2255 (CFIndex
*) &nRealOutSize
2259 CFRelease(theString
);
2261 #if SIZEOF_WCHAR_T == 4
2262 delete[] szUniBuffer
;
2265 return nRealOutSize
- 1;
2270 return m_encoding
!= kCFStringEncodingInvalidId
&&
2271 CFStringIsEncodingAvailable(m_encoding
);
2275 CFStringEncoding m_encoding
;
2278 #endif // defined(__WXCOCOA__)
2280 // ============================================================================
2281 // Mac conversion classes
2282 // ============================================================================
2284 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2286 class wxMBConv_mac
: public wxMBConv
2291 Init(CFStringGetSystemEncoding()) ;
2295 wxMBConv_mac(const wxChar
* name
)
2297 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2301 wxMBConv_mac(wxFontEncoding encoding
)
2303 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2308 OSStatus status
= noErr
;
2309 status
= TECDisposeConverter(m_MB2WC_converter
);
2310 status
= TECDisposeConverter(m_WC2MB_converter
);
2314 void Init( TextEncodingBase encoding
)
2316 OSStatus status
= noErr
;
2317 m_char_encoding
= encoding
;
2318 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2320 status
= TECCreateConverter(&m_MB2WC_converter
,
2322 m_unicode_encoding
);
2323 status
= TECCreateConverter(&m_WC2MB_converter
,
2328 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2330 OSStatus status
= noErr
;
2331 ByteCount byteOutLen
;
2332 ByteCount byteInLen
= strlen(psz
) ;
2333 wchar_t *tbuf
= NULL
;
2334 UniChar
* ubuf
= NULL
;
2339 //apple specs say at least 32
2340 n
= wxMax( 32 , byteInLen
) ;
2341 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2343 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2344 #if SIZEOF_WCHAR_T == 4
2345 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2347 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2349 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2350 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2351 #if SIZEOF_WCHAR_T == 4
2352 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2353 // is not properly terminated we get random characters at the end
2354 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2355 wxMBConvUTF16 converter
;
2356 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2359 res
= byteOutLen
/ sizeof( UniChar
) ;
2364 if ( buf
&& res
< n
)
2370 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2372 OSStatus status
= noErr
;
2373 ByteCount byteOutLen
;
2374 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2380 //apple specs say at least 32
2381 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2382 tbuf
= (char*) malloc( n
) ;
2385 ByteCount byteBufferLen
= n
;
2386 UniChar
* ubuf
= NULL
;
2387 #if SIZEOF_WCHAR_T == 4
2388 wxMBConvUTF16 converter
;
2389 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2390 byteInLen
= unicharlen
;
2391 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2392 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2394 ubuf
= (UniChar
*) psz
;
2396 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2397 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2398 #if SIZEOF_WCHAR_T == 4
2404 size_t res
= byteOutLen
;
2405 if ( buf
&& res
< n
)
2409 //we need to double-trip to verify it didn't insert any ? in place
2410 //of bogus characters
2411 wxWCharBuffer
wcBuf(n
);
2412 size_t pszlen
= wxWcslen(psz
);
2413 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2414 wxWcslen(wcBuf
) != pszlen
||
2415 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2417 // we didn't obtain the same thing we started from, hence
2418 // the conversion was lossy and we consider that it failed
2427 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2430 TECObjectRef m_MB2WC_converter
;
2431 TECObjectRef m_WC2MB_converter
;
2433 TextEncodingBase m_char_encoding
;
2434 TextEncodingBase m_unicode_encoding
;
2437 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2439 // ============================================================================
2440 // wxEncodingConverter based conversion classes
2441 // ============================================================================
2445 class wxMBConv_wxwin
: public wxMBConv
2450 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2451 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2455 // temporarily just use wxEncodingConverter stuff,
2456 // so that it works while a better implementation is built
2457 wxMBConv_wxwin(const wxChar
* name
)
2460 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2462 m_enc
= wxFONTENCODING_SYSTEM
;
2467 wxMBConv_wxwin(wxFontEncoding enc
)
2474 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2476 size_t inbuf
= strlen(psz
);
2479 if (!m2w
.Convert(psz
,buf
))
2485 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2487 const size_t inbuf
= wxWcslen(psz
);
2490 if (!w2m
.Convert(psz
,buf
))
2497 bool IsOk() const { return m_ok
; }
2500 wxFontEncoding m_enc
;
2501 wxEncodingConverter m2w
, w2m
;
2503 // were we initialized successfully?
2506 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2509 // make the constructors available for unit testing
2510 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2512 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2513 if ( !result
->IsOk() )
2521 #endif // wxUSE_FONTMAP
2523 // ============================================================================
2524 // wxCSConv implementation
2525 // ============================================================================
2527 void wxCSConv::Init()
2534 wxCSConv::wxCSConv(const wxChar
*charset
)
2544 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2546 m_encoding
= wxFONTENCODING_SYSTEM
;
2550 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2552 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2554 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2556 encoding
= wxFONTENCODING_SYSTEM
;
2561 m_encoding
= encoding
;
2564 wxCSConv::~wxCSConv()
2569 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2574 SetName(conv
.m_name
);
2575 m_encoding
= conv
.m_encoding
;
2578 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2582 SetName(conv
.m_name
);
2583 m_encoding
= conv
.m_encoding
;
2588 void wxCSConv::Clear()
2597 void wxCSConv::SetName(const wxChar
*charset
)
2601 m_name
= wxStrdup(charset
);
2607 #include "wx/hashmap.h"
2609 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2610 wxEncodingNameCache
);
2612 static wxEncodingNameCache gs_nameCache
;
2615 wxMBConv
*wxCSConv::DoCreate() const
2618 wxLogTrace(TRACE_STRCONV
,
2619 wxT("creating conversion for %s"),
2621 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2622 #endif // wxUSE_FONTMAP
2624 // check for the special case of ASCII or ISO8859-1 charset: as we have
2625 // special knowledge of it anyhow, we don't need to create a special
2626 // conversion object
2627 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2628 m_encoding
== wxFONTENCODING_DEFAULT
)
2630 // don't convert at all
2634 // we trust OS to do conversion better than we can so try external
2635 // conversion methods first
2637 // the full order is:
2638 // 1. OS conversion (iconv() under Unix or Win32 API)
2639 // 2. hard coded conversions for UTF
2640 // 3. wxEncodingConverter as fall back
2646 #endif // !wxUSE_FONTMAP
2648 wxString
name(m_name
);
2649 wxFontEncoding
encoding(m_encoding
);
2651 if ( !name
.empty() )
2653 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2661 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2662 #endif // wxUSE_FONTMAP
2666 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2667 if ( it
!= gs_nameCache
.end() )
2669 if ( it
->second
.empty() )
2672 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2679 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2681 for ( ; *names
; ++names
)
2683 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2686 gs_nameCache
[encoding
] = *names
;
2693 gs_nameCache
[encoding
] = _T(""); // cache the failure
2695 #endif // wxUSE_FONTMAP
2697 #endif // HAVE_ICONV
2699 #ifdef wxHAVE_WIN32_MB2WC
2702 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2703 : new wxMBConv_win32(m_encoding
);
2712 #endif // wxHAVE_WIN32_MB2WC
2713 #if defined(__WXMAC__)
2715 // leave UTF16 and UTF32 to the built-ins of wx
2716 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2717 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2721 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2722 : new wxMBConv_mac(m_encoding
);
2724 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2733 #if defined(__WXCOCOA__)
2735 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2739 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2740 : new wxMBConv_cocoa(m_encoding
);
2742 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2752 wxFontEncoding enc
= m_encoding
;
2754 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2756 // use "false" to suppress interactive dialogs -- we can be called from
2757 // anywhere and popping up a dialog from here is the last thing we want to
2759 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2761 #endif // wxUSE_FONTMAP
2765 case wxFONTENCODING_UTF7
:
2766 return new wxMBConvUTF7
;
2768 case wxFONTENCODING_UTF8
:
2769 return new wxMBConvUTF8
;
2771 case wxFONTENCODING_UTF16BE
:
2772 return new wxMBConvUTF16BE
;
2774 case wxFONTENCODING_UTF16LE
:
2775 return new wxMBConvUTF16LE
;
2777 case wxFONTENCODING_UTF32BE
:
2778 return new wxMBConvUTF32BE
;
2780 case wxFONTENCODING_UTF32LE
:
2781 return new wxMBConvUTF32LE
;
2784 // nothing to do but put here to suppress gcc warnings
2791 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2792 : new wxMBConv_wxwin(m_encoding
);
2798 #endif // wxUSE_FONTMAP
2800 // NB: This is a hack to prevent deadlock. What could otherwise happen
2801 // in Unicode build: wxConvLocal creation ends up being here
2802 // because of some failure and logs the error. But wxLog will try to
2803 // attach timestamp, for which it will need wxConvLocal (to convert
2804 // time to char* and then wchar_t*), but that fails, tries to log
2805 // error, but wxLog has a (already locked) critical section that
2806 // guards static buffer.
2807 static bool alreadyLoggingError
= false;
2808 if (!alreadyLoggingError
)
2810 alreadyLoggingError
= true;
2811 wxLogError(_("Cannot convert from the charset '%s'!"),
2815 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2816 #else // !wxUSE_FONTMAP
2817 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2818 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2820 alreadyLoggingError
= false;
2826 void wxCSConv::CreateConvIfNeeded() const
2830 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2833 // if we don't have neither the name nor the encoding, use the default
2834 // encoding for this system
2835 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2837 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2839 #endif // wxUSE_INTL
2841 self
->m_convReal
= DoCreate();
2842 self
->m_deferred
= false;
2846 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2848 CreateConvIfNeeded();
2851 return m_convReal
->MB2WC(buf
, psz
, n
);
2854 size_t len
= strlen(psz
);
2858 for (size_t c
= 0; c
<= len
; c
++)
2859 buf
[c
] = (unsigned char)(psz
[c
]);
2865 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2867 CreateConvIfNeeded();
2870 return m_convReal
->WC2MB(buf
, psz
, n
);
2873 const size_t len
= wxWcslen(psz
);
2876 for (size_t c
= 0; c
<= len
; c
++)
2880 buf
[c
] = (char)psz
[c
];
2885 for (size_t c
= 0; c
<= len
; c
++)
2895 // ----------------------------------------------------------------------------
2897 // ----------------------------------------------------------------------------
2900 static wxMBConv_win32 wxConvLibcObj
;
2901 #elif defined(__WXMAC__) && !defined(__MACH__)
2902 static wxMBConv_mac wxConvLibcObj
;
2904 static wxMBConvLibc wxConvLibcObj
;
2907 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2908 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2909 static wxMBConvUTF7 wxConvUTF7Obj
;
2910 static wxMBConvUTF8 wxConvUTF8Obj
;
2912 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2913 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2914 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2915 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2916 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2917 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2918 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2926 #else // !wxUSE_WCHAR_T
2928 // stand-ins in absence of wchar_t
2929 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2934 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T