1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
37 #include "wx/strconv.h"
42 #include "wx/msw/private.h"
46 #include "wx/msw/missing.h"
57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59 #endif // __WIN32__ but !__WXMICROWIN__
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
78 #include <ATSUnicode.h>
79 #include <TextCommon.h>
80 #include <TextEncodingConverter.h>
82 #include "wx/mac/private.h" // includes mac headers
84 // ----------------------------------------------------------------------------
86 // ----------------------------------------------------------------------------
88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
91 #if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
97 #define WC_NAME_BEST "UCS-4LE"
99 #elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
106 #define WC_NAME_BEST "UTF-16LE"
108 #else // sizeof(wchar_t) != 2 nor 4
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 // ============================================================================
115 // ============================================================================
117 // ----------------------------------------------------------------------------
118 // UTF-16 en/decoding to/from UCS-4
119 // ----------------------------------------------------------------------------
122 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
127 *output
= (wxUint16
) input
;
130 else if (input
>=0x110000)
138 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
139 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
145 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
147 if ((*input
<0xd800) || (*input
>0xdfff))
152 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
159 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
165 // ----------------------------------------------------------------------------
167 // ----------------------------------------------------------------------------
169 wxMBConv::~wxMBConv()
171 // nothing to do here (necessary for Darwin linking probably)
174 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
178 // calculate the length of the buffer needed first
179 size_t nLen
= MB2WC(NULL
, psz
, 0);
180 if ( nLen
!= (size_t)-1 )
182 // now do the actual conversion
183 wxWCharBuffer
buf(nLen
);
184 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
185 if ( nLen
!= (size_t)-1 )
192 wxWCharBuffer
buf((wchar_t *)NULL
);
197 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
201 size_t nLen
= WC2MB(NULL
, pwz
, 0);
202 if ( nLen
!= (size_t)-1 )
204 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
205 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
206 if ( nLen
!= (size_t)-1 )
213 wxCharBuffer
buf((char *)NULL
);
218 // ----------------------------------------------------------------------------
220 // ----------------------------------------------------------------------------
222 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
224 return wxMB2WC(buf
, psz
, n
);
227 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
229 return wxWC2MB(buf
, psz
, n
);
232 // ----------------------------------------------------------------------------
234 // ----------------------------------------------------------------------------
237 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789'(),-./:?";
240 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
241 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
242 "abcdefghijklmnopqrstuvwxyz"
246 // TODO: write actual implementations of UTF-7 here
247 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
248 const char * WXUNUSED(psz
),
249 size_t WXUNUSED(n
)) const
254 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
255 const wchar_t * WXUNUSED(psz
),
256 size_t WXUNUSED(n
)) const
261 // ----------------------------------------------------------------------------
263 // ----------------------------------------------------------------------------
265 static wxUint32 utf8_max
[]=
266 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
268 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
272 while (*psz
&& ((!buf
) || (len
< n
)))
274 unsigned char cc
= *psz
++, fc
= cc
;
276 for (cnt
= 0; fc
& 0x80; cnt
++)
290 // invalid UTF-8 sequence
295 unsigned ocnt
= cnt
- 1;
296 wxUint32 res
= cc
& (0x3f >> cnt
);
300 if ((cc
& 0xC0) != 0x80)
302 // invalid UTF-8 sequence
305 res
= (res
<< 6) | (cc
& 0x3f);
307 if (res
<= utf8_max
[ocnt
])
309 // illegal UTF-8 encoding
313 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
314 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
315 if (pa
== (size_t)-1)
324 #endif // WC_UTF16/!WC_UTF16
328 if (buf
&& (len
< n
))
333 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
337 while (*psz
&& ((!buf
) || (len
< n
)))
341 // cast is ok for WC_UTF16
342 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
343 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
345 cc
=(*psz
++) & 0x7fffffff;
348 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
362 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
364 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
369 if (buf
&& (len
<n
)) *buf
= 0;
377 // ----------------------------------------------------------------------------
379 // ----------------------------------------------------------------------------
381 #ifdef WORDS_BIGENDIAN
382 #define wxMBConvUTF16straight wxMBConvUTF16BE
383 #define wxMBConvUTF16swap wxMBConvUTF16LE
385 #define wxMBConvUTF16swap wxMBConvUTF16BE
386 #define wxMBConvUTF16straight wxMBConvUTF16LE
392 // copy 16bit MB to 16bit String
393 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
397 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
400 *buf
++ = *(wxUint16
*)psz
;
403 psz
+= sizeof(wxUint16
);
405 if (buf
&& len
<n
) *buf
=0;
411 // copy 16bit String to 16bit MB
412 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
416 while (*psz
&& (!buf
|| len
< n
))
420 *(wxUint16
*)buf
= *psz
;
421 buf
+= sizeof(wxUint16
);
423 len
+= sizeof(wxUint16
);
426 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
432 // swap 16bit MB to 16bit String
433 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
437 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
441 ((char *)buf
)[0] = psz
[1];
442 ((char *)buf
)[1] = psz
[0];
446 psz
+= sizeof(wxUint16
);
448 if (buf
&& len
<n
) *buf
=0;
454 // swap 16bit MB to 16bit String
455 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
459 while (*psz
&& (!buf
|| len
< n
))
463 *buf
++ = ((char*)psz
)[1];
464 *buf
++ = ((char*)psz
)[0];
466 len
+= sizeof(wxUint16
);
469 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
478 // copy 16bit MB to 32bit String
479 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
483 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
486 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
487 if (pa
== (size_t)-1)
493 psz
+= pa
* sizeof(wxUint16
);
495 if (buf
&& len
<n
) *buf
=0;
501 // copy 32bit String to 16bit MB
502 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
506 while (*psz
&& (!buf
|| len
< n
))
509 size_t pa
=encode_utf16(*psz
, cc
);
511 if (pa
== (size_t)-1)
516 *(wxUint16
*)buf
= cc
[0];
517 buf
+= sizeof(wxUint16
);
520 *(wxUint16
*)buf
= cc
[1];
521 buf
+= sizeof(wxUint16
);
525 len
+= pa
*sizeof(wxUint16
);
528 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
534 // swap 16bit MB to 32bit String
535 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
539 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
543 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
544 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
546 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
547 if (pa
== (size_t)-1)
554 psz
+= pa
* sizeof(wxUint16
);
556 if (buf
&& len
<n
) *buf
=0;
562 // swap 32bit String to 16bit MB
563 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
567 while (*psz
&& (!buf
|| len
< n
))
570 size_t pa
=encode_utf16(*psz
, cc
);
572 if (pa
== (size_t)-1)
577 *buf
++ = ((char*)cc
)[1];
578 *buf
++ = ((char*)cc
)[0];
581 *buf
++ = ((char*)cc
)[3];
582 *buf
++ = ((char*)cc
)[2];
586 len
+= pa
*sizeof(wxUint16
);
589 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
597 // ----------------------------------------------------------------------------
599 // ----------------------------------------------------------------------------
601 #ifdef WORDS_BIGENDIAN
602 #define wxMBConvUTF32straight wxMBConvUTF32BE
603 #define wxMBConvUTF32swap wxMBConvUTF32LE
605 #define wxMBConvUTF32swap wxMBConvUTF32BE
606 #define wxMBConvUTF32straight wxMBConvUTF32LE
610 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
611 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
616 // copy 32bit MB to 16bit String
617 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
621 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
625 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
626 if (pa
== (size_t)-1)
636 psz
+= sizeof(wxUint32
);
638 if (buf
&& len
<n
) *buf
=0;
644 // copy 16bit String to 32bit MB
645 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
649 while (*psz
&& (!buf
|| len
< n
))
653 // cast is ok for WC_UTF16
654 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
655 if (pa
== (size_t)-1)
660 *(wxUint32
*)buf
= cc
;
661 buf
+= sizeof(wxUint32
);
663 len
+= sizeof(wxUint32
);
667 if (buf
&& len
<=n
-sizeof(wxUint32
))
675 // swap 32bit MB to 16bit String
676 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
680 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
683 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
684 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
689 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
690 if (pa
== (size_t)-1)
700 psz
+= sizeof(wxUint32
);
710 // swap 16bit String to 32bit MB
711 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
715 while (*psz
&& (!buf
|| len
< n
))
719 // cast is ok for WC_UTF16
720 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
721 if (pa
== (size_t)-1)
731 len
+= sizeof(wxUint32
);
735 if (buf
&& len
<=n
-sizeof(wxUint32
))
744 // copy 32bit MB to 32bit String
745 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
749 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
752 *buf
++ = *(wxUint32
*)psz
;
754 psz
+= sizeof(wxUint32
);
764 // copy 32bit String to 32bit MB
765 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
769 while (*psz
&& (!buf
|| len
< n
))
773 *(wxUint32
*)buf
= *psz
;
774 buf
+= sizeof(wxUint32
);
777 len
+= sizeof(wxUint32
);
781 if (buf
&& len
<=n
-sizeof(wxUint32
))
788 // swap 32bit MB to 32bit String
789 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
793 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
797 ((char *)buf
)[0] = psz
[3];
798 ((char *)buf
)[1] = psz
[2];
799 ((char *)buf
)[2] = psz
[1];
800 ((char *)buf
)[3] = psz
[0];
804 psz
+= sizeof(wxUint32
);
814 // swap 32bit String to 32bit MB
815 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
819 while (*psz
&& (!buf
|| len
< n
))
823 *buf
++ = ((char *)psz
)[3];
824 *buf
++ = ((char *)psz
)[2];
825 *buf
++ = ((char *)psz
)[1];
826 *buf
++ = ((char *)psz
)[0];
828 len
+= sizeof(wxUint32
);
832 if (buf
&& len
<=n
-sizeof(wxUint32
))
842 // ============================================================================
843 // The classes doing conversion using the iconv_xxx() functions
844 // ============================================================================
848 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
849 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
850 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
851 // (which means error) and says there are 0 bytes left in the input buffer --
852 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
853 // this alternative test for iconv() failure.
854 // [This bug does not appear in glibc 2.2.]
855 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
856 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
857 (errno != E2BIG || bufLeft != 0))
859 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
862 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
864 // ----------------------------------------------------------------------------
865 // wxMBConv_iconv: encapsulates an iconv character set
866 // ----------------------------------------------------------------------------
868 class wxMBConv_iconv
: public wxMBConv
871 wxMBConv_iconv(const wxChar
*name
);
872 virtual ~wxMBConv_iconv();
874 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
875 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
878 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
881 // the iconv handlers used to translate from multibyte to wide char and in
882 // the other direction
887 // the name (for iconv_open()) of a wide char charset -- if none is
888 // available on this machine, it will remain NULL
889 static const char *ms_wcCharsetName
;
891 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
892 // different endian-ness than the native one
893 static bool ms_wcNeedsSwap
;
896 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
897 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
899 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
901 // Do it the hard way
903 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
904 cname
[i
] = (char) name
[i
];
906 // check for charset that represents wchar_t:
907 if (ms_wcCharsetName
== NULL
)
909 ms_wcNeedsSwap
= false;
911 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
912 ms_wcCharsetName
= WC_NAME_BEST
;
913 m2w
= iconv_open(ms_wcCharsetName
, cname
);
915 if (m2w
== (iconv_t
)-1)
917 // try charset w/o bytesex info (e.g. "UCS4")
918 // and check for bytesex ourselves:
919 ms_wcCharsetName
= WC_NAME
;
920 m2w
= iconv_open(ms_wcCharsetName
, cname
);
922 // last bet, try if it knows WCHAR_T pseudo-charset
923 if (m2w
== (iconv_t
)-1)
925 ms_wcCharsetName
= "WCHAR_T";
926 m2w
= iconv_open(ms_wcCharsetName
, cname
);
929 if (m2w
!= (iconv_t
)-1)
931 char buf
[2], *bufPtr
;
932 wchar_t wbuf
[2], *wbufPtr
;
940 outsz
= SIZEOF_WCHAR_T
* 2;
944 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
945 (char**)&wbufPtr
, &outsz
);
947 if (ICONV_FAILED(res
, insz
))
949 ms_wcCharsetName
= NULL
;
950 wxLogLastError(wxT("iconv"));
951 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
955 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
960 ms_wcCharsetName
= NULL
;
962 // VS: we must not output an error here, since wxWidgets will safely
963 // fall back to using wxEncodingConverter.
964 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
968 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
970 else // we already have ms_wcCharsetName
972 m2w
= iconv_open(ms_wcCharsetName
, cname
);
975 // NB: don't ever pass NULL to iconv_open(), it may crash!
976 if ( ms_wcCharsetName
)
978 w2m
= iconv_open( cname
, ms_wcCharsetName
);
986 wxMBConv_iconv::~wxMBConv_iconv()
988 if ( m2w
!= (iconv_t
)-1 )
990 if ( w2m
!= (iconv_t
)-1 )
994 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
996 size_t inbuf
= strlen(psz
);
997 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
999 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000 wchar_t *bufPtr
= buf
;
1001 const char *pszPtr
= psz
;
1005 // have destination buffer, convert there
1007 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1008 (char**)&bufPtr
, &outbuf
);
1009 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1013 // convert to native endianness
1014 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1017 // NB: iconv was given only strlen(psz) characters on input, and so
1018 // it couldn't convert the trailing zero. Let's do it ourselves
1019 // if there's some room left for it in the output buffer.
1025 // no destination buffer... convert using temp buffer
1026 // to calculate destination buffer requirement
1031 outbuf
= 8*SIZEOF_WCHAR_T
;
1034 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1035 (char**)&bufPtr
, &outbuf
);
1037 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1038 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1041 if (ICONV_FAILED(cres
, inbuf
))
1043 //VS: it is ok if iconv fails, hence trace only
1044 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1051 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1053 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1057 wchar_t *tmpbuf
= 0;
1061 // need to copy to temp buffer to switch endianness
1062 // this absolutely doesn't rock!
1063 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064 // could be in read-only memory, or be accessed in some other thread)
1065 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1066 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1067 WC_BSWAP(tmpbuf
, inbuf
)
1073 // have destination buffer, convert there
1074 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1078 // NB: iconv was given only wcslen(psz) characters on input, and so
1079 // it couldn't convert the trailing zero. Let's do it ourselves
1080 // if there's some room left for it in the output buffer.
1086 // no destination buffer... convert using temp buffer
1087 // to calculate destination buffer requirement
1091 buf
= tbuf
; outbuf
= 16;
1093 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1096 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1104 if (ICONV_FAILED(cres
, inbuf
))
1106 //VS: it is ok if iconv fails, hence trace only
1107 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1114 #endif // HAVE_ICONV
1117 // ============================================================================
1118 // Win32 conversion classes
1119 // ============================================================================
1121 #ifdef wxHAVE_WIN32_MB2WC
1125 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1126 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1129 class wxMBConv_win32
: public wxMBConv
1134 m_CodePage
= CP_ACP
;
1138 wxMBConv_win32(const wxChar
* name
)
1140 m_CodePage
= wxCharsetToCodepage(name
);
1143 wxMBConv_win32(wxFontEncoding encoding
)
1145 m_CodePage
= wxEncodingToCodepage(encoding
);
1149 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1151 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1152 // the behaviour is not compatible with the Unix version (using iconv)
1153 // and break the library itself, e.g. wxTextInputStream::NextChar()
1154 // wouldn't work if reading an incomplete MB char didn't result in an
1156 const size_t len
= ::MultiByteToWideChar
1158 m_CodePage
, // code page
1159 MB_ERR_INVALID_CHARS
, // flags: fall on error
1160 psz
, // input string
1161 -1, // its length (NUL-terminated)
1162 buf
, // output string
1163 buf
? n
: 0 // size of output buffer
1166 // note that it returns count of written chars for buf != NULL and size
1167 // of the needed buffer for buf == NULL so in either case the length of
1168 // the string (which never includes the terminating NUL) is one less
1169 return len
? len
- 1 : (size_t)-1;
1172 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1175 we have a problem here: by default, WideCharToMultiByte() may
1176 replace characters unrepresentable in the target code page with bad
1177 quality approximations such as turning "1/2" symbol (U+00BD) into
1178 "1" for the code pages which don't have it and we, obviously, want
1179 to avoid this at any price
1181 the trouble is that this function does it _silently_, i.e. it won't
1182 even tell us whether it did or not... Win98/2000 and higher provide
1183 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1184 we have to resort to a round trip, i.e. check that converting back
1185 results in the same string -- this is, of course, expensive but
1186 otherwise we simply can't be sure to not garble the data.
1189 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1190 // it doesn't work with CJK encodings (which we test for rather roughly
1191 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1193 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1196 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1198 // it's our lucky day
1199 flags
= WC_NO_BEST_FIT_CHARS
;
1200 pUsedDef
= &usedDef
;
1202 else // old system or unsupported encoding
1208 const size_t len
= ::WideCharToMultiByte
1210 m_CodePage
, // code page
1211 flags
, // either none or no best fit
1212 pwz
, // input string
1213 -1, // it is (wide) NUL-terminated
1214 buf
, // output buffer
1215 buf
? n
: 0, // and its size
1216 NULL
, // default "replacement" char
1217 pUsedDef
// [out] was it used?
1222 // function totally failed
1226 // if we were really converting, check if we succeeded
1231 // check if the conversion failed, i.e. if any replacements
1236 else // we must resort to double tripping...
1238 wxWCharBuffer
wcBuf(n
);
1239 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1240 wcscmp(wcBuf
, pwz
) != 0 )
1242 // we didn't obtain the same thing we started from, hence
1243 // the conversion was lossy and we consider that it failed
1249 // see the comment above for the reason of "len - 1"
1253 bool IsOk() const { return m_CodePage
!= -1; }
1256 static bool CanUseNoBestFit()
1258 static int s_isWin98Or2k
= -1;
1260 if ( s_isWin98Or2k
== -1 )
1263 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1266 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1270 s_isWin98Or2k
= verMaj
>= 5;
1274 // unknown, be conseravtive by default
1278 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1281 return s_isWin98Or2k
== 1;
1287 #endif // wxHAVE_WIN32_MB2WC
1289 // ============================================================================
1290 // Cocoa conversion classes
1291 // ============================================================================
1293 #if defined(__WXCOCOA__)
1295 // RN: There is no UTF-32 support in either Core Foundation or
1296 // Cocoa. Strangely enough, internally Core Foundation uses
1297 // UTF 32 internally quite a bit - its just not public (yet).
1299 #include <CoreFoundation/CFString.h>
1300 #include <CoreFoundation/CFStringEncodingExt.h>
1302 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1304 CFStringEncoding enc
= 0 ;
1305 if ( encoding
== wxFONTENCODING_DEFAULT
)
1308 encoding
= wxFont::GetDefaultEncoding() ;
1310 encoding
= wxLocale::GetSystemEncoding() ;
1313 else switch( encoding
)
1315 case wxFONTENCODING_ISO8859_1
:
1316 enc
= kCFStringEncodingISOLatin1
;
1318 case wxFONTENCODING_ISO8859_2
:
1319 enc
= kCFStringEncodingISOLatin2
;
1321 case wxFONTENCODING_ISO8859_3
:
1322 enc
= kCFStringEncodingISOLatin3
;
1324 case wxFONTENCODING_ISO8859_4
:
1325 enc
= kCFStringEncodingISOLatin4
;
1327 case wxFONTENCODING_ISO8859_5
:
1328 enc
= kCFStringEncodingISOLatinCyrillic
;
1330 case wxFONTENCODING_ISO8859_6
:
1331 enc
= kCFStringEncodingISOLatinArabic
;
1333 case wxFONTENCODING_ISO8859_7
:
1334 enc
= kCFStringEncodingISOLatinGreek
;
1336 case wxFONTENCODING_ISO8859_8
:
1337 enc
= kCFStringEncodingISOLatinHebrew
;
1339 case wxFONTENCODING_ISO8859_9
:
1340 enc
= kCFStringEncodingISOLatin5
;
1342 case wxFONTENCODING_ISO8859_10
:
1343 enc
= kCFStringEncodingISOLatin6
;
1345 case wxFONTENCODING_ISO8859_11
:
1346 enc
= kCFStringEncodingISOLatinThai
;
1348 case wxFONTENCODING_ISO8859_13
:
1349 enc
= kCFStringEncodingISOLatin7
;
1351 case wxFONTENCODING_ISO8859_14
:
1352 enc
= kCFStringEncodingISOLatin8
;
1354 case wxFONTENCODING_ISO8859_15
:
1355 enc
= kCFStringEncodingISOLatin9
;
1358 case wxFONTENCODING_KOI8
:
1359 enc
= kCFStringEncodingKOI8_R
;
1361 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1362 enc
= kCFStringEncodingDOSRussian
;
1365 // case wxFONTENCODING_BULGARIAN :
1369 case wxFONTENCODING_CP437
:
1370 enc
=kCFStringEncodingDOSLatinUS
;
1372 case wxFONTENCODING_CP850
:
1373 enc
= kCFStringEncodingDOSLatin1
;
1375 case wxFONTENCODING_CP852
:
1376 enc
= kCFStringEncodingDOSLatin2
;
1378 case wxFONTENCODING_CP855
:
1379 enc
= kCFStringEncodingDOSCyrillic
;
1381 case wxFONTENCODING_CP866
:
1382 enc
=kCFStringEncodingDOSRussian
;
1384 case wxFONTENCODING_CP874
:
1385 enc
= kCFStringEncodingDOSThai
;
1387 case wxFONTENCODING_CP932
:
1388 enc
= kCFStringEncodingDOSJapanese
;
1390 case wxFONTENCODING_CP936
:
1391 enc
=kCFStringEncodingDOSChineseSimplif
;
1393 case wxFONTENCODING_CP949
:
1394 enc
= kCFStringEncodingDOSKorean
;
1396 case wxFONTENCODING_CP950
:
1397 enc
= kCFStringEncodingDOSChineseTrad
;
1400 case wxFONTENCODING_CP1250
:
1401 enc
= kCFStringEncodingWindowsLatin2
;
1403 case wxFONTENCODING_CP1251
:
1404 enc
=kCFStringEncodingWindowsCyrillic
;
1406 case wxFONTENCODING_CP1252
:
1407 enc
=kCFStringEncodingWindowsLatin1
;
1409 case wxFONTENCODING_CP1253
:
1410 enc
= kCFStringEncodingWindowsGreek
;
1412 case wxFONTENCODING_CP1254
:
1413 enc
= kCFStringEncodingWindowsLatin5
;
1415 case wxFONTENCODING_CP1255
:
1416 enc
=kCFStringEncodingWindowsHebrew
;
1418 case wxFONTENCODING_CP1256
:
1419 enc
=kCFStringEncodingWindowsArabic
;
1421 case wxFONTENCODING_CP1257
:
1422 enc
= kCFStringEncodingWindowsBalticRim
;
1424 case wxFONTENCODING_UTF7
:
1425 enc
= kCFStringEncodingNonLossyASCII
;
1427 case wxFONTENCODING_UTF8
:
1428 enc
= kCFStringEncodingUTF8
;
1430 case wxFONTENCODING_EUC_JP
:
1431 enc
= kCFStringEncodingEUC_JP
;
1433 case wxFONTENCODING_UTF16
:
1434 enc
= kCFStringEncodingUnicode
;
1436 case wxFONTENCODING_MACROMAN
:
1437 enc
= kCFStringEncodingMacRoman
;
1439 case wxFONTENCODING_MACJAPANESE
:
1440 enc
= kCFStringEncodingMacJapanese
;
1442 case wxFONTENCODING_MACCHINESETRAD
:
1443 enc
= kCFStringEncodingMacChineseTrad
;
1445 case wxFONTENCODING_MACKOREAN
:
1446 enc
= kCFStringEncodingMacKorean
;
1448 case wxFONTENCODING_MACARABIC
:
1449 enc
= kCFStringEncodingMacArabic
;
1451 case wxFONTENCODING_MACHEBREW
:
1452 enc
= kCFStringEncodingMacHebrew
;
1454 case wxFONTENCODING_MACGREEK
:
1455 enc
= kCFStringEncodingMacGreek
;
1457 case wxFONTENCODING_MACCYRILLIC
:
1458 enc
= kCFStringEncodingMacCyrillic
;
1460 case wxFONTENCODING_MACDEVANAGARI
:
1461 enc
= kCFStringEncodingMacDevanagari
;
1463 case wxFONTENCODING_MACGURMUKHI
:
1464 enc
= kCFStringEncodingMacGurmukhi
;
1466 case wxFONTENCODING_MACGUJARATI
:
1467 enc
= kCFStringEncodingMacGujarati
;
1469 case wxFONTENCODING_MACORIYA
:
1470 enc
= kCFStringEncodingMacOriya
;
1472 case wxFONTENCODING_MACBENGALI
:
1473 enc
= kCFStringEncodingMacBengali
;
1475 case wxFONTENCODING_MACTAMIL
:
1476 enc
= kCFStringEncodingMacTamil
;
1478 case wxFONTENCODING_MACTELUGU
:
1479 enc
= kCFStringEncodingMacTelugu
;
1481 case wxFONTENCODING_MACKANNADA
:
1482 enc
= kCFStringEncodingMacKannada
;
1484 case wxFONTENCODING_MACMALAJALAM
:
1485 enc
= kCFStringEncodingMacMalayalam
;
1487 case wxFONTENCODING_MACSINHALESE
:
1488 enc
= kCFStringEncodingMacSinhalese
;
1490 case wxFONTENCODING_MACBURMESE
:
1491 enc
= kCFStringEncodingMacBurmese
;
1493 case wxFONTENCODING_MACKHMER
:
1494 enc
= kCFStringEncodingMacKhmer
;
1496 case wxFONTENCODING_MACTHAI
:
1497 enc
= kCFStringEncodingMacThai
;
1499 case wxFONTENCODING_MACLAOTIAN
:
1500 enc
= kCFStringEncodingMacLaotian
;
1502 case wxFONTENCODING_MACGEORGIAN
:
1503 enc
= kCFStringEncodingMacGeorgian
;
1505 case wxFONTENCODING_MACARMENIAN
:
1506 enc
= kCFStringEncodingMacArmenian
;
1508 case wxFONTENCODING_MACCHINESESIMP
:
1509 enc
= kCFStringEncodingMacChineseSimp
;
1511 case wxFONTENCODING_MACTIBETAN
:
1512 enc
= kCFStringEncodingMacTibetan
;
1514 case wxFONTENCODING_MACMONGOLIAN
:
1515 enc
= kCFStringEncodingMacMongolian
;
1517 case wxFONTENCODING_MACETHIOPIC
:
1518 enc
= kCFStringEncodingMacEthiopic
;
1520 case wxFONTENCODING_MACCENTRALEUR
:
1521 enc
= kCFStringEncodingMacCentralEurRoman
;
1523 case wxFONTENCODING_MACVIATNAMESE
:
1524 enc
= kCFStringEncodingMacVietnamese
;
1526 case wxFONTENCODING_MACARABICEXT
:
1527 enc
= kCFStringEncodingMacExtArabic
;
1529 case wxFONTENCODING_MACSYMBOL
:
1530 enc
= kCFStringEncodingMacSymbol
;
1532 case wxFONTENCODING_MACDINGBATS
:
1533 enc
= kCFStringEncodingMacDingbats
;
1535 case wxFONTENCODING_MACTURKISH
:
1536 enc
= kCFStringEncodingMacTurkish
;
1538 case wxFONTENCODING_MACCROATIAN
:
1539 enc
= kCFStringEncodingMacCroatian
;
1541 case wxFONTENCODING_MACICELANDIC
:
1542 enc
= kCFStringEncodingMacIcelandic
;
1544 case wxFONTENCODING_MACROMANIAN
:
1545 enc
= kCFStringEncodingMacRomanian
;
1547 case wxFONTENCODING_MACCELTIC
:
1548 enc
= kCFStringEncodingMacCeltic
;
1550 case wxFONTENCODING_MACGAELIC
:
1551 enc
= kCFStringEncodingMacGaelic
;
1553 // case wxFONTENCODING_MACKEYBOARD :
1554 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1557 // because gcc is picky
1563 wxFontEncoding
wxFontEncFromCFStringEnc(CFStringEncoding encoding
)
1565 wxFontEncoding enc
= wxFONTENCODING_DEFAULT
;
1569 case kCFStringEncodingISOLatin1
:
1570 enc
= wxFONTENCODING_ISO8859_1
;
1572 case kCFStringEncodingISOLatin2
:
1573 enc
= wxFONTENCODING_ISO8859_2
;
1575 case kCFStringEncodingISOLatin3
:
1576 enc
= wxFONTENCODING_ISO8859_3
;
1578 case kCFStringEncodingISOLatin4
:
1579 enc
= wxFONTENCODING_ISO8859_4
;
1581 case kCFStringEncodingISOLatinCyrillic
:
1582 enc
= wxFONTENCODING_ISO8859_5
;
1584 case kCFStringEncodingISOLatinArabic
:
1585 enc
= wxFONTENCODING_ISO8859_6
;
1587 case kCFStringEncodingISOLatinGreek
:
1588 enc
= wxFONTENCODING_ISO8859_7
;
1590 case kCFStringEncodingISOLatinHebrew
:
1591 enc
= wxFONTENCODING_ISO8859_8
;
1593 case kCFStringEncodingISOLatin5
:
1594 enc
= wxFONTENCODING_ISO8859_9
;
1596 case kCFStringEncodingISOLatin6
:
1597 enc
= wxFONTENCODING_ISO8859_10
;
1599 case kCFStringEncodingISOLatin7
:
1600 enc
= wxFONTENCODING_ISO8859_13
;
1602 case kCFStringEncodingISOLatin8
:
1603 enc
= wxFONTENCODING_ISO8859_14
;
1605 case kCFStringEncodingISOLatin9
:
1606 enc
=wxFONTENCODING_ISO8859_15
;
1609 case kCFStringEncodingKOI8_R
:
1610 enc
= wxFONTENCODING_KOI8
;
1614 // enc = wxFONTENCODING_BULGARIAN;
1617 case kCFStringEncodingDOSLatinUS
:
1618 enc
= wxFONTENCODING_CP437
;
1620 case kCFStringEncodingDOSLatin1
:
1621 enc
= wxFONTENCODING_CP850
;
1623 case kCFStringEncodingDOSLatin2
:
1624 enc
=wxFONTENCODING_CP852
;
1626 case kCFStringEncodingDOSCyrillic
:
1627 enc
= wxFONTENCODING_CP855
;
1629 case kCFStringEncodingDOSRussian
:
1630 enc
= wxFONTENCODING_CP866
;
1632 case kCFStringEncodingDOSThai
:
1633 enc
=wxFONTENCODING_CP874
;
1635 case kCFStringEncodingDOSJapanese
:
1636 enc
= wxFONTENCODING_CP932
;
1638 case kCFStringEncodingDOSChineseSimplif
:
1639 enc
= wxFONTENCODING_CP936
;
1641 case kCFStringEncodingDOSKorean
:
1642 enc
= wxFONTENCODING_CP949
;
1644 case kCFStringEncodingDOSChineseTrad
:
1645 enc
= wxFONTENCODING_CP950
;
1648 case kCFStringEncodingWindowsLatin2
:
1649 enc
= wxFONTENCODING_CP1250
;
1651 case kCFStringEncodingWindowsCyrillic
:
1652 enc
= wxFONTENCODING_CP1251
;
1654 case kCFStringEncodingWindowsLatin1
:
1655 enc
= wxFONTENCODING_CP1252
;
1657 case kCFStringEncodingWindowsGreek
:
1658 enc
= wxFONTENCODING_CP1253
;
1660 case kCFStringEncodingWindowsLatin5
:
1661 enc
= wxFONTENCODING_CP1254
;
1663 case kCFStringEncodingWindowsHebrew
:
1664 enc
= wxFONTENCODING_CP1255
;
1666 case kCFStringEncodingWindowsArabic
:
1667 enc
= wxFONTENCODING_CP1256
;
1669 case kCFStringEncodingWindowsBalticRim
:
1670 enc
=wxFONTENCODING_CP1257
;
1672 case kCFStringEncodingEUC_JP
:
1673 enc
= wxFONTENCODING_EUC_JP
;
1675 case kCFStringEncodingUnicode
:
1676 enc
= wxFONTENCODING_UTF16
;
1678 case kCFStringEncodingMacRoman
:
1679 enc
= wxFONTENCODING_MACROMAN
;
1681 case kCFStringEncodingMacJapanese
:
1682 enc
= wxFONTENCODING_MACJAPANESE
;
1684 case kCFStringEncodingMacChineseTrad
:
1685 enc
= wxFONTENCODING_MACCHINESETRAD
;
1687 case kCFStringEncodingMacKorean
:
1688 enc
= wxFONTENCODING_MACKOREAN
;
1690 case kCFStringEncodingMacArabic
:
1691 enc
=wxFONTENCODING_MACARABIC
;
1693 case kCFStringEncodingMacHebrew
:
1694 enc
= wxFONTENCODING_MACHEBREW
;
1696 case kCFStringEncodingMacGreek
:
1697 enc
= wxFONTENCODING_MACGREEK
;
1699 case kCFStringEncodingMacCyrillic
:
1700 enc
= wxFONTENCODING_MACCYRILLIC
;
1702 case kCFStringEncodingMacDevanagari
:
1703 enc
= wxFONTENCODING_MACDEVANAGARI
;
1705 case kCFStringEncodingMacGurmukhi
:
1706 enc
= wxFONTENCODING_MACGURMUKHI
;
1708 case kCFStringEncodingMacGujarati
:
1709 enc
= wxFONTENCODING_MACGUJARATI
;
1711 case kCFStringEncodingMacOriya
:
1712 enc
=wxFONTENCODING_MACORIYA
;
1714 case kCFStringEncodingMacBengali
:
1715 enc
=wxFONTENCODING_MACBENGALI
;
1717 case kCFStringEncodingMacTamil
:
1718 enc
= wxFONTENCODING_MACTAMIL
;
1720 case kCFStringEncodingMacTelugu
:
1721 enc
= wxFONTENCODING_MACTELUGU
;
1723 case kCFStringEncodingMacKannada
:
1724 enc
= wxFONTENCODING_MACKANNADA
;
1726 case kCFStringEncodingMacMalayalam
:
1727 enc
= wxFONTENCODING_MACMALAJALAM
;
1729 case kCFStringEncodingMacSinhalese
:
1730 enc
= wxFONTENCODING_MACSINHALESE
;
1732 case kCFStringEncodingMacBurmese
:
1733 enc
= wxFONTENCODING_MACBURMESE
;
1735 case kCFStringEncodingMacKhmer
:
1736 enc
= wxFONTENCODING_MACKHMER
;
1738 case kCFStringEncodingMacThai
:
1739 enc
= wxFONTENCODING_MACTHAI
;
1741 case kCFStringEncodingMacLaotian
:
1742 enc
= wxFONTENCODING_MACLAOTIAN
;
1744 case kCFStringEncodingMacGeorgian
:
1745 enc
= wxFONTENCODING_MACGEORGIAN
;
1747 case kCFStringEncodingMacArmenian
:
1748 enc
= wxFONTENCODING_MACARMENIAN
;
1750 case kCFStringEncodingMacChineseSimp
:
1751 enc
= wxFONTENCODING_MACCHINESESIMP
;
1753 case kCFStringEncodingMacTibetan
:
1754 enc
= wxFONTENCODING_MACTIBETAN
;
1756 case kCFStringEncodingMacMongolian
:
1757 enc
= wxFONTENCODING_MACMONGOLIAN
;
1759 case kCFStringEncodingMacEthiopic
:
1760 enc
= wxFONTENCODING_MACETHIOPIC
;
1762 case kCFStringEncodingMacCentralEurRoman
:
1763 enc
= wxFONTENCODING_MACCENTRALEUR
;
1765 case kCFStringEncodingMacVietnamese
:
1766 enc
= wxFONTENCODING_MACVIATNAMESE
;
1768 case kCFStringEncodingMacExtArabic
:
1769 enc
= wxFONTENCODING_MACARABICEXT
;
1771 case kCFStringEncodingMacSymbol
:
1772 enc
= wxFONTENCODING_MACSYMBOL
;
1774 case kCFStringEncodingMacDingbats
:
1775 enc
= wxFONTENCODING_MACDINGBATS
;
1777 case kCFStringEncodingMacTurkish
:
1778 enc
= wxFONTENCODING_MACTURKISH
;
1780 case kCFStringEncodingMacCroatian
:
1781 enc
= wxFONTENCODING_MACCROATIAN
;
1783 case kCFStringEncodingMacIcelandic
:
1784 enc
= wxFONTENCODING_MACICELANDIC
;
1786 case kCFStringEncodingMacRomanian
:
1787 enc
= wxFONTENCODING_MACROMANIAN
;
1789 case kCFStringEncodingMacCeltic
:
1790 enc
= wxFONTENCODING_MACCELTIC
;
1792 case kCFStringEncodingMacGaelic
:
1793 enc
= wxFONTENCODING_MACGAELIC
;
1795 // case kCFStringEncodingMacKeyboardGlyphs :
1796 // enc = wxFONTENCODING_MACKEYBOARD ;
1802 class wxMBConv_cocoa
: public wxMBConv
1807 Init(CFStringGetSystemEncoding()) ;
1810 wxMBConv_cocoa(const wxChar
* name
)
1812 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
1815 wxMBConv_cocoa(wxFontEncoding encoding
)
1817 Init( wxCFStringEncFromFontEnc(encoding
) );
1824 void Init( CFStringEncoding encoding
)
1826 m_char_encoding
= encoding
;
1827 m_unicode_encoding
= kCFStringEncodingUnicode
;
1830 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
1834 size_t nBufSize
= strlen(szUnConv
) + 1;
1835 size_t nRealOutSize
;
1837 UniChar
* szUniCharBuffer
= (UniChar
*) szOut
;
1838 wchar_t* szConvBuffer
= szOut
;
1840 if (szConvBuffer
== NULL
&& nOutSize
!= 0)
1842 szConvBuffer
= new wchar_t[nOutSize
] ;
1845 #if SIZEOF_WCHAR_T == 4
1846 szUniCharBuffer
= new UniChar
[nOutSize
];
1849 CFDataRef theData
= CFDataCreateWithBytesNoCopy (
1851 (const UInt8
*)szUnConv
,
1858 CFStringRef theString
= CFStringCreateFromExternalRepresentation (
1864 wxASSERT(theString
);
1868 nRealOutSize
= CFStringGetLength(theString
) + 1;
1869 CFRelease(theString
);
1870 return nRealOutSize
- 1;
1873 CFRange theRange
= { 0, CFStringGetLength(theString
) };
1875 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
1878 nRealOutSize
= (CFStringGetLength(theString
) + 1);
1880 CFRelease(theString
);
1882 szUniCharBuffer
[nRealOutSize
-1] = '\0' ;
1884 #if SIZEOF_WCHAR_T == 4
1885 wxMBConvUTF16 converter
;
1886 converter
.MB2WC(szConvBuffer
, (const char*)szUniCharBuffer
, nRealOutSize
) ;
1887 delete[] szUniCharBuffer
;
1889 if ( szOut
== NULL
)
1890 delete [] szConvBuffer
;
1892 return nRealOutSize
;
1895 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
1897 size_t nBufSize
= wxWcslen(szUnConv
) + 1;
1898 size_t nRealOutSize
;
1899 char* szBuffer
= szOut
;
1900 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
1905 nRealOutSize
= ((nBufSize
- 1) << 1)+1 ;
1906 szBuffer
= new char[ nRealOutSize
] ;
1909 nRealOutSize
= nOutSize
;
1911 #if SIZEOF_WCHAR_T == 4
1912 wxMBConvUTF16BE converter
;
1913 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
1914 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
1915 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
1916 nBufSize
/= sizeof(UniChar
);
1920 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
1927 wxASSERT(theString
);
1929 //Note that CER puts a BOM when converting to unicode
1930 //so we may want to check and use getchars instead in that case
1931 CFDataRef theData
= CFStringCreateExternalRepresentation(
1935 0 //what to put in characters that can't be converted -
1936 //0 tells CFString to return NULL if it meets such a character
1942 CFRelease(theString
);
1944 nRealOutSize
= CFDataGetLength(theData
);
1946 if ( szOut
== NULL
)
1951 //TODO: This gets flagged as a non-malloced address by the debugger...
1952 //#if SIZEOF_WCHAR_T == 4
1953 // delete[] szUniBuffer;
1956 return nRealOutSize
- 1;
1959 CFRange theRange
= {0, CFDataGetLength(theData
) };
1960 CFDataGetBytes(theData
, theRange
, (UInt8
*) szBuffer
);
1964 //TODO: This gets flagged as a non-malloced address by the debugger...
1965 //#if SIZEOF_WCHAR_T == 4
1966 // delete[] szUniBuffer;
1968 return nRealOutSize
- 1;
1973 //TODO: check for invalid en/de/coding
1978 CFStringEncoding m_char_encoding
;
1979 CFStringEncoding m_unicode_encoding
;
1982 #endif // defined(__WXCOCOA__)
1984 // ============================================================================
1985 // Mac conversion classes
1986 // ============================================================================
1988 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1990 class wxMBConv_mac
: public wxMBConv
1995 Init(CFStringGetSystemEncoding()) ;
1998 wxMBConv_mac(const wxChar
* name
)
2000 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
2003 wxMBConv_mac(wxFontEncoding encoding
)
2005 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2010 OSStatus status
= noErr
;
2011 status
= TECDisposeConverter(m_MB2WC_converter
);
2012 status
= TECDisposeConverter(m_WC2MB_converter
);
2016 void Init( TextEncodingBase encoding
)
2018 OSStatus status
= noErr
;
2019 m_char_encoding
= encoding
;
2020 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2022 status
= TECCreateConverter(&m_MB2WC_converter
,
2024 m_unicode_encoding
);
2025 status
= TECCreateConverter(&m_WC2MB_converter
,
2030 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2032 OSStatus status
= noErr
;
2033 ByteCount byteOutLen
;
2034 ByteCount byteInLen
= strlen(psz
) ;
2035 wchar_t *tbuf
= NULL
;
2036 UniChar
* ubuf
= NULL
;
2042 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2044 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2045 #if SIZEOF_WCHAR_T == 4
2046 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2048 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2050 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2051 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2052 #if SIZEOF_WCHAR_T == 4
2053 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2054 // is not properly terminated we get random characters at the end
2055 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2056 wxMBConvUTF16BE converter
;
2057 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2060 res
= byteOutLen
/ sizeof( UniChar
) ;
2065 if ( buf
&& res
< n
)
2071 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2073 OSStatus status
= noErr
;
2074 ByteCount byteOutLen
;
2075 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2083 tbuf
= (char*) malloc( n
) ;
2086 ByteCount byteBufferLen
= n
;
2087 UniChar
* ubuf
= NULL
;
2088 #if SIZEOF_WCHAR_T == 4
2089 wxMBConvUTF16BE converter
;
2090 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2091 byteInLen
= unicharlen
;
2092 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2093 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2095 ubuf
= (UniChar
*) psz
;
2097 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2098 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2099 #if SIZEOF_WCHAR_T == 4
2105 size_t res
= byteOutLen
;
2106 if ( buf
&& res
< n
)
2113 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2116 TECObjectRef m_MB2WC_converter
;
2117 TECObjectRef m_WC2MB_converter
;
2119 TextEncodingBase m_char_encoding
;
2120 TextEncodingBase m_unicode_encoding
;
2123 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2125 // ============================================================================
2126 // wxEncodingConverter based conversion classes
2127 // ============================================================================
2131 class wxMBConv_wxwin
: public wxMBConv
2136 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2137 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2141 // temporarily just use wxEncodingConverter stuff,
2142 // so that it works while a better implementation is built
2143 wxMBConv_wxwin(const wxChar
* name
)
2146 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
2148 m_enc
= wxFONTENCODING_SYSTEM
;
2153 wxMBConv_wxwin(wxFontEncoding enc
)
2160 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2162 size_t inbuf
= strlen(psz
);
2164 m2w
.Convert(psz
,buf
);
2168 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2170 const size_t inbuf
= wxWcslen(psz
);
2172 w2m
.Convert(psz
,buf
);
2177 bool IsOk() const { return m_ok
; }
2180 wxFontEncoding m_enc
;
2181 wxEncodingConverter m2w
, w2m
;
2183 // were we initialized successfully?
2186 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2189 #endif // wxUSE_FONTMAP
2191 // ============================================================================
2192 // wxCSConv implementation
2193 // ============================================================================
2195 void wxCSConv::Init()
2202 wxCSConv::wxCSConv(const wxChar
*charset
)
2211 m_encoding
= wxFONTENCODING_SYSTEM
;
2214 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2216 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2218 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2220 encoding
= wxFONTENCODING_SYSTEM
;
2225 m_encoding
= encoding
;
2228 wxCSConv::~wxCSConv()
2233 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2238 SetName(conv
.m_name
);
2239 m_encoding
= conv
.m_encoding
;
2242 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2246 SetName(conv
.m_name
);
2247 m_encoding
= conv
.m_encoding
;
2252 void wxCSConv::Clear()
2261 void wxCSConv::SetName(const wxChar
*charset
)
2265 m_name
= wxStrdup(charset
);
2270 wxMBConv
*wxCSConv::DoCreate() const
2272 // check for the special case of ASCII or ISO8859-1 charset: as we have
2273 // special knowledge of it anyhow, we don't need to create a special
2274 // conversion object
2275 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2277 // don't convert at all
2281 // we trust OS to do conversion better than we can so try external
2282 // conversion methods first
2284 // the full order is:
2285 // 1. OS conversion (iconv() under Unix or Win32 API)
2286 // 2. hard coded conversions for UTF
2287 // 3. wxEncodingConverter as fall back
2293 #endif // !wxUSE_FONTMAP
2295 wxString
name(m_name
);
2299 name
= wxFontMapper::Get()->GetEncodingName(m_encoding
);
2300 #endif // wxUSE_FONTMAP
2302 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2308 #endif // HAVE_ICONV
2310 #ifdef wxHAVE_WIN32_MB2WC
2313 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2314 : new wxMBConv_win32(m_encoding
);
2323 #endif // wxHAVE_WIN32_MB2WC
2324 #if defined(__WXMAC__)
2326 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
) )
2329 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2330 : new wxMBConv_mac(m_encoding
);
2338 #if defined(__WXCOCOA__)
2340 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2343 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2344 : new wxMBConv_cocoa(m_encoding
);
2353 wxFontEncoding enc
= m_encoding
;
2355 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2357 // use "false" to suppress interactive dialogs -- we can be called from
2358 // anywhere and popping up a dialog from here is the last thing we want to
2360 enc
= wxFontMapper::Get()->CharsetToEncoding(m_name
, false);
2362 #endif // wxUSE_FONTMAP
2366 case wxFONTENCODING_UTF7
:
2367 return new wxMBConvUTF7
;
2369 case wxFONTENCODING_UTF8
:
2370 return new wxMBConvUTF8
;
2372 case wxFONTENCODING_UTF16BE
:
2373 return new wxMBConvUTF16BE
;
2375 case wxFONTENCODING_UTF16LE
:
2376 return new wxMBConvUTF16LE
;
2378 case wxFONTENCODING_UTF32BE
:
2379 return new wxMBConvUTF32BE
;
2381 case wxFONTENCODING_UTF32LE
:
2382 return new wxMBConvUTF32LE
;
2385 // nothing to do but put here to suppress gcc warnings
2392 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2393 : new wxMBConv_wxwin(m_encoding
);
2399 #endif // wxUSE_FONTMAP
2401 // NB: This is a hack to prevent deadlock. What could otherwise happen
2402 // in Unicode build: wxConvLocal creation ends up being here
2403 // because of some failure and logs the error. But wxLog will try to
2404 // attach timestamp, for which it will need wxConvLocal (to convert
2405 // time to char* and then wchar_t*), but that fails, tries to log
2406 // error, but wxLog has a (already locked) critical section that
2407 // guards static buffer.
2408 static bool alreadyLoggingError
= false;
2409 if (!alreadyLoggingError
)
2411 alreadyLoggingError
= true;
2412 wxLogError(_("Cannot convert from the charset '%s'!"),
2416 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
2417 #else // !wxUSE_FONTMAP
2418 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2419 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2421 alreadyLoggingError
= false;
2427 void wxCSConv::CreateConvIfNeeded() const
2431 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2434 // if we don't have neither the name nor the encoding, use the default
2435 // encoding for this system
2436 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2438 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2440 #endif // wxUSE_INTL
2442 self
->m_convReal
= DoCreate();
2443 self
->m_deferred
= false;
2447 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2449 CreateConvIfNeeded();
2452 return m_convReal
->MB2WC(buf
, psz
, n
);
2455 size_t len
= strlen(psz
);
2459 for (size_t c
= 0; c
<= len
; c
++)
2460 buf
[c
] = (unsigned char)(psz
[c
]);
2466 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2468 CreateConvIfNeeded();
2471 return m_convReal
->WC2MB(buf
, psz
, n
);
2474 const size_t len
= wxWcslen(psz
);
2477 for (size_t c
= 0; c
<= len
; c
++)
2481 buf
[c
] = (char)psz
[c
];
2486 for (size_t c
= 0; c
<= len
; c
++)
2496 // ----------------------------------------------------------------------------
2498 // ----------------------------------------------------------------------------
2501 static wxMBConv_win32 wxConvLibcObj
;
2502 #elif defined(__WXMAC__) && !defined(__MACH__)
2503 static wxMBConv_mac wxConvLibcObj
;
2505 static wxMBConvLibc wxConvLibcObj
;
2508 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2509 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2510 static wxMBConvUTF7 wxConvUTF7Obj
;
2511 static wxMBConvUTF8 wxConvUTF8Obj
;
2514 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2515 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2516 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2517 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2518 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2519 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2521 #else // !wxUSE_WCHAR_T
2523 // stand-ins in absence of wchar_t
2524 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2529 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T