1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
37 #include "wx/strconv.h"
42 #include "wx/msw/private.h"
46 #include "wx/msw/missing.h"
57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59 #endif // __WIN32__ but !__WXMICROWIN__
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
78 #include <ATSUnicode.h>
79 #include <TextCommon.h>
80 #include <TextEncodingConverter.h>
82 #include "wx/mac/private.h" // includes mac headers
84 // ----------------------------------------------------------------------------
86 // ----------------------------------------------------------------------------
88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
91 #if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
97 #define WC_NAME_BEST "UCS-4LE"
99 #elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
106 #define WC_NAME_BEST "UTF-16LE"
108 #else // sizeof(wchar_t) != 2 nor 4
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 // ============================================================================
115 // ============================================================================
117 // ----------------------------------------------------------------------------
118 // UTF-16 en/decoding to/from UCS-4
119 // ----------------------------------------------------------------------------
122 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
127 *output
= (wxUint16
) input
;
130 else if (input
>=0x110000)
138 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
139 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
145 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
147 if ((*input
<0xd800) || (*input
>0xdfff))
152 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
159 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
165 // ----------------------------------------------------------------------------
167 // ----------------------------------------------------------------------------
169 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
173 // calculate the length of the buffer needed first
174 size_t nLen
= MB2WC(NULL
, psz
, 0);
175 if ( nLen
!= (size_t)-1 )
177 // now do the actual conversion
178 wxWCharBuffer
buf(nLen
);
179 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
180 if ( nLen
!= (size_t)-1 )
187 wxWCharBuffer
buf((wchar_t *)NULL
);
192 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
196 size_t nLen
= WC2MB(NULL
, pwz
, 0);
197 if ( nLen
!= (size_t)-1 )
199 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
200 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
201 if ( nLen
!= (size_t)-1 )
208 wxCharBuffer
buf((char *)NULL
);
213 // ----------------------------------------------------------------------------
215 // ----------------------------------------------------------------------------
217 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
219 return wxMB2WC(buf
, psz
, n
);
222 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
224 return wxWC2MB(buf
, psz
, n
);
227 // ----------------------------------------------------------------------------
229 // ----------------------------------------------------------------------------
232 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
233 "abcdefghijklmnopqrstuvwxyz"
234 "0123456789'(),-./:?";
235 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
236 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
237 "abcdefghijklmnopqrstuvwxyz"
241 // TODO: write actual implementations of UTF-7 here
242 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
243 const char * WXUNUSED(psz
),
244 size_t WXUNUSED(n
)) const
249 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
250 const wchar_t * WXUNUSED(psz
),
251 size_t WXUNUSED(n
)) const
256 // ----------------------------------------------------------------------------
258 // ----------------------------------------------------------------------------
260 static wxUint32 utf8_max
[]=
261 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
263 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
267 while (*psz
&& ((!buf
) || (len
< n
)))
269 unsigned char cc
= *psz
++, fc
= cc
;
271 for (cnt
= 0; fc
& 0x80; cnt
++)
285 // invalid UTF-8 sequence
290 unsigned ocnt
= cnt
- 1;
291 wxUint32 res
= cc
& (0x3f >> cnt
);
295 if ((cc
& 0xC0) != 0x80)
297 // invalid UTF-8 sequence
300 res
= (res
<< 6) | (cc
& 0x3f);
302 if (res
<= utf8_max
[ocnt
])
304 // illegal UTF-8 encoding
308 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
309 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
310 if (pa
== (size_t)-1)
319 #endif // WC_UTF16/!WC_UTF16
323 if (buf
&& (len
< n
))
328 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
332 while (*psz
&& ((!buf
) || (len
< n
)))
336 // cast is ok for WC_UTF16
337 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
338 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
340 cc
=(*psz
++) & 0x7fffffff;
343 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
357 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
359 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
364 if (buf
&& (len
<n
)) *buf
= 0;
372 // ----------------------------------------------------------------------------
374 // ----------------------------------------------------------------------------
376 #ifdef WORDS_BIGENDIAN
377 #define wxMBConvUTF16straight wxMBConvUTF16BE
378 #define wxMBConvUTF16swap wxMBConvUTF16LE
380 #define wxMBConvUTF16swap wxMBConvUTF16BE
381 #define wxMBConvUTF16straight wxMBConvUTF16LE
387 // copy 16bit MB to 16bit String
388 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
392 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
395 *buf
++ = *(wxUint16
*)psz
;
398 psz
+= sizeof(wxUint16
);
400 if (buf
&& len
<n
) *buf
=0;
406 // copy 16bit String to 16bit MB
407 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
411 while (*psz
&& (!buf
|| len
< n
))
415 *(wxUint16
*)buf
= *psz
;
416 buf
+= sizeof(wxUint16
);
418 len
+= sizeof(wxUint16
);
421 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
427 // swap 16bit MB to 16bit String
428 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
432 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
436 ((char *)buf
)[0] = psz
[1];
437 ((char *)buf
)[1] = psz
[0];
441 psz
+= sizeof(wxUint16
);
443 if (buf
&& len
<n
) *buf
=0;
449 // swap 16bit MB to 16bit String
450 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
454 while (*psz
&& (!buf
|| len
< n
))
458 *buf
++ = ((char*)psz
)[1];
459 *buf
++ = ((char*)psz
)[0];
461 len
+= sizeof(wxUint16
);
464 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
473 // copy 16bit MB to 32bit String
474 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
478 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
481 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
482 if (pa
== (size_t)-1)
488 psz
+= pa
* sizeof(wxUint16
);
490 if (buf
&& len
<n
) *buf
=0;
496 // copy 32bit String to 16bit MB
497 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
501 while (*psz
&& (!buf
|| len
< n
))
504 size_t pa
=encode_utf16(*psz
, cc
);
506 if (pa
== (size_t)-1)
511 *(wxUint16
*)buf
= cc
[0];
512 buf
+= sizeof(wxUint16
);
515 *(wxUint16
*)buf
= cc
[1];
516 buf
+= sizeof(wxUint16
);
520 len
+= pa
*sizeof(wxUint16
);
523 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
529 // swap 16bit MB to 32bit String
530 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
534 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
538 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
539 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
541 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
542 if (pa
== (size_t)-1)
549 psz
+= pa
* sizeof(wxUint16
);
551 if (buf
&& len
<n
) *buf
=0;
557 // swap 32bit String to 16bit MB
558 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
562 while (*psz
&& (!buf
|| len
< n
))
565 size_t pa
=encode_utf16(*psz
, cc
);
567 if (pa
== (size_t)-1)
572 *buf
++ = ((char*)cc
)[1];
573 *buf
++ = ((char*)cc
)[0];
576 *buf
++ = ((char*)cc
)[3];
577 *buf
++ = ((char*)cc
)[2];
581 len
+= pa
*sizeof(wxUint16
);
584 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
592 // ----------------------------------------------------------------------------
594 // ----------------------------------------------------------------------------
596 #ifdef WORDS_BIGENDIAN
597 #define wxMBConvUTF32straight wxMBConvUTF32BE
598 #define wxMBConvUTF32swap wxMBConvUTF32LE
600 #define wxMBConvUTF32swap wxMBConvUTF32BE
601 #define wxMBConvUTF32straight wxMBConvUTF32LE
605 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
606 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
611 // copy 32bit MB to 16bit String
612 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
616 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
620 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
621 if (pa
== (size_t)-1)
631 psz
+= sizeof(wxUint32
);
633 if (buf
&& len
<n
) *buf
=0;
639 // copy 16bit String to 32bit MB
640 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
644 while (*psz
&& (!buf
|| len
< n
))
648 // cast is ok for WC_UTF16
649 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
650 if (pa
== (size_t)-1)
655 *(wxUint32
*)buf
= cc
;
656 buf
+= sizeof(wxUint32
);
658 len
+= sizeof(wxUint32
);
662 if (buf
&& len
<=n
-sizeof(wxUint32
))
670 // swap 32bit MB to 16bit String
671 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
675 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
678 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
679 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
684 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
685 if (pa
== (size_t)-1)
695 psz
+= sizeof(wxUint32
);
705 // swap 16bit String to 32bit MB
706 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
710 while (*psz
&& (!buf
|| len
< n
))
714 // cast is ok for WC_UTF16
715 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
716 if (pa
== (size_t)-1)
726 len
+= sizeof(wxUint32
);
730 if (buf
&& len
<=n
-sizeof(wxUint32
))
739 // copy 32bit MB to 32bit String
740 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
744 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
747 *buf
++ = *(wxUint32
*)psz
;
749 psz
+= sizeof(wxUint32
);
759 // copy 32bit String to 32bit MB
760 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
764 while (*psz
&& (!buf
|| len
< n
))
768 *(wxUint32
*)buf
= *psz
;
769 buf
+= sizeof(wxUint32
);
772 len
+= sizeof(wxUint32
);
776 if (buf
&& len
<=n
-sizeof(wxUint32
))
783 // swap 32bit MB to 32bit String
784 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
788 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
792 ((char *)buf
)[0] = psz
[3];
793 ((char *)buf
)[1] = psz
[2];
794 ((char *)buf
)[2] = psz
[1];
795 ((char *)buf
)[3] = psz
[0];
799 psz
+= sizeof(wxUint32
);
809 // swap 32bit String to 32bit MB
810 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
814 while (*psz
&& (!buf
|| len
< n
))
818 *buf
++ = ((char *)psz
)[3];
819 *buf
++ = ((char *)psz
)[2];
820 *buf
++ = ((char *)psz
)[1];
821 *buf
++ = ((char *)psz
)[0];
823 len
+= sizeof(wxUint32
);
827 if (buf
&& len
<=n
-sizeof(wxUint32
))
837 // ============================================================================
838 // The classes doing conversion using the iconv_xxx() functions
839 // ============================================================================
843 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
844 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
845 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
846 // (which means error) and says there are 0 bytes left in the input buffer --
847 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
848 // this alternative test for iconv() failure.
849 // [This bug does not appear in glibc 2.2.]
850 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
851 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
852 (errno != E2BIG || bufLeft != 0))
854 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
857 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
859 // ----------------------------------------------------------------------------
860 // wxMBConv_iconv: encapsulates an iconv character set
861 // ----------------------------------------------------------------------------
863 class wxMBConv_iconv
: public wxMBConv
866 wxMBConv_iconv(const wxChar
*name
);
867 virtual ~wxMBConv_iconv();
869 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
870 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
873 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
876 // the iconv handlers used to translate from multibyte to wide char and in
877 // the other direction
882 // the name (for iconv_open()) of a wide char charset -- if none is
883 // available on this machine, it will remain NULL
884 static const char *ms_wcCharsetName
;
886 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
887 // different endian-ness than the native one
888 static bool ms_wcNeedsSwap
;
891 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
892 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
894 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
896 // Do it the hard way
898 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
899 cname
[i
] = (char) name
[i
];
901 // check for charset that represents wchar_t:
902 if (ms_wcCharsetName
== NULL
)
904 ms_wcNeedsSwap
= false;
906 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
907 ms_wcCharsetName
= WC_NAME_BEST
;
908 m2w
= iconv_open(ms_wcCharsetName
, cname
);
910 if (m2w
== (iconv_t
)-1)
912 // try charset w/o bytesex info (e.g. "UCS4")
913 // and check for bytesex ourselves:
914 ms_wcCharsetName
= WC_NAME
;
915 m2w
= iconv_open(ms_wcCharsetName
, cname
);
917 // last bet, try if it knows WCHAR_T pseudo-charset
918 if (m2w
== (iconv_t
)-1)
920 ms_wcCharsetName
= "WCHAR_T";
921 m2w
= iconv_open(ms_wcCharsetName
, cname
);
924 if (m2w
!= (iconv_t
)-1)
926 char buf
[2], *bufPtr
;
927 wchar_t wbuf
[2], *wbufPtr
;
935 outsz
= SIZEOF_WCHAR_T
* 2;
939 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
940 (char**)&wbufPtr
, &outsz
);
942 if (ICONV_FAILED(res
, insz
))
944 ms_wcCharsetName
= NULL
;
945 wxLogLastError(wxT("iconv"));
946 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
950 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
955 ms_wcCharsetName
= NULL
;
957 // VS: we must not output an error here, since wxWidgets will safely
958 // fall back to using wxEncodingConverter.
959 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
963 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
965 else // we already have ms_wcCharsetName
967 m2w
= iconv_open(ms_wcCharsetName
, cname
);
970 // NB: don't ever pass NULL to iconv_open(), it may crash!
971 if ( ms_wcCharsetName
)
973 w2m
= iconv_open( cname
, ms_wcCharsetName
);
981 wxMBConv_iconv::~wxMBConv_iconv()
983 if ( m2w
!= (iconv_t
)-1 )
985 if ( w2m
!= (iconv_t
)-1 )
989 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
991 size_t inbuf
= strlen(psz
);
992 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
994 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
995 wchar_t *bufPtr
= buf
;
996 const char *pszPtr
= psz
;
1000 // have destination buffer, convert there
1002 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1003 (char**)&bufPtr
, &outbuf
);
1004 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1008 // convert to native endianness
1009 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1012 // NB: iconv was given only strlen(psz) characters on input, and so
1013 // it couldn't convert the trailing zero. Let's do it ourselves
1014 // if there's some room left for it in the output buffer.
1020 // no destination buffer... convert using temp buffer
1021 // to calculate destination buffer requirement
1026 outbuf
= 8*SIZEOF_WCHAR_T
;
1029 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1030 (char**)&bufPtr
, &outbuf
);
1032 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1033 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1036 if (ICONV_FAILED(cres
, inbuf
))
1038 //VS: it is ok if iconv fails, hence trace only
1039 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1046 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1048 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1052 wchar_t *tmpbuf
= 0;
1056 // need to copy to temp buffer to switch endianness
1057 // this absolutely doesn't rock!
1058 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1059 // could be in read-only memory, or be accessed in some other thread)
1060 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1061 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1062 WC_BSWAP(tmpbuf
, inbuf
)
1068 // have destination buffer, convert there
1069 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1073 // NB: iconv was given only wcslen(psz) characters on input, and so
1074 // it couldn't convert the trailing zero. Let's do it ourselves
1075 // if there's some room left for it in the output buffer.
1081 // no destination buffer... convert using temp buffer
1082 // to calculate destination buffer requirement
1086 buf
= tbuf
; outbuf
= 16;
1088 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1091 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1099 if (ICONV_FAILED(cres
, inbuf
))
1101 //VS: it is ok if iconv fails, hence trace only
1102 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1109 #endif // HAVE_ICONV
1112 // ============================================================================
1113 // Win32 conversion classes
1114 // ============================================================================
1116 #ifdef wxHAVE_WIN32_MB2WC
1120 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1121 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1124 class wxMBConv_win32
: public wxMBConv
1129 m_CodePage
= CP_ACP
;
1133 wxMBConv_win32(const wxChar
* name
)
1135 m_CodePage
= wxCharsetToCodepage(name
);
1138 wxMBConv_win32(wxFontEncoding encoding
)
1140 m_CodePage
= wxEncodingToCodepage(encoding
);
1144 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1146 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1147 // the behaviour is not compatible with the Unix version (using iconv)
1148 // and break the library itself, e.g. wxTextInputStream::NextChar()
1149 // wouldn't work if reading an incomplete MB char didn't result in an
1151 const size_t len
= ::MultiByteToWideChar
1153 m_CodePage
, // code page
1154 MB_ERR_INVALID_CHARS
, // flags: fall on error
1155 psz
, // input string
1156 -1, // its length (NUL-terminated)
1157 buf
, // output string
1158 buf
? n
: 0 // size of output buffer
1161 // note that it returns count of written chars for buf != NULL and size
1162 // of the needed buffer for buf == NULL so in either case the length of
1163 // the string (which never includes the terminating NUL) is one less
1164 return len
? len
- 1 : (size_t)-1;
1167 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1170 we have a problem here: by default, WideCharToMultiByte() may
1171 replace characters unrepresentable in the target code page with bad
1172 quality approximations such as turning "1/2" symbol (U+00BD) into
1173 "1" for the code pages which don't have it and we, obviously, want
1174 to avoid this at any price
1176 the trouble is that this function does it _silently_, i.e. it won't
1177 even tell us whether it did or not... Win98/2000 and higher provide
1178 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1179 we have to resort to a round trip, i.e. check that converting back
1180 results in the same string -- this is, of course, expensive but
1181 otherwise we simply can't be sure to not garble the data.
1184 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1185 // it doesn't work with CJK encodings (which we test for rather roughly
1186 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1188 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1191 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1193 // it's our lucky day
1194 flags
= WC_NO_BEST_FIT_CHARS
;
1195 pUsedDef
= &usedDef
;
1197 else // old system or unsupported encoding
1203 const size_t len
= ::WideCharToMultiByte
1205 m_CodePage
, // code page
1206 flags
, // either none or no best fit
1207 pwz
, // input string
1208 -1, // it is (wide) NUL-terminated
1209 buf
, // output buffer
1210 buf
? n
: 0, // and its size
1211 NULL
, // default "replacement" char
1212 pUsedDef
// [out] was it used?
1217 // function totally failed
1221 // if we were really converting, check if we succeeded
1226 // check if the conversion failed, i.e. if any replacements
1231 else // we must resort to double tripping...
1233 wxWCharBuffer
wcBuf(n
);
1234 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1235 wcscmp(wcBuf
, pwz
) != 0 )
1237 // we didn't obtain the same thing we started from, hence
1238 // the conversion was lossy and we consider that it failed
1244 // see the comment above for the reason of "len - 1"
1248 bool IsOk() const { return m_CodePage
!= -1; }
1251 static bool CanUseNoBestFit()
1253 static int s_isWin98Or2k
= -1;
1255 if ( s_isWin98Or2k
== -1 )
1258 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1261 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1265 s_isWin98Or2k
= verMaj
>= 5;
1269 // unknown, be conseravtive by default
1273 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1276 return s_isWin98Or2k
== 1;
1282 #endif // wxHAVE_WIN32_MB2WC
1284 // ============================================================================
1285 // Cocoa conversion classes
1286 // ============================================================================
1288 #if defined(__WXCOCOA__)
1290 // RN: There is no UTF-32 support in either Core Foundation or
1291 // Cocoa. Strangely enough, internally Core Foundation uses
1292 // UTF 32 internally quite a bit - its just not public (yet).
1294 #include <CoreFoundation/CFString.h>
1295 #include <CoreFoundation/CFStringEncodingExt.h>
1297 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1299 CFStringEncoding enc
= 0 ;
1300 if ( encoding
== wxFONTENCODING_DEFAULT
)
1303 encoding
= wxFont::GetDefaultEncoding() ;
1305 encoding
= wxLocale::GetSystemEncoding() ;
1308 else switch( encoding
)
1310 case wxFONTENCODING_ISO8859_1
:
1311 enc
= kCFStringEncodingISOLatin1
;
1313 case wxFONTENCODING_ISO8859_2
:
1314 enc
= kCFStringEncodingISOLatin2
;
1316 case wxFONTENCODING_ISO8859_3
:
1317 enc
= kCFStringEncodingISOLatin3
;
1319 case wxFONTENCODING_ISO8859_4
:
1320 enc
= kCFStringEncodingISOLatin4
;
1322 case wxFONTENCODING_ISO8859_5
:
1323 enc
= kCFStringEncodingISOLatinCyrillic
;
1325 case wxFONTENCODING_ISO8859_6
:
1326 enc
= kCFStringEncodingISOLatinArabic
;
1328 case wxFONTENCODING_ISO8859_7
:
1329 enc
= kCFStringEncodingISOLatinGreek
;
1331 case wxFONTENCODING_ISO8859_8
:
1332 enc
= kCFStringEncodingISOLatinHebrew
;
1334 case wxFONTENCODING_ISO8859_9
:
1335 enc
= kCFStringEncodingISOLatin5
;
1337 case wxFONTENCODING_ISO8859_10
:
1338 enc
= kCFStringEncodingISOLatin6
;
1340 case wxFONTENCODING_ISO8859_11
:
1341 enc
= kCFStringEncodingISOLatinThai
;
1343 case wxFONTENCODING_ISO8859_13
:
1344 enc
= kCFStringEncodingISOLatin7
;
1346 case wxFONTENCODING_ISO8859_14
:
1347 enc
= kCFStringEncodingISOLatin8
;
1349 case wxFONTENCODING_ISO8859_15
:
1350 enc
= kCFStringEncodingISOLatin9
;
1353 case wxFONTENCODING_KOI8
:
1354 enc
= kCFStringEncodingKOI8_R
;
1356 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1357 enc
= kCFStringEncodingDOSRussian
;
1360 // case wxFONTENCODING_BULGARIAN :
1364 case wxFONTENCODING_CP437
:
1365 enc
=kCFStringEncodingDOSLatinUS
;
1367 case wxFONTENCODING_CP850
:
1368 enc
= kCFStringEncodingDOSLatin1
;
1370 case wxFONTENCODING_CP852
:
1371 enc
= kCFStringEncodingDOSLatin2
;
1373 case wxFONTENCODING_CP855
:
1374 enc
= kCFStringEncodingDOSCyrillic
;
1376 case wxFONTENCODING_CP866
:
1377 enc
=kCFStringEncodingDOSRussian
;
1379 case wxFONTENCODING_CP874
:
1380 enc
= kCFStringEncodingDOSThai
;
1382 case wxFONTENCODING_CP932
:
1383 enc
= kCFStringEncodingDOSJapanese
;
1385 case wxFONTENCODING_CP936
:
1386 enc
=kCFStringEncodingDOSChineseSimplif
;
1388 case wxFONTENCODING_CP949
:
1389 enc
= kCFStringEncodingDOSKorean
;
1391 case wxFONTENCODING_CP950
:
1392 enc
= kCFStringEncodingDOSChineseTrad
;
1395 case wxFONTENCODING_CP1250
:
1396 enc
= kCFStringEncodingWindowsLatin2
;
1398 case wxFONTENCODING_CP1251
:
1399 enc
=kCFStringEncodingWindowsCyrillic
;
1401 case wxFONTENCODING_CP1252
:
1402 enc
=kCFStringEncodingWindowsLatin1
;
1404 case wxFONTENCODING_CP1253
:
1405 enc
= kCFStringEncodingWindowsGreek
;
1407 case wxFONTENCODING_CP1254
:
1408 enc
= kCFStringEncodingWindowsLatin5
;
1410 case wxFONTENCODING_CP1255
:
1411 enc
=kCFStringEncodingWindowsHebrew
;
1413 case wxFONTENCODING_CP1256
:
1414 enc
=kCFStringEncodingWindowsArabic
;
1416 case wxFONTENCODING_CP1257
:
1417 enc
= kCFStringEncodingWindowsBalticRim
;
1419 case wxFONTENCODING_UTF7
:
1420 enc
= kCFStringEncodingNonLossyASCII
;
1422 case wxFONTENCODING_UTF8
:
1423 enc
= kCFStringEncodingUTF8
;
1425 case wxFONTENCODING_EUC_JP
:
1426 enc
= kCFStringEncodingEUC_JP
;
1428 case wxFONTENCODING_UTF16
:
1429 enc
= kCFStringEncodingUnicode
;
1431 case wxFONTENCODING_MACROMAN
:
1432 enc
= kCFStringEncodingMacRoman
;
1434 case wxFONTENCODING_MACJAPANESE
:
1435 enc
= kCFStringEncodingMacJapanese
;
1437 case wxFONTENCODING_MACCHINESETRAD
:
1438 enc
= kCFStringEncodingMacChineseTrad
;
1440 case wxFONTENCODING_MACKOREAN
:
1441 enc
= kCFStringEncodingMacKorean
;
1443 case wxFONTENCODING_MACARABIC
:
1444 enc
= kCFStringEncodingMacArabic
;
1446 case wxFONTENCODING_MACHEBREW
:
1447 enc
= kCFStringEncodingMacHebrew
;
1449 case wxFONTENCODING_MACGREEK
:
1450 enc
= kCFStringEncodingMacGreek
;
1452 case wxFONTENCODING_MACCYRILLIC
:
1453 enc
= kCFStringEncodingMacCyrillic
;
1455 case wxFONTENCODING_MACDEVANAGARI
:
1456 enc
= kCFStringEncodingMacDevanagari
;
1458 case wxFONTENCODING_MACGURMUKHI
:
1459 enc
= kCFStringEncodingMacGurmukhi
;
1461 case wxFONTENCODING_MACGUJARATI
:
1462 enc
= kCFStringEncodingMacGujarati
;
1464 case wxFONTENCODING_MACORIYA
:
1465 enc
= kCFStringEncodingMacOriya
;
1467 case wxFONTENCODING_MACBENGALI
:
1468 enc
= kCFStringEncodingMacBengali
;
1470 case wxFONTENCODING_MACTAMIL
:
1471 enc
= kCFStringEncodingMacTamil
;
1473 case wxFONTENCODING_MACTELUGU
:
1474 enc
= kCFStringEncodingMacTelugu
;
1476 case wxFONTENCODING_MACKANNADA
:
1477 enc
= kCFStringEncodingMacKannada
;
1479 case wxFONTENCODING_MACMALAJALAM
:
1480 enc
= kCFStringEncodingMacMalayalam
;
1482 case wxFONTENCODING_MACSINHALESE
:
1483 enc
= kCFStringEncodingMacSinhalese
;
1485 case wxFONTENCODING_MACBURMESE
:
1486 enc
= kCFStringEncodingMacBurmese
;
1488 case wxFONTENCODING_MACKHMER
:
1489 enc
= kCFStringEncodingMacKhmer
;
1491 case wxFONTENCODING_MACTHAI
:
1492 enc
= kCFStringEncodingMacThai
;
1494 case wxFONTENCODING_MACLAOTIAN
:
1495 enc
= kCFStringEncodingMacLaotian
;
1497 case wxFONTENCODING_MACGEORGIAN
:
1498 enc
= kCFStringEncodingMacGeorgian
;
1500 case wxFONTENCODING_MACARMENIAN
:
1501 enc
= kCFStringEncodingMacArmenian
;
1503 case wxFONTENCODING_MACCHINESESIMP
:
1504 enc
= kCFStringEncodingMacChineseSimp
;
1506 case wxFONTENCODING_MACTIBETAN
:
1507 enc
= kCFStringEncodingMacTibetan
;
1509 case wxFONTENCODING_MACMONGOLIAN
:
1510 enc
= kCFStringEncodingMacMongolian
;
1512 case wxFONTENCODING_MACETHIOPIC
:
1513 enc
= kCFStringEncodingMacEthiopic
;
1515 case wxFONTENCODING_MACCENTRALEUR
:
1516 enc
= kCFStringEncodingMacCentralEurRoman
;
1518 case wxFONTENCODING_MACVIATNAMESE
:
1519 enc
= kCFStringEncodingMacVietnamese
;
1521 case wxFONTENCODING_MACARABICEXT
:
1522 enc
= kCFStringEncodingMacExtArabic
;
1524 case wxFONTENCODING_MACSYMBOL
:
1525 enc
= kCFStringEncodingMacSymbol
;
1527 case wxFONTENCODING_MACDINGBATS
:
1528 enc
= kCFStringEncodingMacDingbats
;
1530 case wxFONTENCODING_MACTURKISH
:
1531 enc
= kCFStringEncodingMacTurkish
;
1533 case wxFONTENCODING_MACCROATIAN
:
1534 enc
= kCFStringEncodingMacCroatian
;
1536 case wxFONTENCODING_MACICELANDIC
:
1537 enc
= kCFStringEncodingMacIcelandic
;
1539 case wxFONTENCODING_MACROMANIAN
:
1540 enc
= kCFStringEncodingMacRomanian
;
1542 case wxFONTENCODING_MACCELTIC
:
1543 enc
= kCFStringEncodingMacCeltic
;
1545 case wxFONTENCODING_MACGAELIC
:
1546 enc
= kCFStringEncodingMacGaelic
;
1548 // case wxFONTENCODING_MACKEYBOARD :
1549 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1552 // because gcc is picky
1558 wxFontEncoding
wxFontEncFromCFStringEnc(CFStringEncoding encoding
)
1560 wxFontEncoding enc
= wxFONTENCODING_DEFAULT
;
1564 case kCFStringEncodingISOLatin1
:
1565 enc
= wxFONTENCODING_ISO8859_1
;
1567 case kCFStringEncodingISOLatin2
:
1568 enc
= wxFONTENCODING_ISO8859_2
;
1570 case kCFStringEncodingISOLatin3
:
1571 enc
= wxFONTENCODING_ISO8859_3
;
1573 case kCFStringEncodingISOLatin4
:
1574 enc
= wxFONTENCODING_ISO8859_4
;
1576 case kCFStringEncodingISOLatinCyrillic
:
1577 enc
= wxFONTENCODING_ISO8859_5
;
1579 case kCFStringEncodingISOLatinArabic
:
1580 enc
= wxFONTENCODING_ISO8859_6
;
1582 case kCFStringEncodingISOLatinGreek
:
1583 enc
= wxFONTENCODING_ISO8859_7
;
1585 case kCFStringEncodingISOLatinHebrew
:
1586 enc
= wxFONTENCODING_ISO8859_8
;
1588 case kCFStringEncodingISOLatin5
:
1589 enc
= wxFONTENCODING_ISO8859_9
;
1591 case kCFStringEncodingISOLatin6
:
1592 enc
= wxFONTENCODING_ISO8859_10
;
1594 case kCFStringEncodingISOLatin7
:
1595 enc
= wxFONTENCODING_ISO8859_13
;
1597 case kCFStringEncodingISOLatin8
:
1598 enc
= wxFONTENCODING_ISO8859_14
;
1600 case kCFStringEncodingISOLatin9
:
1601 enc
=wxFONTENCODING_ISO8859_15
;
1604 case kCFStringEncodingKOI8_R
:
1605 enc
= wxFONTENCODING_KOI8
;
1609 // enc = wxFONTENCODING_BULGARIAN;
1612 case kCFStringEncodingDOSLatinUS
:
1613 enc
= wxFONTENCODING_CP437
;
1615 case kCFStringEncodingDOSLatin1
:
1616 enc
= wxFONTENCODING_CP850
;
1618 case kCFStringEncodingDOSLatin2
:
1619 enc
=wxFONTENCODING_CP852
;
1621 case kCFStringEncodingDOSCyrillic
:
1622 enc
= wxFONTENCODING_CP855
;
1624 case kCFStringEncodingDOSRussian
:
1625 enc
= wxFONTENCODING_CP866
;
1627 case kCFStringEncodingDOSThai
:
1628 enc
=wxFONTENCODING_CP874
;
1630 case kCFStringEncodingDOSJapanese
:
1631 enc
= wxFONTENCODING_CP932
;
1633 case kCFStringEncodingDOSChineseSimplif
:
1634 enc
= wxFONTENCODING_CP936
;
1636 case kCFStringEncodingDOSKorean
:
1637 enc
= wxFONTENCODING_CP949
;
1639 case kCFStringEncodingDOSChineseTrad
:
1640 enc
= wxFONTENCODING_CP950
;
1643 case kCFStringEncodingWindowsLatin2
:
1644 enc
= wxFONTENCODING_CP1250
;
1646 case kCFStringEncodingWindowsCyrillic
:
1647 enc
= wxFONTENCODING_CP1251
;
1649 case kCFStringEncodingWindowsLatin1
:
1650 enc
= wxFONTENCODING_CP1252
;
1652 case kCFStringEncodingWindowsGreek
:
1653 enc
= wxFONTENCODING_CP1253
;
1655 case kCFStringEncodingWindowsLatin5
:
1656 enc
= wxFONTENCODING_CP1254
;
1658 case kCFStringEncodingWindowsHebrew
:
1659 enc
= wxFONTENCODING_CP1255
;
1661 case kCFStringEncodingWindowsArabic
:
1662 enc
= wxFONTENCODING_CP1256
;
1664 case kCFStringEncodingWindowsBalticRim
:
1665 enc
=wxFONTENCODING_CP1257
;
1667 case kCFStringEncodingEUC_JP
:
1668 enc
= wxFONTENCODING_EUC_JP
;
1670 case kCFStringEncodingUnicode
:
1671 enc
= wxFONTENCODING_UTF16
;
1673 case kCFStringEncodingMacRoman
:
1674 enc
= wxFONTENCODING_MACROMAN
;
1676 case kCFStringEncodingMacJapanese
:
1677 enc
= wxFONTENCODING_MACJAPANESE
;
1679 case kCFStringEncodingMacChineseTrad
:
1680 enc
= wxFONTENCODING_MACCHINESETRAD
;
1682 case kCFStringEncodingMacKorean
:
1683 enc
= wxFONTENCODING_MACKOREAN
;
1685 case kCFStringEncodingMacArabic
:
1686 enc
=wxFONTENCODING_MACARABIC
;
1688 case kCFStringEncodingMacHebrew
:
1689 enc
= wxFONTENCODING_MACHEBREW
;
1691 case kCFStringEncodingMacGreek
:
1692 enc
= wxFONTENCODING_MACGREEK
;
1694 case kCFStringEncodingMacCyrillic
:
1695 enc
= wxFONTENCODING_MACCYRILLIC
;
1697 case kCFStringEncodingMacDevanagari
:
1698 enc
= wxFONTENCODING_MACDEVANAGARI
;
1700 case kCFStringEncodingMacGurmukhi
:
1701 enc
= wxFONTENCODING_MACGURMUKHI
;
1703 case kCFStringEncodingMacGujarati
:
1704 enc
= wxFONTENCODING_MACGUJARATI
;
1706 case kCFStringEncodingMacOriya
:
1707 enc
=wxFONTENCODING_MACORIYA
;
1709 case kCFStringEncodingMacBengali
:
1710 enc
=wxFONTENCODING_MACBENGALI
;
1712 case kCFStringEncodingMacTamil
:
1713 enc
= wxFONTENCODING_MACTAMIL
;
1715 case kCFStringEncodingMacTelugu
:
1716 enc
= wxFONTENCODING_MACTELUGU
;
1718 case kCFStringEncodingMacKannada
:
1719 enc
= wxFONTENCODING_MACKANNADA
;
1721 case kCFStringEncodingMacMalayalam
:
1722 enc
= wxFONTENCODING_MACMALAJALAM
;
1724 case kCFStringEncodingMacSinhalese
:
1725 enc
= wxFONTENCODING_MACSINHALESE
;
1727 case kCFStringEncodingMacBurmese
:
1728 enc
= wxFONTENCODING_MACBURMESE
;
1730 case kCFStringEncodingMacKhmer
:
1731 enc
= wxFONTENCODING_MACKHMER
;
1733 case kCFStringEncodingMacThai
:
1734 enc
= wxFONTENCODING_MACTHAI
;
1736 case kCFStringEncodingMacLaotian
:
1737 enc
= wxFONTENCODING_MACLAOTIAN
;
1739 case kCFStringEncodingMacGeorgian
:
1740 enc
= wxFONTENCODING_MACGEORGIAN
;
1742 case kCFStringEncodingMacArmenian
:
1743 enc
= wxFONTENCODING_MACARMENIAN
;
1745 case kCFStringEncodingMacChineseSimp
:
1746 enc
= wxFONTENCODING_MACCHINESESIMP
;
1748 case kCFStringEncodingMacTibetan
:
1749 enc
= wxFONTENCODING_MACTIBETAN
;
1751 case kCFStringEncodingMacMongolian
:
1752 enc
= wxFONTENCODING_MACMONGOLIAN
;
1754 case kCFStringEncodingMacEthiopic
:
1755 enc
= wxFONTENCODING_MACETHIOPIC
;
1757 case kCFStringEncodingMacCentralEurRoman
:
1758 enc
= wxFONTENCODING_MACCENTRALEUR
;
1760 case kCFStringEncodingMacVietnamese
:
1761 enc
= wxFONTENCODING_MACVIATNAMESE
;
1763 case kCFStringEncodingMacExtArabic
:
1764 enc
= wxFONTENCODING_MACARABICEXT
;
1766 case kCFStringEncodingMacSymbol
:
1767 enc
= wxFONTENCODING_MACSYMBOL
;
1769 case kCFStringEncodingMacDingbats
:
1770 enc
= wxFONTENCODING_MACDINGBATS
;
1772 case kCFStringEncodingMacTurkish
:
1773 enc
= wxFONTENCODING_MACTURKISH
;
1775 case kCFStringEncodingMacCroatian
:
1776 enc
= wxFONTENCODING_MACCROATIAN
;
1778 case kCFStringEncodingMacIcelandic
:
1779 enc
= wxFONTENCODING_MACICELANDIC
;
1781 case kCFStringEncodingMacRomanian
:
1782 enc
= wxFONTENCODING_MACROMANIAN
;
1784 case kCFStringEncodingMacCeltic
:
1785 enc
= wxFONTENCODING_MACCELTIC
;
1787 case kCFStringEncodingMacGaelic
:
1788 enc
= wxFONTENCODING_MACGAELIC
;
1790 // case kCFStringEncodingMacKeyboardGlyphs :
1791 // enc = wxFONTENCODING_MACKEYBOARD ;
1797 class wxMBConv_cocoa
: public wxMBConv
1802 Init(CFStringGetSystemEncoding()) ;
1805 wxMBConv_cocoa(const wxChar
* name
)
1807 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
1810 wxMBConv_cocoa(wxFontEncoding encoding
)
1812 Init( wxCFStringEncFromFontEnc(encoding
) );
1819 void Init( CFStringEncoding encoding
)
1821 m_char_encoding
= encoding
;
1822 m_unicode_encoding
= kCFStringEncodingUnicode
;
1825 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
1829 size_t nBufSize
= strlen(szUnConv
) + 1;
1830 size_t nRealOutSize
;
1832 UniChar
* szUniCharBuffer
= (UniChar
*) szOut
;
1833 wchar_t* szConvBuffer
= szOut
;
1835 if (szConvBuffer
== NULL
&& nOutSize
!= 0)
1837 szConvBuffer
= new wchar_t[nOutSize
] ;
1840 #if SIZEOF_WCHAR_T == 4
1841 szUniCharBuffer
= new UniChar
[nOutSize
];
1844 CFDataRef theData
= CFDataCreateWithBytesNoCopy (
1846 (const UInt8
*)szUnConv
,
1853 CFStringRef theString
= CFStringCreateFromExternalRepresentation (
1859 wxASSERT(theString
);
1863 nRealOutSize
= CFStringGetLength(theString
) + 1;
1864 CFRelease(theString
);
1865 return nRealOutSize
- 1;
1868 CFRange theRange
= { 0, CFStringGetLength(theString
) };
1870 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
1873 nRealOutSize
= (CFStringGetLength(theString
) + 1);
1875 CFRelease(theString
);
1877 szUniCharBuffer
[nRealOutSize
-1] = '\0' ;
1879 #if SIZEOF_WCHAR_T == 4
1880 wxMBConvUTF16 converter
;
1881 converter
.MB2WC(szConvBuffer
, (const char*)szUniCharBuffer
, nRealOutSize
) ;
1882 delete[] szUniCharBuffer
;
1884 if ( szOut
== NULL
)
1885 delete [] szConvBuffer
;
1887 return nRealOutSize
;
1890 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
1892 size_t nBufSize
= wxWcslen(szUnConv
) + 1;
1893 size_t nRealOutSize
;
1894 char* szBuffer
= szOut
;
1895 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
1900 nRealOutSize
= ((nBufSize
- 1) << 1)+1 ;
1901 szBuffer
= new char[ nRealOutSize
] ;
1904 nRealOutSize
= nOutSize
;
1906 #if SIZEOF_WCHAR_T == 4
1907 wxMBConvUTF16BE converter
;
1908 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
1909 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
1910 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
1911 nBufSize
/= sizeof(UniChar
);
1915 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
1922 wxASSERT(theString
);
1924 //Note that CER puts a BOM when converting to unicode
1925 //so we may want to check and use getchars instead in that case
1926 CFDataRef theData
= CFStringCreateExternalRepresentation(
1930 0 //what to put in characters that can't be converted -
1931 //0 tells CFString to return NULL if it meets such a character
1937 CFRelease(theString
);
1939 nRealOutSize
= CFDataGetLength(theData
);
1941 if ( szOut
== NULL
)
1946 //TODO: This gets flagged as a non-malloced address by the debugger...
1947 //#if SIZEOF_WCHAR_T == 4
1948 // delete[] szUniBuffer;
1951 return nRealOutSize
- 1;
1954 CFRange theRange
= {0, CFDataGetLength(theData
) };
1955 CFDataGetBytes(theData
, theRange
, (UInt8
*) szBuffer
);
1959 //TODO: This gets flagged as a non-malloced address by the debugger...
1960 //#if SIZEOF_WCHAR_T == 4
1961 // delete[] szUniBuffer;
1963 return nRealOutSize
- 1;
1968 //TODO: check for invalid en/de/coding
1973 CFStringEncoding m_char_encoding
;
1974 CFStringEncoding m_unicode_encoding
;
1977 #endif // defined(__WXCOCOA__)
1979 // ============================================================================
1980 // Mac conversion classes
1981 // ============================================================================
1983 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1985 class wxMBConv_mac
: public wxMBConv
1990 Init(CFStringGetSystemEncoding()) ;
1993 wxMBConv_mac(const wxChar
* name
)
1995 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
1998 wxMBConv_mac(wxFontEncoding encoding
)
2000 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2005 OSStatus status
= noErr
;
2006 status
= TECDisposeConverter(m_MB2WC_converter
);
2007 status
= TECDisposeConverter(m_WC2MB_converter
);
2011 void Init( TextEncodingBase encoding
)
2013 OSStatus status
= noErr
;
2014 m_char_encoding
= encoding
;
2015 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2017 status
= TECCreateConverter(&m_MB2WC_converter
,
2019 m_unicode_encoding
);
2020 status
= TECCreateConverter(&m_WC2MB_converter
,
2025 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2027 OSStatus status
= noErr
;
2028 ByteCount byteOutLen
;
2029 ByteCount byteInLen
= strlen(psz
) ;
2030 wchar_t *tbuf
= NULL
;
2031 UniChar
* ubuf
= NULL
;
2037 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2039 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2040 #if SIZEOF_WCHAR_T == 4
2041 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2043 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2045 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2046 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2047 #if SIZEOF_WCHAR_T == 4
2048 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2049 // is not properly terminated we get random characters at the end
2050 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2051 wxMBConvUTF16BE converter
;
2052 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2055 res
= byteOutLen
/ sizeof( UniChar
) ;
2060 if ( buf
&& res
< n
)
2066 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2068 OSStatus status
= noErr
;
2069 ByteCount byteOutLen
;
2070 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2078 tbuf
= (char*) malloc( n
) ;
2081 ByteCount byteBufferLen
= n
;
2082 UniChar
* ubuf
= NULL
;
2083 #if SIZEOF_WCHAR_T == 4
2084 wxMBConvUTF16BE converter
;
2085 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2086 byteInLen
= unicharlen
;
2087 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2088 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2090 ubuf
= (UniChar
*) psz
;
2092 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2093 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2094 #if SIZEOF_WCHAR_T == 4
2100 size_t res
= byteOutLen
;
2101 if ( buf
&& res
< n
)
2108 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2111 TECObjectRef m_MB2WC_converter
;
2112 TECObjectRef m_WC2MB_converter
;
2114 TextEncodingBase m_char_encoding
;
2115 TextEncodingBase m_unicode_encoding
;
2118 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2120 // ============================================================================
2121 // wxEncodingConverter based conversion classes
2122 // ============================================================================
2126 class wxMBConv_wxwin
: public wxMBConv
2131 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2132 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2136 // temporarily just use wxEncodingConverter stuff,
2137 // so that it works while a better implementation is built
2138 wxMBConv_wxwin(const wxChar
* name
)
2141 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
2143 m_enc
= wxFONTENCODING_SYSTEM
;
2148 wxMBConv_wxwin(wxFontEncoding enc
)
2155 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2157 size_t inbuf
= strlen(psz
);
2159 m2w
.Convert(psz
,buf
);
2163 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2165 const size_t inbuf
= wxWcslen(psz
);
2167 w2m
.Convert(psz
,buf
);
2172 bool IsOk() const { return m_ok
; }
2175 wxFontEncoding m_enc
;
2176 wxEncodingConverter m2w
, w2m
;
2178 // were we initialized successfully?
2181 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2184 #endif // wxUSE_FONTMAP
2186 // ============================================================================
2187 // wxCSConv implementation
2188 // ============================================================================
2190 void wxCSConv::Init()
2197 wxCSConv::wxCSConv(const wxChar
*charset
)
2206 m_encoding
= wxFONTENCODING_SYSTEM
;
2209 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2211 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2213 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2215 encoding
= wxFONTENCODING_SYSTEM
;
2220 m_encoding
= encoding
;
2223 wxCSConv::~wxCSConv()
2228 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2233 SetName(conv
.m_name
);
2234 m_encoding
= conv
.m_encoding
;
2237 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2241 SetName(conv
.m_name
);
2242 m_encoding
= conv
.m_encoding
;
2247 void wxCSConv::Clear()
2256 void wxCSConv::SetName(const wxChar
*charset
)
2260 m_name
= wxStrdup(charset
);
2265 wxMBConv
*wxCSConv::DoCreate() const
2267 // check for the special case of ASCII or ISO8859-1 charset: as we have
2268 // special knowledge of it anyhow, we don't need to create a special
2269 // conversion object
2270 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2272 // don't convert at all
2276 // we trust OS to do conversion better than we can so try external
2277 // conversion methods first
2279 // the full order is:
2280 // 1. OS conversion (iconv() under Unix or Win32 API)
2281 // 2. hard coded conversions for UTF
2282 // 3. wxEncodingConverter as fall back
2288 #endif // !wxUSE_FONTMAP
2290 wxString
name(m_name
);
2294 name
= wxFontMapper::Get()->GetEncodingName(m_encoding
);
2295 #endif // wxUSE_FONTMAP
2297 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2303 #endif // HAVE_ICONV
2305 #ifdef wxHAVE_WIN32_MB2WC
2308 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2309 : new wxMBConv_win32(m_encoding
);
2318 #endif // wxHAVE_WIN32_MB2WC
2319 #if defined(__WXMAC__)
2321 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
) )
2324 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2325 : new wxMBConv_mac(m_encoding
);
2333 #if defined(__WXCOCOA__)
2335 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2338 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2339 : new wxMBConv_cocoa(m_encoding
);
2348 wxFontEncoding enc
= m_encoding
;
2350 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2352 // use "false" to suppress interactive dialogs -- we can be called from
2353 // anywhere and popping up a dialog from here is the last thing we want to
2355 enc
= wxFontMapper::Get()->CharsetToEncoding(m_name
, false);
2357 #endif // wxUSE_FONTMAP
2361 case wxFONTENCODING_UTF7
:
2362 return new wxMBConvUTF7
;
2364 case wxFONTENCODING_UTF8
:
2365 return new wxMBConvUTF8
;
2367 case wxFONTENCODING_UTF16BE
:
2368 return new wxMBConvUTF16BE
;
2370 case wxFONTENCODING_UTF16LE
:
2371 return new wxMBConvUTF16LE
;
2373 case wxFONTENCODING_UTF32BE
:
2374 return new wxMBConvUTF32BE
;
2376 case wxFONTENCODING_UTF32LE
:
2377 return new wxMBConvUTF32LE
;
2380 // nothing to do but put here to suppress gcc warnings
2387 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2388 : new wxMBConv_wxwin(m_encoding
);
2394 #endif // wxUSE_FONTMAP
2396 // NB: This is a hack to prevent deadlock. What could otherwise happen
2397 // in Unicode build: wxConvLocal creation ends up being here
2398 // because of some failure and logs the error. But wxLog will try to
2399 // attach timestamp, for which it will need wxConvLocal (to convert
2400 // time to char* and then wchar_t*), but that fails, tries to log
2401 // error, but wxLog has a (already locked) critical section that
2402 // guards static buffer.
2403 static bool alreadyLoggingError
= false;
2404 if (!alreadyLoggingError
)
2406 alreadyLoggingError
= true;
2407 wxLogError(_("Cannot convert from the charset '%s'!"),
2411 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
2412 #else // !wxUSE_FONTMAP
2413 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2414 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2416 alreadyLoggingError
= false;
2422 void wxCSConv::CreateConvIfNeeded() const
2426 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2429 // if we don't have neither the name nor the encoding, use the default
2430 // encoding for this system
2431 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2433 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2435 #endif // wxUSE_INTL
2437 self
->m_convReal
= DoCreate();
2438 self
->m_deferred
= false;
2442 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2444 CreateConvIfNeeded();
2447 return m_convReal
->MB2WC(buf
, psz
, n
);
2450 size_t len
= strlen(psz
);
2454 for (size_t c
= 0; c
<= len
; c
++)
2455 buf
[c
] = (unsigned char)(psz
[c
]);
2461 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2463 CreateConvIfNeeded();
2466 return m_convReal
->WC2MB(buf
, psz
, n
);
2469 const size_t len
= wxWcslen(psz
);
2472 for (size_t c
= 0; c
<= len
; c
++)
2476 buf
[c
] = (char)psz
[c
];
2481 for (size_t c
= 0; c
<= len
; c
++)
2491 // ----------------------------------------------------------------------------
2493 // ----------------------------------------------------------------------------
2496 static wxMBConv_win32 wxConvLibcObj
;
2497 #elif defined(__WXMAC__) && !defined(__MACH__)
2498 static wxMBConv_mac wxConvLibcObj
;
2500 static wxMBConvLibc wxConvLibcObj
;
2503 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2504 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2505 static wxMBConvUTF7 wxConvUTF7Obj
;
2506 static wxMBConvUTF8 wxConvUTF8Obj
;
2509 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2510 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2511 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2512 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2513 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2514 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2516 #else // !wxUSE_WCHAR_T
2518 // stand-ins in absence of wchar_t
2519 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2524 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T