1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
37 #include "wx/strconv.h"
42 #include "wx/msw/private.h"
53 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
54 #define wxHAVE_WIN32_MB2WC
55 #endif // __WIN32__ but !__WXMICROWIN__
57 // ----------------------------------------------------------------------------
59 // ----------------------------------------------------------------------------
69 #include "wx/encconv.h"
70 #include "wx/fontmap.h"
72 // ----------------------------------------------------------------------------
74 // ----------------------------------------------------------------------------
76 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
77 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
79 #if SIZEOF_WCHAR_T == 4
80 #define WC_NAME "UCS4"
81 #define WC_BSWAP BSWAP_UCS4
82 #ifdef WORDS_BIGENDIAN
83 #define WC_NAME_BEST "UCS-4BE"
85 #define WC_NAME_BEST "UCS-4LE"
87 #elif SIZEOF_WCHAR_T == 2
88 #define WC_NAME "UTF16"
89 #define WC_BSWAP BSWAP_UTF16
91 #ifdef WORDS_BIGENDIAN
92 #define WC_NAME_BEST "UTF-16BE"
94 #define WC_NAME_BEST "UTF-16LE"
96 #else // sizeof(wchar_t) != 2 nor 4
97 // does this ever happen?
98 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
101 // ============================================================================
103 // ============================================================================
105 // ----------------------------------------------------------------------------
106 // UTF-16 en/decoding to/from UCS-4
107 // ----------------------------------------------------------------------------
110 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
115 *output
= (wxUint16
) input
;
118 else if (input
>=0x110000)
126 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
127 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
133 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
135 if ((*input
<0xd800) || (*input
>0xdfff))
140 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
147 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
153 // ----------------------------------------------------------------------------
155 // ----------------------------------------------------------------------------
157 wxMBConv::~wxMBConv()
159 // nothing to do here
162 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
166 // calculate the length of the buffer needed first
167 size_t nLen
= MB2WC(NULL
, psz
, 0);
168 if ( nLen
!= (size_t)-1 )
170 // now do the actual conversion
171 wxWCharBuffer
buf(nLen
);
172 MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NUL
178 wxWCharBuffer
buf((wchar_t *)NULL
);
183 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
187 size_t nLen
= WC2MB(NULL
, pwz
, 0);
188 if ( nLen
!= (size_t)-1 )
190 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
191 WC2MB(buf
.data(), pwz
, nLen
+ 4);
197 wxCharBuffer
buf((char *)NULL
);
202 // ----------------------------------------------------------------------------
204 // ----------------------------------------------------------------------------
206 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
208 return wxMB2WC(buf
, psz
, n
);
211 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
213 return wxWC2MB(buf
, psz
, n
);
216 // ----------------------------------------------------------------------------
218 // ----------------------------------------------------------------------------
221 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
222 "abcdefghijklmnopqrstuvwxyz"
223 "0123456789'(),-./:?";
224 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
225 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
226 "abcdefghijklmnopqrstuvwxyz"
230 // TODO: write actual implementations of UTF-7 here
231 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
232 const char * WXUNUSED(psz
),
233 size_t WXUNUSED(n
)) const
238 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
239 const wchar_t * WXUNUSED(psz
),
240 size_t WXUNUSED(n
)) const
245 // ----------------------------------------------------------------------------
247 // ----------------------------------------------------------------------------
249 static wxUint32 utf8_max
[]=
250 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
252 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
256 while (*psz
&& ((!buf
) || (len
< n
)))
258 unsigned char cc
= *psz
++, fc
= cc
;
260 for (cnt
= 0; fc
& 0x80; cnt
++)
274 // invalid UTF-8 sequence
279 unsigned ocnt
= cnt
- 1;
280 wxUint32 res
= cc
& (0x3f >> cnt
);
284 if ((cc
& 0xC0) != 0x80)
286 // invalid UTF-8 sequence
289 res
= (res
<< 6) | (cc
& 0x3f);
291 if (res
<= utf8_max
[ocnt
])
293 // illegal UTF-8 encoding
297 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
298 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
299 if (pa
== (size_t)-1)
308 #endif // WC_UTF16/!WC_UTF16
312 if (buf
&& (len
< n
))
317 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
321 while (*psz
&& ((!buf
) || (len
< n
)))
325 // cast is ok for WC_UTF16
326 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
327 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
329 cc
=(*psz
++) & 0x7fffffff;
332 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
346 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
348 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
353 if (buf
&& (len
<n
)) *buf
= 0;
361 // ----------------------------------------------------------------------------
363 // ----------------------------------------------------------------------------
365 #ifdef WORDS_BIGENDIAN
366 #define wxMBConvUTF16straight wxMBConvUTF16BE
367 #define wxMBConvUTF16swap wxMBConvUTF16LE
369 #define wxMBConvUTF16swap wxMBConvUTF16BE
370 #define wxMBConvUTF16straight wxMBConvUTF16LE
376 // copy 16bit MB to 16bit String
377 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
381 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
384 *buf
++ = *(wxUint16
*)psz
;
387 psz
+= sizeof(wxUint16
);
389 if (buf
&& len
<n
) *buf
=0;
395 // copy 16bit String to 16bit MB
396 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
400 while (*psz
&& (!buf
|| len
< n
))
404 *(wxUint16
*)buf
= *psz
;
405 buf
+= sizeof(wxUint16
);
407 len
+= sizeof(wxUint16
);
410 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
416 // swap 16bit MB to 16bit String
417 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
421 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
425 ((char *)buf
)[0] = psz
[1];
426 ((char *)buf
)[1] = psz
[0];
430 psz
+= sizeof(wxUint16
);
432 if (buf
&& len
<n
) *buf
=0;
438 // swap 16bit MB to 16bit String
439 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
443 while (*psz
&& (!buf
|| len
< n
))
447 *buf
++ = ((char*)psz
)[1];
448 *buf
++ = ((char*)psz
)[0];
450 len
+= sizeof(wxUint16
);
453 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
462 // copy 16bit MB to 32bit String
463 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
467 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
470 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
471 if (pa
== (size_t)-1)
477 psz
+= pa
* sizeof(wxUint16
);
479 if (buf
&& len
<n
) *buf
=0;
485 // copy 32bit String to 16bit MB
486 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
490 while (*psz
&& (!buf
|| len
< n
))
493 size_t pa
=encode_utf16(*psz
, cc
);
495 if (pa
== (size_t)-1)
500 *(wxUint16
*)buf
= cc
[0];
501 buf
+= sizeof(wxUint16
);
504 *(wxUint16
*)buf
= cc
[1];
505 buf
+= sizeof(wxUint16
);
509 len
+= pa
*sizeof(wxUint16
);
512 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
518 // swap 16bit MB to 32bit String
519 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
523 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
527 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
528 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
530 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
531 if (pa
== (size_t)-1)
538 psz
+= pa
* sizeof(wxUint16
);
540 if (buf
&& len
<n
) *buf
=0;
546 // swap 32bit String to 16bit MB
547 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
551 while (*psz
&& (!buf
|| len
< n
))
554 size_t pa
=encode_utf16(*psz
, cc
);
556 if (pa
== (size_t)-1)
561 *buf
++ = ((char*)cc
)[1];
562 *buf
++ = ((char*)cc
)[0];
565 *buf
++ = ((char*)cc
)[3];
566 *buf
++ = ((char*)cc
)[2];
570 len
+= pa
*sizeof(wxUint16
);
573 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
581 // ----------------------------------------------------------------------------
583 // ----------------------------------------------------------------------------
585 #ifdef WORDS_BIGENDIAN
586 #define wxMBConvUTF32straight wxMBConvUTF32BE
587 #define wxMBConvUTF32swap wxMBConvUTF32LE
589 #define wxMBConvUTF32swap wxMBConvUTF32BE
590 #define wxMBConvUTF32straight wxMBConvUTF32LE
594 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
595 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
600 // copy 32bit MB to 16bit String
601 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
605 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
609 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
610 if (pa
== (size_t)-1)
620 psz
+= sizeof(wxUint32
);
622 if (buf
&& len
<n
) *buf
=0;
628 // copy 16bit String to 32bit MB
629 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
633 while (*psz
&& (!buf
|| len
< n
))
637 // cast is ok for WC_UTF16
638 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
639 if (pa
== (size_t)-1)
644 *(wxUint32
*)buf
= cc
;
645 buf
+= sizeof(wxUint32
);
647 len
+= sizeof(wxUint32
);
651 if (buf
&& len
<=n
-sizeof(wxUint32
))
659 // swap 32bit MB to 16bit String
660 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
664 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
667 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
668 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
673 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
674 if (pa
== (size_t)-1)
684 psz
+= sizeof(wxUint32
);
694 // swap 16bit String to 32bit MB
695 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
699 while (*psz
&& (!buf
|| len
< n
))
703 // cast is ok for WC_UTF16
704 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
705 if (pa
== (size_t)-1)
715 len
+= sizeof(wxUint32
);
719 if (buf
&& len
<=n
-sizeof(wxUint32
))
728 // copy 32bit MB to 32bit String
729 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
733 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
736 *buf
++ = *(wxUint32
*)psz
;
738 psz
+= sizeof(wxUint32
);
748 // copy 32bit String to 32bit MB
749 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
753 while (*psz
&& (!buf
|| len
< n
))
757 *(wxUint32
*)buf
= *psz
;
758 buf
+= sizeof(wxUint32
);
761 len
+= sizeof(wxUint32
);
765 if (buf
&& len
<=n
-sizeof(wxUint32
))
772 // swap 32bit MB to 32bit String
773 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
777 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
781 ((char *)buf
)[0] = psz
[3];
782 ((char *)buf
)[1] = psz
[2];
783 ((char *)buf
)[2] = psz
[1];
784 ((char *)buf
)[3] = psz
[0];
788 psz
+= sizeof(wxUint32
);
798 // swap 32bit String to 32bit MB
799 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
803 while (*psz
&& (!buf
|| len
< n
))
807 *buf
++ = ((char *)psz
)[3];
808 *buf
++ = ((char *)psz
)[2];
809 *buf
++ = ((char *)psz
)[1];
810 *buf
++ = ((char *)psz
)[0];
812 len
+= sizeof(wxUint32
);
816 if (buf
&& len
<=n
-sizeof(wxUint32
))
826 // ============================================================================
827 // The classes doing conversion using the iconv_xxx() functions
828 // ============================================================================
832 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
833 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
834 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
835 // (which means error) and says there are 0 bytes left in the input buffer --
836 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
837 // this alternative test for iconv() failure.
838 // [This bug does not appear in glibc 2.2.]
839 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
840 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
841 (errno != E2BIG || bufLeft != 0))
843 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
846 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
848 // ----------------------------------------------------------------------------
849 // wxMBConv_iconv: encapsulates an iconv character set
850 // ----------------------------------------------------------------------------
852 class wxMBConv_iconv
: public wxMBConv
855 wxMBConv_iconv(const wxChar
*name
);
856 virtual ~wxMBConv_iconv();
858 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
859 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
862 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
865 // the iconv handlers used to translate from multibyte to wide char and in
866 // the other direction
871 // the name (for iconv_open()) of a wide char charset -- if none is
872 // available on this machine, it will remain NULL
873 static const char *ms_wcCharsetName
;
875 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
876 // different endian-ness than the native one
877 static bool ms_wcNeedsSwap
;
880 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
881 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
883 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
885 // Do it the hard way
887 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
888 cname
[i
] = (char) name
[i
];
890 // check for charset that represents wchar_t:
891 if (ms_wcCharsetName
== NULL
)
893 ms_wcNeedsSwap
= false;
895 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
896 ms_wcCharsetName
= WC_NAME_BEST
;
897 m2w
= iconv_open(ms_wcCharsetName
, cname
);
899 if (m2w
== (iconv_t
)-1)
901 // try charset w/o bytesex info (e.g. "UCS4")
902 // and check for bytesex ourselves:
903 ms_wcCharsetName
= WC_NAME
;
904 m2w
= iconv_open(ms_wcCharsetName
, cname
);
906 // last bet, try if it knows WCHAR_T pseudo-charset
907 if (m2w
== (iconv_t
)-1)
909 ms_wcCharsetName
= "WCHAR_T";
910 m2w
= iconv_open(ms_wcCharsetName
, cname
);
913 if (m2w
!= (iconv_t
)-1)
915 char buf
[2], *bufPtr
;
916 wchar_t wbuf
[2], *wbufPtr
;
924 outsz
= SIZEOF_WCHAR_T
* 2;
928 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
929 (char**)&wbufPtr
, &outsz
);
931 if (ICONV_FAILED(res
, insz
))
933 ms_wcCharsetName
= NULL
;
934 wxLogLastError(wxT("iconv"));
935 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
939 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
944 ms_wcCharsetName
= NULL
;
946 // VS: we must not output an error here, since wxWindows will safely
947 // fall back to using wxEncodingConverter.
948 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
952 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
954 else // we already have ms_wcCharsetName
956 m2w
= iconv_open(ms_wcCharsetName
, cname
);
959 // NB: don't ever pass NULL to iconv_open(), it may crash!
960 if ( ms_wcCharsetName
)
962 w2m
= iconv_open( cname
, ms_wcCharsetName
);
970 wxMBConv_iconv::~wxMBConv_iconv()
972 if ( m2w
!= (iconv_t
)-1 )
974 if ( w2m
!= (iconv_t
)-1 )
978 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
980 size_t inbuf
= strlen(psz
);
981 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
983 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
984 wchar_t *bufPtr
= buf
;
985 const char *pszPtr
= psz
;
989 // have destination buffer, convert there
991 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
992 (char**)&bufPtr
, &outbuf
);
993 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
997 // convert to native endianness
998 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1001 // NB: iconv was given only strlen(psz) characters on input, and so
1002 // it couldn't convert the trailing zero. Let's do it ourselves
1003 // if there's some room left for it in the output buffer.
1009 // no destination buffer... convert using temp buffer
1010 // to calculate destination buffer requirement
1015 outbuf
= 8*SIZEOF_WCHAR_T
;
1018 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1019 (char**)&bufPtr
, &outbuf
);
1021 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1022 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1025 if (ICONV_FAILED(cres
, inbuf
))
1027 //VS: it is ok if iconv fails, hence trace only
1028 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1035 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1037 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1041 wchar_t *tmpbuf
= 0;
1045 // need to copy to temp buffer to switch endianness
1046 // this absolutely doesn't rock!
1047 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1048 // could be in read-only memory, or be accessed in some other thread)
1049 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1050 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1051 WC_BSWAP(tmpbuf
, inbuf
)
1057 // have destination buffer, convert there
1058 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1062 // NB: iconv was given only wcslen(psz) characters on input, and so
1063 // it couldn't convert the trailing zero. Let's do it ourselves
1064 // if there's some room left for it in the output buffer.
1070 // no destination buffer... convert using temp buffer
1071 // to calculate destination buffer requirement
1075 buf
= tbuf
; outbuf
= 16;
1077 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1080 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1088 if (ICONV_FAILED(cres
, inbuf
))
1090 //VS: it is ok if iconv fails, hence trace only
1091 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1098 #endif // HAVE_ICONV
1101 // ============================================================================
1102 // Win32 conversion classes
1103 // ============================================================================
1105 #ifdef wxHAVE_WIN32_MB2WC
1108 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1109 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1111 class wxMBConv_win32
: public wxMBConv
1116 m_CodePage
= CP_ACP
;
1119 wxMBConv_win32(const wxChar
* name
)
1121 m_CodePage
= wxCharsetToCodepage(name
);
1124 wxMBConv_win32(wxFontEncoding encoding
)
1126 m_CodePage
= wxEncodingToCodepage(encoding
);
1129 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1131 const size_t len
= ::MultiByteToWideChar
1133 m_CodePage
, // code page
1135 psz
, // input string
1136 -1, // its length (NUL-terminated)
1137 buf
, // output string
1138 buf
? n
: 0 // size of output buffer
1141 // note that it returns count of written chars for buf != NULL and size
1142 // of the needed buffer for buf == NULL so in either case the length of
1143 // the string (which never includes the terminating NUL) is one less
1144 return len
? len
- 1 : (size_t)-1;
1147 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1149 const size_t len
= ::WideCharToMultiByte
1151 m_CodePage
, // code page
1153 psz
, // input string
1154 -1, // it is (wide) NUL-terminated
1155 buf
, // output buffer
1156 buf
? n
: 0, // and its size
1157 NULL
, // default "replacement" char
1158 NULL
// [out] was it used?
1161 // see the comment above for the reason of "len - 1"
1162 return len
? len
- 1 : (size_t)-1;
1166 { return m_CodePage
!= -1; }
1172 #endif // wxHAVE_WIN32_MB2WC
1175 // ============================================================================
1176 // wxEncodingConverter based conversion classes
1177 // ============================================================================
1181 class wxMBConv_wxwin
: public wxMBConv
1186 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
1187 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
1191 // temporarily just use wxEncodingConverter stuff,
1192 // so that it works while a better implementation is built
1193 wxMBConv_wxwin(const wxChar
* name
)
1196 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
1198 m_enc
= wxFONTENCODING_SYSTEM
;
1203 wxMBConv_wxwin(wxFontEncoding enc
)
1210 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
1212 size_t inbuf
= strlen(psz
);
1214 m2w
.Convert(psz
,buf
);
1218 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
1220 const size_t inbuf
= wxWcslen(psz
);
1222 w2m
.Convert(psz
,buf
);
1227 bool IsOk() const { return m_ok
; }
1230 wxFontEncoding m_enc
;
1231 wxEncodingConverter m2w
, w2m
;
1233 // were we initialized successfully?
1236 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
1239 #endif // wxUSE_FONTMAP
1241 // ============================================================================
1242 // wxCSConv implementation
1243 // ============================================================================
1245 void wxCSConv::Init()
1252 wxCSConv::wxCSConv(const wxChar
*charset
)
1261 m_encoding
= wxFONTENCODING_SYSTEM
;
1264 wxCSConv::wxCSConv(wxFontEncoding encoding
)
1266 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
1268 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1270 encoding
= wxFONTENCODING_SYSTEM
;
1275 m_encoding
= encoding
;
1278 wxCSConv::~wxCSConv()
1283 wxCSConv::wxCSConv(const wxCSConv
& conv
)
1288 SetName(conv
.m_name
);
1289 m_encoding
= conv
.m_encoding
;
1292 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
1296 SetName(conv
.m_name
);
1297 m_encoding
= conv
.m_encoding
;
1302 void wxCSConv::Clear()
1311 void wxCSConv::SetName(const wxChar
*charset
)
1315 m_name
= wxStrdup(charset
);
1320 wxMBConv
*wxCSConv::DoCreate() const
1322 // check for the special case of ASCII or ISO8859-1 charset: as we have
1323 // special knowledge of it anyhow, we don't need to create a special
1324 // conversion object
1325 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
1327 // don't convert at all
1331 // we trust OS to do conversion better than we can so try external
1332 // conversion methods first
1334 // the full order is:
1335 // 1. OS conversion (iconv() under Unix or Win32 API)
1336 // 2. hard coded conversions for UTF
1337 // 3. wxEncodingConverter as fall back
1343 #endif // !wxUSE_FONTMAP
1345 wxString
name(m_name
);
1349 name
= wxFontMapper::Get()->GetEncodingName(m_encoding
);
1350 #endif // wxUSE_FONTMAP
1352 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
1358 #endif // HAVE_ICONV
1360 #ifdef wxHAVE_WIN32_MB2WC
1362 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
1363 : new wxMBConv_win32(m_encoding
);
1369 #endif // wxHAVE_WIN32_MB2WC
1372 wxFontEncoding enc
= m_encoding
;
1374 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
1376 // use "false" to suppress interactive dialogs -- we can be called from
1377 // anywhere and popping up a dialog from here is the last thing we want to
1379 enc
= wxFontMapper::Get()->CharsetToEncoding(m_name
, false);
1381 #endif // wxUSE_FONTMAP
1385 case wxFONTENCODING_UTF7
:
1386 return new wxMBConvUTF7
;
1388 case wxFONTENCODING_UTF8
:
1389 return new wxMBConvUTF8
;
1391 case wxFONTENCODING_UTF16BE
:
1392 return new wxMBConvUTF16BE
;
1394 case wxFONTENCODING_UTF16LE
:
1395 return new wxMBConvUTF16LE
;
1397 case wxFONTENCODING_UTF32BE
:
1398 return new wxMBConvUTF32BE
;
1400 case wxFONTENCODING_UTF32LE
:
1401 return new wxMBConvUTF32LE
;
1404 // nothing to do but put here to suppress gcc warnings
1411 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
1412 : new wxMBConv_wxwin(m_encoding
);
1418 #endif // wxUSE_FONTMAP
1420 // NB: This is a hack to prevent deadlock. What could otherwise happen
1421 // in Unicode build: wxConvLocal creation ends up being here
1422 // because of some failure and logs the error. But wxLog will try to
1423 // attach timestamp, for which it will need wxConvLocal (to convert
1424 // time to char* and then wchar_t*), but that fails, tries to log
1425 // error, but wxLog has a (already locked) critical section that
1426 // guards static buffer.
1427 static bool alreadyLoggingError
= false;
1428 if (!alreadyLoggingError
)
1430 alreadyLoggingError
= true;
1431 wxLogError(_("Cannot convert from the charset '%s'!"),
1435 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
1436 #else // !wxUSE_FONTMAP
1437 wxString::Format(_("encoding %s"), m_encoding
).c_str()
1438 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1440 alreadyLoggingError
= false;
1446 void wxCSConv::CreateConvIfNeeded() const
1450 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
1453 // if we don't have neither the name nor the encoding, use the default
1454 // encoding for this system
1455 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
1457 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
1459 #endif // wxUSE_INTL
1461 self
->m_convReal
= DoCreate();
1462 self
->m_deferred
= false;
1466 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1468 CreateConvIfNeeded();
1471 return m_convReal
->MB2WC(buf
, psz
, n
);
1474 size_t len
= strlen(psz
);
1478 for (size_t c
= 0; c
<= len
; c
++)
1479 buf
[c
] = (unsigned char)(psz
[c
]);
1485 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1487 CreateConvIfNeeded();
1490 return m_convReal
->WC2MB(buf
, psz
, n
);
1493 const size_t len
= wxWcslen(psz
);
1496 for (size_t c
= 0; c
<= len
; c
++)
1505 for (size_t c
= 0; c
<= len
; c
++)
1515 // ----------------------------------------------------------------------------
1517 // ----------------------------------------------------------------------------
1520 static wxMBConv_win32 wxConvLibcObj
;
1522 static wxMBConvLibc wxConvLibcObj
;
1525 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
1526 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
1527 static wxMBConvUTF7 wxConvUTF7Obj
;
1528 static wxMBConvUTF8 wxConvUTF8Obj
;
1531 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
1532 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
1533 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
1534 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
1535 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
1536 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
1538 #else // !wxUSE_WCHAR_T
1540 // stand-ins in absence of wchar_t
1541 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
1546 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T