1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
38 #include "wx/msw/private.h"
49 #include "wx/module.h"
50 #include "wx/strconv.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
56 // ----------------------------------------------------------------------------
58 // ----------------------------------------------------------------------------
61 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
;
62 WXDLLIMPEXP_DATA_BASE(wxCSConv
) wxConvLocal((const wxChar
*)NULL
);
63 WXDLLIMPEXP_DATA_BASE(wxCSConv
) wxConvISO8859_1(_T("iso-8859-1"));
65 // stand-ins in absence of wchar_t
66 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
71 #endif // wxUSE_WCHAR_T
73 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibc
;
75 class wxStrConvModule
: public wxModule
78 wxStrConvModule() : wxModule() { }
79 virtual bool OnInit() { return true; }
84 wxConvISO8859_1
.Clear();
88 DECLARE_DYNAMIC_CLASS(wxStrConvModule
)
91 IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule
, wxModule
)
94 // ----------------------------------------------------------------------------
96 // ----------------------------------------------------------------------------
108 #include "wx/encconv.h"
109 #include "wx/fontmap.h"
111 // ----------------------------------------------------------------------------
113 // ----------------------------------------------------------------------------
115 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
116 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
118 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
119 // it might be not defined - assume the most common value
120 #ifndef SIZEOF_WCHAR_T
121 #define SIZEOF_WCHAR_T 2
122 #endif // !defined(SIZEOF_WCHAR_T)
124 #if SIZEOF_WCHAR_T == 4
125 #define WC_NAME "UCS4"
126 #define WC_BSWAP BSWAP_UCS4
127 #ifdef WORDS_BIGENDIAN
128 #define WC_NAME_BEST "UCS-4BE"
130 #define WC_NAME_BEST "UCS-4LE"
132 #elif SIZEOF_WCHAR_T == 2
133 #define WC_NAME "UTF16"
134 #define WC_BSWAP BSWAP_UTF16
136 #ifdef WORDS_BIGENDIAN
137 #define WC_NAME_BEST "UTF-16BE"
139 #define WC_NAME_BEST "UTF-16LE"
141 #else // sizeof(wchar_t) != 2 nor 4
142 // I don't know what to do about this
143 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
146 // ============================================================================
148 // ============================================================================
150 // ----------------------------------------------------------------------------
151 // UTF-16 en/decoding to/from UCS-4
152 // ----------------------------------------------------------------------------
155 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
159 if (output
) *output
++ = (wxUint16
) input
;
162 else if (input
>=0x110000)
170 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
171 *output
++ = (wxUint16
) ((input
&0x3ff)+0xdc00);
177 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
179 if ((*input
<0xd800) || (*input
>0xdfff))
184 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
191 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
197 // ----------------------------------------------------------------------------
199 // ----------------------------------------------------------------------------
201 #define IGNORE_LIBC 0
203 wxMBConv::~wxMBConv()
205 // nothing to do here
208 size_t wxMBConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
213 for (size_t i
= 0; i
< strlen( psz
)+1; i
++)
214 buf
[i
] = (wchar_t) psz
[i
];
215 return strlen( psz
);
219 return strlen( psz
);
222 return wxMB2WC(buf
, psz
, n
);
226 size_t wxMBConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
231 for (size_t i
= 0; i
< wxStrlen( psz
)+1; i
++)
232 buf
[i
] = (char) psz
[i
];
233 return wxStrlen( psz
);
237 return wxStrlen( psz
);
240 return wxWC2MB(buf
, psz
, n
);
244 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
248 // calculate the length of the buffer needed first
249 size_t nLen
= MB2WC(NULL
, psz
, 0);
250 if ( nLen
!= (size_t)-1 )
252 // now do the actual conversion
253 wxWCharBuffer
buf(nLen
);
254 MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NUL
260 wxWCharBuffer
buf((wchar_t *)NULL
);
265 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
269 size_t nLen
= WC2MB(NULL
, pwz
, 0);
270 if ( nLen
!= (size_t)-1 )
272 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
273 WC2MB(buf
.data(), pwz
, nLen
+ 4);
279 wxCharBuffer
buf((char *)NULL
);
284 // ----------------------------------------------------------------------------
286 // ----------------------------------------------------------------------------
288 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
) wxConvUTF7
;
291 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
292 "abcdefghijklmnopqrstuvwxyz"
293 "0123456789'(),-./:?";
294 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
295 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
296 "abcdefghijklmnopqrstuvwxyz"
300 // TODO: write actual implementations of UTF-7 here
301 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
302 const char * WXUNUSED(psz
),
303 size_t WXUNUSED(n
)) const
308 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
309 const wchar_t * WXUNUSED(psz
),
310 size_t WXUNUSED(n
)) const
315 // ----------------------------------------------------------------------------
317 // ----------------------------------------------------------------------------
319 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
) wxConvUTF8
;
321 static wxUint32 utf8_max
[]=
322 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
324 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
328 while (*psz
&& ((!buf
) || (len
< n
)))
330 unsigned char cc
= *psz
++, fc
= cc
;
332 for (cnt
= 0; fc
& 0x80; cnt
++)
346 // invalid UTF-8 sequence
351 unsigned ocnt
= cnt
- 1;
352 wxUint32 res
= cc
& (0x3f >> cnt
);
356 if ((cc
& 0xC0) != 0x80)
358 // invalid UTF-8 sequence
361 res
= (res
<< 6) | (cc
& 0x3f);
363 if (res
<= utf8_max
[ocnt
])
365 // illegal UTF-8 encoding
369 size_t pa
= encode_utf16(res
, buf
);
370 if (pa
== (size_t)-1)
379 #endif // WC_UTF16/!WC_UTF16
383 if (buf
&& (len
< n
))
388 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
392 while (*psz
&& ((!buf
) || (len
< n
)))
396 size_t pa
= decode_utf16(psz
, cc
);
397 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
399 cc
=(*psz
++) & 0x7fffffff;
402 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
416 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
418 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
423 if (buf
&& (len
<n
)) *buf
= 0;
431 // ----------------------------------------------------------------------------
433 // ----------------------------------------------------------------------------
435 #ifdef WORDS_BIGENDIAN
436 #define wxMBConvUTF16straight wxMBConvUTF16BE
437 #define wxMBConvUTF16swap wxMBConvUTF16LE
439 #define wxMBConvUTF16swap wxMBConvUTF16BE
440 #define wxMBConvUTF16straight wxMBConvUTF16LE
444 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16LE
) wxConvUTF16LE
;
445 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16BE
) wxConvUTF16BE
;
454 // copy 16bit MB to 16bit String
455 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
459 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
462 *buf
++ = *(wxUint16
*)psz
;
465 psz
+= sizeof(wxUint16
);
467 if (buf
&& len
<n
) *buf
=0;
473 // copy 16bit String to 16bit MB
474 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
478 while (*psz
&& (!buf
|| len
< n
))
482 *(wxUint16
*)buf
= *psz
;
483 buf
+= sizeof(wxUint16
);
485 len
+= sizeof(wxUint16
);
488 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
494 // swap 16bit MB to 16bit String
495 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
499 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
503 ((char *)buf
)[0] = psz
[1];
504 ((char *)buf
)[1] = psz
[0];
508 psz
+= sizeof(wxUint16
);
510 if (buf
&& len
<n
) *buf
=0;
516 // swap 16bit MB to 16bit String
517 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
521 while (*psz
&& (!buf
|| len
< n
))
525 *buf
++ = ((char*)psz
)[1];
526 *buf
++ = ((char*)psz
)[0];
528 len
+= sizeof(wxUint16
);
531 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
540 // copy 16bit MB to 32bit String
541 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
545 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
548 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
549 if (pa
== (size_t)-1)
555 psz
+= pa
* sizeof(wxUint16
);
557 if (buf
&& len
<n
) *buf
=0;
563 // copy 32bit String to 16bit MB
564 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
568 while (*psz
&& (!buf
|| len
< n
))
571 size_t pa
=encode_utf16(*psz
, cc
);
573 if (pa
== (size_t)-1)
578 *(wxUint16
*)buf
= cc
[0];
579 buf
+= sizeof(wxUint16
);
582 *(wxUint16
*)buf
= cc
[1];
583 buf
+= sizeof(wxUint16
);
587 len
+= pa
*sizeof(wxUint16
);
590 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
596 // swap 16bit MB to 32bit String
597 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
601 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
605 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
606 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
608 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
609 if (pa
== (size_t)-1)
616 psz
+= pa
* sizeof(wxUint16
);
618 if (buf
&& len
<n
) *buf
=0;
624 // swap 32bit String to 16bit MB
625 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
629 while (*psz
&& (!buf
|| len
< n
))
632 size_t pa
=encode_utf16(*psz
, cc
);
634 if (pa
== (size_t)-1)
639 *buf
++ = ((char*)cc
)[1];
640 *buf
++ = ((char*)cc
)[0];
643 *buf
++ = ((char*)cc
)[3];
644 *buf
++ = ((char*)cc
)[2];
648 len
+= pa
*sizeof(wxUint16
);
651 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
659 // ----------------------------------------------------------------------------
661 // ----------------------------------------------------------------------------
663 #ifdef WORDS_BIGENDIAN
664 #define wxMBConvUTF32straight wxMBConvUTF32BE
665 #define wxMBConvUTF32swap wxMBConvUTF32LE
667 #define wxMBConvUTF32swap wxMBConvUTF32BE
668 #define wxMBConvUTF32straight wxMBConvUTF32LE
672 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
673 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
678 // copy 32bit MB to 16bit String
679 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
683 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
687 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
688 if (pa
== (size_t)-1)
698 psz
+= sizeof(wxUint32
);
700 if (buf
&& len
<n
) *buf
=0;
706 // copy 16bit String to 32bit MB
707 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
711 while (*psz
&& (!buf
|| len
< n
))
715 size_t pa
=decode_utf16(psz
, cc
);
716 if (pa
== (size_t)-1)
721 *(wxUint32
*)buf
= cc
;
722 buf
+= sizeof(wxUint32
);
724 len
+= sizeof(wxUint32
);
727 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
734 // swap 32bit MB to 16bit String
735 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
739 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
742 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
743 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
748 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
749 if (pa
== (size_t)-1)
759 psz
+= sizeof(wxUint32
);
761 if (buf
&& len
<n
) *buf
=0;
767 // swap 16bit String to 32bit MB
768 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
772 while (*psz
&& (!buf
|| len
< n
))
776 size_t pa
=decode_utf16(psz
, *(wxUint32
*)cc
);
777 if (pa
== (size_t)-1)
787 len
+= sizeof(wxUint32
);
790 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
798 // copy 32bit MB to 32bit String
799 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
803 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
806 *buf
++ = *(wxUint32
*)psz
;
808 psz
+= sizeof(wxUint32
);
810 if (buf
&& len
<n
) *buf
=0;
816 // copy 32bit String to 32bit MB
817 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
821 while (*psz
&& (!buf
|| len
< n
))
825 *(wxUint32
*)buf
= *psz
;
826 buf
+= sizeof(wxUint32
);
829 len
+= sizeof(wxUint32
);
833 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
839 // swap 32bit MB to 32bit String
840 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
844 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
848 ((char *)buf
)[0] = psz
[3];
849 ((char *)buf
)[1] = psz
[2];
850 ((char *)buf
)[2] = psz
[1];
851 ((char *)buf
)[3] = psz
[0];
855 psz
+= sizeof(wxUint32
);
857 if (buf
&& len
<n
) *buf
=0;
863 // swap 32bit String to 32bit MB
864 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
868 while (*psz
&& (!buf
|| len
< n
))
872 *buf
++ = ((char *)psz
)[3];
873 *buf
++ = ((char *)psz
)[2];
874 *buf
++ = ((char *)psz
)[1];
875 *buf
++ = ((char *)psz
)[0];
877 len
+= sizeof(wxUint32
);
880 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
889 // ============================================================================
890 // The classes doing conversion using the iconv_xxx() functions
891 // ============================================================================
895 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
896 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
897 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
898 // (which means error) and says there are 0 bytes left in the input buffer --
899 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
900 // this alternative test for iconv() failure.
901 // [This bug does not appear in glibc 2.2.]
902 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
903 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
904 (errno != E2BIG || bufLeft != 0))
906 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
909 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
911 // ----------------------------------------------------------------------------
912 // wxMBConv_iconv: encapsulates an iconv character set
913 // ----------------------------------------------------------------------------
915 class wxMBConv_iconv
: public wxMBConv
918 wxMBConv_iconv(const wxChar
*name
);
919 virtual ~wxMBConv_iconv();
921 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
);
922 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
);
925 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
928 // the iconv handlers used to translate from multibyte to wide char and in
929 // the other direction
934 // the name (for iconv_open()) of a wide char charset -- if none is
935 // available on this machine, it will remain NULL
936 static const char *ms_wcCharsetName
;
938 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
939 // different endian-ness than the native one
940 static bool ms_wcNeedsSwap
;
943 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
944 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
946 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
948 // Do it the hard way
950 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
951 cname
[i
] = (char) name
[i
];
953 // check for charset that represents wchar_t:
954 if (ms_wcCharsetName
== NULL
)
956 ms_wcNeedsSwap
= false;
958 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
959 ms_wcCharsetName
= WC_NAME_BEST
;
960 m2w
= iconv_open(ms_wcCharsetName
, cname
);
962 if (m2w
== (iconv_t
)-1)
964 // try charset w/o bytesex info (e.g. "UCS4")
965 // and check for bytesex ourselves:
966 ms_wcCharsetName
= WC_NAME
;
967 m2w
= iconv_open(ms_wcCharsetName
, cname
);
969 // last bet, try if it knows WCHAR_T pseudo-charset
970 if (m2w
== (iconv_t
)-1)
972 ms_wcCharsetName
= "WCHAR_T";
973 m2w
= iconv_open(ms_wcCharsetName
, cname
);
976 if (m2w
!= (iconv_t
)-1)
978 char buf
[2], *bufPtr
;
979 wchar_t wbuf
[2], *wbufPtr
;
987 outsz
= SIZEOF_WCHAR_T
* 2;
991 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
992 (char**)&wbufPtr
, &outsz
);
994 if (ICONV_FAILED(res
, insz
))
996 ms_wcCharsetName
= NULL
;
997 wxLogLastError(wxT("iconv"));
998 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1002 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1007 ms_wcCharsetName
= NULL
;
1009 // VS: we must not output an error here, since wxWindows will safely
1010 // fall back to using wxEncodingConverter.
1011 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1015 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1017 else // we already have ms_wcCharsetName
1019 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1022 // NB: don't ever pass NULL to iconv_open(), it may crash!
1023 if ( ms_wcCharsetName
)
1025 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1033 wxMBConv_iconv::~wxMBConv_iconv()
1035 if ( m2w
!= (iconv_t
)-1 )
1037 if ( w2m
!= (iconv_t
)-1 )
1041 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
)
1043 size_t inbuf
= strlen(psz
);
1044 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1046 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1047 wchar_t *bufPtr
= buf
;
1048 const char *pszPtr
= psz
;
1052 // have destination buffer, convert there
1054 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1055 (char**)&bufPtr
, &outbuf
);
1056 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1060 // convert to native endianness
1061 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1064 // NB: iconv was given only strlen(psz) characters on input, and so
1065 // it couldn't convert the trailing zero. Let's do it ourselves
1066 // if there's some room left for it in the output buffer.
1072 // no destination buffer... convert using temp buffer
1073 // to calculate destination buffer requirement
1078 outbuf
= 8*SIZEOF_WCHAR_T
;
1081 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1082 (char**)&bufPtr
, &outbuf
);
1084 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1085 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1088 if (ICONV_FAILED(cres
, inbuf
))
1090 //VS: it is ok if iconv fails, hence trace only
1091 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1098 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
)
1100 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1104 wchar_t *tmpbuf
= 0;
1108 // need to copy to temp buffer to switch endianness
1109 // this absolutely doesn't rock!
1110 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1111 // could be in read-only memory, or be accessed in some other thread)
1112 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1113 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1114 WC_BSWAP(tmpbuf
, inbuf
)
1120 // have destination buffer, convert there
1121 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1125 // NB: iconv was given only wcslen(psz) characters on input, and so
1126 // it couldn't convert the trailing zero. Let's do it ourselves
1127 // if there's some room left for it in the output buffer.
1133 // no destination buffer... convert using temp buffer
1134 // to calculate destination buffer requirement
1138 buf
= tbuf
; outbuf
= 16;
1140 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1143 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1151 if (ICONV_FAILED(cres
, inbuf
))
1153 //VS: it is ok if iconv fails, hence trace only
1154 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1161 #endif // HAVE_ICONV
1164 // ============================================================================
1165 // Win32 conversion classes
1166 // ============================================================================
1168 #ifdef wxHAVE_WIN32_MB2WC
1171 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1172 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1174 class wxMBConv_win32
: public wxMBConv
1177 wxMBConv_win32(const wxChar
* name
)
1179 m_CodePage
= wxCharsetToCodepage(name
);
1182 wxMBConv_win32(wxFontEncoding encoding
)
1184 m_CodePage
= wxEncodingToCodepage(encoding
);
1187 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
)
1189 const size_t len
= ::MultiByteToWideChar
1191 m_CodePage
, // code page
1193 psz
, // input string
1194 -1, // its length (NUL-terminated)
1195 buf
, // output string
1196 buf
? n
: 0 // size of output buffer
1199 // note that it returns # of written chars for buf != NULL and *size*
1200 // of the needed buffer for buf == NULL
1201 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1204 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
)
1206 const size_t len
= ::WideCharToMultiByte
1208 m_CodePage
, // code page
1210 psz
, // input string
1211 -1, // it is (wide) NUL-terminated
1212 buf
, // output buffer
1213 buf
? n
: 0, // and its size
1214 NULL
, // default "replacement" char
1215 NULL
// [out] was it used?
1218 // see the comment above!
1219 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1223 { return m_CodePage
!= -1; }
1229 #endif // wxHAVE_WIN32_MB2WC
1232 // ============================================================================
1233 // wxEncodingConverter based conversion classes
1234 // ============================================================================
1238 class wxMBConv_wxwin
: public wxMBConv
1243 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
1244 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
1248 // temporarily just use wxEncodingConverter stuff,
1249 // so that it works while a better implementation is built
1250 wxMBConv_wxwin(const wxChar
* name
)
1253 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
1255 m_enc
= wxFONTENCODING_SYSTEM
;
1260 wxMBConv_wxwin(wxFontEncoding enc
)
1267 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
))
1269 size_t inbuf
= strlen(psz
);
1271 m2w
.Convert(psz
,buf
);
1275 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
))
1277 const size_t inbuf
= wxWcslen(psz
);
1279 w2m
.Convert(psz
,buf
);
1284 bool IsOk() const { return m_ok
; }
1287 wxFontEncoding m_enc
;
1288 wxEncodingConverter m2w
, w2m
;
1290 // were we initialized successfully?
1293 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
1296 #endif // wxUSE_FONTMAP
1298 // ============================================================================
1299 // wxCSConv implementation
1300 // ============================================================================
1302 void wxCSConv::Init()
1309 // find a valid value for the encoding
1310 void wxCSConv::SetEncoding()
1313 m_encoding
= wxLocale::GetSystemEncoding();
1315 m_encoding
= wxFONTENCODING_SYSTEM
;
1319 wxCSConv::wxCSConv(const wxChar
*charset
)
1326 m_encoding
= wxFONTENCODING_SYSTEM
;
1330 else // no charset specified
1336 wxCSConv::wxCSConv(wxFontEncoding encoding
)
1338 if ( encoding
== wxFONTENCODING_MAX
||
1339 encoding
== wxFONTENCODING_DEFAULT
)
1341 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1343 encoding
= wxFONTENCODING_SYSTEM
;
1348 if ( encoding
== wxFONTENCODING_SYSTEM
)
1352 else // have valid encoding, use it
1354 m_encoding
= encoding
;
1358 wxCSConv::~wxCSConv()
1363 wxCSConv::wxCSConv(const wxCSConv
& conv
)
1368 SetName(conv
.m_name
);
1369 m_encoding
= conv
.m_encoding
;
1372 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
1376 SetName(conv
.m_name
);
1377 m_encoding
= conv
.m_encoding
;
1382 void wxCSConv::Clear()
1391 void wxCSConv::SetName(const wxChar
*charset
)
1395 m_name
= wxStrdup(charset
);
1400 static inline bool DoesntNeedConv(wxFontEncoding enc
)
1402 return enc
== wxFONTENCODING_DEFAULT
||
1403 enc
== wxFONTENCODING_SYSTEM
||
1404 enc
== wxFONTENCODING_ISO8859_1
;
1407 wxMBConv
*wxCSConv::DoCreate() const
1410 wxFontMapper
* const fontMapper
= wxFontMapper::Get();
1412 wxFontEncoding encFromName
= m_name
? fontMapper
->CharsetToEncoding(m_name
)
1413 : wxFONTENCODING_SYSTEM
;
1414 #endif // wxUSE_FONTMAP
1416 // check for the special case of ASCII charset
1417 if ( (!m_name
&& DoesntNeedConv(m_encoding
))
1419 || (m_name
&& DoesntNeedConv(encFromName
))
1420 #endif // wxUSE_FONTMAP
1423 // don't convert at all
1427 // we trust OS to do conversion better than we can so try external
1428 // conversion methods first
1430 // the full order is:
1431 // 1. OS conversion (iconv() under Unix or Win32 API)
1432 // 2. hard coded conversions for UTF
1433 // 3. wxEncodingConverter as fall back
1439 wxMBConv_iconv
*conv
= new wxMBConv_iconv(m_name
);
1445 #endif // HAVE_ICONV
1447 #ifdef wxHAVE_WIN32_MB2WC
1449 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
1450 : new wxMBConv_win32(m_encoding
);
1456 #endif // wxHAVE_WIN32_MB2WC
1459 wxFontEncoding enc
= m_encoding
;
1461 if ( enc
== wxFONTENCODING_SYSTEM
)
1463 #endif // wxUSE_FONTMAP
1467 case wxFONTENCODING_UTF7
:
1468 return new wxMBConvUTF7
;
1470 case wxFONTENCODING_UTF8
:
1471 return new wxMBConvUTF8
;
1473 case wxFONTENCODING_UTF16
:
1474 return new wxMBConvUTF16
;
1476 case wxFONTENCODING_UTF16BE
:
1477 return new wxMBConvUTF16BE
;
1479 case wxFONTENCODING_UTF16LE
:
1480 return new wxMBConvUTF16LE
;
1482 case wxFONTENCODING_UTF32
:
1483 return new wxMBConvUTF32
;
1485 case wxFONTENCODING_UTF32BE
:
1486 return new wxMBConvUTF32BE
;
1488 case wxFONTENCODING_UTF32LE
:
1489 return new wxMBConvUTF32LE
;
1492 // nothing to do but put here to suppress gcc warnings
1499 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
1500 : new wxMBConv_wxwin(m_encoding
);
1506 #endif // wxUSE_FONTMAP
1508 wxLogError(_("Cannot convert from the charset '%s'!"),
1512 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
1513 #else // !wxUSE_FONTMAP
1514 wxString::Format(_("encoding %s"), m_encoding
).c_str()
1515 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1521 void wxCSConv::CreateConvIfNeeded() const
1525 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
1526 self
->m_convReal
= DoCreate();
1527 self
->m_deferred
= false;
1531 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1533 CreateConvIfNeeded();
1536 return m_convReal
->MB2WC(buf
, psz
, n
);
1539 size_t len
= strlen(psz
);
1543 for (size_t c
= 0; c
<= len
; c
++)
1544 buf
[c
] = (unsigned char)(psz
[c
]);
1550 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1552 CreateConvIfNeeded();
1555 return m_convReal
->WC2MB(buf
, psz
, n
);
1558 const size_t len
= wxWcslen(psz
);
1561 for (size_t c
= 0; c
<= len
; c
++)
1562 buf
[c
] = (psz
[c
] > 0xff) ? '?' : psz
[c
];
1568 #endif // wxUSE_WCHAR_T