1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
37 #include "wx/strconv.h"
42 #include "wx/msw/private.h"
53 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
54 #define wxHAVE_WIN32_MB2WC
55 #endif // __WIN32__ but !__WXMICROWIN__
57 // ----------------------------------------------------------------------------
59 // ----------------------------------------------------------------------------
69 #include "wx/encconv.h"
70 #include "wx/fontmap.h"
72 // ----------------------------------------------------------------------------
74 // ----------------------------------------------------------------------------
76 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
77 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
79 #if SIZEOF_WCHAR_T == 4
80 #define WC_NAME "UCS4"
81 #define WC_BSWAP BSWAP_UCS4
82 #ifdef WORDS_BIGENDIAN
83 #define WC_NAME_BEST "UCS-4BE"
85 #define WC_NAME_BEST "UCS-4LE"
87 #elif SIZEOF_WCHAR_T == 2
88 #define WC_NAME "UTF16"
89 #define WC_BSWAP BSWAP_UTF16
91 #ifdef WORDS_BIGENDIAN
92 #define WC_NAME_BEST "UTF-16BE"
94 #define WC_NAME_BEST "UTF-16LE"
96 #else // sizeof(wchar_t) != 2 nor 4
97 // does this ever happen?
98 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
101 // ============================================================================
103 // ============================================================================
105 // ----------------------------------------------------------------------------
106 // UTF-16 en/decoding to/from UCS-4
107 // ----------------------------------------------------------------------------
110 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
114 if (output
) *output
++ = (wxUint16
) input
;
117 else if (input
>=0x110000)
125 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
126 *output
++ = (wxUint16
) ((input
&0x3ff)+0xdc00);
132 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
134 if ((*input
<0xd800) || (*input
>0xdfff))
139 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
146 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
152 // ----------------------------------------------------------------------------
154 // ----------------------------------------------------------------------------
156 wxMBConv::~wxMBConv()
158 // nothing to do here
161 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
165 // calculate the length of the buffer needed first
166 size_t nLen
= MB2WC(NULL
, psz
, 0);
167 if ( nLen
!= (size_t)-1 )
169 // now do the actual conversion
170 wxWCharBuffer
buf(nLen
);
171 MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NUL
177 wxWCharBuffer
buf((wchar_t *)NULL
);
182 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
186 size_t nLen
= WC2MB(NULL
, pwz
, 0);
187 if ( nLen
!= (size_t)-1 )
189 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
190 WC2MB(buf
.data(), pwz
, nLen
+ 4);
196 wxCharBuffer
buf((char *)NULL
);
201 // ----------------------------------------------------------------------------
203 // ----------------------------------------------------------------------------
205 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
207 return wxMB2WC(buf
, psz
, n
);
210 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
212 return wxWC2MB(buf
, psz
, n
);
215 // ----------------------------------------------------------------------------
217 // ----------------------------------------------------------------------------
220 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
221 "abcdefghijklmnopqrstuvwxyz"
222 "0123456789'(),-./:?";
223 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
224 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
225 "abcdefghijklmnopqrstuvwxyz"
229 // TODO: write actual implementations of UTF-7 here
230 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
231 const char * WXUNUSED(psz
),
232 size_t WXUNUSED(n
)) const
237 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
238 const wchar_t * WXUNUSED(psz
),
239 size_t WXUNUSED(n
)) const
244 // ----------------------------------------------------------------------------
246 // ----------------------------------------------------------------------------
248 static wxUint32 utf8_max
[]=
249 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
251 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
255 while (*psz
&& ((!buf
) || (len
< n
)))
257 unsigned char cc
= *psz
++, fc
= cc
;
259 for (cnt
= 0; fc
& 0x80; cnt
++)
273 // invalid UTF-8 sequence
278 unsigned ocnt
= cnt
- 1;
279 wxUint32 res
= cc
& (0x3f >> cnt
);
283 if ((cc
& 0xC0) != 0x80)
285 // invalid UTF-8 sequence
288 res
= (res
<< 6) | (cc
& 0x3f);
290 if (res
<= utf8_max
[ocnt
])
292 // illegal UTF-8 encoding
296 size_t pa
= encode_utf16(res
, buf
);
297 if (pa
== (size_t)-1)
306 #endif // WC_UTF16/!WC_UTF16
310 if (buf
&& (len
< n
))
315 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
319 while (*psz
&& ((!buf
) || (len
< n
)))
323 size_t pa
= decode_utf16(psz
, cc
);
324 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
326 cc
=(*psz
++) & 0x7fffffff;
329 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
343 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
345 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
350 if (buf
&& (len
<n
)) *buf
= 0;
358 // ----------------------------------------------------------------------------
360 // ----------------------------------------------------------------------------
362 #ifdef WORDS_BIGENDIAN
363 #define wxMBConvUTF16straight wxMBConvUTF16BE
364 #define wxMBConvUTF16swap wxMBConvUTF16LE
366 #define wxMBConvUTF16swap wxMBConvUTF16BE
367 #define wxMBConvUTF16straight wxMBConvUTF16LE
373 // copy 16bit MB to 16bit String
374 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
378 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
381 *buf
++ = *(wxUint16
*)psz
;
384 psz
+= sizeof(wxUint16
);
386 if (buf
&& len
<n
) *buf
=0;
392 // copy 16bit String to 16bit MB
393 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
397 while (*psz
&& (!buf
|| len
< n
))
401 *(wxUint16
*)buf
= *psz
;
402 buf
+= sizeof(wxUint16
);
404 len
+= sizeof(wxUint16
);
407 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
413 // swap 16bit MB to 16bit String
414 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
418 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
422 ((char *)buf
)[0] = psz
[1];
423 ((char *)buf
)[1] = psz
[0];
427 psz
+= sizeof(wxUint16
);
429 if (buf
&& len
<n
) *buf
=0;
435 // swap 16bit MB to 16bit String
436 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
440 while (*psz
&& (!buf
|| len
< n
))
444 *buf
++ = ((char*)psz
)[1];
445 *buf
++ = ((char*)psz
)[0];
447 len
+= sizeof(wxUint16
);
450 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
459 // copy 16bit MB to 32bit String
460 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
464 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
467 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
468 if (pa
== (size_t)-1)
474 psz
+= pa
* sizeof(wxUint16
);
476 if (buf
&& len
<n
) *buf
=0;
482 // copy 32bit String to 16bit MB
483 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
487 while (*psz
&& (!buf
|| len
< n
))
490 size_t pa
=encode_utf16(*psz
, cc
);
492 if (pa
== (size_t)-1)
497 *(wxUint16
*)buf
= cc
[0];
498 buf
+= sizeof(wxUint16
);
501 *(wxUint16
*)buf
= cc
[1];
502 buf
+= sizeof(wxUint16
);
506 len
+= pa
*sizeof(wxUint16
);
509 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
515 // swap 16bit MB to 32bit String
516 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
520 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
524 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
525 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
527 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
528 if (pa
== (size_t)-1)
535 psz
+= pa
* sizeof(wxUint16
);
537 if (buf
&& len
<n
) *buf
=0;
543 // swap 32bit String to 16bit MB
544 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
548 while (*psz
&& (!buf
|| len
< n
))
551 size_t pa
=encode_utf16(*psz
, cc
);
553 if (pa
== (size_t)-1)
558 *buf
++ = ((char*)cc
)[1];
559 *buf
++ = ((char*)cc
)[0];
562 *buf
++ = ((char*)cc
)[3];
563 *buf
++ = ((char*)cc
)[2];
567 len
+= pa
*sizeof(wxUint16
);
570 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
578 // ----------------------------------------------------------------------------
580 // ----------------------------------------------------------------------------
582 #ifdef WORDS_BIGENDIAN
583 #define wxMBConvUTF32straight wxMBConvUTF32BE
584 #define wxMBConvUTF32swap wxMBConvUTF32LE
586 #define wxMBConvUTF32swap wxMBConvUTF32BE
587 #define wxMBConvUTF32straight wxMBConvUTF32LE
591 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
592 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
597 // copy 32bit MB to 16bit String
598 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
602 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
606 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
607 if (pa
== (size_t)-1)
617 psz
+= sizeof(wxUint32
);
619 if (buf
&& len
<n
) *buf
=0;
625 // copy 16bit String to 32bit MB
626 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
630 while (*psz
&& (!buf
|| len
< n
))
634 size_t pa
=decode_utf16(psz
, cc
);
635 if (pa
== (size_t)-1)
640 *(wxUint32
*)buf
= cc
;
641 buf
+= sizeof(wxUint32
);
643 len
+= sizeof(wxUint32
);
646 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
653 // swap 32bit MB to 16bit String
654 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
658 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
661 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
662 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
667 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
668 if (pa
== (size_t)-1)
678 psz
+= sizeof(wxUint32
);
680 if (buf
&& len
<n
) *buf
=0;
686 // swap 16bit String to 32bit MB
687 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
691 while (*psz
&& (!buf
|| len
< n
))
695 size_t pa
=decode_utf16(psz
, *(wxUint32
*)cc
);
696 if (pa
== (size_t)-1)
706 len
+= sizeof(wxUint32
);
709 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
717 // copy 32bit MB to 32bit String
718 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
722 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
725 *buf
++ = *(wxUint32
*)psz
;
727 psz
+= sizeof(wxUint32
);
729 if (buf
&& len
<n
) *buf
=0;
735 // copy 32bit String to 32bit MB
736 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
740 while (*psz
&& (!buf
|| len
< n
))
744 *(wxUint32
*)buf
= *psz
;
745 buf
+= sizeof(wxUint32
);
748 len
+= sizeof(wxUint32
);
752 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
758 // swap 32bit MB to 32bit String
759 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
763 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
767 ((char *)buf
)[0] = psz
[3];
768 ((char *)buf
)[1] = psz
[2];
769 ((char *)buf
)[2] = psz
[1];
770 ((char *)buf
)[3] = psz
[0];
774 psz
+= sizeof(wxUint32
);
776 if (buf
&& len
<n
) *buf
=0;
782 // swap 32bit String to 32bit MB
783 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
787 while (*psz
&& (!buf
|| len
< n
))
791 *buf
++ = ((char *)psz
)[3];
792 *buf
++ = ((char *)psz
)[2];
793 *buf
++ = ((char *)psz
)[1];
794 *buf
++ = ((char *)psz
)[0];
796 len
+= sizeof(wxUint32
);
799 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
808 // ============================================================================
809 // The classes doing conversion using the iconv_xxx() functions
810 // ============================================================================
814 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
815 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
816 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
817 // (which means error) and says there are 0 bytes left in the input buffer --
818 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
819 // this alternative test for iconv() failure.
820 // [This bug does not appear in glibc 2.2.]
821 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
822 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
823 (errno != E2BIG || bufLeft != 0))
825 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
828 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
830 // ----------------------------------------------------------------------------
831 // wxMBConv_iconv: encapsulates an iconv character set
832 // ----------------------------------------------------------------------------
834 class wxMBConv_iconv
: public wxMBConv
837 wxMBConv_iconv(const wxChar
*name
);
838 virtual ~wxMBConv_iconv();
840 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
841 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
844 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
847 // the iconv handlers used to translate from multibyte to wide char and in
848 // the other direction
853 // the name (for iconv_open()) of a wide char charset -- if none is
854 // available on this machine, it will remain NULL
855 static const char *ms_wcCharsetName
;
857 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
858 // different endian-ness than the native one
859 static bool ms_wcNeedsSwap
;
862 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
863 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
865 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
867 // Do it the hard way
869 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
870 cname
[i
] = (char) name
[i
];
872 // check for charset that represents wchar_t:
873 if (ms_wcCharsetName
== NULL
)
875 ms_wcNeedsSwap
= false;
877 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
878 ms_wcCharsetName
= WC_NAME_BEST
;
879 m2w
= iconv_open(ms_wcCharsetName
, cname
);
881 if (m2w
== (iconv_t
)-1)
883 // try charset w/o bytesex info (e.g. "UCS4")
884 // and check for bytesex ourselves:
885 ms_wcCharsetName
= WC_NAME
;
886 m2w
= iconv_open(ms_wcCharsetName
, cname
);
888 // last bet, try if it knows WCHAR_T pseudo-charset
889 if (m2w
== (iconv_t
)-1)
891 ms_wcCharsetName
= "WCHAR_T";
892 m2w
= iconv_open(ms_wcCharsetName
, cname
);
895 if (m2w
!= (iconv_t
)-1)
897 char buf
[2], *bufPtr
;
898 wchar_t wbuf
[2], *wbufPtr
;
906 outsz
= SIZEOF_WCHAR_T
* 2;
910 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
911 (char**)&wbufPtr
, &outsz
);
913 if (ICONV_FAILED(res
, insz
))
915 ms_wcCharsetName
= NULL
;
916 wxLogLastError(wxT("iconv"));
917 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
921 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
926 ms_wcCharsetName
= NULL
;
928 // VS: we must not output an error here, since wxWindows will safely
929 // fall back to using wxEncodingConverter.
930 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
934 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
936 else // we already have ms_wcCharsetName
938 m2w
= iconv_open(ms_wcCharsetName
, cname
);
941 // NB: don't ever pass NULL to iconv_open(), it may crash!
942 if ( ms_wcCharsetName
)
944 w2m
= iconv_open( cname
, ms_wcCharsetName
);
952 wxMBConv_iconv::~wxMBConv_iconv()
954 if ( m2w
!= (iconv_t
)-1 )
956 if ( w2m
!= (iconv_t
)-1 )
960 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
962 size_t inbuf
= strlen(psz
);
963 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
965 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
966 wchar_t *bufPtr
= buf
;
967 const char *pszPtr
= psz
;
971 // have destination buffer, convert there
973 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
974 (char**)&bufPtr
, &outbuf
);
975 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
979 // convert to native endianness
980 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
983 // NB: iconv was given only strlen(psz) characters on input, and so
984 // it couldn't convert the trailing zero. Let's do it ourselves
985 // if there's some room left for it in the output buffer.
991 // no destination buffer... convert using temp buffer
992 // to calculate destination buffer requirement
997 outbuf
= 8*SIZEOF_WCHAR_T
;
1000 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1001 (char**)&bufPtr
, &outbuf
);
1003 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1004 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1007 if (ICONV_FAILED(cres
, inbuf
))
1009 //VS: it is ok if iconv fails, hence trace only
1010 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1017 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1019 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1023 wchar_t *tmpbuf
= 0;
1027 // need to copy to temp buffer to switch endianness
1028 // this absolutely doesn't rock!
1029 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1030 // could be in read-only memory, or be accessed in some other thread)
1031 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1032 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1033 WC_BSWAP(tmpbuf
, inbuf
)
1039 // have destination buffer, convert there
1040 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1044 // NB: iconv was given only wcslen(psz) characters on input, and so
1045 // it couldn't convert the trailing zero. Let's do it ourselves
1046 // if there's some room left for it in the output buffer.
1052 // no destination buffer... convert using temp buffer
1053 // to calculate destination buffer requirement
1057 buf
= tbuf
; outbuf
= 16;
1059 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1062 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1070 if (ICONV_FAILED(cres
, inbuf
))
1072 //VS: it is ok if iconv fails, hence trace only
1073 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1080 #endif // HAVE_ICONV
1083 // ============================================================================
1084 // Win32 conversion classes
1085 // ============================================================================
1087 #ifdef wxHAVE_WIN32_MB2WC
1090 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1091 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1093 class wxMBConv_win32
: public wxMBConv
1098 m_CodePage
= CP_ACP
;
1101 wxMBConv_win32(const wxChar
* name
)
1103 m_CodePage
= wxCharsetToCodepage(name
);
1106 wxMBConv_win32(wxFontEncoding encoding
)
1108 m_CodePage
= wxEncodingToCodepage(encoding
);
1111 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1113 const size_t len
= ::MultiByteToWideChar
1115 m_CodePage
, // code page
1117 psz
, // input string
1118 -1, // its length (NUL-terminated)
1119 buf
, // output string
1120 buf
? n
: 0 // size of output buffer
1123 // note that it returns # of written chars for buf != NULL and *size*
1124 // of the needed buffer for buf == NULL
1125 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1128 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1130 const size_t len
= ::WideCharToMultiByte
1132 m_CodePage
, // code page
1134 psz
, // input string
1135 -1, // it is (wide) NUL-terminated
1136 buf
, // output buffer
1137 buf
? n
: 0, // and its size
1138 NULL
, // default "replacement" char
1139 NULL
// [out] was it used?
1142 // see the comment above!
1143 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1147 { return m_CodePage
!= -1; }
1153 #endif // wxHAVE_WIN32_MB2WC
1156 // ============================================================================
1157 // wxEncodingConverter based conversion classes
1158 // ============================================================================
1162 class wxMBConv_wxwin
: public wxMBConv
1167 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
1168 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
1172 // temporarily just use wxEncodingConverter stuff,
1173 // so that it works while a better implementation is built
1174 wxMBConv_wxwin(const wxChar
* name
)
1177 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
1179 m_enc
= wxFONTENCODING_SYSTEM
;
1184 wxMBConv_wxwin(wxFontEncoding enc
)
1191 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
1193 size_t inbuf
= strlen(psz
);
1195 m2w
.Convert(psz
,buf
);
1199 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
1201 const size_t inbuf
= wxWcslen(psz
);
1203 w2m
.Convert(psz
,buf
);
1208 bool IsOk() const { return m_ok
; }
1211 wxFontEncoding m_enc
;
1212 wxEncodingConverter m2w
, w2m
;
1214 // were we initialized successfully?
1217 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
1220 #endif // wxUSE_FONTMAP
1222 // ============================================================================
1223 // wxCSConv implementation
1224 // ============================================================================
1226 void wxCSConv::Init()
1233 // find a valid value for the encoding
1234 void wxCSConv::SetEncoding()
1237 m_encoding
= wxLocale::GetSystemEncoding();
1239 m_encoding
= wxFONTENCODING_SYSTEM
;
1243 wxCSConv::wxCSConv(const wxChar
*charset
)
1250 m_encoding
= wxFONTENCODING_SYSTEM
;
1254 else // no charset specified
1260 wxCSConv::wxCSConv(wxFontEncoding encoding
)
1262 if ( encoding
== wxFONTENCODING_MAX
||
1263 encoding
== wxFONTENCODING_DEFAULT
)
1265 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1267 encoding
= wxFONTENCODING_SYSTEM
;
1272 if ( encoding
== wxFONTENCODING_SYSTEM
)
1276 else // have valid encoding, use it
1278 m_encoding
= encoding
;
1282 wxCSConv::~wxCSConv()
1287 wxCSConv::wxCSConv(const wxCSConv
& conv
)
1292 SetName(conv
.m_name
);
1293 m_encoding
= conv
.m_encoding
;
1296 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
1300 SetName(conv
.m_name
);
1301 m_encoding
= conv
.m_encoding
;
1306 void wxCSConv::Clear()
1315 void wxCSConv::SetName(const wxChar
*charset
)
1319 m_name
= wxStrdup(charset
);
1324 static inline bool DoesntNeedConv(wxFontEncoding enc
)
1326 return enc
== wxFONTENCODING_DEFAULT
||
1327 enc
== wxFONTENCODING_SYSTEM
||
1328 enc
== wxFONTENCODING_ISO8859_1
;
1331 wxMBConv
*wxCSConv::DoCreate() const
1334 wxFontMapper
* const fontMapper
= wxFontMapper::Get();
1336 wxFontEncoding encFromName
= m_name
? fontMapper
->CharsetToEncoding(m_name
)
1337 : wxFONTENCODING_SYSTEM
;
1338 #endif // wxUSE_FONTMAP
1340 // check for the special case of ASCII charset
1341 if ( (!m_name
&& DoesntNeedConv(m_encoding
))
1343 || (m_name
&& DoesntNeedConv(encFromName
))
1344 #endif // wxUSE_FONTMAP
1347 // don't convert at all
1351 // we trust OS to do conversion better than we can so try external
1352 // conversion methods first
1354 // the full order is:
1355 // 1. OS conversion (iconv() under Unix or Win32 API)
1356 // 2. hard coded conversions for UTF
1357 // 3. wxEncodingConverter as fall back
1363 wxMBConv_iconv
*conv
= new wxMBConv_iconv(m_name
);
1369 #endif // HAVE_ICONV
1371 #ifdef wxHAVE_WIN32_MB2WC
1373 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
1374 : new wxMBConv_win32(m_encoding
);
1380 #endif // wxHAVE_WIN32_MB2WC
1383 wxFontEncoding enc
= m_encoding
;
1385 if ( enc
== wxFONTENCODING_SYSTEM
)
1387 #endif // wxUSE_FONTMAP
1391 case wxFONTENCODING_UTF7
:
1392 return new wxMBConvUTF7
;
1394 case wxFONTENCODING_UTF8
:
1395 return new wxMBConvUTF8
;
1397 case wxFONTENCODING_UTF16
:
1398 return new wxMBConvUTF16
;
1400 case wxFONTENCODING_UTF16BE
:
1401 return new wxMBConvUTF16BE
;
1403 case wxFONTENCODING_UTF16LE
:
1404 return new wxMBConvUTF16LE
;
1406 case wxFONTENCODING_UTF32
:
1407 return new wxMBConvUTF32
;
1409 case wxFONTENCODING_UTF32BE
:
1410 return new wxMBConvUTF32BE
;
1412 case wxFONTENCODING_UTF32LE
:
1413 return new wxMBConvUTF32LE
;
1416 // nothing to do but put here to suppress gcc warnings
1423 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
1424 : new wxMBConv_wxwin(m_encoding
);
1430 #endif // wxUSE_FONTMAP
1432 wxLogError(_("Cannot convert from the charset '%s'!"),
1436 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
1437 #else // !wxUSE_FONTMAP
1438 wxString::Format(_("encoding %s"), m_encoding
).c_str()
1439 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1445 void wxCSConv::CreateConvIfNeeded() const
1449 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
1450 self
->m_convReal
= DoCreate();
1451 self
->m_deferred
= false;
1455 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1457 CreateConvIfNeeded();
1460 return m_convReal
->MB2WC(buf
, psz
, n
);
1463 size_t len
= strlen(psz
);
1467 for (size_t c
= 0; c
<= len
; c
++)
1468 buf
[c
] = (unsigned char)(psz
[c
]);
1474 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1476 CreateConvIfNeeded();
1479 return m_convReal
->WC2MB(buf
, psz
, n
);
1482 const size_t len
= wxWcslen(psz
);
1485 for (size_t c
= 0; c
<= len
; c
++)
1486 buf
[c
] = (psz
[c
] > 0xff) ? '?' : psz
[c
];
1492 // ----------------------------------------------------------------------------
1494 // ----------------------------------------------------------------------------
1497 static wxMBConv_win32 wxConvLibcObj
;
1499 static wxMBConvSystem wxConvLibcObj
;
1502 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
1503 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
1504 static wxMBConvUTF7 wxConvUTF7Obj
;
1505 static wxMBConvUTF8 wxConvUTF8Obj
;
1508 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
1509 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
1510 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
1511 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
1512 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
1513 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
1515 #else // !wxUSE_WCHAR_T
1517 // stand-ins in absence of wchar_t
1518 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
1523 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T