]>
git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
ffce9eb4187cefa75222f4bebad621bf99eac817
1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
12 // ============================================================================
14 // ============================================================================
16 // ----------------------------------------------------------------------------
18 // ----------------------------------------------------------------------------
20 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
21 #pragma implementation "strconv.h"
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
37 #include "wx/msw/private.h"
48 #include "wx/module.h"
49 #include "wx/strconv.h"
51 // ----------------------------------------------------------------------------
53 // ----------------------------------------------------------------------------
56 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
;
57 WXDLLIMPEXP_DATA_BASE(wxCSConv
) wxConvLocal((const wxChar
*)NULL
);
58 WXDLLIMPEXP_DATA_BASE(wxCSConv
) wxConvISO8859_1(_T("iso-8859-1"));
60 // stand-ins in absence of wchar_t
61 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
66 #endif // wxUSE_WCHAR_T
68 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibc
;
70 class wxStrConvModule
: public wxModule
73 wxStrConvModule() : wxModule() { }
74 virtual bool OnInit() { return TRUE
; }
79 wxConvISO8859_1
.Clear();
83 DECLARE_DYNAMIC_CLASS(wxStrConvModule
)
86 IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule
, wxModule
)
89 // ----------------------------------------------------------------------------
91 // ----------------------------------------------------------------------------
103 #include "wx/encconv.h"
104 #include "wx/fontmap.h"
106 // ----------------------------------------------------------------------------
108 // ----------------------------------------------------------------------------
110 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
111 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
113 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
114 // it might be not defined - assume the most common value
115 #ifndef SIZEOF_WCHAR_T
116 #define SIZEOF_WCHAR_T 2
117 #endif // !defined(SIZEOF_WCHAR_T)
119 #if SIZEOF_WCHAR_T == 4
120 #define WC_NAME "UCS4"
121 #define WC_BSWAP BSWAP_UCS4
122 #ifdef WORDS_BIGENDIAN
123 #define WC_NAME_BEST "UCS-4BE"
125 #define WC_NAME_BEST "UCS-4LE"
127 #elif SIZEOF_WCHAR_T == 2
128 #define WC_NAME "UTF16"
129 #define WC_BSWAP BSWAP_UTF16
131 #ifdef WORDS_BIGENDIAN
132 #define WC_NAME_BEST "UTF-16BE"
134 #define WC_NAME_BEST "UTF-16LE"
136 #else // sizeof(wchar_t) != 2 nor 4
137 // I don't know what to do about this
138 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
141 // ============================================================================
143 // ============================================================================
145 // ----------------------------------------------------------------------------
146 // UTF-16 en/decoding to/from UCS-4
147 // ----------------------------------------------------------------------------
150 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
154 if (output
) *output
++ = (wxUint16
) input
;
157 else if (input
>=0x110000)
165 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
166 *output
++ = (wxUint16
) ((input
&0x3ff)+0xdc00);
172 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
174 if ((*input
<0xd800) || (*input
>0xdfff))
179 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
186 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
192 // ----------------------------------------------------------------------------
194 // ----------------------------------------------------------------------------
196 #define IGNORE_LIBC 0
198 wxMBConv::~wxMBConv()
200 // nothing to do here
203 size_t wxMBConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
208 for (size_t i
= 0; i
< strlen( psz
)+1; i
++)
209 buf
[i
] = (wchar_t) psz
[i
];
210 return strlen( psz
);
214 return strlen( psz
);
217 return wxMB2WC(buf
, psz
, n
);
221 size_t wxMBConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
226 for (size_t i
= 0; i
< wxStrlen( psz
)+1; i
++)
227 buf
[i
] = (char) psz
[i
];
228 return wxStrlen( psz
);
232 return wxStrlen( psz
);
235 return wxWC2MB(buf
, psz
, n
);
239 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
243 // calculate the length of the buffer needed first
244 size_t nLen
= MB2WC(NULL
, psz
, 0);
245 if ( nLen
!= (size_t)-1 )
247 // now do the actual conversion
248 wxWCharBuffer
buf(nLen
);
249 MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NUL
255 wxWCharBuffer
buf((wchar_t *)NULL
);
260 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
264 size_t nLen
= WC2MB(NULL
, pwz
, 0);
265 if ( nLen
!= (size_t)-1 )
267 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
268 WC2MB(buf
.data(), pwz
, nLen
+ 4);
274 wxCharBuffer
buf((char *)NULL
);
279 // ----------------------------------------------------------------------------
281 // ----------------------------------------------------------------------------
283 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
) wxConvUTF7
;
286 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
287 "abcdefghijklmnopqrstuvwxyz"
288 "0123456789'(),-./:?";
289 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}";
290 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
291 "abcdefghijklmnopqrstuvwxyz"
295 // TODO: write actual implementations of UTF-7 here
296 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
),
297 const char * WXUNUSED(psz
),
298 size_t WXUNUSED(n
)) const
303 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
),
304 const wchar_t * WXUNUSED(psz
),
305 size_t WXUNUSED(n
)) const
310 // ----------------------------------------------------------------------------
312 // ----------------------------------------------------------------------------
314 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
) wxConvUTF8
;
316 static wxUint32 utf8_max
[]=
317 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
319 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
323 while (*psz
&& ((!buf
) || (len
< n
)))
325 unsigned char cc
= *psz
++, fc
= cc
;
327 for (cnt
= 0; fc
& 0x80; cnt
++)
341 // invalid UTF-8 sequence
346 unsigned ocnt
= cnt
- 1;
347 wxUint32 res
= cc
& (0x3f >> cnt
);
351 if ((cc
& 0xC0) != 0x80)
353 // invalid UTF-8 sequence
356 res
= (res
<< 6) | (cc
& 0x3f);
358 if (res
<= utf8_max
[ocnt
])
360 // illegal UTF-8 encoding
364 size_t pa
= encode_utf16(res
, buf
);
365 if (pa
== (size_t)-1)
374 #endif // WC_UTF16/!WC_UTF16
378 if (buf
&& (len
< n
))
383 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
387 while (*psz
&& ((!buf
) || (len
< n
)))
391 size_t pa
= decode_utf16(psz
, cc
);
392 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
394 cc
=(*psz
++) & 0x7fffffff;
397 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
411 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
413 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
418 if (buf
&& (len
<n
)) *buf
= 0;
426 // ----------------------------------------------------------------------------
428 // ----------------------------------------------------------------------------
430 #ifdef WORDS_BIGENDIAN
431 #define wxMBConvUTF16straight wxMBConvUTF16BE
432 #define wxMBConvUTF16swap wxMBConvUTF16LE
434 #define wxMBConvUTF16swap wxMBConvUTF16BE
435 #define wxMBConvUTF16straight wxMBConvUTF16LE
439 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16LE
) wxConvUTF16LE
;
440 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16BE
) wxConvUTF16BE
;
449 // copy 16bit MB to 16bit String
450 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
454 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
457 *buf
++ = *(wxUint16
*)psz
;
460 psz
+= sizeof(wxUint16
);
462 if (buf
&& len
<n
) *buf
=0;
468 // copy 16bit String to 16bit MB
469 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
473 while (*psz
&& (!buf
|| len
< n
))
477 *(wxUint16
*)buf
= *psz
;
478 buf
+= sizeof(wxUint16
);
480 len
+= sizeof(wxUint16
);
483 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
489 // swap 16bit MB to 16bit String
490 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
494 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
498 ((char *)buf
)[0] = psz
[1];
499 ((char *)buf
)[1] = psz
[0];
503 psz
+= sizeof(wxUint16
);
505 if (buf
&& len
<n
) *buf
=0;
511 // swap 16bit MB to 16bit String
512 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
516 while (*psz
&& (!buf
|| len
< n
))
520 *buf
++ = ((char*)psz
)[1];
521 *buf
++ = ((char*)psz
)[0];
523 len
+= sizeof(wxUint16
);
526 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
535 // copy 16bit MB to 32bit String
536 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
540 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
543 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
544 if (pa
== (size_t)-1)
550 psz
+= pa
* sizeof(wxUint16
);
552 if (buf
&& len
<n
) *buf
=0;
558 // copy 32bit String to 16bit MB
559 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
563 while (*psz
&& (!buf
|| len
< n
))
566 size_t pa
=encode_utf16(*psz
, cc
);
568 if (pa
== (size_t)-1)
573 *(wxUint16
*)buf
= cc
[0];
574 buf
+= sizeof(wxUint16
);
577 *(wxUint16
*)buf
= cc
[1];
578 buf
+= sizeof(wxUint16
);
582 len
+= pa
*sizeof(wxUint16
);
585 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
591 // swap 16bit MB to 32bit String
592 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
596 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
600 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
601 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
603 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
604 if (pa
== (size_t)-1)
611 psz
+= pa
* sizeof(wxUint16
);
613 if (buf
&& len
<n
) *buf
=0;
619 // swap 32bit String to 16bit MB
620 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
624 while (*psz
&& (!buf
|| len
< n
))
627 size_t pa
=encode_utf16(*psz
, cc
);
629 if (pa
== (size_t)-1)
634 *buf
++ = ((char*)cc
)[1];
635 *buf
++ = ((char*)cc
)[0];
638 *buf
++ = ((char*)cc
)[3];
639 *buf
++ = ((char*)cc
)[2];
643 len
+= pa
*sizeof(wxUint16
);
646 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
654 // ----------------------------------------------------------------------------
656 // ----------------------------------------------------------------------------
658 #ifdef WORDS_BIGENDIAN
659 #define wxMBConvUTF32straight wxMBConvUTF32BE
660 #define wxMBConvUTF32swap wxMBConvUTF32LE
662 #define wxMBConvUTF32swap wxMBConvUTF32BE
663 #define wxMBConvUTF32straight wxMBConvUTF32LE
667 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
668 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
673 // copy 32bit MB to 16bit String
674 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
678 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
682 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
683 if (pa
== (size_t)-1)
693 psz
+= sizeof(wxUint32
);
695 if (buf
&& len
<n
) *buf
=0;
701 // copy 16bit String to 32bit MB
702 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
706 while (*psz
&& (!buf
|| len
< n
))
710 size_t pa
=decode_utf16(psz
, cc
);
711 if (pa
== (size_t)-1)
716 *(wxUint32
*)buf
= cc
;
717 buf
+= sizeof(wxUint32
);
719 len
+= sizeof(wxUint32
);
722 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
729 // swap 32bit MB to 16bit String
730 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
734 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
737 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
738 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
743 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
744 if (pa
== (size_t)-1)
754 psz
+= sizeof(wxUint32
);
756 if (buf
&& len
<n
) *buf
=0;
762 // swap 16bit String to 32bit MB
763 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
767 while (*psz
&& (!buf
|| len
< n
))
771 size_t pa
=decode_utf16(psz
, *(wxUint32
*)cc
);
772 if (pa
== (size_t)-1)
782 len
+= sizeof(wxUint32
);
785 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
793 // copy 32bit MB to 32bit String
794 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
798 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
801 *buf
++ = *(wxUint32
*)psz
;
803 psz
+= sizeof(wxUint32
);
805 if (buf
&& len
<n
) *buf
=0;
811 // copy 32bit String to 32bit MB
812 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
816 while (*psz
&& (!buf
|| len
< n
))
820 *(wxUint32
*)buf
= *psz
;
821 buf
+= sizeof(wxUint32
);
824 len
+= sizeof(wxUint32
);
828 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
834 // swap 32bit MB to 32bit String
835 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
839 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
843 ((char *)buf
)[0] = psz
[3];
844 ((char *)buf
)[1] = psz
[2];
845 ((char *)buf
)[2] = psz
[1];
846 ((char *)buf
)[3] = psz
[0];
850 psz
+= sizeof(wxUint32
);
852 if (buf
&& len
<n
) *buf
=0;
858 // swap 32bit String to 32bit MB
859 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
863 while (*psz
&& (!buf
|| len
< n
))
867 *buf
++ = ((char *)psz
)[3];
868 *buf
++ = ((char *)psz
)[2];
869 *buf
++ = ((char *)psz
)[1];
870 *buf
++ = ((char *)psz
)[0];
872 len
+= sizeof(wxUint32
);
875 if (buf
&& len
<=n
-sizeof(wxUint32
)) *(wxUint32
*)buf
=0;
884 // ============================================================================
885 // wxCharacterSet and derived classes
886 // ============================================================================
888 // ----------------------------------------------------------------------------
889 // wxCharacterSet is the ABC for the classes below
890 // ----------------------------------------------------------------------------
896 virtual ~wxCharacterSet() {}
898 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) = 0;
899 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) = 0;
900 virtual bool usable() const = 0;
903 // ----------------------------------------------------------------------------
904 // ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
905 // ----------------------------------------------------------------------------
907 class ID_CharSet
: public wxCharacterSet
910 ID_CharSet(wxMBConv
*cnv
) : work(cnv
) {}
912 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
)
913 { return work
? work
->MB2WC(buf
,psz
,n
) : (size_t)-1; }
915 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
)
916 { return work
? work
->WC2MB(buf
,psz
,n
) : (size_t)-1; }
919 { return work
!=NULL
; }
925 // ============================================================================
926 // The classes doing conversion using the iconv_xxx() functions
927 // ============================================================================
931 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
932 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
933 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
934 // (which means error) and says there are 0 bytes left in the input buffer --
935 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
936 // this alternative test for iconv() failure.
937 // [This bug does not appear in glibc 2.2.]
938 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
939 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
940 (errno != E2BIG || bufLeft != 0))
942 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
945 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
947 // ----------------------------------------------------------------------------
948 // IC_CharSet: encapsulates an iconv character set
949 // ----------------------------------------------------------------------------
951 class IC_CharSet
: public wxCharacterSet
954 IC_CharSet(const wxChar
*name
);
955 virtual ~IC_CharSet();
957 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
);
958 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
);
961 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
964 // the iconv handlers used to translate from multibyte to wide char and in
965 // the other direction
970 // the name (for iconv_open()) of a wide char charset - if none is
971 // available on this machine, it will remain NULL
972 static const char *ms_wcCharsetName
;
974 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
975 // different endian-ness than the native one
976 static bool ms_wcNeedsSwap
;
979 const char *IC_CharSet::ms_wcCharsetName
= NULL
;
980 bool IC_CharSet::ms_wcNeedsSwap
= FALSE
;
982 IC_CharSet::IC_CharSet(const wxChar
*name
)
984 // Do it the hard way
986 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
987 cname
[i
] = (char) name
[i
];
989 // check for charset that represents wchar_t:
990 if (ms_wcCharsetName
== NULL
)
992 ms_wcNeedsSwap
= FALSE
;
994 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
995 ms_wcCharsetName
= WC_NAME_BEST
;
996 m2w
= iconv_open(ms_wcCharsetName
, cname
);
998 if (m2w
== (iconv_t
)-1)
1000 // try charset w/o bytesex info (e.g. "UCS4")
1001 // and check for bytesex ourselves:
1002 ms_wcCharsetName
= WC_NAME
;
1003 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1005 // last bet, try if it knows WCHAR_T pseudo-charset
1006 if (m2w
== (iconv_t
)-1)
1008 ms_wcCharsetName
= "WCHAR_T";
1009 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1012 if (m2w
!= (iconv_t
)-1)
1014 char buf
[2], *bufPtr
;
1015 wchar_t wbuf
[2], *wbufPtr
;
1023 outsz
= SIZEOF_WCHAR_T
* 2;
1027 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1028 (char**)&wbufPtr
, &outsz
);
1030 if (ICONV_FAILED(res
, insz
))
1032 ms_wcCharsetName
= NULL
;
1033 wxLogLastError(wxT("iconv"));
1034 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1038 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1043 ms_wcCharsetName
= NULL
;
1045 // VS: we must not output an error here, since wxWindows will safely
1046 // fall back to using wxEncodingConverter.
1047 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1051 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1053 else // we already have ms_wcCharsetName
1055 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1058 // NB: don't ever pass NULL to iconv_open(), it may crash!
1059 if ( ms_wcCharsetName
)
1061 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1069 IC_CharSet::~IC_CharSet()
1071 if ( m2w
!= (iconv_t
)-1 )
1073 if ( w2m
!= (iconv_t
)-1 )
1077 size_t IC_CharSet::MB2WC(wchar_t *buf
, const char *psz
, size_t n
)
1079 size_t inbuf
= strlen(psz
);
1080 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1082 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1083 wchar_t *bufPtr
= buf
;
1084 const char *pszPtr
= psz
;
1088 // have destination buffer, convert there
1090 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1091 (char**)&bufPtr
, &outbuf
);
1092 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1096 // convert to native endianness
1097 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1100 // NB: iconv was given only strlen(psz) characters on input, and so
1101 // it couldn't convert the trailing zero. Let's do it ourselves
1102 // if there's some room left for it in the output buffer.
1108 // no destination buffer... convert using temp buffer
1109 // to calculate destination buffer requirement
1114 outbuf
= 8*SIZEOF_WCHAR_T
;
1117 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1118 (char**)&bufPtr
, &outbuf
);
1120 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1121 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1124 if (ICONV_FAILED(cres
, inbuf
))
1126 //VS: it is ok if iconv fails, hence trace only
1127 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1134 size_t IC_CharSet::WC2MB(char *buf
, const wchar_t *psz
, size_t n
)
1136 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1140 wchar_t *tmpbuf
= 0;
1144 // need to copy to temp buffer to switch endianness
1145 // this absolutely doesn't rock!
1146 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1147 // could be in read-only memory, or be accessed in some other thread)
1148 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1149 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1150 WC_BSWAP(tmpbuf
, inbuf
)
1156 // have destination buffer, convert there
1157 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1161 // NB: iconv was given only wcslen(psz) characters on input, and so
1162 // it couldn't convert the trailing zero. Let's do it ourselves
1163 // if there's some room left for it in the output buffer.
1169 // no destination buffer... convert using temp buffer
1170 // to calculate destination buffer requirement
1174 buf
= tbuf
; outbuf
= 16;
1176 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1179 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1187 if (ICONV_FAILED(cres
, inbuf
))
1189 //VS: it is ok if iconv fails, hence trace only
1190 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1197 #endif // HAVE_ICONV
1199 // ============================================================================
1200 // Win32 conversion classes
1201 // ============================================================================
1203 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
1206 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1207 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1209 class CP_CharSet
: public wxCharacterSet
1212 CP_CharSet(const wxChar
* name
)
1214 m_CodePage
= wxCharsetToCodepage(name
);
1217 CP_CharSet(wxFontEncoding encoding
)
1219 m_CodePage
= wxEncodingToCodepage(encoding
);
1222 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
)
1224 const size_t len
= ::MultiByteToWideChar
1226 m_CodePage
, // code page
1228 psz
, // input string
1229 -1, // its length (NUL-terminated)
1230 buf
, // output string
1231 buf
? n
: 0 // size of output buffer
1234 // note that it returns # of written chars for buf != NULL and *size*
1235 // of the needed buffer for buf == NULL
1236 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1239 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
)
1241 const size_t len
= ::WideCharToMultiByte
1243 m_CodePage
, // code page
1245 psz
, // input string
1246 -1, // it is (wide) NUL-terminated
1247 buf
, // output buffer
1248 buf
? n
: 0, // and its size
1249 NULL
, // default "replacement" char
1250 NULL
// [out] was it used?
1253 // see the comment above!
1254 return len
? (buf
? len
: len
- 1) : (size_t)-1;
1258 { return m_CodePage
!= -1; }
1263 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
1265 // ============================================================================
1266 // wxEncodingConverter based conversion classes
1267 // ============================================================================
1271 class EC_CharSet
: public wxCharacterSet
1276 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
1277 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
1281 // temporarily just use wxEncodingConverter stuff,
1282 // so that it works while a better implementation is built
1283 EC_CharSet(const wxChar
* name
)
1286 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, FALSE
);
1288 m_enc
= wxFONTENCODING_SYSTEM
;
1293 EC_CharSet(wxFontEncoding enc
)
1300 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
))
1302 size_t inbuf
= strlen(psz
);
1304 m2w
.Convert(psz
,buf
);
1308 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
))
1310 const size_t inbuf
= wxWcslen(psz
);
1312 w2m
.Convert(psz
,buf
);
1317 bool usable() const { return m_ok
; }
1320 wxFontEncoding m_enc
;
1321 wxEncodingConverter m2w
, w2m
;
1323 // were we initialized successfully?
1326 DECLARE_NO_COPY_CLASS(EC_CharSet
)
1329 #endif // wxUSE_FONTMAP
1331 // ----------------------------------------------------------------------------
1332 // the function creating the wxCharacterSet for the specified charset on the
1333 // current system, trying all possibilities
1335 // it uses the name if it is given or encoding if name == NULL
1336 // ----------------------------------------------------------------------------
1338 static wxCharacterSet
*
1339 wxGetCharacterSet(const wxChar
*name
, wxFontEncoding encoding
)
1341 // check for the special case of ASCII charset
1342 if ( (!name
&& encoding
== wxFONTENCODING_DEFAULT
)
1344 || (name
&& wxFontMapper::Get()->
1345 CharsetToEncoding(name
) == wxFONTENCODING_DEFAULT
)
1346 #endif // wxUSE_FONTMAP
1349 // don't convert at all
1353 wxCharacterSet
*cset
= NULL
;
1357 if((wxStricmp(name
, wxT("UTF8")) == 0) ||
1358 (wxStricmp(name
, wxT("UTF-8")) == 0) ||
1359 encoding
== wxFONTENCODING_UTF8
)
1361 cset
= new ID_CharSet(&wxConvUTF8
);
1363 else if((wxStricmp(name
, wxT("UTF16")) == 0) ||
1364 (wxStricmp(name
, wxT("UTF-16")) == 0) ||
1365 encoding
== wxFONTENCODING_UTF16
)
1367 #ifdef WORDS_BIGENDIAN
1368 cset
= new ID_CharSet(&wxConvUTF16BE
);
1370 cset
= new ID_CharSet(&wxConvUTF16LE
);
1373 else if((wxStricmp(name
, wxT("UTF16BE")) == 0) ||
1374 (wxStricmp(name
, wxT("UTF-16BE")) == 0) ||
1375 encoding
== wxFONTENCODING_UTF16BE
)
1377 cset
= new ID_CharSet(&wxConvUTF16BE
);
1379 else if((wxStricmp(name
, wxT("UTF16LE")) == 0) ||
1380 (wxStricmp(name
, wxT("UTF-16LE")) == 0) ||
1381 encoding
== wxFONTENCODING_UTF16LE
)
1383 cset
= new ID_CharSet(&wxConvUTF16LE
);
1385 else if((wxStricmp(name
, wxT("UTF32")) == 0) ||
1386 (wxStricmp(name
, wxT("UTF-32")) == 0) ||
1387 (wxStricmp(name
, wxT("UCS4")) == 0) ||
1388 (wxStricmp(name
, wxT("UCS-4")) == 0) ||
1389 encoding
== wxFONTENCODING_UTF32
)
1391 #ifdef WORDS_BIGENDIAN
1392 cset
= new ID_CharSet(&wxConvUTF32BE
);
1394 cset
= new ID_CharSet(&wxConvUTF32LE
);
1397 else if((wxStricmp(name
, wxT("UTF32BE")) == 0) ||
1398 (wxStricmp(name
, wxT("UTF-32BE")) == 0) ||
1399 (wxStricmp(name
, wxT("UCS4BE")) == 0) ||
1400 (wxStricmp(name
, wxT("UCS-4BE")) == 0) ||
1401 encoding
== wxFONTENCODING_UTF32BE
)
1403 cset
= new ID_CharSet(&wxConvUTF32BE
);
1405 else if((wxStricmp(name
, wxT("UTF32LE")) == 0) ||
1406 (wxStricmp(name
, wxT("UTF-32LE")) == 0) ||
1407 (wxStricmp(name
, wxT("UCS4LE")) == 0) ||
1408 (wxStricmp(name
, wxT("UCS-4LE")) == 0) ||
1409 encoding
== wxFONTENCODING_UTF32
)
1411 cset
= new ID_CharSet(&wxConvUTF32LE
);
1416 cset
= new IC_CharSet(name
);
1418 #endif // HAVE_ICONV
1421 // it can only be NULL in this case
1424 #endif // !HAVE_ICONV
1426 if ( cset
->usable() )
1433 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
1434 cset
= name
? new CP_CharSet(name
) : new CP_CharSet(encoding
);
1435 if ( cset
->usable() )
1440 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
1443 cset
= name
? new EC_CharSet(name
) : new EC_CharSet(encoding
);
1444 if ( cset
->usable() )
1449 #endif // wxUSE_FONTMAP
1451 wxLogError(_("Cannot convert from encoding '%s'!"),
1455 wxFontMapper::GetEncodingDescription(encoding
).c_str()
1456 #else // !wxUSE_FONTMAP
1457 wxString::Format(_T("%s"), encoding
).c_str()
1458 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1464 // ============================================================================
1465 // wxCSConv implementation
1466 // ============================================================================
1468 void wxCSConv::Init()
1470 m_name
= (wxChar
*)NULL
;
1471 m_cset
= (wxCharacterSet
*) NULL
;
1475 wxCSConv::wxCSConv(const wxChar
*charset
)
1478 m_encoding
= wxFONTENCODING_DEFAULT
;
1483 wxCSConv::wxCSConv(wxFontEncoding encoding
)
1487 m_encoding
= encoding
;
1490 wxCSConv::~wxCSConv()
1495 wxCSConv::wxCSConv(const wxCSConv
& conv
)
1500 SetName(conv
.m_name
);
1501 m_encoding
= conv
.m_encoding
;
1504 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
1508 SetName(conv
.m_name
);
1509 m_encoding
= conv
.m_encoding
;
1514 void wxCSConv::Clear()
1523 void wxCSConv::SetName(const wxChar
*charset
)
1527 m_name
= wxStrdup(charset
);
1532 void wxCSConv::LoadNow()
1536 // it would probably be better to make GetSystemEncodingName() always
1537 // available (i.e. even when wxUSE_INTL == 0)?
1539 if ( !m_name
&& m_encoding
== wxFONTENCODING_DEFAULT
)
1541 wxString name
= wxLocale::GetSystemEncodingName();
1542 if ( !name
.empty() )
1547 #endif // wxUSE_INTL
1549 // wxGetCharacterSet() complains about NULL name
1550 m_cset
= wxGetCharacterSet(m_name
, m_encoding
);
1555 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1557 ((wxCSConv
*)this)->LoadNow(); // discard constness
1560 return m_cset
->MB2WC(buf
, psz
, n
);
1563 size_t len
= strlen(psz
);
1567 for (size_t c
= 0; c
<= len
; c
++)
1568 buf
[c
] = (unsigned char)(psz
[c
]);
1574 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1576 ((wxCSConv
*)this)->LoadNow(); // discard constness
1579 return m_cset
->WC2MB(buf
, psz
, n
);
1582 const size_t len
= wxWcslen(psz
);
1585 for (size_t c
= 0; c
<= len
; c
++)
1586 buf
[c
] = (psz
[c
] > 0xff) ? '?' : psz
[c
];
1592 #endif // wxUSE_WCHAR_T