1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
191 wxMBConv::cMB2WC(const char *in
, size_t inLen
, size_t *outLen
) const
193 // the currently accumulated wide characters
196 // the current length of wbuf
199 // we need to know the representation of L'\0' for this conversion
201 const char * const nul
= GetMBNul(&nulLen
);
202 if ( nulLen
== (size_t)-1 || nulLen
== 0 )
203 return wxWCharBuffer();
205 // make a copy of the input string unless it is already properly
209 // now we can compute the input size if we were not given it: notice that
210 // in this case the string must be properly NUL-terminated, of course, as
211 // otherwise we have no way of knowing how long it is
212 if ( inLen
== (size_t)-1 )
214 // not the most efficient algorithm but it shouldn't matter as normally
215 // there are not many NULs in the string and so normally memcmp()
216 // should stop on the first character
218 while ( memcmp(p
, nul
, nulLen
) != 0 )
221 inLen
= p
- in
+ nulLen
;
223 else // we already have the size
225 // check if it's not already NUL-terminated too to avoid the copy
226 if ( inLen
< nulLen
|| memcmp(in
+ inLen
- nulLen
, nul
, nulLen
) != 0 )
228 // make a copy in order to properly NUL-terminate the string
229 bufTmp
= wxCharBuffer(inLen
+ nulLen
- 1 /* 1 will be added */);
230 memcpy(bufTmp
.data(), in
, inLen
);
231 memcpy(bufTmp
.data() + inLen
, nul
, nulLen
);
238 for ( const char * const inEnd
= in
+ inLen
;; )
240 // try to convert the current chunk if anything left
241 size_t lenChunk
= in
< inEnd
? MB2WC(NULL
, in
, 0) : 0;
244 // nothing left in the input string, conversion succeeded
247 // we shouldn't include the last NUL in the result length
248 *outLen
= lenBuf
? lenBuf
- 1 : 0;
254 if ( lenChunk
== (size_t)-1 )
257 const size_t lenBufNew
= lenBuf
+ lenChunk
;
258 if ( !wbuf
.extend(lenBufNew
) )
261 lenChunk
= MB2WC(wbuf
.data() + lenBuf
, in
, lenChunk
+ 1 /* for NUL */);
262 if ( lenChunk
== (size_t)-1 )
265 // +! for the embedded NUL (if something follows)
266 lenBuf
= lenBufNew
+ 1;
268 // advance the input pointer past the end of this chunk
269 while ( memcmp(in
, nul
, nulLen
) != 0 )
272 in
+= nulLen
; // skipping over its terminator as well
279 return wxWCharBuffer();
283 wxMBConv::cWC2MB(const wchar_t *in
, size_t inLen
, size_t *outLen
) const
285 // the currently accumulated multibyte characters
288 // the current length of buf
291 // make a copy of the input string unless it is already properly
294 // if we don't know its length we have no choice but to assume that it is,
295 // indeed, properly terminated
296 wxWCharBuffer bufTmp
;
297 if ( inLen
== (size_t)-1 )
299 inLen
= wxWcslen(in
) + 1;
301 else if ( inLen
!= 0 && in
[inLen
- 1] != L
'\0' )
303 // make a copy in order to properly NUL-terminate the string
304 bufTmp
= wxWCharBuffer(inLen
);
305 memcpy(bufTmp
.data(), in
, inLen
*sizeof(wchar_t));
311 for ( const wchar_t * const inEnd
= in
+ inLen
;; )
313 // try to convert the current chunk, if anything left
314 size_t lenChunk
= in
< inEnd
? WC2MB(NULL
, in
, 0) : 0;
317 // nothing left in the input string, conversion succeeded
319 *outLen
= lenBuf
? lenBuf
- 1 : lenBuf
;
324 if ( lenChunk
== (size_t)-1 )
327 const size_t lenBufNew
= lenBuf
+ lenChunk
;
328 if ( !buf
.extend(lenBufNew
) )
331 lenChunk
= WC2MB(buf
.data() + lenBuf
, in
, lenChunk
+ 1 /* for NUL */);
332 if ( lenChunk
== (size_t)-1 )
335 // chunk successfully converted, go to the next one
336 in
+= wxWcslen(in
) + 1 /* skip NUL too */;
337 lenBuf
= lenBufNew
+ 1;
344 return wxCharBuffer();
347 // ----------------------------------------------------------------------------
349 // ----------------------------------------------------------------------------
351 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
353 return wxMB2WC(buf
, psz
, n
);
356 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
358 return wxWC2MB(buf
, psz
, n
);
361 // ----------------------------------------------------------------------------
362 // wxConvBrokenFileNames
363 // ----------------------------------------------------------------------------
367 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
369 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
370 || wxStricmp(charset
, _T("UTF8")) == 0 )
371 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
373 m_conv
= new wxCSConv(charset
);
378 // ----------------------------------------------------------------------------
380 // ----------------------------------------------------------------------------
382 // Implementation (C) 2004 Fredrik Roubert
385 // BASE64 decoding table
387 static const unsigned char utf7unb64
[] =
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
395 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
396 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
398 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
399 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
400 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
402 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
403 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
404 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
413 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
423 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
427 while ( *psz
&& (!buf
|| (len
< n
)) )
429 unsigned char cc
= *psz
++;
437 else if (*psz
== '-')
445 else // start of BASE64 encoded string
449 for ( ok
= lsb
= false, d
= 0, l
= 0;
450 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
455 for (l
+= 6; l
>= 8; lsb
= !lsb
)
457 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
467 *buf
= (wchar_t)(c
<< 8);
476 // in valid UTF7 we should have valid characters after '+'
485 if ( buf
&& (len
< n
) )
492 // BASE64 encoding table
494 static const unsigned char utf7enb64
[] =
496 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
497 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
498 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
499 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
500 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
501 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
502 'w', 'x', 'y', 'z', '0', '1', '2', '3',
503 '4', '5', '6', '7', '8', '9', '+', '/'
507 // UTF-7 encoding table
509 // 0 - Set D (directly encoded characters)
510 // 1 - Set O (optional direct characters)
511 // 2 - whitespace characters (optional)
512 // 3 - special characters
514 static const unsigned char utf7encode
[128] =
516 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
517 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
518 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
520 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
526 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
530 while (*psz
&& ((!buf
) || (len
< n
)))
533 if (cc
< 0x80 && utf7encode
[cc
] < 1)
541 else if (((wxUint32
)cc
) > 0xffff)
543 // no surrogate pair generation (yet?)
554 // BASE64 encode string
555 unsigned int lsb
, d
, l
;
556 for (d
= 0, l
= 0; /*nothing*/; psz
++)
558 for (lsb
= 0; lsb
< 2; lsb
++)
561 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
563 for (l
+= 8; l
>= 6; )
567 *buf
++ = utf7enb64
[(d
>> l
) % 64];
572 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
578 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
587 if (buf
&& (len
< n
))
592 // ----------------------------------------------------------------------------
594 // ----------------------------------------------------------------------------
596 static wxUint32 utf8_max
[]=
597 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
599 // boundaries of the private use area we use to (temporarily) remap invalid
600 // characters invalid in a UTF-8 encoded string
601 const wxUint32 wxUnicodePUA
= 0x100000;
602 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
604 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
608 while (*psz
&& ((!buf
) || (len
< n
)))
610 const char *opsz
= psz
;
611 bool invalid
= false;
612 unsigned char cc
= *psz
++, fc
= cc
;
614 for (cnt
= 0; fc
& 0x80; cnt
++)
623 // escape the escape character for octal escapes
624 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
625 && cc
== '\\' && (!buf
|| len
< n
))
637 // invalid UTF-8 sequence
642 unsigned ocnt
= cnt
- 1;
643 wxUint32 res
= cc
& (0x3f >> cnt
);
647 if ((cc
& 0xC0) != 0x80)
649 // invalid UTF-8 sequence
654 res
= (res
<< 6) | (cc
& 0x3f);
656 if (invalid
|| res
<= utf8_max
[ocnt
])
658 // illegal UTF-8 encoding
661 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
662 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
664 // if one of our PUA characters turns up externally
665 // it must also be treated as an illegal sequence
666 // (a bit like you have to escape an escape character)
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
674 if (pa
== (size_t)-1)
686 *buf
++ = (wchar_t)res
;
688 #endif // WC_UTF16/!WC_UTF16
693 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
695 while (opsz
< psz
&& (!buf
|| len
< n
))
698 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
699 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
700 wxASSERT(pa
!= (size_t)-1);
707 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
713 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
715 while (opsz
< psz
&& (!buf
|| len
< n
))
717 if ( buf
&& len
+ 3 < n
)
719 unsigned char on
= *opsz
;
721 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
722 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
723 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
729 else // MAP_INVALID_UTF8_NOT
736 if (buf
&& (len
< n
))
741 static inline bool isoctal(wchar_t wch
)
743 return L
'0' <= wch
&& wch
<= L
'7';
746 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
750 while (*psz
&& ((!buf
) || (len
< n
)))
754 // cast is ok for WC_UTF16
755 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
756 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
758 cc
=(*psz
++) & 0x7fffffff;
761 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
762 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
765 *buf
++ = (char)(cc
- wxUnicodePUA
);
768 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
769 && cc
== L
'\\' && psz
[0] == L
'\\' )
776 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
778 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
782 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
783 (psz
[1] - L
'0')*010 +
793 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
807 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
809 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
821 // ----------------------------------------------------------------------------
823 // ----------------------------------------------------------------------------
825 #ifdef WORDS_BIGENDIAN
826 #define wxMBConvUTF16straight wxMBConvUTF16BE
827 #define wxMBConvUTF16swap wxMBConvUTF16LE
829 #define wxMBConvUTF16swap wxMBConvUTF16BE
830 #define wxMBConvUTF16straight wxMBConvUTF16LE
836 // copy 16bit MB to 16bit String
837 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
841 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
844 *buf
++ = *(wxUint16
*)psz
;
847 psz
+= sizeof(wxUint16
);
849 if (buf
&& len
<n
) *buf
=0;
855 // copy 16bit String to 16bit MB
856 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
860 while (*psz
&& (!buf
|| len
< n
))
864 *(wxUint16
*)buf
= *psz
;
865 buf
+= sizeof(wxUint16
);
867 len
+= sizeof(wxUint16
);
870 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
876 // swap 16bit MB to 16bit String
877 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
881 // UTF16 string must be terminated by 2 NULs as single NULs may occur
883 while ( (psz
[0] || psz
[1]) && (!buf
|| len
< n
) )
887 ((char *)buf
)[0] = psz
[1];
888 ((char *)buf
)[1] = psz
[0];
895 if ( buf
&& len
< n
)
902 // swap 16bit MB to 16bit String
903 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
907 while ( *psz
&& (!buf
|| len
< n
) )
911 *buf
++ = ((char*)psz
)[1];
912 *buf
++ = ((char*)psz
)[0];
918 if ( buf
&& len
< n
)
928 // copy 16bit MB to 32bit String
929 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
933 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
936 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
937 if (pa
== (size_t)-1)
941 *buf
++ = (wchar_t)cc
;
943 psz
+= pa
* sizeof(wxUint16
);
945 if (buf
&& len
<n
) *buf
=0;
951 // copy 32bit String to 16bit MB
952 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
956 while (*psz
&& (!buf
|| len
< n
))
959 size_t pa
=encode_utf16(*psz
, cc
);
961 if (pa
== (size_t)-1)
966 *(wxUint16
*)buf
= cc
[0];
967 buf
+= sizeof(wxUint16
);
970 *(wxUint16
*)buf
= cc
[1];
971 buf
+= sizeof(wxUint16
);
975 len
+= pa
*sizeof(wxUint16
);
978 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
984 // swap 16bit MB to 32bit String
985 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
989 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
993 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
994 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
996 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
997 if (pa
== (size_t)-1)
1001 *buf
++ = (wchar_t)cc
;
1004 psz
+= pa
* sizeof(wxUint16
);
1006 if (buf
&& len
<n
) *buf
=0;
1012 // swap 32bit String to 16bit MB
1013 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1017 while (*psz
&& (!buf
|| len
< n
))
1020 size_t pa
=encode_utf16(*psz
, cc
);
1022 if (pa
== (size_t)-1)
1027 *buf
++ = ((char*)cc
)[1];
1028 *buf
++ = ((char*)cc
)[0];
1031 *buf
++ = ((char*)cc
)[3];
1032 *buf
++ = ((char*)cc
)[2];
1036 len
+= pa
*sizeof(wxUint16
);
1039 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1047 // ----------------------------------------------------------------------------
1049 // ----------------------------------------------------------------------------
1051 #ifdef WORDS_BIGENDIAN
1052 #define wxMBConvUTF32straight wxMBConvUTF32BE
1053 #define wxMBConvUTF32swap wxMBConvUTF32LE
1055 #define wxMBConvUTF32swap wxMBConvUTF32BE
1056 #define wxMBConvUTF32straight wxMBConvUTF32LE
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1066 // copy 32bit MB to 16bit String
1067 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1071 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1075 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1076 if (pa
== (size_t)-1)
1086 psz
+= sizeof(wxUint32
);
1088 if (buf
&& len
<n
) *buf
=0;
1094 // copy 16bit String to 32bit MB
1095 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1099 while (*psz
&& (!buf
|| len
< n
))
1103 // cast is ok for WC_UTF16
1104 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1105 if (pa
== (size_t)-1)
1110 *(wxUint32
*)buf
= cc
;
1111 buf
+= sizeof(wxUint32
);
1113 len
+= sizeof(wxUint32
);
1117 if (buf
&& len
<=n
-sizeof(wxUint32
))
1125 // swap 32bit MB to 16bit String
1126 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1130 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1133 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1134 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1139 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1140 if (pa
== (size_t)-1)
1150 psz
+= sizeof(wxUint32
);
1160 // swap 16bit String to 32bit MB
1161 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1165 while (*psz
&& (!buf
|| len
< n
))
1169 // cast is ok for WC_UTF16
1170 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1171 if (pa
== (size_t)-1)
1181 len
+= sizeof(wxUint32
);
1185 if (buf
&& len
<=n
-sizeof(wxUint32
))
1194 // copy 32bit MB to 32bit String
1195 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1199 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1202 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1204 psz
+= sizeof(wxUint32
);
1214 // copy 32bit String to 32bit MB
1215 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1219 while (*psz
&& (!buf
|| len
< n
))
1223 *(wxUint32
*)buf
= *psz
;
1224 buf
+= sizeof(wxUint32
);
1227 len
+= sizeof(wxUint32
);
1231 if (buf
&& len
<=n
-sizeof(wxUint32
))
1238 // swap 32bit MB to 32bit String
1239 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1243 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1247 ((char *)buf
)[0] = psz
[3];
1248 ((char *)buf
)[1] = psz
[2];
1249 ((char *)buf
)[2] = psz
[1];
1250 ((char *)buf
)[3] = psz
[0];
1254 psz
+= sizeof(wxUint32
);
1264 // swap 32bit String to 32bit MB
1265 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1269 while (*psz
&& (!buf
|| len
< n
))
1273 *buf
++ = ((char *)psz
)[3];
1274 *buf
++ = ((char *)psz
)[2];
1275 *buf
++ = ((char *)psz
)[1];
1276 *buf
++ = ((char *)psz
)[0];
1278 len
+= sizeof(wxUint32
);
1282 if (buf
&& len
<=n
-sizeof(wxUint32
))
1292 // ============================================================================
1293 // The classes doing conversion using the iconv_xxx() functions
1294 // ============================================================================
1298 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1299 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1300 // (unless there's yet another bug in glibc) the only case when iconv()
1301 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1302 // left in the input buffer -- when _real_ error occurs,
1303 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1305 // [This bug does not appear in glibc 2.2.]
1306 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1307 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1308 (errno != E2BIG || bufLeft != 0))
1310 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1313 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1315 #define ICONV_T_INVALID ((iconv_t)-1)
1317 #if SIZEOF_WCHAR_T == 4
1318 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1319 #define WC_ENC wxFONTENCODING_UTF32
1320 #elif SIZEOF_WCHAR_T == 2
1321 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1322 #define WC_ENC wxFONTENCODING_UTF16
1323 #else // sizeof(wchar_t) != 2 nor 4
1324 // does this ever happen?
1325 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1328 // ----------------------------------------------------------------------------
1329 // wxMBConv_iconv: encapsulates an iconv character set
1330 // ----------------------------------------------------------------------------
1332 class wxMBConv_iconv
: public wxMBConv
1335 wxMBConv_iconv(const wxChar
*name
);
1336 virtual ~wxMBConv_iconv();
1338 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1339 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1342 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1345 // the iconv handlers used to translate from multibyte to wide char and in
1346 // the other direction
1350 // guards access to m2w and w2m objects
1351 wxMutex m_iconvMutex
;
1355 virtual const char *GetMBNul(size_t *nulLen
) const;
1357 // the name (for iconv_open()) of a wide char charset -- if none is
1358 // available on this machine, it will remain NULL
1359 static wxString ms_wcCharsetName
;
1361 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1362 // different endian-ness than the native one
1363 static bool ms_wcNeedsSwap
;
1365 // NUL representation
1370 // make the constructor available for unit testing
1371 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1373 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1374 if ( !result
->IsOk() )
1382 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1383 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1385 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1387 m_nulLen
= (size_t)-2;
1389 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1390 // names for the charsets
1391 const wxCharBuffer
cname(wxString(name
).ToAscii());
1393 // check for charset that represents wchar_t:
1394 if ( ms_wcCharsetName
.empty() )
1396 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1399 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1400 #else // !wxUSE_FONTMAP
1401 static const wxChar
*names
[] =
1403 #if SIZEOF_WCHAR_T == 4
1405 #elif SIZEOF_WCHAR_T = 2
1410 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1412 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1414 const wxString
nameCS(*names
);
1416 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1417 wxString
nameXE(nameCS
);
1418 #ifdef WORDS_BIGENDIAN
1420 #else // little endian
1424 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1427 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1428 if ( m2w
== ICONV_T_INVALID
)
1430 // try charset w/o bytesex info (e.g. "UCS4")
1431 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1433 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1435 // and check for bytesex ourselves:
1436 if ( m2w
!= ICONV_T_INVALID
)
1438 char buf
[2], *bufPtr
;
1439 wchar_t wbuf
[2], *wbufPtr
;
1447 outsz
= SIZEOF_WCHAR_T
* 2;
1451 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1452 (char**)&wbufPtr
, &outsz
);
1454 if (ICONV_FAILED(res
, insz
))
1456 wxLogLastError(wxT("iconv"));
1457 wxLogError(_("Conversion to charset '%s' doesn't work."),
1460 else // ok, can convert to this encoding, remember it
1462 ms_wcCharsetName
= nameCS
;
1463 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1467 else // use charset not requiring byte swapping
1469 ms_wcCharsetName
= nameXE
;
1473 wxLogTrace(TRACE_STRCONV
,
1474 wxT("iconv wchar_t charset is \"%s\"%s"),
1475 ms_wcCharsetName
.empty() ? _T("<none>")
1476 : ms_wcCharsetName
.c_str(),
1477 ms_wcNeedsSwap
? _T(" (needs swap)")
1480 else // we already have ms_wcCharsetName
1482 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1485 if ( ms_wcCharsetName
.empty() )
1487 w2m
= ICONV_T_INVALID
;
1491 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1492 if ( w2m
== ICONV_T_INVALID
)
1494 wxLogTrace(TRACE_STRCONV
,
1495 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1496 ms_wcCharsetName
.c_str(), cname
.data());
1501 wxMBConv_iconv::~wxMBConv_iconv()
1503 if ( m2w
!= ICONV_T_INVALID
)
1505 if ( w2m
!= ICONV_T_INVALID
)
1509 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1512 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1513 // Unfortunately there is a couple of global wxCSConv objects such as
1514 // wxConvLocal that are used all over wx code, so we have to make sure
1515 // the handle is used by at most one thread at the time. Otherwise
1516 // only a few wx classes would be safe to use from non-main threads
1517 // as MB<->WC conversion would fail "randomly".
1518 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1521 size_t inbuf
= strlen(psz
);
1522 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1524 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1525 wchar_t *bufPtr
= buf
;
1526 const char *pszPtr
= psz
;
1530 // have destination buffer, convert there
1532 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1533 (char**)&bufPtr
, &outbuf
);
1534 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1538 // convert to native endianness
1539 for ( unsigned i
= 0; i
< res
; i
++ )
1540 buf
[n
] = WC_BSWAP(buf
[i
]);
1543 // NB: iconv was given only strlen(psz) characters on input, and so
1544 // it couldn't convert the trailing zero. Let's do it ourselves
1545 // if there's some room left for it in the output buffer.
1551 // no destination buffer... convert using temp buffer
1552 // to calculate destination buffer requirement
1557 outbuf
= 8*SIZEOF_WCHAR_T
;
1560 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1561 (char**)&bufPtr
, &outbuf
);
1563 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1564 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1567 if (ICONV_FAILED(cres
, inbuf
))
1569 //VS: it is ok if iconv fails, hence trace only
1570 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1577 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1580 // NB: explained in MB2WC
1581 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1584 size_t inlen
= wxWcslen(psz
);
1585 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1589 wchar_t *tmpbuf
= 0;
1593 // need to copy to temp buffer to switch endianness
1594 // (doing WC_BSWAP twice on the original buffer won't help, as it
1595 // could be in read-only memory, or be accessed in some other thread)
1596 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1597 for ( size_t i
= 0; i
< inlen
; i
++ )
1598 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1599 tmpbuf
[inlen
] = L
'\0';
1605 // have destination buffer, convert there
1606 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1610 // NB: iconv was given only wcslen(psz) characters on input, and so
1611 // it couldn't convert the trailing zero. Let's do it ourselves
1612 // if there's some room left for it in the output buffer.
1618 // no destination buffer... convert using temp buffer
1619 // to calculate destination buffer requirement
1623 buf
= tbuf
; outbuf
= 16;
1625 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1628 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1636 if (ICONV_FAILED(cres
, inbuf
))
1638 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1645 const char *wxMBConv_iconv::GetMBNul(size_t *nulLen
) const
1647 if ( m_nulLen
== (size_t)-2 )
1649 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1652 // NB: explained in MB2WC
1653 wxMutexLocker
lock(self
->m_iconvMutex
);
1657 outLen
= WXSIZEOF(m_nulBuf
);
1658 self
->m_nulLen
= iconv(w2m
, ICONV_CHAR_CAST(L
""), &inLen
,
1659 (char **)&self
->m_nulBuf
, &outLen
);
1666 #endif // HAVE_ICONV
1669 // ============================================================================
1670 // Win32 conversion classes
1671 // ============================================================================
1673 #ifdef wxHAVE_WIN32_MB2WC
1677 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1678 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1681 class wxMBConv_win32
: public wxMBConv
1686 m_CodePage
= CP_ACP
;
1687 m_nulLen
= (size_t)-2;
1691 wxMBConv_win32(const wxChar
* name
)
1693 m_CodePage
= wxCharsetToCodepage(name
);
1694 m_nulLen
= (size_t)-2;
1697 wxMBConv_win32(wxFontEncoding encoding
)
1699 m_CodePage
= wxEncodingToCodepage(encoding
);
1700 m_nulLen
= (size_t)-2;
1702 #endif // wxUSE_FONTMAP
1704 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1706 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1707 // the behaviour is not compatible with the Unix version (using iconv)
1708 // and break the library itself, e.g. wxTextInputStream::NextChar()
1709 // wouldn't work if reading an incomplete MB char didn't result in an
1712 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1713 // an error (tested under Windows Server 2003) and apparently it is
1714 // done on purpose, i.e. the function accepts any input in this case
1715 // and although I'd prefer to return error on ill-formed output, our
1716 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1717 // explicitly ill-formed according to RFC 2152) neither so we don't
1718 // even have any fallback here...
1720 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1721 // Win XP or newer and if it is specified on older versions, conversion
1722 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1723 // fails. So we can only use the flag on newer Windows versions.
1724 // Additionally, the flag is not supported by UTF7, symbol and CJK
1725 // encodings. See here:
1726 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1727 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1729 if ( m_CodePage
!= CP_UTF7
&& m_CodePage
!= CP_SYMBOL
&&
1730 m_CodePage
< 50000 &&
1731 IsAtLeastWin2kSP4() )
1733 flags
= MB_ERR_INVALID_CHARS
;
1735 else if ( m_CodePage
== CP_UTF8
)
1737 // Avoid round-trip in the special case of UTF-8 by using our
1738 // own UTF-8 conversion code:
1739 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
1742 const size_t len
= ::MultiByteToWideChar
1744 m_CodePage
, // code page
1745 flags
, // flags: fall on error
1746 psz
, // input string
1747 -1, // its length (NUL-terminated)
1748 buf
, // output string
1749 buf
? n
: 0 // size of output buffer
1753 // function totally failed
1757 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1758 // check if we succeeded, by doing a double trip:
1759 if ( !flags
&& buf
)
1761 const size_t mbLen
= strlen(psz
);
1762 wxCharBuffer
mbBuf(mbLen
);
1763 if ( ::WideCharToMultiByte
1770 mbLen
+ 1, // size in bytes, not length
1774 strcmp(mbBuf
, psz
) != 0 )
1776 // we didn't obtain the same thing we started from, hence
1777 // the conversion was lossy and we consider that it failed
1782 // note that it returns count of written chars for buf != NULL and size
1783 // of the needed buffer for buf == NULL so in either case the length of
1784 // the string (which never includes the terminating NUL) is one less
1788 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1791 we have a problem here: by default, WideCharToMultiByte() may
1792 replace characters unrepresentable in the target code page with bad
1793 quality approximations such as turning "1/2" symbol (U+00BD) into
1794 "1" for the code pages which don't have it and we, obviously, want
1795 to avoid this at any price
1797 the trouble is that this function does it _silently_, i.e. it won't
1798 even tell us whether it did or not... Win98/2000 and higher provide
1799 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1800 we have to resort to a round trip, i.e. check that converting back
1801 results in the same string -- this is, of course, expensive but
1802 otherwise we simply can't be sure to not garble the data.
1805 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1806 // it doesn't work with CJK encodings (which we test for rather roughly
1807 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1809 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1812 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1814 // it's our lucky day
1815 flags
= WC_NO_BEST_FIT_CHARS
;
1816 pUsedDef
= &usedDef
;
1818 else // old system or unsupported encoding
1824 const size_t len
= ::WideCharToMultiByte
1826 m_CodePage
, // code page
1827 flags
, // either none or no best fit
1828 pwz
, // input string
1829 -1, // it is (wide) NUL-terminated
1830 buf
, // output buffer
1831 buf
? n
: 0, // and its size
1832 NULL
, // default "replacement" char
1833 pUsedDef
// [out] was it used?
1838 // function totally failed
1842 // if we were really converting, check if we succeeded
1847 // check if the conversion failed, i.e. if any replacements
1852 else // we must resort to double tripping...
1854 wxWCharBuffer
wcBuf(n
);
1855 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1856 wcscmp(wcBuf
, pwz
) != 0 )
1858 // we didn't obtain the same thing we started from, hence
1859 // the conversion was lossy and we consider that it failed
1865 // see the comment above for the reason of "len - 1"
1869 bool IsOk() const { return m_CodePage
!= -1; }
1872 static bool CanUseNoBestFit()
1874 static int s_isWin98Or2k
= -1;
1876 if ( s_isWin98Or2k
== -1 )
1879 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1882 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1886 s_isWin98Or2k
= verMaj
>= 5;
1890 // unknown, be conseravtive by default
1894 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1897 return s_isWin98Or2k
== 1;
1900 static bool IsAtLeastWin2kSP4()
1905 static int s_isAtLeastWin2kSP4
= -1;
1907 if ( s_isAtLeastWin2kSP4
== -1 )
1909 OSVERSIONINFOEX ver
;
1911 memset(&ver
, 0, sizeof(ver
));
1912 ver
.dwOSVersionInfoSize
= sizeof(ver
);
1913 GetVersionEx((OSVERSIONINFO
*)&ver
);
1915 s_isAtLeastWin2kSP4
=
1916 ((ver
.dwMajorVersion
> 5) || // Vista+
1917 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
1918 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
1919 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
1923 return s_isAtLeastWin2kSP4
== 1;
1927 virtual const char *GetMBNul(size_t *nulLen
) const
1929 if ( m_nulLen
== (size_t)-2 )
1931 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
1933 self
->m_nulLen
= ::WideCharToMultiByte
1935 m_CodePage
, // code page
1937 L
"", // input string
1938 1, // translate just NUL
1939 self
->m_nulBuf
, // output buffer
1940 WXSIZEOF(m_nulBuf
), // and its size
1941 NULL
, // "replacement" char
1942 NULL
// [out] was it used?
1945 if ( m_nulLen
== 0 )
1946 self
->m_nulLen
= (size_t)-1;
1958 #endif // wxHAVE_WIN32_MB2WC
1960 // ============================================================================
1961 // Cocoa conversion classes
1962 // ============================================================================
1964 #if defined(__WXCOCOA__)
1966 // RN: There is no UTF-32 support in either Core Foundation or
1967 // Cocoa. Strangely enough, internally Core Foundation uses
1968 // UTF 32 internally quite a bit - its just not public (yet).
1970 #include <CoreFoundation/CFString.h>
1971 #include <CoreFoundation/CFStringEncodingExt.h>
1973 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1975 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1976 if ( encoding
== wxFONTENCODING_DEFAULT
)
1978 enc
= CFStringGetSystemEncoding();
1980 else switch( encoding
)
1982 case wxFONTENCODING_ISO8859_1
:
1983 enc
= kCFStringEncodingISOLatin1
;
1985 case wxFONTENCODING_ISO8859_2
:
1986 enc
= kCFStringEncodingISOLatin2
;
1988 case wxFONTENCODING_ISO8859_3
:
1989 enc
= kCFStringEncodingISOLatin3
;
1991 case wxFONTENCODING_ISO8859_4
:
1992 enc
= kCFStringEncodingISOLatin4
;
1994 case wxFONTENCODING_ISO8859_5
:
1995 enc
= kCFStringEncodingISOLatinCyrillic
;
1997 case wxFONTENCODING_ISO8859_6
:
1998 enc
= kCFStringEncodingISOLatinArabic
;
2000 case wxFONTENCODING_ISO8859_7
:
2001 enc
= kCFStringEncodingISOLatinGreek
;
2003 case wxFONTENCODING_ISO8859_8
:
2004 enc
= kCFStringEncodingISOLatinHebrew
;
2006 case wxFONTENCODING_ISO8859_9
:
2007 enc
= kCFStringEncodingISOLatin5
;
2009 case wxFONTENCODING_ISO8859_10
:
2010 enc
= kCFStringEncodingISOLatin6
;
2012 case wxFONTENCODING_ISO8859_11
:
2013 enc
= kCFStringEncodingISOLatinThai
;
2015 case wxFONTENCODING_ISO8859_13
:
2016 enc
= kCFStringEncodingISOLatin7
;
2018 case wxFONTENCODING_ISO8859_14
:
2019 enc
= kCFStringEncodingISOLatin8
;
2021 case wxFONTENCODING_ISO8859_15
:
2022 enc
= kCFStringEncodingISOLatin9
;
2025 case wxFONTENCODING_KOI8
:
2026 enc
= kCFStringEncodingKOI8_R
;
2028 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2029 enc
= kCFStringEncodingDOSRussian
;
2032 // case wxFONTENCODING_BULGARIAN :
2036 case wxFONTENCODING_CP437
:
2037 enc
=kCFStringEncodingDOSLatinUS
;
2039 case wxFONTENCODING_CP850
:
2040 enc
= kCFStringEncodingDOSLatin1
;
2042 case wxFONTENCODING_CP852
:
2043 enc
= kCFStringEncodingDOSLatin2
;
2045 case wxFONTENCODING_CP855
:
2046 enc
= kCFStringEncodingDOSCyrillic
;
2048 case wxFONTENCODING_CP866
:
2049 enc
=kCFStringEncodingDOSRussian
;
2051 case wxFONTENCODING_CP874
:
2052 enc
= kCFStringEncodingDOSThai
;
2054 case wxFONTENCODING_CP932
:
2055 enc
= kCFStringEncodingDOSJapanese
;
2057 case wxFONTENCODING_CP936
:
2058 enc
=kCFStringEncodingDOSChineseSimplif
;
2060 case wxFONTENCODING_CP949
:
2061 enc
= kCFStringEncodingDOSKorean
;
2063 case wxFONTENCODING_CP950
:
2064 enc
= kCFStringEncodingDOSChineseTrad
;
2066 case wxFONTENCODING_CP1250
:
2067 enc
= kCFStringEncodingWindowsLatin2
;
2069 case wxFONTENCODING_CP1251
:
2070 enc
=kCFStringEncodingWindowsCyrillic
;
2072 case wxFONTENCODING_CP1252
:
2073 enc
=kCFStringEncodingWindowsLatin1
;
2075 case wxFONTENCODING_CP1253
:
2076 enc
= kCFStringEncodingWindowsGreek
;
2078 case wxFONTENCODING_CP1254
:
2079 enc
= kCFStringEncodingWindowsLatin5
;
2081 case wxFONTENCODING_CP1255
:
2082 enc
=kCFStringEncodingWindowsHebrew
;
2084 case wxFONTENCODING_CP1256
:
2085 enc
=kCFStringEncodingWindowsArabic
;
2087 case wxFONTENCODING_CP1257
:
2088 enc
= kCFStringEncodingWindowsBalticRim
;
2090 // This only really encodes to UTF7 (if that) evidently
2091 // case wxFONTENCODING_UTF7 :
2092 // enc = kCFStringEncodingNonLossyASCII ;
2094 case wxFONTENCODING_UTF8
:
2095 enc
= kCFStringEncodingUTF8
;
2097 case wxFONTENCODING_EUC_JP
:
2098 enc
= kCFStringEncodingEUC_JP
;
2100 case wxFONTENCODING_UTF16
:
2101 enc
= kCFStringEncodingUnicode
;
2103 case wxFONTENCODING_MACROMAN
:
2104 enc
= kCFStringEncodingMacRoman
;
2106 case wxFONTENCODING_MACJAPANESE
:
2107 enc
= kCFStringEncodingMacJapanese
;
2109 case wxFONTENCODING_MACCHINESETRAD
:
2110 enc
= kCFStringEncodingMacChineseTrad
;
2112 case wxFONTENCODING_MACKOREAN
:
2113 enc
= kCFStringEncodingMacKorean
;
2115 case wxFONTENCODING_MACARABIC
:
2116 enc
= kCFStringEncodingMacArabic
;
2118 case wxFONTENCODING_MACHEBREW
:
2119 enc
= kCFStringEncodingMacHebrew
;
2121 case wxFONTENCODING_MACGREEK
:
2122 enc
= kCFStringEncodingMacGreek
;
2124 case wxFONTENCODING_MACCYRILLIC
:
2125 enc
= kCFStringEncodingMacCyrillic
;
2127 case wxFONTENCODING_MACDEVANAGARI
:
2128 enc
= kCFStringEncodingMacDevanagari
;
2130 case wxFONTENCODING_MACGURMUKHI
:
2131 enc
= kCFStringEncodingMacGurmukhi
;
2133 case wxFONTENCODING_MACGUJARATI
:
2134 enc
= kCFStringEncodingMacGujarati
;
2136 case wxFONTENCODING_MACORIYA
:
2137 enc
= kCFStringEncodingMacOriya
;
2139 case wxFONTENCODING_MACBENGALI
:
2140 enc
= kCFStringEncodingMacBengali
;
2142 case wxFONTENCODING_MACTAMIL
:
2143 enc
= kCFStringEncodingMacTamil
;
2145 case wxFONTENCODING_MACTELUGU
:
2146 enc
= kCFStringEncodingMacTelugu
;
2148 case wxFONTENCODING_MACKANNADA
:
2149 enc
= kCFStringEncodingMacKannada
;
2151 case wxFONTENCODING_MACMALAJALAM
:
2152 enc
= kCFStringEncodingMacMalayalam
;
2154 case wxFONTENCODING_MACSINHALESE
:
2155 enc
= kCFStringEncodingMacSinhalese
;
2157 case wxFONTENCODING_MACBURMESE
:
2158 enc
= kCFStringEncodingMacBurmese
;
2160 case wxFONTENCODING_MACKHMER
:
2161 enc
= kCFStringEncodingMacKhmer
;
2163 case wxFONTENCODING_MACTHAI
:
2164 enc
= kCFStringEncodingMacThai
;
2166 case wxFONTENCODING_MACLAOTIAN
:
2167 enc
= kCFStringEncodingMacLaotian
;
2169 case wxFONTENCODING_MACGEORGIAN
:
2170 enc
= kCFStringEncodingMacGeorgian
;
2172 case wxFONTENCODING_MACARMENIAN
:
2173 enc
= kCFStringEncodingMacArmenian
;
2175 case wxFONTENCODING_MACCHINESESIMP
:
2176 enc
= kCFStringEncodingMacChineseSimp
;
2178 case wxFONTENCODING_MACTIBETAN
:
2179 enc
= kCFStringEncodingMacTibetan
;
2181 case wxFONTENCODING_MACMONGOLIAN
:
2182 enc
= kCFStringEncodingMacMongolian
;
2184 case wxFONTENCODING_MACETHIOPIC
:
2185 enc
= kCFStringEncodingMacEthiopic
;
2187 case wxFONTENCODING_MACCENTRALEUR
:
2188 enc
= kCFStringEncodingMacCentralEurRoman
;
2190 case wxFONTENCODING_MACVIATNAMESE
:
2191 enc
= kCFStringEncodingMacVietnamese
;
2193 case wxFONTENCODING_MACARABICEXT
:
2194 enc
= kCFStringEncodingMacExtArabic
;
2196 case wxFONTENCODING_MACSYMBOL
:
2197 enc
= kCFStringEncodingMacSymbol
;
2199 case wxFONTENCODING_MACDINGBATS
:
2200 enc
= kCFStringEncodingMacDingbats
;
2202 case wxFONTENCODING_MACTURKISH
:
2203 enc
= kCFStringEncodingMacTurkish
;
2205 case wxFONTENCODING_MACCROATIAN
:
2206 enc
= kCFStringEncodingMacCroatian
;
2208 case wxFONTENCODING_MACICELANDIC
:
2209 enc
= kCFStringEncodingMacIcelandic
;
2211 case wxFONTENCODING_MACROMANIAN
:
2212 enc
= kCFStringEncodingMacRomanian
;
2214 case wxFONTENCODING_MACCELTIC
:
2215 enc
= kCFStringEncodingMacCeltic
;
2217 case wxFONTENCODING_MACGAELIC
:
2218 enc
= kCFStringEncodingMacGaelic
;
2220 // case wxFONTENCODING_MACKEYBOARD :
2221 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2224 // because gcc is picky
2230 class wxMBConv_cocoa
: public wxMBConv
2235 Init(CFStringGetSystemEncoding()) ;
2239 wxMBConv_cocoa(const wxChar
* name
)
2241 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2245 wxMBConv_cocoa(wxFontEncoding encoding
)
2247 Init( wxCFStringEncFromFontEnc(encoding
) );
2254 void Init( CFStringEncoding encoding
)
2256 m_encoding
= encoding
;
2259 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2263 CFStringRef theString
= CFStringCreateWithBytes (
2264 NULL
, //the allocator
2265 (const UInt8
*)szUnConv
,
2268 false //no BOM/external representation
2271 wxASSERT(theString
);
2273 size_t nOutLength
= CFStringGetLength(theString
);
2277 CFRelease(theString
);
2281 CFRange theRange
= { 0, nOutSize
};
2283 #if SIZEOF_WCHAR_T == 4
2284 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2287 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2289 CFRelease(theString
);
2291 szUniCharBuffer
[nOutLength
] = '\0' ;
2293 #if SIZEOF_WCHAR_T == 4
2294 wxMBConvUTF16 converter
;
2295 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2296 delete[] szUniCharBuffer
;
2302 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2306 size_t nRealOutSize
;
2307 size_t nBufSize
= wxWcslen(szUnConv
);
2308 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2310 #if SIZEOF_WCHAR_T == 4
2311 wxMBConvUTF16 converter
;
2312 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2313 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2314 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2315 nBufSize
/= sizeof(UniChar
);
2318 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2322 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2325 wxASSERT(theString
);
2327 //Note that CER puts a BOM when converting to unicode
2328 //so we check and use getchars instead in that case
2329 if (m_encoding
== kCFStringEncodingUnicode
)
2332 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2334 nRealOutSize
= CFStringGetLength(theString
) + 1;
2340 CFRangeMake(0, CFStringGetLength(theString
)),
2342 0, //what to put in characters that can't be converted -
2343 //0 tells CFString to return NULL if it meets such a character
2344 false, //not an external representation
2347 (CFIndex
*) &nRealOutSize
2351 CFRelease(theString
);
2353 #if SIZEOF_WCHAR_T == 4
2354 delete[] szUniBuffer
;
2357 return nRealOutSize
- 1;
2362 return m_encoding
!= kCFStringEncodingInvalidId
&&
2363 CFStringIsEncodingAvailable(m_encoding
);
2367 CFStringEncoding m_encoding
;
2370 #endif // defined(__WXCOCOA__)
2372 // ============================================================================
2373 // Mac conversion classes
2374 // ============================================================================
2376 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2378 class wxMBConv_mac
: public wxMBConv
2383 Init(CFStringGetSystemEncoding()) ;
2387 wxMBConv_mac(const wxChar
* name
)
2389 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2393 wxMBConv_mac(wxFontEncoding encoding
)
2395 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2400 OSStatus status
= noErr
;
2401 status
= TECDisposeConverter(m_MB2WC_converter
);
2402 status
= TECDisposeConverter(m_WC2MB_converter
);
2406 void Init( TextEncodingBase encoding
)
2408 OSStatus status
= noErr
;
2409 m_char_encoding
= encoding
;
2410 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2412 status
= TECCreateConverter(&m_MB2WC_converter
,
2414 m_unicode_encoding
);
2415 status
= TECCreateConverter(&m_WC2MB_converter
,
2420 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2422 OSStatus status
= noErr
;
2423 ByteCount byteOutLen
;
2424 ByteCount byteInLen
= strlen(psz
) ;
2425 wchar_t *tbuf
= NULL
;
2426 UniChar
* ubuf
= NULL
;
2431 //apple specs say at least 32
2432 n
= wxMax( 32 , byteInLen
) ;
2433 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2435 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2436 #if SIZEOF_WCHAR_T == 4
2437 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2439 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2441 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2442 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2443 #if SIZEOF_WCHAR_T == 4
2444 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2445 // is not properly terminated we get random characters at the end
2446 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2447 wxMBConvUTF16 converter
;
2448 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2451 res
= byteOutLen
/ sizeof( UniChar
) ;
2456 if ( buf
&& res
< n
)
2462 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2464 OSStatus status
= noErr
;
2465 ByteCount byteOutLen
;
2466 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2472 //apple specs say at least 32
2473 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2474 tbuf
= (char*) malloc( n
) ;
2477 ByteCount byteBufferLen
= n
;
2478 UniChar
* ubuf
= NULL
;
2479 #if SIZEOF_WCHAR_T == 4
2480 wxMBConvUTF16 converter
;
2481 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2482 byteInLen
= unicharlen
;
2483 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2484 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2486 ubuf
= (UniChar
*) psz
;
2488 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2489 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2490 #if SIZEOF_WCHAR_T == 4
2496 size_t res
= byteOutLen
;
2497 if ( buf
&& res
< n
)
2501 //we need to double-trip to verify it didn't insert any ? in place
2502 //of bogus characters
2503 wxWCharBuffer
wcBuf(n
);
2504 size_t pszlen
= wxWcslen(psz
);
2505 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2506 wxWcslen(wcBuf
) != pszlen
||
2507 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2509 // we didn't obtain the same thing we started from, hence
2510 // the conversion was lossy and we consider that it failed
2519 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2522 TECObjectRef m_MB2WC_converter
;
2523 TECObjectRef m_WC2MB_converter
;
2525 TextEncodingBase m_char_encoding
;
2526 TextEncodingBase m_unicode_encoding
;
2529 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2531 // ============================================================================
2532 // wxEncodingConverter based conversion classes
2533 // ============================================================================
2537 class wxMBConv_wxwin
: public wxMBConv
2542 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2543 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2547 // temporarily just use wxEncodingConverter stuff,
2548 // so that it works while a better implementation is built
2549 wxMBConv_wxwin(const wxChar
* name
)
2552 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2554 m_enc
= wxFONTENCODING_SYSTEM
;
2559 wxMBConv_wxwin(wxFontEncoding enc
)
2566 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2568 size_t inbuf
= strlen(psz
);
2571 if (!m2w
.Convert(psz
,buf
))
2577 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2579 const size_t inbuf
= wxWcslen(psz
);
2582 if (!w2m
.Convert(psz
,buf
))
2589 bool IsOk() const { return m_ok
; }
2592 wxFontEncoding m_enc
;
2593 wxEncodingConverter m2w
, w2m
;
2596 virtual const char *GetMBNul(size_t *nulLen
) const
2600 case wxFONTENCODING_UTF16BE
:
2601 case wxFONTENCODING_UTF16LE
:
2605 case wxFONTENCODING_UTF32BE
:
2606 case wxFONTENCODING_UTF32LE
:
2616 // were we initialized successfully?
2619 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2622 // make the constructors available for unit testing
2623 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2625 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2626 if ( !result
->IsOk() )
2634 #endif // wxUSE_FONTMAP
2636 // ============================================================================
2637 // wxCSConv implementation
2638 // ============================================================================
2640 void wxCSConv::Init()
2647 wxCSConv::wxCSConv(const wxChar
*charset
)
2657 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2659 m_encoding
= wxFONTENCODING_SYSTEM
;
2663 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2665 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2667 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2669 encoding
= wxFONTENCODING_SYSTEM
;
2674 m_encoding
= encoding
;
2677 wxCSConv::~wxCSConv()
2682 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2687 SetName(conv
.m_name
);
2688 m_encoding
= conv
.m_encoding
;
2691 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2695 SetName(conv
.m_name
);
2696 m_encoding
= conv
.m_encoding
;
2701 void wxCSConv::Clear()
2710 void wxCSConv::SetName(const wxChar
*charset
)
2714 m_name
= wxStrdup(charset
);
2720 #include "wx/hashmap.h"
2722 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2723 wxEncodingNameCache
);
2725 static wxEncodingNameCache gs_nameCache
;
2728 wxMBConv
*wxCSConv::DoCreate() const
2731 wxLogTrace(TRACE_STRCONV
,
2732 wxT("creating conversion for %s"),
2734 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2735 #endif // wxUSE_FONTMAP
2737 // check for the special case of ASCII or ISO8859-1 charset: as we have
2738 // special knowledge of it anyhow, we don't need to create a special
2739 // conversion object
2740 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2741 m_encoding
== wxFONTENCODING_DEFAULT
)
2743 // don't convert at all
2747 // we trust OS to do conversion better than we can so try external
2748 // conversion methods first
2750 // the full order is:
2751 // 1. OS conversion (iconv() under Unix or Win32 API)
2752 // 2. hard coded conversions for UTF
2753 // 3. wxEncodingConverter as fall back
2759 #endif // !wxUSE_FONTMAP
2761 wxString
name(m_name
);
2762 wxFontEncoding
encoding(m_encoding
);
2764 if ( !name
.empty() )
2766 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2774 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2775 #endif // wxUSE_FONTMAP
2779 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2780 if ( it
!= gs_nameCache
.end() )
2782 if ( it
->second
.empty() )
2785 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2792 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2794 for ( ; *names
; ++names
)
2796 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2799 gs_nameCache
[encoding
] = *names
;
2806 gs_nameCache
[encoding
] = _T(""); // cache the failure
2808 #endif // wxUSE_FONTMAP
2810 #endif // HAVE_ICONV
2812 #ifdef wxHAVE_WIN32_MB2WC
2815 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2816 : new wxMBConv_win32(m_encoding
);
2825 #endif // wxHAVE_WIN32_MB2WC
2826 #if defined(__WXMAC__)
2828 // leave UTF16 and UTF32 to the built-ins of wx
2829 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2830 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2834 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2835 : new wxMBConv_mac(m_encoding
);
2837 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2846 #if defined(__WXCOCOA__)
2848 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2852 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2853 : new wxMBConv_cocoa(m_encoding
);
2855 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2865 wxFontEncoding enc
= m_encoding
;
2867 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2869 // use "false" to suppress interactive dialogs -- we can be called from
2870 // anywhere and popping up a dialog from here is the last thing we want to
2872 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2874 #endif // wxUSE_FONTMAP
2878 case wxFONTENCODING_UTF7
:
2879 return new wxMBConvUTF7
;
2881 case wxFONTENCODING_UTF8
:
2882 return new wxMBConvUTF8
;
2884 case wxFONTENCODING_UTF16BE
:
2885 return new wxMBConvUTF16BE
;
2887 case wxFONTENCODING_UTF16LE
:
2888 return new wxMBConvUTF16LE
;
2890 case wxFONTENCODING_UTF32BE
:
2891 return new wxMBConvUTF32BE
;
2893 case wxFONTENCODING_UTF32LE
:
2894 return new wxMBConvUTF32LE
;
2897 // nothing to do but put here to suppress gcc warnings
2904 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2905 : new wxMBConv_wxwin(m_encoding
);
2911 #endif // wxUSE_FONTMAP
2913 // NB: This is a hack to prevent deadlock. What could otherwise happen
2914 // in Unicode build: wxConvLocal creation ends up being here
2915 // because of some failure and logs the error. But wxLog will try to
2916 // attach timestamp, for which it will need wxConvLocal (to convert
2917 // time to char* and then wchar_t*), but that fails, tries to log
2918 // error, but wxLog has a (already locked) critical section that
2919 // guards static buffer.
2920 static bool alreadyLoggingError
= false;
2921 if (!alreadyLoggingError
)
2923 alreadyLoggingError
= true;
2924 wxLogError(_("Cannot convert from the charset '%s'!"),
2928 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2929 #else // !wxUSE_FONTMAP
2930 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2931 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2933 alreadyLoggingError
= false;
2939 void wxCSConv::CreateConvIfNeeded() const
2943 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2946 // if we don't have neither the name nor the encoding, use the default
2947 // encoding for this system
2948 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2950 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2952 #endif // wxUSE_INTL
2954 self
->m_convReal
= DoCreate();
2955 self
->m_deferred
= false;
2959 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2961 CreateConvIfNeeded();
2964 return m_convReal
->MB2WC(buf
, psz
, n
);
2967 size_t len
= strlen(psz
);
2971 for (size_t c
= 0; c
<= len
; c
++)
2972 buf
[c
] = (unsigned char)(psz
[c
]);
2978 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2980 CreateConvIfNeeded();
2983 return m_convReal
->WC2MB(buf
, psz
, n
);
2986 const size_t len
= wxWcslen(psz
);
2989 for (size_t c
= 0; c
<= len
; c
++)
2993 buf
[c
] = (char)psz
[c
];
2998 for (size_t c
= 0; c
<= len
; c
++)
3008 const char *wxCSConv::GetMBNul(size_t *nulLen
) const
3010 CreateConvIfNeeded();
3014 // cast needed just to call private function of m_convReal
3015 return ((wxCSConv
*)m_convReal
)->GetMBNul(nulLen
);
3022 // ----------------------------------------------------------------------------
3024 // ----------------------------------------------------------------------------
3027 static wxMBConv_win32 wxConvLibcObj
;
3028 #elif defined(__WXMAC__) && !defined(__MACH__)
3029 static wxMBConv_mac wxConvLibcObj
;
3031 static wxMBConvLibc wxConvLibcObj
;
3034 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
3035 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
3036 static wxMBConvUTF7 wxConvUTF7Obj
;
3037 static wxMBConvUTF8 wxConvUTF8Obj
;
3039 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
3040 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
3041 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
3042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
3043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
3044 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
3045 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
3053 #else // !wxUSE_WCHAR_T
3055 // stand-ins in absence of wchar_t
3056 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3061 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T