1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
35 #include "wx/strconv.h"
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
62 #include "wx/thread.h"
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
76 #include "wx/mac/private.h" // includes mac headers
79 #define TRACE_STRCONV _T("strconv")
81 #if SIZEOF_WCHAR_T == 2
85 // ============================================================================
87 // ============================================================================
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
94 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
99 *output
= (wxUint16
) input
;
102 else if (input
>=0x110000)
110 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
111 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
117 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
119 if ((*input
<0xd800) || (*input
>0xdfff))
124 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
131 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
137 // ----------------------------------------------------------------------------
139 // ----------------------------------------------------------------------------
141 wxMBConv::~wxMBConv()
143 // nothing to do here (necessary for Darwin linking probably)
146 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
150 // calculate the length of the buffer needed first
151 size_t nLen
= MB2WC(NULL
, psz
, 0);
152 if ( nLen
!= (size_t)-1 )
154 // now do the actual conversion
155 wxWCharBuffer
buf(nLen
);
156 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
157 if ( nLen
!= (size_t)-1 )
164 wxWCharBuffer
buf((wchar_t *)NULL
);
169 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
173 size_t nLen
= WC2MB(NULL
, pwz
, 0);
174 if ( nLen
!= (size_t)-1 )
176 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
177 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
178 if ( nLen
!= (size_t)-1 )
185 wxCharBuffer
buf((char *)NULL
);
190 // helper of cMB2WC(): check if n bytes at this location are all NUL
191 static bool NotAllNULs(const char *p
, size_t n
)
193 while ( n
&& *p
++ == '\0' )
200 wxMBConv::cMB2WC(const char *in
, size_t inLen
, size_t *outLen
) const
202 // the currently accumulated wide characters
205 // the current length of wbuf
208 // the number of NULs terminating this string
209 size_t nulLen
wxDUMMY_INITIALIZE(0);
211 // make a copy of the input string unless it is already properly
215 // if we were not given the input size we just have to assume that the
216 // string is properly terminated as we have no way of knowing how long it
217 // is anyhow, but if we do have the size check whether there are enough
219 if ( inLen
!= (size_t)-1 )
221 // we need to know how to find the end of this string
222 nulLen
= GetMinMBCharWidth();
223 if ( nulLen
== (size_t)-1 )
226 // if there are enough NULs we can avoid the copy
227 if ( inLen
< nulLen
|| NotAllNULs(in
+ inLen
- nulLen
, nulLen
) )
229 // make a copy in order to properly NUL-terminate the string
230 bufTmp
= wxCharBuffer(inLen
+ nulLen
- 1 /* 1 will be added */);
231 char * const p
= bufTmp
.data();
232 memcpy(p
, in
, inLen
);
233 for ( char *s
= p
+ inLen
; s
< p
+ inLen
+ nulLen
; s
++ )
242 for ( const char * const inEnd
= in
+ inLen
;; )
244 // try to convert the current chunk
245 lenChunk
= MB2WC(NULL
, in
, 0);
248 // nothing left in the input string, conversion succeeded
252 if ( lenChunk
== (size_t)-1 )
255 // if we already have a previous chunk, leave the NUL separating it
260 const size_t lenBufNew
= lenBuf
+ lenChunk
;
261 if ( !wbuf
.extend(lenBufNew
) )
263 lenChunk
= (size_t)-1;
267 lenChunk
= MB2WC(wbuf
.data() + lenBuf
, in
, lenChunk
+ 1 /* for NUL */);
268 if ( lenChunk
== (size_t)-1 )
273 if ( inLen
== (size_t)-1 )
275 // convert only one chunk in this case, as we suppose that the
276 // string is NUL-terminated and so inEnd is not used at all
280 // advance the input pointer past the end of this chunk
281 while ( NotAllNULs(in
, nulLen
) )
283 // notice that we must skip over multiple bytes here as we suppose
284 // that if NUL takes 2 or 4 bytes, then all the other characters do
285 // too and so if advanced by a single byte we might erroneously
286 // detect sequences of NUL bytes in the middle of the input
290 in
+= nulLen
; // skipping over its terminator as well
292 // note that ">=" (and not just "==") is needed here as the terminator
293 // we skipped just above could be inside or just after the buffer
294 // delimited by inEnd
299 if ( lenChunk
== (size_t)-1 )
313 wxMBConv::cWC2MB(const wchar_t *in
, size_t inLen
, size_t *outLen
) const
315 // the currently accumulated multibyte characters
318 // the current length of buf
321 // make a copy of the input string unless it is already properly
324 // if we don't know its length we have no choice but to assume that it is,
325 // indeed, properly terminated
326 wxWCharBuffer bufTmp
;
327 if ( inLen
== (size_t)-1 )
329 inLen
= wxWcslen(in
) + 1;
331 else if ( inLen
!= 0 && in
[inLen
- 1] != L
'\0' )
333 // make a copy in order to properly NUL-terminate the string
334 bufTmp
= wxWCharBuffer(inLen
);
335 memcpy(bufTmp
.data(), in
, inLen
*sizeof(wchar_t));
341 for ( const wchar_t * const inEnd
= in
+ inLen
;; )
343 // try to convert the current chunk, if anything left
344 size_t lenChunk
= in
< inEnd
? WC2MB(NULL
, in
, 0) : 0;
347 // nothing left in the input string, conversion succeeded
349 *outLen
= lenBuf
? lenBuf
- 1 : lenBuf
;
354 if ( lenChunk
== (size_t)-1 )
357 const size_t lenBufNew
= lenBuf
+ lenChunk
;
358 if ( !buf
.extend(lenBufNew
) )
361 lenChunk
= WC2MB(buf
.data() + lenBuf
, in
, lenChunk
+ 1 /* for NUL */);
362 if ( lenChunk
== (size_t)-1 )
365 // chunk successfully converted, go to the next one
366 in
+= wxWcslen(in
) + 1 /* skip NUL too */;
367 lenBuf
= lenBufNew
+ 1;
374 return wxCharBuffer();
377 // ----------------------------------------------------------------------------
379 // ----------------------------------------------------------------------------
381 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
383 return wxMB2WC(buf
, psz
, n
);
386 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
388 return wxWC2MB(buf
, psz
, n
);
391 // ----------------------------------------------------------------------------
392 // wxConvBrokenFileNames
393 // ----------------------------------------------------------------------------
397 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
399 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
400 || wxStricmp(charset
, _T("UTF8")) == 0 )
401 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
403 m_conv
= new wxCSConv(charset
);
408 // ----------------------------------------------------------------------------
410 // ----------------------------------------------------------------------------
412 // Implementation (C) 2004 Fredrik Roubert
415 // BASE64 decoding table
417 static const unsigned char utf7unb64
[] =
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
425 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
426 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
428 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
429 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
430 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
432 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
433 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
434 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
453 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
457 while ( *psz
&& (!buf
|| (len
< n
)) )
459 unsigned char cc
= *psz
++;
467 else if (*psz
== '-')
475 else // start of BASE64 encoded string
479 for ( ok
= lsb
= false, d
= 0, l
= 0;
480 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
485 for (l
+= 6; l
>= 8; lsb
= !lsb
)
487 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
497 *buf
= (wchar_t)(c
<< 8);
506 // in valid UTF7 we should have valid characters after '+'
515 if ( buf
&& (len
< n
) )
522 // BASE64 encoding table
524 static const unsigned char utf7enb64
[] =
526 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
527 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
528 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
529 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
530 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
531 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
532 'w', 'x', 'y', 'z', '0', '1', '2', '3',
533 '4', '5', '6', '7', '8', '9', '+', '/'
537 // UTF-7 encoding table
539 // 0 - Set D (directly encoded characters)
540 // 1 - Set O (optional direct characters)
541 // 2 - whitespace characters (optional)
542 // 3 - special characters
544 static const unsigned char utf7encode
[128] =
546 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
547 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
548 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
550 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
552 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
556 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
560 while (*psz
&& ((!buf
) || (len
< n
)))
563 if (cc
< 0x80 && utf7encode
[cc
] < 1)
571 else if (((wxUint32
)cc
) > 0xffff)
573 // no surrogate pair generation (yet?)
584 // BASE64 encode string
585 unsigned int lsb
, d
, l
;
586 for (d
= 0, l
= 0; /*nothing*/; psz
++)
588 for (lsb
= 0; lsb
< 2; lsb
++)
591 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
593 for (l
+= 8; l
>= 6; )
597 *buf
++ = utf7enb64
[(d
>> l
) % 64];
602 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
608 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
617 if (buf
&& (len
< n
))
622 // ----------------------------------------------------------------------------
624 // ----------------------------------------------------------------------------
626 static wxUint32 utf8_max
[]=
627 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
629 // boundaries of the private use area we use to (temporarily) remap invalid
630 // characters invalid in a UTF-8 encoded string
631 const wxUint32 wxUnicodePUA
= 0x100000;
632 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
634 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
638 while (*psz
&& ((!buf
) || (len
< n
)))
640 const char *opsz
= psz
;
641 bool invalid
= false;
642 unsigned char cc
= *psz
++, fc
= cc
;
644 for (cnt
= 0; fc
& 0x80; cnt
++)
653 // escape the escape character for octal escapes
654 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
655 && cc
== '\\' && (!buf
|| len
< n
))
667 // invalid UTF-8 sequence
672 unsigned ocnt
= cnt
- 1;
673 wxUint32 res
= cc
& (0x3f >> cnt
);
677 if ((cc
& 0xC0) != 0x80)
679 // invalid UTF-8 sequence
684 res
= (res
<< 6) | (cc
& 0x3f);
686 if (invalid
|| res
<= utf8_max
[ocnt
])
688 // illegal UTF-8 encoding
691 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
692 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
694 // if one of our PUA characters turns up externally
695 // it must also be treated as an illegal sequence
696 // (a bit like you have to escape an escape character)
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
704 if (pa
== (size_t)-1)
716 *buf
++ = (wchar_t)res
;
718 #endif // WC_UTF16/!WC_UTF16
723 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
725 while (opsz
< psz
&& (!buf
|| len
< n
))
728 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
729 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
730 wxASSERT(pa
!= (size_t)-1);
737 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
743 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
745 while (opsz
< psz
&& (!buf
|| len
< n
))
747 if ( buf
&& len
+ 3 < n
)
749 unsigned char on
= *opsz
;
751 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
752 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
753 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
759 else // MAP_INVALID_UTF8_NOT
766 if (buf
&& (len
< n
))
771 static inline bool isoctal(wchar_t wch
)
773 return L
'0' <= wch
&& wch
<= L
'7';
776 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
780 while (*psz
&& ((!buf
) || (len
< n
)))
784 // cast is ok for WC_UTF16
785 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
786 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
788 cc
=(*psz
++) & 0x7fffffff;
791 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
792 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
795 *buf
++ = (char)(cc
- wxUnicodePUA
);
798 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
799 && cc
== L
'\\' && psz
[0] == L
'\\' )
806 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
808 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
812 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
813 (psz
[1] - L
'0')*010 +
823 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
837 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
839 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
851 // ----------------------------------------------------------------------------
853 // ----------------------------------------------------------------------------
855 #ifdef WORDS_BIGENDIAN
856 #define wxMBConvUTF16straight wxMBConvUTF16BE
857 #define wxMBConvUTF16swap wxMBConvUTF16LE
859 #define wxMBConvUTF16swap wxMBConvUTF16BE
860 #define wxMBConvUTF16straight wxMBConvUTF16LE
866 // copy 16bit MB to 16bit String
867 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
871 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
874 *buf
++ = *(wxUint16
*)psz
;
877 psz
+= sizeof(wxUint16
);
879 if (buf
&& len
<n
) *buf
=0;
885 // copy 16bit String to 16bit MB
886 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
890 while (*psz
&& (!buf
|| len
< n
))
894 *(wxUint16
*)buf
= *psz
;
895 buf
+= sizeof(wxUint16
);
897 len
+= sizeof(wxUint16
);
900 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
906 // swap 16bit MB to 16bit String
907 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
911 // UTF16 string must be terminated by 2 NULs as single NULs may occur
913 while ( (psz
[0] || psz
[1]) && (!buf
|| len
< n
) )
917 ((char *)buf
)[0] = psz
[1];
918 ((char *)buf
)[1] = psz
[0];
925 if ( buf
&& len
< n
)
932 // swap 16bit MB to 16bit String
933 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
937 while ( *psz
&& (!buf
|| len
< n
) )
941 *buf
++ = ((char*)psz
)[1];
942 *buf
++ = ((char*)psz
)[0];
948 if ( buf
&& len
< n
)
958 // copy 16bit MB to 32bit String
959 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
963 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
966 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
967 if (pa
== (size_t)-1)
971 *buf
++ = (wchar_t)cc
;
973 psz
+= pa
* sizeof(wxUint16
);
975 if (buf
&& len
<n
) *buf
=0;
981 // copy 32bit String to 16bit MB
982 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
986 while (*psz
&& (!buf
|| len
< n
))
989 size_t pa
=encode_utf16(*psz
, cc
);
991 if (pa
== (size_t)-1)
996 *(wxUint16
*)buf
= cc
[0];
997 buf
+= sizeof(wxUint16
);
1000 *(wxUint16
*)buf
= cc
[1];
1001 buf
+= sizeof(wxUint16
);
1005 len
+= pa
*sizeof(wxUint16
);
1008 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1014 // swap 16bit MB to 32bit String
1015 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1019 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
1023 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
1024 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
1026 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
1027 if (pa
== (size_t)-1)
1031 *buf
++ = (wchar_t)cc
;
1034 psz
+= pa
* sizeof(wxUint16
);
1036 if (buf
&& len
<n
) *buf
=0;
1042 // swap 32bit String to 16bit MB
1043 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1047 while (*psz
&& (!buf
|| len
< n
))
1050 size_t pa
=encode_utf16(*psz
, cc
);
1052 if (pa
== (size_t)-1)
1057 *buf
++ = ((char*)cc
)[1];
1058 *buf
++ = ((char*)cc
)[0];
1061 *buf
++ = ((char*)cc
)[3];
1062 *buf
++ = ((char*)cc
)[2];
1066 len
+= pa
*sizeof(wxUint16
);
1069 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1077 // ----------------------------------------------------------------------------
1079 // ----------------------------------------------------------------------------
1081 #ifdef WORDS_BIGENDIAN
1082 #define wxMBConvUTF32straight wxMBConvUTF32BE
1083 #define wxMBConvUTF32swap wxMBConvUTF32LE
1085 #define wxMBConvUTF32swap wxMBConvUTF32BE
1086 #define wxMBConvUTF32straight wxMBConvUTF32LE
1090 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1091 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1096 // copy 32bit MB to 16bit String
1097 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1101 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1105 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1106 if (pa
== (size_t)-1)
1116 psz
+= sizeof(wxUint32
);
1118 if (buf
&& len
<n
) *buf
=0;
1124 // copy 16bit String to 32bit MB
1125 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1129 while (*psz
&& (!buf
|| len
< n
))
1133 // cast is ok for WC_UTF16
1134 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1135 if (pa
== (size_t)-1)
1140 *(wxUint32
*)buf
= cc
;
1141 buf
+= sizeof(wxUint32
);
1143 len
+= sizeof(wxUint32
);
1147 if (buf
&& len
<=n
-sizeof(wxUint32
))
1155 // swap 32bit MB to 16bit String
1156 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1160 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1163 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1164 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1169 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1170 if (pa
== (size_t)-1)
1180 psz
+= sizeof(wxUint32
);
1190 // swap 16bit String to 32bit MB
1191 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1195 while (*psz
&& (!buf
|| len
< n
))
1199 // cast is ok for WC_UTF16
1200 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1201 if (pa
== (size_t)-1)
1211 len
+= sizeof(wxUint32
);
1215 if (buf
&& len
<=n
-sizeof(wxUint32
))
1224 // copy 32bit MB to 32bit String
1225 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1229 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1232 *buf
++ = (wchar_t)(*(wxUint32
*)psz
);
1234 psz
+= sizeof(wxUint32
);
1244 // copy 32bit String to 32bit MB
1245 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1249 while (*psz
&& (!buf
|| len
< n
))
1253 *(wxUint32
*)buf
= *psz
;
1254 buf
+= sizeof(wxUint32
);
1257 len
+= sizeof(wxUint32
);
1261 if (buf
&& len
<=n
-sizeof(wxUint32
))
1268 // swap 32bit MB to 32bit String
1269 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1273 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1277 ((char *)buf
)[0] = psz
[3];
1278 ((char *)buf
)[1] = psz
[2];
1279 ((char *)buf
)[2] = psz
[1];
1280 ((char *)buf
)[3] = psz
[0];
1284 psz
+= sizeof(wxUint32
);
1294 // swap 32bit String to 32bit MB
1295 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1299 while (*psz
&& (!buf
|| len
< n
))
1303 *buf
++ = ((char *)psz
)[3];
1304 *buf
++ = ((char *)psz
)[2];
1305 *buf
++ = ((char *)psz
)[1];
1306 *buf
++ = ((char *)psz
)[0];
1308 len
+= sizeof(wxUint32
);
1312 if (buf
&& len
<=n
-sizeof(wxUint32
))
1322 // ============================================================================
1323 // The classes doing conversion using the iconv_xxx() functions
1324 // ============================================================================
1328 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1329 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1330 // (unless there's yet another bug in glibc) the only case when iconv()
1331 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1332 // left in the input buffer -- when _real_ error occurs,
1333 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1335 // [This bug does not appear in glibc 2.2.]
1336 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1337 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1338 (errno != E2BIG || bufLeft != 0))
1340 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1343 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1345 #define ICONV_T_INVALID ((iconv_t)-1)
1347 #if SIZEOF_WCHAR_T == 4
1348 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1349 #define WC_ENC wxFONTENCODING_UTF32
1350 #elif SIZEOF_WCHAR_T == 2
1351 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1352 #define WC_ENC wxFONTENCODING_UTF16
1353 #else // sizeof(wchar_t) != 2 nor 4
1354 // does this ever happen?
1355 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1358 // ----------------------------------------------------------------------------
1359 // wxMBConv_iconv: encapsulates an iconv character set
1360 // ----------------------------------------------------------------------------
1362 class wxMBConv_iconv
: public wxMBConv
1365 wxMBConv_iconv(const wxChar
*name
);
1366 virtual ~wxMBConv_iconv();
1368 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1369 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1372 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1375 // the iconv handlers used to translate from multibyte to wide char and in
1376 // the other direction
1380 // guards access to m2w and w2m objects
1381 wxMutex m_iconvMutex
;
1385 // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
1387 virtual size_t GetMinMBCharWidth() const;
1389 // the name (for iconv_open()) of a wide char charset -- if none is
1390 // available on this machine, it will remain NULL
1391 static wxString ms_wcCharsetName
;
1393 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1394 // different endian-ness than the native one
1395 static bool ms_wcNeedsSwap
;
1397 // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
1399 size_t m_minMBCharWidth
;
1402 // make the constructor available for unit testing
1403 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1405 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1406 if ( !result
->IsOk() )
1414 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1415 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1417 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1419 m_minMBCharWidth
= 0;
1421 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1422 // names for the charsets
1423 const wxCharBuffer
cname(wxString(name
).ToAscii());
1425 // check for charset that represents wchar_t:
1426 if ( ms_wcCharsetName
.empty() )
1428 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1431 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1432 #else // !wxUSE_FONTMAP
1433 static const wxChar
*names
[] =
1435 #if SIZEOF_WCHAR_T == 4
1437 #elif SIZEOF_WCHAR_T = 2
1442 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1444 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1446 const wxString
nameCS(*names
);
1448 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1449 wxString
nameXE(nameCS
);
1450 #ifdef WORDS_BIGENDIAN
1452 #else // little endian
1456 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1459 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1460 if ( m2w
== ICONV_T_INVALID
)
1462 // try charset w/o bytesex info (e.g. "UCS4")
1463 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1465 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1467 // and check for bytesex ourselves:
1468 if ( m2w
!= ICONV_T_INVALID
)
1470 char buf
[2], *bufPtr
;
1471 wchar_t wbuf
[2], *wbufPtr
;
1479 outsz
= SIZEOF_WCHAR_T
* 2;
1483 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1484 (char**)&wbufPtr
, &outsz
);
1486 if (ICONV_FAILED(res
, insz
))
1488 wxLogLastError(wxT("iconv"));
1489 wxLogError(_("Conversion to charset '%s' doesn't work."),
1492 else // ok, can convert to this encoding, remember it
1494 ms_wcCharsetName
= nameCS
;
1495 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1499 else // use charset not requiring byte swapping
1501 ms_wcCharsetName
= nameXE
;
1505 wxLogTrace(TRACE_STRCONV
,
1506 wxT("iconv wchar_t charset is \"%s\"%s"),
1507 ms_wcCharsetName
.empty() ? _T("<none>")
1508 : ms_wcCharsetName
.c_str(),
1509 ms_wcNeedsSwap
? _T(" (needs swap)")
1512 else // we already have ms_wcCharsetName
1514 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1517 if ( ms_wcCharsetName
.empty() )
1519 w2m
= ICONV_T_INVALID
;
1523 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1524 if ( w2m
== ICONV_T_INVALID
)
1526 wxLogTrace(TRACE_STRCONV
,
1527 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1528 ms_wcCharsetName
.c_str(), cname
.data());
1533 wxMBConv_iconv::~wxMBConv_iconv()
1535 if ( m2w
!= ICONV_T_INVALID
)
1537 if ( w2m
!= ICONV_T_INVALID
)
1541 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1544 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1545 // Unfortunately there is a couple of global wxCSConv objects such as
1546 // wxConvLocal that are used all over wx code, so we have to make sure
1547 // the handle is used by at most one thread at the time. Otherwise
1548 // only a few wx classes would be safe to use from non-main threads
1549 // as MB<->WC conversion would fail "randomly".
1550 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1553 size_t inbuf
= strlen(psz
);
1554 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1556 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1557 wchar_t *bufPtr
= buf
;
1558 const char *pszPtr
= psz
;
1562 // have destination buffer, convert there
1564 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1565 (char**)&bufPtr
, &outbuf
);
1566 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1570 // convert to native endianness
1571 for ( unsigned i
= 0; i
< res
; i
++ )
1572 buf
[n
] = WC_BSWAP(buf
[i
]);
1575 // NB: iconv was given only strlen(psz) characters on input, and so
1576 // it couldn't convert the trailing zero. Let's do it ourselves
1577 // if there's some room left for it in the output buffer.
1583 // no destination buffer... convert using temp buffer
1584 // to calculate destination buffer requirement
1589 outbuf
= 8*SIZEOF_WCHAR_T
;
1592 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1593 (char**)&bufPtr
, &outbuf
);
1595 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1596 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1599 if (ICONV_FAILED(cres
, inbuf
))
1601 //VS: it is ok if iconv fails, hence trace only
1602 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1609 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1612 // NB: explained in MB2WC
1613 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1616 size_t inlen
= wxWcslen(psz
);
1617 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1621 wchar_t *tmpbuf
= 0;
1625 // need to copy to temp buffer to switch endianness
1626 // (doing WC_BSWAP twice on the original buffer won't help, as it
1627 // could be in read-only memory, or be accessed in some other thread)
1628 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1629 for ( size_t i
= 0; i
< inlen
; i
++ )
1630 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1631 tmpbuf
[inlen
] = L
'\0';
1637 // have destination buffer, convert there
1638 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1642 // NB: iconv was given only wcslen(psz) characters on input, and so
1643 // it couldn't convert the trailing zero. Let's do it ourselves
1644 // if there's some room left for it in the output buffer.
1650 // no destination buffer... convert using temp buffer
1651 // to calculate destination buffer requirement
1655 buf
= tbuf
; outbuf
= 16;
1657 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1660 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1668 if (ICONV_FAILED(cres
, inbuf
))
1670 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1677 size_t wxMBConv_iconv::GetMinMBCharWidth() const
1679 if ( m_minMBCharWidth
== 0 )
1681 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1684 // NB: explained in MB2WC
1685 wxMutexLocker
lock(self
->m_iconvMutex
);
1688 wchar_t *wnul
= L
"";
1689 char buf
[8]; // should be enough for NUL in any encoding
1690 size_t inLen
= sizeof(wchar_t),
1691 outLen
= WXSIZEOF(buf
);
1692 char *in
= (char *)wnul
;
1694 if ( iconv(w2m
, ICONV_CHAR_CAST(&in
), &inLen
, &out
, &outLen
) == (size_t)-1 )
1696 self
->m_minMBCharWidth
= (size_t)-1;
1700 self
->m_minMBCharWidth
= out
- buf
;
1704 return m_minMBCharWidth
;
1707 #endif // HAVE_ICONV
1710 // ============================================================================
1711 // Win32 conversion classes
1712 // ============================================================================
1714 #ifdef wxHAVE_WIN32_MB2WC
1718 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1719 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1722 class wxMBConv_win32
: public wxMBConv
1727 m_CodePage
= CP_ACP
;
1728 m_minMBCharWidth
= 0;
1732 wxMBConv_win32(const wxChar
* name
)
1734 m_CodePage
= wxCharsetToCodepage(name
);
1735 m_minMBCharWidth
= 0;
1738 wxMBConv_win32(wxFontEncoding encoding
)
1740 m_CodePage
= wxEncodingToCodepage(encoding
);
1741 m_minMBCharWidth
= 0;
1743 #endif // wxUSE_FONTMAP
1745 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1747 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1748 // the behaviour is not compatible with the Unix version (using iconv)
1749 // and break the library itself, e.g. wxTextInputStream::NextChar()
1750 // wouldn't work if reading an incomplete MB char didn't result in an
1753 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1754 // an error (tested under Windows Server 2003) and apparently it is
1755 // done on purpose, i.e. the function accepts any input in this case
1756 // and although I'd prefer to return error on ill-formed output, our
1757 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1758 // explicitly ill-formed according to RFC 2152) neither so we don't
1759 // even have any fallback here...
1761 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1762 // Win XP or newer and if it is specified on older versions, conversion
1763 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1764 // fails. So we can only use the flag on newer Windows versions.
1765 // Additionally, the flag is not supported by UTF7, symbol and CJK
1766 // encodings. See here:
1767 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1768 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1770 if ( m_CodePage
!= CP_UTF7
&& m_CodePage
!= CP_SYMBOL
&&
1771 m_CodePage
< 50000 &&
1772 IsAtLeastWin2kSP4() )
1774 flags
= MB_ERR_INVALID_CHARS
;
1776 else if ( m_CodePage
== CP_UTF8
)
1778 // Avoid round-trip in the special case of UTF-8 by using our
1779 // own UTF-8 conversion code:
1780 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
1783 const size_t len
= ::MultiByteToWideChar
1785 m_CodePage
, // code page
1786 flags
, // flags: fall on error
1787 psz
, // input string
1788 -1, // its length (NUL-terminated)
1789 buf
, // output string
1790 buf
? n
: 0 // size of output buffer
1794 // function totally failed
1798 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1799 // check if we succeeded, by doing a double trip:
1800 if ( !flags
&& buf
)
1802 const size_t mbLen
= strlen(psz
);
1803 wxCharBuffer
mbBuf(mbLen
);
1804 if ( ::WideCharToMultiByte
1811 mbLen
+ 1, // size in bytes, not length
1815 strcmp(mbBuf
, psz
) != 0 )
1817 // we didn't obtain the same thing we started from, hence
1818 // the conversion was lossy and we consider that it failed
1823 // note that it returns count of written chars for buf != NULL and size
1824 // of the needed buffer for buf == NULL so in either case the length of
1825 // the string (which never includes the terminating NUL) is one less
1829 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1832 we have a problem here: by default, WideCharToMultiByte() may
1833 replace characters unrepresentable in the target code page with bad
1834 quality approximations such as turning "1/2" symbol (U+00BD) into
1835 "1" for the code pages which don't have it and we, obviously, want
1836 to avoid this at any price
1838 the trouble is that this function does it _silently_, i.e. it won't
1839 even tell us whether it did or not... Win98/2000 and higher provide
1840 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1841 we have to resort to a round trip, i.e. check that converting back
1842 results in the same string -- this is, of course, expensive but
1843 otherwise we simply can't be sure to not garble the data.
1846 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1847 // it doesn't work with CJK encodings (which we test for rather roughly
1848 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1850 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1853 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1855 // it's our lucky day
1856 flags
= WC_NO_BEST_FIT_CHARS
;
1857 pUsedDef
= &usedDef
;
1859 else // old system or unsupported encoding
1865 const size_t len
= ::WideCharToMultiByte
1867 m_CodePage
, // code page
1868 flags
, // either none or no best fit
1869 pwz
, // input string
1870 -1, // it is (wide) NUL-terminated
1871 buf
, // output buffer
1872 buf
? n
: 0, // and its size
1873 NULL
, // default "replacement" char
1874 pUsedDef
// [out] was it used?
1879 // function totally failed
1883 // if we were really converting, check if we succeeded
1888 // check if the conversion failed, i.e. if any replacements
1893 else // we must resort to double tripping...
1895 wxWCharBuffer
wcBuf(n
);
1896 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1897 wcscmp(wcBuf
, pwz
) != 0 )
1899 // we didn't obtain the same thing we started from, hence
1900 // the conversion was lossy and we consider that it failed
1906 // see the comment above for the reason of "len - 1"
1910 bool IsOk() const { return m_CodePage
!= -1; }
1913 static bool CanUseNoBestFit()
1915 static int s_isWin98Or2k
= -1;
1917 if ( s_isWin98Or2k
== -1 )
1920 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1923 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1927 s_isWin98Or2k
= verMaj
>= 5;
1931 // unknown, be conseravtive by default
1935 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1938 return s_isWin98Or2k
== 1;
1941 static bool IsAtLeastWin2kSP4()
1946 static int s_isAtLeastWin2kSP4
= -1;
1948 if ( s_isAtLeastWin2kSP4
== -1 )
1950 OSVERSIONINFOEX ver
;
1952 memset(&ver
, 0, sizeof(ver
));
1953 ver
.dwOSVersionInfoSize
= sizeof(ver
);
1954 GetVersionEx((OSVERSIONINFO
*)&ver
);
1956 s_isAtLeastWin2kSP4
=
1957 ((ver
.dwMajorVersion
> 5) || // Vista+
1958 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
1959 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
1960 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
1964 return s_isAtLeastWin2kSP4
== 1;
1968 virtual size_t GetMinMBCharWidth() const
1970 if ( m_minMBCharWidth
== 0 )
1972 int len
= ::WideCharToMultiByte
1974 m_CodePage
, // code page
1976 L
"", // input string
1977 1, // translate just the NUL
1978 NULL
, // output buffer
1980 NULL
, // no replacement char
1981 NULL
// [out] don't care if it was used
1984 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
1988 wxLogDebug(_T("Unexpected NUL length %d"), len
);
1992 self
->m_minMBCharWidth
= (size_t)-1;
1998 self
->m_minMBCharWidth
= len
;
2003 return m_minMBCharWidth
;
2006 // the code page we're working with
2009 // cached result of GetMinMBCharWidth(), set to 0 initially meaning
2011 size_t m_minMBCharWidth
;
2014 #endif // wxHAVE_WIN32_MB2WC
2016 // ============================================================================
2017 // Cocoa conversion classes
2018 // ============================================================================
2020 #if defined(__WXCOCOA__)
2022 // RN: There is no UTF-32 support in either Core Foundation or
2023 // Cocoa. Strangely enough, internally Core Foundation uses
2024 // UTF 32 internally quite a bit - its just not public (yet).
2026 #include <CoreFoundation/CFString.h>
2027 #include <CoreFoundation/CFStringEncodingExt.h>
2029 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
2031 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
2032 if ( encoding
== wxFONTENCODING_DEFAULT
)
2034 enc
= CFStringGetSystemEncoding();
2036 else switch( encoding
)
2038 case wxFONTENCODING_ISO8859_1
:
2039 enc
= kCFStringEncodingISOLatin1
;
2041 case wxFONTENCODING_ISO8859_2
:
2042 enc
= kCFStringEncodingISOLatin2
;
2044 case wxFONTENCODING_ISO8859_3
:
2045 enc
= kCFStringEncodingISOLatin3
;
2047 case wxFONTENCODING_ISO8859_4
:
2048 enc
= kCFStringEncodingISOLatin4
;
2050 case wxFONTENCODING_ISO8859_5
:
2051 enc
= kCFStringEncodingISOLatinCyrillic
;
2053 case wxFONTENCODING_ISO8859_6
:
2054 enc
= kCFStringEncodingISOLatinArabic
;
2056 case wxFONTENCODING_ISO8859_7
:
2057 enc
= kCFStringEncodingISOLatinGreek
;
2059 case wxFONTENCODING_ISO8859_8
:
2060 enc
= kCFStringEncodingISOLatinHebrew
;
2062 case wxFONTENCODING_ISO8859_9
:
2063 enc
= kCFStringEncodingISOLatin5
;
2065 case wxFONTENCODING_ISO8859_10
:
2066 enc
= kCFStringEncodingISOLatin6
;
2068 case wxFONTENCODING_ISO8859_11
:
2069 enc
= kCFStringEncodingISOLatinThai
;
2071 case wxFONTENCODING_ISO8859_13
:
2072 enc
= kCFStringEncodingISOLatin7
;
2074 case wxFONTENCODING_ISO8859_14
:
2075 enc
= kCFStringEncodingISOLatin8
;
2077 case wxFONTENCODING_ISO8859_15
:
2078 enc
= kCFStringEncodingISOLatin9
;
2081 case wxFONTENCODING_KOI8
:
2082 enc
= kCFStringEncodingKOI8_R
;
2084 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2085 enc
= kCFStringEncodingDOSRussian
;
2088 // case wxFONTENCODING_BULGARIAN :
2092 case wxFONTENCODING_CP437
:
2093 enc
=kCFStringEncodingDOSLatinUS
;
2095 case wxFONTENCODING_CP850
:
2096 enc
= kCFStringEncodingDOSLatin1
;
2098 case wxFONTENCODING_CP852
:
2099 enc
= kCFStringEncodingDOSLatin2
;
2101 case wxFONTENCODING_CP855
:
2102 enc
= kCFStringEncodingDOSCyrillic
;
2104 case wxFONTENCODING_CP866
:
2105 enc
=kCFStringEncodingDOSRussian
;
2107 case wxFONTENCODING_CP874
:
2108 enc
= kCFStringEncodingDOSThai
;
2110 case wxFONTENCODING_CP932
:
2111 enc
= kCFStringEncodingDOSJapanese
;
2113 case wxFONTENCODING_CP936
:
2114 enc
=kCFStringEncodingDOSChineseSimplif
;
2116 case wxFONTENCODING_CP949
:
2117 enc
= kCFStringEncodingDOSKorean
;
2119 case wxFONTENCODING_CP950
:
2120 enc
= kCFStringEncodingDOSChineseTrad
;
2122 case wxFONTENCODING_CP1250
:
2123 enc
= kCFStringEncodingWindowsLatin2
;
2125 case wxFONTENCODING_CP1251
:
2126 enc
=kCFStringEncodingWindowsCyrillic
;
2128 case wxFONTENCODING_CP1252
:
2129 enc
=kCFStringEncodingWindowsLatin1
;
2131 case wxFONTENCODING_CP1253
:
2132 enc
= kCFStringEncodingWindowsGreek
;
2134 case wxFONTENCODING_CP1254
:
2135 enc
= kCFStringEncodingWindowsLatin5
;
2137 case wxFONTENCODING_CP1255
:
2138 enc
=kCFStringEncodingWindowsHebrew
;
2140 case wxFONTENCODING_CP1256
:
2141 enc
=kCFStringEncodingWindowsArabic
;
2143 case wxFONTENCODING_CP1257
:
2144 enc
= kCFStringEncodingWindowsBalticRim
;
2146 // This only really encodes to UTF7 (if that) evidently
2147 // case wxFONTENCODING_UTF7 :
2148 // enc = kCFStringEncodingNonLossyASCII ;
2150 case wxFONTENCODING_UTF8
:
2151 enc
= kCFStringEncodingUTF8
;
2153 case wxFONTENCODING_EUC_JP
:
2154 enc
= kCFStringEncodingEUC_JP
;
2156 case wxFONTENCODING_UTF16
:
2157 enc
= kCFStringEncodingUnicode
;
2159 case wxFONTENCODING_MACROMAN
:
2160 enc
= kCFStringEncodingMacRoman
;
2162 case wxFONTENCODING_MACJAPANESE
:
2163 enc
= kCFStringEncodingMacJapanese
;
2165 case wxFONTENCODING_MACCHINESETRAD
:
2166 enc
= kCFStringEncodingMacChineseTrad
;
2168 case wxFONTENCODING_MACKOREAN
:
2169 enc
= kCFStringEncodingMacKorean
;
2171 case wxFONTENCODING_MACARABIC
:
2172 enc
= kCFStringEncodingMacArabic
;
2174 case wxFONTENCODING_MACHEBREW
:
2175 enc
= kCFStringEncodingMacHebrew
;
2177 case wxFONTENCODING_MACGREEK
:
2178 enc
= kCFStringEncodingMacGreek
;
2180 case wxFONTENCODING_MACCYRILLIC
:
2181 enc
= kCFStringEncodingMacCyrillic
;
2183 case wxFONTENCODING_MACDEVANAGARI
:
2184 enc
= kCFStringEncodingMacDevanagari
;
2186 case wxFONTENCODING_MACGURMUKHI
:
2187 enc
= kCFStringEncodingMacGurmukhi
;
2189 case wxFONTENCODING_MACGUJARATI
:
2190 enc
= kCFStringEncodingMacGujarati
;
2192 case wxFONTENCODING_MACORIYA
:
2193 enc
= kCFStringEncodingMacOriya
;
2195 case wxFONTENCODING_MACBENGALI
:
2196 enc
= kCFStringEncodingMacBengali
;
2198 case wxFONTENCODING_MACTAMIL
:
2199 enc
= kCFStringEncodingMacTamil
;
2201 case wxFONTENCODING_MACTELUGU
:
2202 enc
= kCFStringEncodingMacTelugu
;
2204 case wxFONTENCODING_MACKANNADA
:
2205 enc
= kCFStringEncodingMacKannada
;
2207 case wxFONTENCODING_MACMALAJALAM
:
2208 enc
= kCFStringEncodingMacMalayalam
;
2210 case wxFONTENCODING_MACSINHALESE
:
2211 enc
= kCFStringEncodingMacSinhalese
;
2213 case wxFONTENCODING_MACBURMESE
:
2214 enc
= kCFStringEncodingMacBurmese
;
2216 case wxFONTENCODING_MACKHMER
:
2217 enc
= kCFStringEncodingMacKhmer
;
2219 case wxFONTENCODING_MACTHAI
:
2220 enc
= kCFStringEncodingMacThai
;
2222 case wxFONTENCODING_MACLAOTIAN
:
2223 enc
= kCFStringEncodingMacLaotian
;
2225 case wxFONTENCODING_MACGEORGIAN
:
2226 enc
= kCFStringEncodingMacGeorgian
;
2228 case wxFONTENCODING_MACARMENIAN
:
2229 enc
= kCFStringEncodingMacArmenian
;
2231 case wxFONTENCODING_MACCHINESESIMP
:
2232 enc
= kCFStringEncodingMacChineseSimp
;
2234 case wxFONTENCODING_MACTIBETAN
:
2235 enc
= kCFStringEncodingMacTibetan
;
2237 case wxFONTENCODING_MACMONGOLIAN
:
2238 enc
= kCFStringEncodingMacMongolian
;
2240 case wxFONTENCODING_MACETHIOPIC
:
2241 enc
= kCFStringEncodingMacEthiopic
;
2243 case wxFONTENCODING_MACCENTRALEUR
:
2244 enc
= kCFStringEncodingMacCentralEurRoman
;
2246 case wxFONTENCODING_MACVIATNAMESE
:
2247 enc
= kCFStringEncodingMacVietnamese
;
2249 case wxFONTENCODING_MACARABICEXT
:
2250 enc
= kCFStringEncodingMacExtArabic
;
2252 case wxFONTENCODING_MACSYMBOL
:
2253 enc
= kCFStringEncodingMacSymbol
;
2255 case wxFONTENCODING_MACDINGBATS
:
2256 enc
= kCFStringEncodingMacDingbats
;
2258 case wxFONTENCODING_MACTURKISH
:
2259 enc
= kCFStringEncodingMacTurkish
;
2261 case wxFONTENCODING_MACCROATIAN
:
2262 enc
= kCFStringEncodingMacCroatian
;
2264 case wxFONTENCODING_MACICELANDIC
:
2265 enc
= kCFStringEncodingMacIcelandic
;
2267 case wxFONTENCODING_MACROMANIAN
:
2268 enc
= kCFStringEncodingMacRomanian
;
2270 case wxFONTENCODING_MACCELTIC
:
2271 enc
= kCFStringEncodingMacCeltic
;
2273 case wxFONTENCODING_MACGAELIC
:
2274 enc
= kCFStringEncodingMacGaelic
;
2276 // case wxFONTENCODING_MACKEYBOARD :
2277 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2280 // because gcc is picky
2286 class wxMBConv_cocoa
: public wxMBConv
2291 Init(CFStringGetSystemEncoding()) ;
2295 wxMBConv_cocoa(const wxChar
* name
)
2297 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2301 wxMBConv_cocoa(wxFontEncoding encoding
)
2303 Init( wxCFStringEncFromFontEnc(encoding
) );
2310 void Init( CFStringEncoding encoding
)
2312 m_encoding
= encoding
;
2315 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2319 CFStringRef theString
= CFStringCreateWithBytes (
2320 NULL
, //the allocator
2321 (const UInt8
*)szUnConv
,
2324 false //no BOM/external representation
2327 wxASSERT(theString
);
2329 size_t nOutLength
= CFStringGetLength(theString
);
2333 CFRelease(theString
);
2337 CFRange theRange
= { 0, nOutSize
};
2339 #if SIZEOF_WCHAR_T == 4
2340 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2343 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2345 CFRelease(theString
);
2347 szUniCharBuffer
[nOutLength
] = '\0' ;
2349 #if SIZEOF_WCHAR_T == 4
2350 wxMBConvUTF16 converter
;
2351 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2352 delete[] szUniCharBuffer
;
2358 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2362 size_t nRealOutSize
;
2363 size_t nBufSize
= wxWcslen(szUnConv
);
2364 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2366 #if SIZEOF_WCHAR_T == 4
2367 wxMBConvUTF16 converter
;
2368 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2369 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2370 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2371 nBufSize
/= sizeof(UniChar
);
2374 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2378 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2381 wxASSERT(theString
);
2383 //Note that CER puts a BOM when converting to unicode
2384 //so we check and use getchars instead in that case
2385 if (m_encoding
== kCFStringEncodingUnicode
)
2388 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2390 nRealOutSize
= CFStringGetLength(theString
) + 1;
2396 CFRangeMake(0, CFStringGetLength(theString
)),
2398 0, //what to put in characters that can't be converted -
2399 //0 tells CFString to return NULL if it meets such a character
2400 false, //not an external representation
2403 (CFIndex
*) &nRealOutSize
2407 CFRelease(theString
);
2409 #if SIZEOF_WCHAR_T == 4
2410 delete[] szUniBuffer
;
2413 return nRealOutSize
- 1;
2418 return m_encoding
!= kCFStringEncodingInvalidId
&&
2419 CFStringIsEncodingAvailable(m_encoding
);
2423 CFStringEncoding m_encoding
;
2426 #endif // defined(__WXCOCOA__)
2428 // ============================================================================
2429 // Mac conversion classes
2430 // ============================================================================
2432 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2434 class wxMBConv_mac
: public wxMBConv
2439 Init(CFStringGetSystemEncoding()) ;
2443 wxMBConv_mac(const wxChar
* name
)
2445 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2449 wxMBConv_mac(wxFontEncoding encoding
)
2451 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2456 OSStatus status
= noErr
;
2457 status
= TECDisposeConverter(m_MB2WC_converter
);
2458 status
= TECDisposeConverter(m_WC2MB_converter
);
2462 void Init( TextEncodingBase encoding
)
2464 OSStatus status
= noErr
;
2465 m_char_encoding
= encoding
;
2466 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2468 status
= TECCreateConverter(&m_MB2WC_converter
,
2470 m_unicode_encoding
);
2471 status
= TECCreateConverter(&m_WC2MB_converter
,
2476 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2478 OSStatus status
= noErr
;
2479 ByteCount byteOutLen
;
2480 ByteCount byteInLen
= strlen(psz
) ;
2481 wchar_t *tbuf
= NULL
;
2482 UniChar
* ubuf
= NULL
;
2487 //apple specs say at least 32
2488 n
= wxMax( 32 , byteInLen
) ;
2489 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2491 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2492 #if SIZEOF_WCHAR_T == 4
2493 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2495 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2497 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2498 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2499 #if SIZEOF_WCHAR_T == 4
2500 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2501 // is not properly terminated we get random characters at the end
2502 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2503 wxMBConvUTF16 converter
;
2504 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2507 res
= byteOutLen
/ sizeof( UniChar
) ;
2512 if ( buf
&& res
< n
)
2518 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2520 OSStatus status
= noErr
;
2521 ByteCount byteOutLen
;
2522 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2528 //apple specs say at least 32
2529 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2530 tbuf
= (char*) malloc( n
) ;
2533 ByteCount byteBufferLen
= n
;
2534 UniChar
* ubuf
= NULL
;
2535 #if SIZEOF_WCHAR_T == 4
2536 wxMBConvUTF16 converter
;
2537 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2538 byteInLen
= unicharlen
;
2539 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2540 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2542 ubuf
= (UniChar
*) psz
;
2544 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2545 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2546 #if SIZEOF_WCHAR_T == 4
2552 size_t res
= byteOutLen
;
2553 if ( buf
&& res
< n
)
2557 //we need to double-trip to verify it didn't insert any ? in place
2558 //of bogus characters
2559 wxWCharBuffer
wcBuf(n
);
2560 size_t pszlen
= wxWcslen(psz
);
2561 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2562 wxWcslen(wcBuf
) != pszlen
||
2563 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2565 // we didn't obtain the same thing we started from, hence
2566 // the conversion was lossy and we consider that it failed
2575 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2578 TECObjectRef m_MB2WC_converter
;
2579 TECObjectRef m_WC2MB_converter
;
2581 TextEncodingBase m_char_encoding
;
2582 TextEncodingBase m_unicode_encoding
;
2585 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2587 // ============================================================================
2588 // wxEncodingConverter based conversion classes
2589 // ============================================================================
2593 class wxMBConv_wxwin
: public wxMBConv
2598 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2599 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2603 // temporarily just use wxEncodingConverter stuff,
2604 // so that it works while a better implementation is built
2605 wxMBConv_wxwin(const wxChar
* name
)
2608 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2610 m_enc
= wxFONTENCODING_SYSTEM
;
2615 wxMBConv_wxwin(wxFontEncoding enc
)
2622 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2624 size_t inbuf
= strlen(psz
);
2627 if (!m2w
.Convert(psz
,buf
))
2633 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2635 const size_t inbuf
= wxWcslen(psz
);
2638 if (!w2m
.Convert(psz
,buf
))
2645 bool IsOk() const { return m_ok
; }
2648 wxFontEncoding m_enc
;
2649 wxEncodingConverter m2w
, w2m
;
2652 virtual size_t GetMinMBCharWidth() const
2656 case wxFONTENCODING_UTF16BE
:
2657 case wxFONTENCODING_UTF16LE
:
2660 case wxFONTENCODING_UTF32BE
:
2661 case wxFONTENCODING_UTF32LE
:
2669 // were we initialized successfully?
2672 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2675 // make the constructors available for unit testing
2676 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
2678 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2679 if ( !result
->IsOk() )
2687 #endif // wxUSE_FONTMAP
2689 // ============================================================================
2690 // wxCSConv implementation
2691 // ============================================================================
2693 void wxCSConv::Init()
2700 wxCSConv::wxCSConv(const wxChar
*charset
)
2710 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2712 m_encoding
= wxFONTENCODING_SYSTEM
;
2716 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2718 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2720 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2722 encoding
= wxFONTENCODING_SYSTEM
;
2727 m_encoding
= encoding
;
2730 wxCSConv::~wxCSConv()
2735 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2740 SetName(conv
.m_name
);
2741 m_encoding
= conv
.m_encoding
;
2744 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2748 SetName(conv
.m_name
);
2749 m_encoding
= conv
.m_encoding
;
2754 void wxCSConv::Clear()
2763 void wxCSConv::SetName(const wxChar
*charset
)
2767 m_name
= wxStrdup(charset
);
2773 #include "wx/hashmap.h"
2775 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2776 wxEncodingNameCache
);
2778 static wxEncodingNameCache gs_nameCache
;
2781 wxMBConv
*wxCSConv::DoCreate() const
2784 wxLogTrace(TRACE_STRCONV
,
2785 wxT("creating conversion for %s"),
2787 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
2788 #endif // wxUSE_FONTMAP
2790 // check for the special case of ASCII or ISO8859-1 charset: as we have
2791 // special knowledge of it anyhow, we don't need to create a special
2792 // conversion object
2793 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2794 m_encoding
== wxFONTENCODING_DEFAULT
)
2796 // don't convert at all
2800 // we trust OS to do conversion better than we can so try external
2801 // conversion methods first
2803 // the full order is:
2804 // 1. OS conversion (iconv() under Unix or Win32 API)
2805 // 2. hard coded conversions for UTF
2806 // 3. wxEncodingConverter as fall back
2812 #endif // !wxUSE_FONTMAP
2814 wxString
name(m_name
);
2815 wxFontEncoding
encoding(m_encoding
);
2817 if ( !name
.empty() )
2819 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2827 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2828 #endif // wxUSE_FONTMAP
2832 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2833 if ( it
!= gs_nameCache
.end() )
2835 if ( it
->second
.empty() )
2838 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
2845 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2847 for ( ; *names
; ++names
)
2849 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
2852 gs_nameCache
[encoding
] = *names
;
2859 gs_nameCache
[encoding
] = _T(""); // cache the failure
2861 #endif // wxUSE_FONTMAP
2863 #endif // HAVE_ICONV
2865 #ifdef wxHAVE_WIN32_MB2WC
2868 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2869 : new wxMBConv_win32(m_encoding
);
2878 #endif // wxHAVE_WIN32_MB2WC
2879 #if defined(__WXMAC__)
2881 // leave UTF16 and UTF32 to the built-ins of wx
2882 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2883 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2887 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2888 : new wxMBConv_mac(m_encoding
);
2890 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2899 #if defined(__WXCOCOA__)
2901 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2905 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2906 : new wxMBConv_cocoa(m_encoding
);
2908 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2918 wxFontEncoding enc
= m_encoding
;
2920 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2922 // use "false" to suppress interactive dialogs -- we can be called from
2923 // anywhere and popping up a dialog from here is the last thing we want to
2925 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2927 #endif // wxUSE_FONTMAP
2931 case wxFONTENCODING_UTF7
:
2932 return new wxMBConvUTF7
;
2934 case wxFONTENCODING_UTF8
:
2935 return new wxMBConvUTF8
;
2937 case wxFONTENCODING_UTF16BE
:
2938 return new wxMBConvUTF16BE
;
2940 case wxFONTENCODING_UTF16LE
:
2941 return new wxMBConvUTF16LE
;
2943 case wxFONTENCODING_UTF32BE
:
2944 return new wxMBConvUTF32BE
;
2946 case wxFONTENCODING_UTF32LE
:
2947 return new wxMBConvUTF32LE
;
2950 // nothing to do but put here to suppress gcc warnings
2957 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2958 : new wxMBConv_wxwin(m_encoding
);
2964 #endif // wxUSE_FONTMAP
2966 // NB: This is a hack to prevent deadlock. What could otherwise happen
2967 // in Unicode build: wxConvLocal creation ends up being here
2968 // because of some failure and logs the error. But wxLog will try to
2969 // attach timestamp, for which it will need wxConvLocal (to convert
2970 // time to char* and then wchar_t*), but that fails, tries to log
2971 // error, but wxLog has a (already locked) critical section that
2972 // guards static buffer.
2973 static bool alreadyLoggingError
= false;
2974 if (!alreadyLoggingError
)
2976 alreadyLoggingError
= true;
2977 wxLogError(_("Cannot convert from the charset '%s'!"),
2981 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2982 #else // !wxUSE_FONTMAP
2983 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2984 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2986 alreadyLoggingError
= false;
2992 void wxCSConv::CreateConvIfNeeded() const
2996 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2999 // if we don't have neither the name nor the encoding, use the default
3000 // encoding for this system
3001 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
3003 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
3005 #endif // wxUSE_INTL
3007 self
->m_convReal
= DoCreate();
3008 self
->m_deferred
= false;
3012 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
3014 CreateConvIfNeeded();
3017 return m_convReal
->MB2WC(buf
, psz
, n
);
3020 size_t len
= strlen(psz
);
3024 for (size_t c
= 0; c
<= len
; c
++)
3025 buf
[c
] = (unsigned char)(psz
[c
]);
3031 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
3033 CreateConvIfNeeded();
3036 return m_convReal
->WC2MB(buf
, psz
, n
);
3039 const size_t len
= wxWcslen(psz
);
3042 for (size_t c
= 0; c
<= len
; c
++)
3046 buf
[c
] = (char)psz
[c
];
3051 for (size_t c
= 0; c
<= len
; c
++)
3061 size_t wxCSConv::GetMinMBCharWidth() const
3063 CreateConvIfNeeded();
3067 // cast needed just to call private function of m_convReal
3068 return ((wxCSConv
*)m_convReal
)->GetMinMBCharWidth();
3074 // ----------------------------------------------------------------------------
3076 // ----------------------------------------------------------------------------
3079 static wxMBConv_win32 wxConvLibcObj
;
3080 #elif defined(__WXMAC__) && !defined(__MACH__)
3081 static wxMBConv_mac wxConvLibcObj
;
3083 static wxMBConvLibc wxConvLibcObj
;
3086 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
3087 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
3088 static wxMBConvUTF7 wxConvUTF7Obj
;
3089 static wxMBConvUTF8 wxConvUTF8Obj
;
3091 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
3092 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
3093 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
3094 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
3095 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
3096 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
3097 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
3105 #else // !wxUSE_WCHAR_T
3107 // stand-ins in absence of wchar_t
3108 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3113 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T