1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
20 #include "wx/msw/missing.h"
25 #include "wx/hashmap.h"
28 #include "wx/strconv.h"
33 #include "wx/msw/private.h"
44 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
45 #define wxHAVE_WIN32_MB2WC
54 #include "wx/thread.h"
57 #include "wx/encconv.h"
58 #include "wx/fontmap.h"
62 #include <ATSUnicode.h>
63 #include <TextCommon.h>
64 #include <TextEncodingConverter.h>
67 // includes Mac headers
68 #include "wx/mac/private.h"
72 #define TRACE_STRCONV _T("strconv")
74 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
76 #if SIZEOF_WCHAR_T == 2
81 // ============================================================================
83 // ============================================================================
85 // helper function of cMB2WC(): check if n bytes at this location are all NUL
86 static bool NotAllNULs(const char *p
, size_t n
)
88 while ( n
&& *p
++ == '\0' )
94 // ----------------------------------------------------------------------------
95 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
96 // ----------------------------------------------------------------------------
98 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
103 *output
= (wxUint16
) input
;
107 else if (input
>= 0x110000)
109 return wxCONV_FAILED
;
115 *output
++ = (wxUint16
) ((input
>> 10) + 0xd7c0);
116 *output
= (wxUint16
) ((input
& 0x3ff) + 0xdc00);
123 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
125 if ((*input
< 0xd800) || (*input
> 0xdfff))
130 else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff))
133 return wxCONV_FAILED
;
137 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
143 typedef wchar_t wxDecodeSurrogate_t
;
145 typedef wxUint16 wxDecodeSurrogate_t
;
146 #endif // WC_UTF16/!WC_UTF16
148 // returns the next UTF-32 character from the wchar_t buffer and advances the
149 // pointer to the character after this one
151 // if an invalid character is found, *pSrc is set to NULL, the caller must
153 static wxUint32
wxDecodeSurrogate(const wxDecodeSurrogate_t
**pSrc
)
157 n
= decode_utf16(wx_reinterpret_cast(const wxUint16
*, *pSrc
), out
);
158 if ( n
== wxCONV_FAILED
)
166 // ----------------------------------------------------------------------------
168 // ----------------------------------------------------------------------------
171 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
,
172 const char *src
, size_t srcLen
) const
174 // although new conversion classes are supposed to implement this function
175 // directly, the existins ones only implement the old MB2WC() and so, to
176 // avoid to have to rewrite all conversion classes at once, we provide a
177 // default (but not efficient) implementation of this one in terms of the
178 // old function by copying the input to ensure that it's NUL-terminated and
179 // then using MB2WC() to convert it
181 // the number of chars [which would be] written to dst [if it were not NULL]
182 size_t dstWritten
= 0;
184 // the number of NULs terminating this string
185 size_t nulLen
= 0; // not really needed, but just to avoid warnings
187 // if we were not given the input size we just have to assume that the
188 // string is properly terminated as we have no way of knowing how long it
189 // is anyhow, but if we do have the size check whether there are enough
193 if ( srcLen
!= wxNO_LEN
)
195 // we need to know how to find the end of this string
196 nulLen
= GetMBNulLen();
197 if ( nulLen
== wxCONV_FAILED
)
198 return wxCONV_FAILED
;
200 // if there are enough NULs we can avoid the copy
201 if ( srcLen
< nulLen
|| NotAllNULs(src
+ srcLen
- nulLen
, nulLen
) )
203 // make a copy in order to properly NUL-terminate the string
204 bufTmp
= wxCharBuffer(srcLen
+ nulLen
- 1 /* 1 will be added */);
205 char * const p
= bufTmp
.data();
206 memcpy(p
, src
, srcLen
);
207 for ( char *s
= p
+ srcLen
; s
< p
+ srcLen
+ nulLen
; s
++ )
213 srcEnd
= src
+ srcLen
;
215 else // quit after the first loop iteration
222 // try to convert the current chunk
223 size_t lenChunk
= MB2WC(NULL
, src
, 0);
224 if ( lenChunk
== wxCONV_FAILED
)
225 return wxCONV_FAILED
;
227 lenChunk
++; // for the L'\0' at the end of this chunk
229 dstWritten
+= lenChunk
;
233 // nothing left in the input string, conversion succeeded
239 if ( dstWritten
> dstLen
)
240 return wxCONV_FAILED
;
242 if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED
)
243 return wxCONV_FAILED
;
250 // we convert just one chunk in this case as this is the entire
255 // advance the input pointer past the end of this chunk
256 while ( NotAllNULs(src
, nulLen
) )
258 // notice that we must skip over multiple bytes here as we suppose
259 // that if NUL takes 2 or 4 bytes, then all the other characters do
260 // too and so if advanced by a single byte we might erroneously
261 // detect sequences of NUL bytes in the middle of the input
265 src
+= nulLen
; // skipping over its terminator as well
267 // note that ">=" (and not just "==") is needed here as the terminator
268 // we skipped just above could be inside or just after the buffer
269 // delimited by inEnd
278 wxMBConv::FromWChar(char *dst
, size_t dstLen
,
279 const wchar_t *src
, size_t srcLen
) const
281 // the number of chars [which would be] written to dst [if it were not NULL]
282 size_t dstWritten
= 0;
284 // make a copy of the input string unless it is already properly
287 // if we don't know its length we have no choice but to assume that it is,
288 // indeed, properly terminated
289 wxWCharBuffer bufTmp
;
290 if ( srcLen
== wxNO_LEN
)
292 srcLen
= wxWcslen(src
) + 1;
294 else if ( srcLen
!= 0 && src
[srcLen
- 1] != L
'\0' )
296 // make a copy in order to properly NUL-terminate the string
297 bufTmp
= wxWCharBuffer(srcLen
);
298 memcpy(bufTmp
.data(), src
, srcLen
* sizeof(wchar_t));
302 const size_t lenNul
= GetMBNulLen();
303 for ( const wchar_t * const srcEnd
= src
+ srcLen
;
305 src
+= wxWcslen(src
) + 1 /* skip L'\0' too */ )
307 // try to convert the current chunk
308 size_t lenChunk
= WC2MB(NULL
, src
, 0);
310 if ( lenChunk
== wxCONV_FAILED
)
311 return wxCONV_FAILED
;
314 dstWritten
+= lenChunk
;
318 if ( dstWritten
> dstLen
)
319 return wxCONV_FAILED
;
321 if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED
)
322 return wxCONV_FAILED
;
331 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const
333 size_t rc
= ToWChar(outBuff
, outLen
, inBuff
);
334 if ( rc
!= wxCONV_FAILED
)
336 // ToWChar() returns the buffer length, i.e. including the trailing
337 // NUL, while this method doesn't take it into account
344 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const
346 size_t rc
= FromWChar(outBuff
, outLen
, inBuff
);
347 if ( rc
!= wxCONV_FAILED
)
355 wxMBConv::~wxMBConv()
357 // nothing to do here (necessary for Darwin linking probably)
360 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
364 // calculate the length of the buffer needed first
365 const size_t nLen
= MB2WC(NULL
, psz
, 0);
366 if ( nLen
!= wxCONV_FAILED
)
368 // now do the actual conversion
369 wxWCharBuffer
buf(nLen
/* +1 added implicitly */);
371 // +1 for the trailing NULL
372 if ( MB2WC(buf
.data(), psz
, nLen
+ 1) != wxCONV_FAILED
)
377 return wxWCharBuffer();
380 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
384 const size_t nLen
= WC2MB(NULL
, pwz
, 0);
385 if ( nLen
!= wxCONV_FAILED
)
387 // extra space for trailing NUL(s)
388 static const size_t extraLen
= GetMaxMBNulLen();
390 wxCharBuffer
buf(nLen
+ extraLen
- 1);
391 if ( WC2MB(buf
.data(), pwz
, nLen
+ extraLen
) != wxCONV_FAILED
)
396 return wxCharBuffer();
400 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const
402 const size_t dstLen
= ToWChar(NULL
, 0, inBuff
, inLen
);
403 if ( dstLen
!= wxCONV_FAILED
)
405 wxWCharBuffer
wbuf(dstLen
- 1);
406 if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
411 if ( wbuf
[dstLen
- 1] == L
'\0' )
422 return wxWCharBuffer();
426 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const
428 size_t dstLen
= FromWChar(NULL
, 0, inBuff
, inLen
);
429 if ( dstLen
!= wxCONV_FAILED
)
431 // special case of empty input: can't allocate 0 size buffer below as
432 // wxCharBuffer insists on NUL-terminating it
433 wxCharBuffer
buf(dstLen
? dstLen
- 1 : 1);
434 if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
440 const size_t nulLen
= GetMBNulLen();
441 if ( dstLen
>= nulLen
&&
442 !NotAllNULs(buf
.data() + dstLen
- nulLen
, nulLen
) )
444 // in this case the output is NUL-terminated and we're not
445 // supposed to count NUL
457 return wxCharBuffer();
460 // ----------------------------------------------------------------------------
462 // ----------------------------------------------------------------------------
464 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
466 return wxMB2WC(buf
, psz
, n
);
469 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
471 return wxWC2MB(buf
, psz
, n
);
474 // ----------------------------------------------------------------------------
475 // wxConvBrokenFileNames
476 // ----------------------------------------------------------------------------
480 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
482 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
483 || wxStricmp(charset
, _T("UTF8")) == 0 )
484 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
486 m_conv
= new wxCSConv(charset
);
491 // ----------------------------------------------------------------------------
493 // ----------------------------------------------------------------------------
495 // Implementation (C) 2004 Fredrik Roubert
498 // BASE64 decoding table
500 static const unsigned char utf7unb64
[] =
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
507 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
508 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
509 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
511 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
512 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
513 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
515 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
516 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
517 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
533 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
536 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
540 while ( *psz
&& (!buf
|| (len
< n
)) )
542 unsigned char cc
= *psz
++;
550 else if (*psz
== '-')
558 else // start of BASE64 encoded string
562 for ( ok
= lsb
= false, d
= 0, l
= 0;
563 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
568 for (l
+= 6; l
>= 8; lsb
= !lsb
)
570 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
580 *buf
= (wchar_t)(c
<< 8);
589 // in valid UTF7 we should have valid characters after '+'
590 return wxCONV_FAILED
;
598 if ( buf
&& (len
< n
) )
605 // BASE64 encoding table
607 static const unsigned char utf7enb64
[] =
609 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
610 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
611 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
612 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
613 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
614 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
615 'w', 'x', 'y', 'z', '0', '1', '2', '3',
616 '4', '5', '6', '7', '8', '9', '+', '/'
620 // UTF-7 encoding table
622 // 0 - Set D (directly encoded characters)
623 // 1 - Set O (optional direct characters)
624 // 2 - whitespace characters (optional)
625 // 3 - special characters
627 static const unsigned char utf7encode
[128] =
629 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
630 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
631 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
635 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
639 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
643 while (*psz
&& ((!buf
) || (len
< n
)))
646 if (cc
< 0x80 && utf7encode
[cc
] < 1)
655 else if (((wxUint32
)cc
) > 0xffff)
657 // no surrogate pair generation (yet?)
658 return wxCONV_FAILED
;
669 // BASE64 encode string
670 unsigned int lsb
, d
, l
;
671 for (d
= 0, l
= 0; /*nothing*/; psz
++)
673 for (lsb
= 0; lsb
< 2; lsb
++)
676 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
678 for (l
+= 8; l
>= 6; )
682 *buf
++ = utf7enb64
[(d
>> l
) % 64];
688 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
695 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
707 if (buf
&& (len
< n
))
713 // ----------------------------------------------------------------------------
715 // ----------------------------------------------------------------------------
717 static wxUint32 utf8_max
[]=
718 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
720 // boundaries of the private use area we use to (temporarily) remap invalid
721 // characters invalid in a UTF-8 encoded string
722 const wxUint32 wxUnicodePUA
= 0x100000;
723 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
725 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
729 while (*psz
&& ((!buf
) || (len
< n
)))
731 const char *opsz
= psz
;
732 bool invalid
= false;
733 unsigned char cc
= *psz
++, fc
= cc
;
735 for (cnt
= 0; fc
& 0x80; cnt
++)
745 // escape the escape character for octal escapes
746 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
747 && cc
== '\\' && (!buf
|| len
< n
))
759 // invalid UTF-8 sequence
764 unsigned ocnt
= cnt
- 1;
765 wxUint32 res
= cc
& (0x3f >> cnt
);
769 if ((cc
& 0xC0) != 0x80)
771 // invalid UTF-8 sequence
777 res
= (res
<< 6) | (cc
& 0x3f);
780 if (invalid
|| res
<= utf8_max
[ocnt
])
782 // illegal UTF-8 encoding
785 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
786 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
788 // if one of our PUA characters turns up externally
789 // it must also be treated as an illegal sequence
790 // (a bit like you have to escape an escape character)
796 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
797 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
798 if (pa
== wxCONV_FAILED
)
810 *buf
++ = (wchar_t)res
;
812 #endif // WC_UTF16/!WC_UTF16
818 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
820 while (opsz
< psz
&& (!buf
|| len
< n
))
823 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
824 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
825 wxASSERT(pa
!= wxCONV_FAILED
);
832 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
838 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
840 while (opsz
< psz
&& (!buf
|| len
< n
))
842 if ( buf
&& len
+ 3 < n
)
844 unsigned char on
= *opsz
;
846 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
847 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
848 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
855 else // MAP_INVALID_UTF8_NOT
857 return wxCONV_FAILED
;
863 if (buf
&& (len
< n
))
869 static inline bool isoctal(wchar_t wch
)
871 return L
'0' <= wch
&& wch
<= L
'7';
874 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
878 while (*psz
&& ((!buf
) || (len
< n
)))
883 // cast is ok for WC_UTF16
884 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
885 psz
+= (pa
== wxCONV_FAILED
) ? 1 : pa
;
887 cc
= (*psz
++) & 0x7fffffff;
890 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
891 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
894 *buf
++ = (char)(cc
- wxUnicodePUA
);
897 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
898 && cc
== L
'\\' && psz
[0] == L
'\\' )
905 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
907 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
911 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 +
912 (psz
[1] - L
'0') * 010 +
922 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++)
938 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
940 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
946 if (buf
&& (len
< n
))
952 // ============================================================================
954 // ============================================================================
956 #ifdef WORDS_BIGENDIAN
957 #define wxMBConvUTF16straight wxMBConvUTF16BE
958 #define wxMBConvUTF16swap wxMBConvUTF16LE
960 #define wxMBConvUTF16swap wxMBConvUTF16BE
961 #define wxMBConvUTF16straight wxMBConvUTF16LE
965 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
)
967 if ( srcLen
== wxNO_LEN
)
969 // count the number of bytes in input, including the trailing NULs
970 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
971 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
974 srcLen
*= BYTES_PER_CHAR
;
976 else // we already have the length
978 // we can only convert an entire number of UTF-16 characters
979 if ( srcLen
% BYTES_PER_CHAR
)
980 return wxCONV_FAILED
;
986 // case when in-memory representation is UTF-16 too
989 // ----------------------------------------------------------------------------
990 // conversions without endianness change
991 // ----------------------------------------------------------------------------
994 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
995 const char *src
, size_t srcLen
) const
997 // set up the scene for using memcpy() (which is presumably more efficient
998 // than copying the bytes one by one)
999 srcLen
= GetLength(src
, srcLen
);
1000 if ( srcLen
== wxNO_LEN
)
1001 return wxCONV_FAILED
;
1003 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1006 if ( dstLen
< inLen
)
1007 return wxCONV_FAILED
;
1009 memcpy(dst
, src
, srcLen
);
1016 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1017 const wchar_t *src
, size_t srcLen
) const
1019 if ( srcLen
== wxNO_LEN
)
1020 srcLen
= wxWcslen(src
) + 1;
1022 srcLen
*= BYTES_PER_CHAR
;
1026 if ( dstLen
< srcLen
)
1027 return wxCONV_FAILED
;
1029 memcpy(dst
, src
, srcLen
);
1035 // ----------------------------------------------------------------------------
1036 // endian-reversing conversions
1037 // ----------------------------------------------------------------------------
1040 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1041 const char *src
, size_t srcLen
) const
1043 srcLen
= GetLength(src
, srcLen
);
1044 if ( srcLen
== wxNO_LEN
)
1045 return wxCONV_FAILED
;
1047 srcLen
/= BYTES_PER_CHAR
;
1051 if ( dstLen
< srcLen
)
1052 return wxCONV_FAILED
;
1054 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1055 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1057 *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
);
1065 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1066 const wchar_t *src
, size_t srcLen
) const
1068 if ( srcLen
== wxNO_LEN
)
1069 srcLen
= wxWcslen(src
) + 1;
1071 srcLen
*= BYTES_PER_CHAR
;
1075 if ( dstLen
< srcLen
)
1076 return wxCONV_FAILED
;
1078 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1079 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1081 *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
);
1088 #else // !WC_UTF16: wchar_t is UTF-32
1090 // ----------------------------------------------------------------------------
1091 // conversions without endianness change
1092 // ----------------------------------------------------------------------------
1095 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1096 const char *src
, size_t srcLen
) const
1098 srcLen
= GetLength(src
, srcLen
);
1099 if ( srcLen
== wxNO_LEN
)
1100 return wxCONV_FAILED
;
1102 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1105 // optimization: return maximal space which could be needed for this
1106 // string even if the real size could be smaller if the buffer contains
1112 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1113 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1115 const wxUint32 ch
= wxDecodeSurrogate(&inBuff
);
1117 return wxCONV_FAILED
;
1119 if ( ++outLen
> dstLen
)
1120 return wxCONV_FAILED
;
1130 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1131 const wchar_t *src
, size_t srcLen
) const
1133 if ( srcLen
== wxNO_LEN
)
1134 srcLen
= wxWcslen(src
) + 1;
1137 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1138 for ( size_t n
= 0; n
< srcLen
; n
++ )
1141 const size_t numChars
= encode_utf16(*src
++, cc
);
1142 if ( numChars
== wxCONV_FAILED
)
1143 return wxCONV_FAILED
;
1145 outLen
+= numChars
* BYTES_PER_CHAR
;
1148 if ( outLen
> dstLen
)
1149 return wxCONV_FAILED
;
1152 if ( numChars
== 2 )
1154 // second character of a surrogate
1163 // ----------------------------------------------------------------------------
1164 // endian-reversing conversions
1165 // ----------------------------------------------------------------------------
1168 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1169 const char *src
, size_t srcLen
) const
1171 srcLen
= GetLength(src
, srcLen
);
1172 if ( srcLen
== wxNO_LEN
)
1173 return wxCONV_FAILED
;
1175 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1178 // optimization: return maximal space which could be needed for this
1179 // string even if the real size could be smaller if the buffer contains
1185 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1186 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1191 tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1193 tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1195 const size_t numChars
= decode_utf16(tmp
, ch
);
1196 if ( numChars
== wxCONV_FAILED
)
1197 return wxCONV_FAILED
;
1199 if ( numChars
== 2 )
1202 if ( ++outLen
> dstLen
)
1203 return wxCONV_FAILED
;
1213 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1214 const wchar_t *src
, size_t srcLen
) const
1216 if ( srcLen
== wxNO_LEN
)
1217 srcLen
= wxWcslen(src
) + 1;
1220 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1221 for ( const wchar_t *srcEnd
= src
+ srcLen
; src
< srcEnd
; src
++ )
1224 const size_t numChars
= encode_utf16(*src
, cc
);
1225 if ( numChars
== wxCONV_FAILED
)
1226 return wxCONV_FAILED
;
1228 outLen
+= numChars
* BYTES_PER_CHAR
;
1231 if ( outLen
> dstLen
)
1232 return wxCONV_FAILED
;
1234 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]);
1235 if ( numChars
== 2 )
1237 // second character of a surrogate
1238 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]);
1246 #endif // WC_UTF16/!WC_UTF16
1249 // ============================================================================
1251 // ============================================================================
1253 #ifdef WORDS_BIGENDIAN
1254 #define wxMBConvUTF32straight wxMBConvUTF32BE
1255 #define wxMBConvUTF32swap wxMBConvUTF32LE
1257 #define wxMBConvUTF32swap wxMBConvUTF32BE
1258 #define wxMBConvUTF32straight wxMBConvUTF32LE
1262 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1263 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1266 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
)
1268 if ( srcLen
== wxNO_LEN
)
1270 // count the number of bytes in input, including the trailing NULs
1271 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1272 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
1275 srcLen
*= BYTES_PER_CHAR
;
1277 else // we already have the length
1279 // we can only convert an entire number of UTF-32 characters
1280 if ( srcLen
% BYTES_PER_CHAR
)
1281 return wxCONV_FAILED
;
1287 // case when in-memory representation is UTF-16
1290 // ----------------------------------------------------------------------------
1291 // conversions without endianness change
1292 // ----------------------------------------------------------------------------
1295 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1296 const char *src
, size_t srcLen
) const
1298 srcLen
= GetLength(src
, srcLen
);
1299 if ( srcLen
== wxNO_LEN
)
1300 return wxCONV_FAILED
;
1302 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1303 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1305 for ( size_t n
= 0; n
< inLen
; n
++ )
1308 const size_t numChars
= encode_utf16(*inBuff
++, cc
);
1309 if ( numChars
== wxCONV_FAILED
)
1310 return wxCONV_FAILED
;
1315 if ( outLen
> dstLen
)
1316 return wxCONV_FAILED
;
1319 if ( numChars
== 2 )
1321 // second character of a surrogate
1331 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1332 const wchar_t *src
, size_t srcLen
) const
1334 if ( srcLen
== wxNO_LEN
)
1335 srcLen
= wxWcslen(src
) + 1;
1339 // optimization: return maximal space which could be needed for this
1340 // string instead of the exact amount which could be less if there are
1341 // any surrogates in the input
1343 // we consider that surrogates are rare enough to make it worthwhile to
1344 // avoid running the loop below at the cost of slightly extra memory
1346 return srcLen
* BYTES_PER_CHAR
;
1349 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1351 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1353 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1355 return wxCONV_FAILED
;
1357 outLen
+= BYTES_PER_CHAR
;
1359 if ( outLen
> dstLen
)
1360 return wxCONV_FAILED
;
1368 // ----------------------------------------------------------------------------
1369 // endian-reversing conversions
1370 // ----------------------------------------------------------------------------
1373 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1374 const char *src
, size_t srcLen
) const
1376 srcLen
= GetLength(src
, srcLen
);
1377 if ( srcLen
== wxNO_LEN
)
1378 return wxCONV_FAILED
;
1380 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1381 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1383 for ( size_t n
= 0; n
< inLen
; n
++, inBuff
++ )
1386 const size_t numChars
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
);
1387 if ( numChars
== wxCONV_FAILED
)
1388 return wxCONV_FAILED
;
1393 if ( outLen
> dstLen
)
1394 return wxCONV_FAILED
;
1397 if ( numChars
== 2 )
1399 // second character of a surrogate
1409 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1410 const wchar_t *src
, size_t srcLen
) const
1412 if ( srcLen
== wxNO_LEN
)
1413 srcLen
= wxWcslen(src
) + 1;
1417 // optimization: return maximal space which could be needed for this
1418 // string instead of the exact amount which could be less if there are
1419 // any surrogates in the input
1421 // we consider that surrogates are rare enough to make it worthwhile to
1422 // avoid running the loop below at the cost of slightly extra memory
1424 return srcLen
*BYTES_PER_CHAR
;
1427 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1429 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1431 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1433 return wxCONV_FAILED
;
1435 outLen
+= BYTES_PER_CHAR
;
1437 if ( outLen
> dstLen
)
1438 return wxCONV_FAILED
;
1440 *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
);
1446 #else // !WC_UTF16: wchar_t is UTF-32
1448 // ----------------------------------------------------------------------------
1449 // conversions without endianness change
1450 // ----------------------------------------------------------------------------
1453 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1454 const char *src
, size_t srcLen
) const
1456 // use memcpy() as it should be much faster than hand-written loop
1457 srcLen
= GetLength(src
, srcLen
);
1458 if ( srcLen
== wxNO_LEN
)
1459 return wxCONV_FAILED
;
1461 const size_t inLen
= srcLen
/BYTES_PER_CHAR
;
1464 if ( dstLen
< inLen
)
1465 return wxCONV_FAILED
;
1467 memcpy(dst
, src
, srcLen
);
1474 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1475 const wchar_t *src
, size_t srcLen
) const
1477 if ( srcLen
== wxNO_LEN
)
1478 srcLen
= wxWcslen(src
) + 1;
1480 srcLen
*= BYTES_PER_CHAR
;
1484 if ( dstLen
< srcLen
)
1485 return wxCONV_FAILED
;
1487 memcpy(dst
, src
, srcLen
);
1493 // ----------------------------------------------------------------------------
1494 // endian-reversing conversions
1495 // ----------------------------------------------------------------------------
1498 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1499 const char *src
, size_t srcLen
) const
1501 srcLen
= GetLength(src
, srcLen
);
1502 if ( srcLen
== wxNO_LEN
)
1503 return wxCONV_FAILED
;
1505 srcLen
/= BYTES_PER_CHAR
;
1509 if ( dstLen
< srcLen
)
1510 return wxCONV_FAILED
;
1512 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1513 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1515 *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
);
1523 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1524 const wchar_t *src
, size_t srcLen
) const
1526 if ( srcLen
== wxNO_LEN
)
1527 srcLen
= wxWcslen(src
) + 1;
1529 srcLen
*= BYTES_PER_CHAR
;
1533 if ( dstLen
< srcLen
)
1534 return wxCONV_FAILED
;
1536 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1537 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1539 *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
);
1546 #endif // WC_UTF16/!WC_UTF16
1549 // ============================================================================
1550 // The classes doing conversion using the iconv_xxx() functions
1551 // ============================================================================
1555 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1556 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1557 // (unless there's yet another bug in glibc) the only case when iconv()
1558 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1559 // left in the input buffer -- when _real_ error occurs,
1560 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1562 // [This bug does not appear in glibc 2.2.]
1563 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1564 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1565 (errno != E2BIG || bufLeft != 0))
1567 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1570 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1572 #define ICONV_T_INVALID ((iconv_t)-1)
1574 #if SIZEOF_WCHAR_T == 4
1575 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1576 #define WC_ENC wxFONTENCODING_UTF32
1577 #elif SIZEOF_WCHAR_T == 2
1578 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1579 #define WC_ENC wxFONTENCODING_UTF16
1580 #else // sizeof(wchar_t) != 2 nor 4
1581 // does this ever happen?
1582 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1585 // ----------------------------------------------------------------------------
1586 // wxMBConv_iconv: encapsulates an iconv character set
1587 // ----------------------------------------------------------------------------
1589 class wxMBConv_iconv
: public wxMBConv
1592 wxMBConv_iconv(const wxChar
*name
);
1593 virtual ~wxMBConv_iconv();
1595 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1596 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1598 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1599 virtual size_t GetMBNulLen() const;
1601 virtual wxMBConv
*Clone() const
1603 wxMBConv_iconv
*p
= new wxMBConv_iconv(m_name
);
1604 p
->m_minMBCharWidth
= m_minMBCharWidth
;
1609 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1612 // the iconv handlers used to translate from multibyte
1613 // to wide char and in the other direction
1618 // guards access to m2w and w2m objects
1619 wxMutex m_iconvMutex
;
1623 // the name (for iconv_open()) of a wide char charset -- if none is
1624 // available on this machine, it will remain NULL
1625 static wxString ms_wcCharsetName
;
1627 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1628 // different endian-ness than the native one
1629 static bool ms_wcNeedsSwap
;
1632 // name of the encoding handled by this conversion
1635 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1637 size_t m_minMBCharWidth
;
1640 // make the constructor available for unit testing
1641 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1643 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1644 if ( !result
->IsOk() )
1653 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1654 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1656 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1659 m_minMBCharWidth
= 0;
1661 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1662 // names for the charsets
1663 const wxCharBuffer
cname(wxString(name
).ToAscii());
1665 // check for charset that represents wchar_t:
1666 if ( ms_wcCharsetName
.empty() )
1668 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1671 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1672 #else // !wxUSE_FONTMAP
1673 static const wxChar
*names_static
[] =
1675 #if SIZEOF_WCHAR_T == 4
1677 #elif SIZEOF_WCHAR_T = 2
1682 const wxChar
**names
= names_static
;
1683 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1685 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1687 const wxString
nameCS(*names
);
1689 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1690 wxString
nameXE(nameCS
);
1692 #ifdef WORDS_BIGENDIAN
1694 #else // little endian
1698 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1701 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1702 if ( m2w
== ICONV_T_INVALID
)
1704 // try charset w/o bytesex info (e.g. "UCS4")
1705 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1707 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1709 // and check for bytesex ourselves:
1710 if ( m2w
!= ICONV_T_INVALID
)
1712 char buf
[2], *bufPtr
;
1713 wchar_t wbuf
[2], *wbufPtr
;
1721 outsz
= SIZEOF_WCHAR_T
* 2;
1726 m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1727 (char**)&wbufPtr
, &outsz
);
1729 if (ICONV_FAILED(res
, insz
))
1731 wxLogLastError(wxT("iconv"));
1732 wxLogError(_("Conversion to charset '%s' doesn't work."),
1735 else // ok, can convert to this encoding, remember it
1737 ms_wcCharsetName
= nameCS
;
1738 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1742 else // use charset not requiring byte swapping
1744 ms_wcCharsetName
= nameXE
;
1748 wxLogTrace(TRACE_STRCONV
,
1749 wxT("iconv wchar_t charset is \"%s\"%s"),
1750 ms_wcCharsetName
.empty() ? _T("<none>")
1751 : ms_wcCharsetName
.c_str(),
1752 ms_wcNeedsSwap
? _T(" (needs swap)")
1755 else // we already have ms_wcCharsetName
1757 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1760 if ( ms_wcCharsetName
.empty() )
1762 w2m
= ICONV_T_INVALID
;
1766 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1767 if ( w2m
== ICONV_T_INVALID
)
1769 wxLogTrace(TRACE_STRCONV
,
1770 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1771 ms_wcCharsetName
.c_str(), cname
.data());
1776 wxMBConv_iconv::~wxMBConv_iconv()
1778 if ( m2w
!= ICONV_T_INVALID
)
1780 if ( w2m
!= ICONV_T_INVALID
)
1784 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1786 // find the string length: notice that must be done differently for
1787 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1789 const size_t nulLen
= GetMBNulLen();
1793 return wxCONV_FAILED
;
1796 inbuf
= strlen(psz
); // arguably more optimized than our version
1801 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1802 // they also have to start at character boundary and not span two
1803 // adjacent characters
1805 for ( p
= psz
; NotAllNULs(p
, nulLen
); p
+= nulLen
)
1812 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1813 // Unfortunately there are a couple of global wxCSConv objects such as
1814 // wxConvLocal that are used all over wx code, so we have to make sure
1815 // the handle is used by at most one thread at the time. Otherwise
1816 // only a few wx classes would be safe to use from non-main threads
1817 // as MB<->WC conversion would fail "randomly".
1818 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1819 #endif // wxUSE_THREADS
1821 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1823 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1824 wchar_t *bufPtr
= buf
;
1825 const char *pszPtr
= psz
;
1829 // have destination buffer, convert there
1831 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1832 (char**)&bufPtr
, &outbuf
);
1833 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1837 // convert to native endianness
1838 for ( unsigned i
= 0; i
< res
; i
++ )
1839 buf
[n
] = WC_BSWAP(buf
[i
]);
1842 // NUL-terminate the string if there is any space left
1848 // no destination buffer... convert using temp buffer
1849 // to calculate destination buffer requirement
1856 outbuf
= 8 * SIZEOF_WCHAR_T
;
1859 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1860 (char**)&bufPtr
, &outbuf
);
1862 res
+= 8 - (outbuf
/ SIZEOF_WCHAR_T
);
1864 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1867 if (ICONV_FAILED(cres
, inbuf
))
1869 //VS: it is ok if iconv fails, hence trace only
1870 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1871 return wxCONV_FAILED
;
1877 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1880 // NB: explained in MB2WC
1881 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1884 size_t inlen
= wxWcslen(psz
);
1885 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1889 wchar_t *tmpbuf
= 0;
1893 // need to copy to temp buffer to switch endianness
1894 // (doing WC_BSWAP twice on the original buffer won't help, as it
1895 // could be in read-only memory, or be accessed in some other thread)
1896 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1897 for ( size_t i
= 0; i
< inlen
; i
++ )
1898 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1900 tmpbuf
[inlen
] = L
'\0';
1906 // have destination buffer, convert there
1907 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1911 // NB: iconv was given only wcslen(psz) characters on input, and so
1912 // it couldn't convert the trailing zero. Let's do it ourselves
1913 // if there's some room left for it in the output buffer.
1919 // no destination buffer: convert using temp buffer
1920 // to calculate destination buffer requirement
1928 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1932 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1940 if (ICONV_FAILED(cres
, inbuf
))
1942 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1943 return wxCONV_FAILED
;
1949 size_t wxMBConv_iconv::GetMBNulLen() const
1951 if ( m_minMBCharWidth
== 0 )
1953 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1956 // NB: explained in MB2WC
1957 wxMutexLocker
lock(self
->m_iconvMutex
);
1960 wchar_t *wnul
= L
"";
1961 char buf
[8]; // should be enough for NUL in any encoding
1962 size_t inLen
= sizeof(wchar_t),
1963 outLen
= WXSIZEOF(buf
);
1964 char *inBuff
= (char *)wnul
;
1965 char *outBuff
= buf
;
1966 if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 )
1968 self
->m_minMBCharWidth
= (size_t)-1;
1972 self
->m_minMBCharWidth
= outBuff
- buf
;
1976 return m_minMBCharWidth
;
1979 #endif // HAVE_ICONV
1982 // ============================================================================
1983 // Win32 conversion classes
1984 // ============================================================================
1986 #ifdef wxHAVE_WIN32_MB2WC
1990 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1991 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1994 class wxMBConv_win32
: public wxMBConv
1999 m_CodePage
= CP_ACP
;
2000 m_minMBCharWidth
= 0;
2003 wxMBConv_win32(const wxMBConv_win32
& conv
)
2006 m_CodePage
= conv
.m_CodePage
;
2007 m_minMBCharWidth
= conv
.m_minMBCharWidth
;
2011 wxMBConv_win32(const wxChar
* name
)
2013 m_CodePage
= wxCharsetToCodepage(name
);
2014 m_minMBCharWidth
= 0;
2017 wxMBConv_win32(wxFontEncoding encoding
)
2019 m_CodePage
= wxEncodingToCodepage(encoding
);
2020 m_minMBCharWidth
= 0;
2022 #endif // wxUSE_FONTMAP
2024 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2026 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2027 // the behaviour is not compatible with the Unix version (using iconv)
2028 // and break the library itself, e.g. wxTextInputStream::NextChar()
2029 // wouldn't work if reading an incomplete MB char didn't result in an
2032 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2033 // Win XP or newer and it is not supported for UTF-[78] so we always
2034 // use our own conversions in this case. See
2035 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2036 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2037 if ( m_CodePage
== CP_UTF8
)
2039 return wxConvUTF8
.MB2WC(buf
, psz
, n
);
2042 if ( m_CodePage
== CP_UTF7
)
2044 return wxConvUTF7
.MB2WC(buf
, psz
, n
);
2048 if ( (m_CodePage
< 50000 && m_CodePage
!= CP_SYMBOL
) &&
2049 IsAtLeastWin2kSP4() )
2051 flags
= MB_ERR_INVALID_CHARS
;
2054 const size_t len
= ::MultiByteToWideChar
2056 m_CodePage
, // code page
2057 flags
, // flags: fall on error
2058 psz
, // input string
2059 -1, // its length (NUL-terminated)
2060 buf
, // output string
2061 buf
? n
: 0 // size of output buffer
2065 // function totally failed
2066 return wxCONV_FAILED
;
2069 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2070 // check if we succeeded, by doing a double trip:
2071 if ( !flags
&& buf
)
2073 const size_t mbLen
= strlen(psz
);
2074 wxCharBuffer
mbBuf(mbLen
);
2075 if ( ::WideCharToMultiByte
2082 mbLen
+ 1, // size in bytes, not length
2086 strcmp(mbBuf
, psz
) != 0 )
2088 // we didn't obtain the same thing we started from, hence
2089 // the conversion was lossy and we consider that it failed
2090 return wxCONV_FAILED
;
2094 // note that it returns count of written chars for buf != NULL and size
2095 // of the needed buffer for buf == NULL so in either case the length of
2096 // the string (which never includes the terminating NUL) is one less
2100 virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
2103 we have a problem here: by default, WideCharToMultiByte() may
2104 replace characters unrepresentable in the target code page with bad
2105 quality approximations such as turning "1/2" symbol (U+00BD) into
2106 "1" for the code pages which don't have it and we, obviously, want
2107 to avoid this at any price
2109 the trouble is that this function does it _silently_, i.e. it won't
2110 even tell us whether it did or not... Win98/2000 and higher provide
2111 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2112 we have to resort to a round trip, i.e. check that converting back
2113 results in the same string -- this is, of course, expensive but
2114 otherwise we simply can't be sure to not garble the data.
2117 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2118 // it doesn't work with CJK encodings (which we test for rather roughly
2119 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2121 BOOL usedDef
wxDUMMY_INITIALIZE(false);
2124 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
2126 // it's our lucky day
2127 flags
= WC_NO_BEST_FIT_CHARS
;
2128 pUsedDef
= &usedDef
;
2130 else // old system or unsupported encoding
2136 const size_t len
= ::WideCharToMultiByte
2138 m_CodePage
, // code page
2139 flags
, // either none or no best fit
2140 pwz
, // input string
2141 -1, // it is (wide) NUL-terminated
2142 buf
, // output buffer
2143 buf
? n
: 0, // and its size
2144 NULL
, // default "replacement" char
2145 pUsedDef
// [out] was it used?
2150 // function totally failed
2151 return wxCONV_FAILED
;
2154 // if we were really converting, check if we succeeded
2159 // check if the conversion failed, i.e. if any replacements
2162 return wxCONV_FAILED
;
2164 else // we must resort to double tripping...
2166 wxWCharBuffer
wcBuf(n
);
2167 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2168 wcscmp(wcBuf
, pwz
) != 0 )
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
2172 return wxCONV_FAILED
;
2177 // see the comment above for the reason of "len - 1"
2181 virtual size_t GetMBNulLen() const
2183 if ( m_minMBCharWidth
== 0 )
2185 int len
= ::WideCharToMultiByte
2187 m_CodePage
, // code page
2189 L
"", // input string
2190 1, // translate just the NUL
2191 NULL
, // output buffer
2193 NULL
, // no replacement char
2194 NULL
// [out] don't care if it was used
2197 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
2201 wxLogDebug(_T("Unexpected NUL length %d"), len
);
2202 self
->m_minMBCharWidth
= (size_t)-1;
2206 self
->m_minMBCharWidth
= (size_t)-1;
2212 self
->m_minMBCharWidth
= len
;
2217 return m_minMBCharWidth
;
2220 virtual wxMBConv
*Clone() const { return new wxMBConv_win32(*this); }
2222 bool IsOk() const { return m_CodePage
!= -1; }
2225 static bool CanUseNoBestFit()
2227 static int s_isWin98Or2k
= -1;
2229 if ( s_isWin98Or2k
== -1 )
2232 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
2234 case wxOS_WINDOWS_9X
:
2235 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
2238 case wxOS_WINDOWS_NT
:
2239 s_isWin98Or2k
= verMaj
>= 5;
2243 // unknown: be conservative by default
2248 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
2251 return s_isWin98Or2k
== 1;
2254 static bool IsAtLeastWin2kSP4()
2259 static int s_isAtLeastWin2kSP4
= -1;
2261 if ( s_isAtLeastWin2kSP4
== -1 )
2263 OSVERSIONINFOEX ver
;
2265 memset(&ver
, 0, sizeof(ver
));
2266 ver
.dwOSVersionInfoSize
= sizeof(ver
);
2267 GetVersionEx((OSVERSIONINFO
*)&ver
);
2269 s_isAtLeastWin2kSP4
=
2270 ((ver
.dwMajorVersion
> 5) || // Vista+
2271 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
2272 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
2273 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
2277 return s_isAtLeastWin2kSP4
== 1;
2282 // the code page we're working with
2285 // cached result of GetMBNulLen(), set to 0 initially meaning
2287 size_t m_minMBCharWidth
;
2290 #endif // wxHAVE_WIN32_MB2WC
2292 // ============================================================================
2293 // Cocoa conversion classes
2294 // ============================================================================
2296 #if defined(__WXCOCOA__)
2298 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2299 // Strangely enough, internally Core Foundation uses
2300 // UTF-32 internally quite a bit - its just not public (yet).
2302 #include <CoreFoundation/CFString.h>
2303 #include <CoreFoundation/CFStringEncodingExt.h>
2305 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
2307 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
2311 case wxFONTENCODING_DEFAULT
:
2312 enc
= CFStringGetSystemEncoding();
2315 case wxFONTENCODING_ISO8859_1
:
2316 enc
= kCFStringEncodingISOLatin1
;
2318 case wxFONTENCODING_ISO8859_2
:
2319 enc
= kCFStringEncodingISOLatin2
;
2321 case wxFONTENCODING_ISO8859_3
:
2322 enc
= kCFStringEncodingISOLatin3
;
2324 case wxFONTENCODING_ISO8859_4
:
2325 enc
= kCFStringEncodingISOLatin4
;
2327 case wxFONTENCODING_ISO8859_5
:
2328 enc
= kCFStringEncodingISOLatinCyrillic
;
2330 case wxFONTENCODING_ISO8859_6
:
2331 enc
= kCFStringEncodingISOLatinArabic
;
2333 case wxFONTENCODING_ISO8859_7
:
2334 enc
= kCFStringEncodingISOLatinGreek
;
2336 case wxFONTENCODING_ISO8859_8
:
2337 enc
= kCFStringEncodingISOLatinHebrew
;
2339 case wxFONTENCODING_ISO8859_9
:
2340 enc
= kCFStringEncodingISOLatin5
;
2342 case wxFONTENCODING_ISO8859_10
:
2343 enc
= kCFStringEncodingISOLatin6
;
2345 case wxFONTENCODING_ISO8859_11
:
2346 enc
= kCFStringEncodingISOLatinThai
;
2348 case wxFONTENCODING_ISO8859_13
:
2349 enc
= kCFStringEncodingISOLatin7
;
2351 case wxFONTENCODING_ISO8859_14
:
2352 enc
= kCFStringEncodingISOLatin8
;
2354 case wxFONTENCODING_ISO8859_15
:
2355 enc
= kCFStringEncodingISOLatin9
;
2358 case wxFONTENCODING_KOI8
:
2359 enc
= kCFStringEncodingKOI8_R
;
2361 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2362 enc
= kCFStringEncodingDOSRussian
;
2365 // case wxFONTENCODING_BULGARIAN :
2369 case wxFONTENCODING_CP437
:
2370 enc
= kCFStringEncodingDOSLatinUS
;
2372 case wxFONTENCODING_CP850
:
2373 enc
= kCFStringEncodingDOSLatin1
;
2375 case wxFONTENCODING_CP852
:
2376 enc
= kCFStringEncodingDOSLatin2
;
2378 case wxFONTENCODING_CP855
:
2379 enc
= kCFStringEncodingDOSCyrillic
;
2381 case wxFONTENCODING_CP866
:
2382 enc
= kCFStringEncodingDOSRussian
;
2384 case wxFONTENCODING_CP874
:
2385 enc
= kCFStringEncodingDOSThai
;
2387 case wxFONTENCODING_CP932
:
2388 enc
= kCFStringEncodingDOSJapanese
;
2390 case wxFONTENCODING_CP936
:
2391 enc
= kCFStringEncodingDOSChineseSimplif
;
2393 case wxFONTENCODING_CP949
:
2394 enc
= kCFStringEncodingDOSKorean
;
2396 case wxFONTENCODING_CP950
:
2397 enc
= kCFStringEncodingDOSChineseTrad
;
2399 case wxFONTENCODING_CP1250
:
2400 enc
= kCFStringEncodingWindowsLatin2
;
2402 case wxFONTENCODING_CP1251
:
2403 enc
= kCFStringEncodingWindowsCyrillic
;
2405 case wxFONTENCODING_CP1252
:
2406 enc
= kCFStringEncodingWindowsLatin1
;
2408 case wxFONTENCODING_CP1253
:
2409 enc
= kCFStringEncodingWindowsGreek
;
2411 case wxFONTENCODING_CP1254
:
2412 enc
= kCFStringEncodingWindowsLatin5
;
2414 case wxFONTENCODING_CP1255
:
2415 enc
= kCFStringEncodingWindowsHebrew
;
2417 case wxFONTENCODING_CP1256
:
2418 enc
= kCFStringEncodingWindowsArabic
;
2420 case wxFONTENCODING_CP1257
:
2421 enc
= kCFStringEncodingWindowsBalticRim
;
2423 // This only really encodes to UTF7 (if that) evidently
2424 // case wxFONTENCODING_UTF7 :
2425 // enc = kCFStringEncodingNonLossyASCII ;
2427 case wxFONTENCODING_UTF8
:
2428 enc
= kCFStringEncodingUTF8
;
2430 case wxFONTENCODING_EUC_JP
:
2431 enc
= kCFStringEncodingEUC_JP
;
2433 case wxFONTENCODING_UTF16
:
2434 enc
= kCFStringEncodingUnicode
;
2436 case wxFONTENCODING_MACROMAN
:
2437 enc
= kCFStringEncodingMacRoman
;
2439 case wxFONTENCODING_MACJAPANESE
:
2440 enc
= kCFStringEncodingMacJapanese
;
2442 case wxFONTENCODING_MACCHINESETRAD
:
2443 enc
= kCFStringEncodingMacChineseTrad
;
2445 case wxFONTENCODING_MACKOREAN
:
2446 enc
= kCFStringEncodingMacKorean
;
2448 case wxFONTENCODING_MACARABIC
:
2449 enc
= kCFStringEncodingMacArabic
;
2451 case wxFONTENCODING_MACHEBREW
:
2452 enc
= kCFStringEncodingMacHebrew
;
2454 case wxFONTENCODING_MACGREEK
:
2455 enc
= kCFStringEncodingMacGreek
;
2457 case wxFONTENCODING_MACCYRILLIC
:
2458 enc
= kCFStringEncodingMacCyrillic
;
2460 case wxFONTENCODING_MACDEVANAGARI
:
2461 enc
= kCFStringEncodingMacDevanagari
;
2463 case wxFONTENCODING_MACGURMUKHI
:
2464 enc
= kCFStringEncodingMacGurmukhi
;
2466 case wxFONTENCODING_MACGUJARATI
:
2467 enc
= kCFStringEncodingMacGujarati
;
2469 case wxFONTENCODING_MACORIYA
:
2470 enc
= kCFStringEncodingMacOriya
;
2472 case wxFONTENCODING_MACBENGALI
:
2473 enc
= kCFStringEncodingMacBengali
;
2475 case wxFONTENCODING_MACTAMIL
:
2476 enc
= kCFStringEncodingMacTamil
;
2478 case wxFONTENCODING_MACTELUGU
:
2479 enc
= kCFStringEncodingMacTelugu
;
2481 case wxFONTENCODING_MACKANNADA
:
2482 enc
= kCFStringEncodingMacKannada
;
2484 case wxFONTENCODING_MACMALAJALAM
:
2485 enc
= kCFStringEncodingMacMalayalam
;
2487 case wxFONTENCODING_MACSINHALESE
:
2488 enc
= kCFStringEncodingMacSinhalese
;
2490 case wxFONTENCODING_MACBURMESE
:
2491 enc
= kCFStringEncodingMacBurmese
;
2493 case wxFONTENCODING_MACKHMER
:
2494 enc
= kCFStringEncodingMacKhmer
;
2496 case wxFONTENCODING_MACTHAI
:
2497 enc
= kCFStringEncodingMacThai
;
2499 case wxFONTENCODING_MACLAOTIAN
:
2500 enc
= kCFStringEncodingMacLaotian
;
2502 case wxFONTENCODING_MACGEORGIAN
:
2503 enc
= kCFStringEncodingMacGeorgian
;
2505 case wxFONTENCODING_MACARMENIAN
:
2506 enc
= kCFStringEncodingMacArmenian
;
2508 case wxFONTENCODING_MACCHINESESIMP
:
2509 enc
= kCFStringEncodingMacChineseSimp
;
2511 case wxFONTENCODING_MACTIBETAN
:
2512 enc
= kCFStringEncodingMacTibetan
;
2514 case wxFONTENCODING_MACMONGOLIAN
:
2515 enc
= kCFStringEncodingMacMongolian
;
2517 case wxFONTENCODING_MACETHIOPIC
:
2518 enc
= kCFStringEncodingMacEthiopic
;
2520 case wxFONTENCODING_MACCENTRALEUR
:
2521 enc
= kCFStringEncodingMacCentralEurRoman
;
2523 case wxFONTENCODING_MACVIATNAMESE
:
2524 enc
= kCFStringEncodingMacVietnamese
;
2526 case wxFONTENCODING_MACARABICEXT
:
2527 enc
= kCFStringEncodingMacExtArabic
;
2529 case wxFONTENCODING_MACSYMBOL
:
2530 enc
= kCFStringEncodingMacSymbol
;
2532 case wxFONTENCODING_MACDINGBATS
:
2533 enc
= kCFStringEncodingMacDingbats
;
2535 case wxFONTENCODING_MACTURKISH
:
2536 enc
= kCFStringEncodingMacTurkish
;
2538 case wxFONTENCODING_MACCROATIAN
:
2539 enc
= kCFStringEncodingMacCroatian
;
2541 case wxFONTENCODING_MACICELANDIC
:
2542 enc
= kCFStringEncodingMacIcelandic
;
2544 case wxFONTENCODING_MACROMANIAN
:
2545 enc
= kCFStringEncodingMacRomanian
;
2547 case wxFONTENCODING_MACCELTIC
:
2548 enc
= kCFStringEncodingMacCeltic
;
2550 case wxFONTENCODING_MACGAELIC
:
2551 enc
= kCFStringEncodingMacGaelic
;
2553 // case wxFONTENCODING_MACKEYBOARD :
2554 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2558 // because gcc is picky
2565 class wxMBConv_cocoa
: public wxMBConv
2570 Init(CFStringGetSystemEncoding()) ;
2573 wxMBConv_cocoa(const wxMBConv_cocoa
& conv
)
2575 m_encoding
= conv
.m_encoding
;
2579 wxMBConv_cocoa(const wxChar
* name
)
2581 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2585 wxMBConv_cocoa(wxFontEncoding encoding
)
2587 Init( wxCFStringEncFromFontEnc(encoding
) );
2590 virtual ~wxMBConv_cocoa()
2594 void Init( CFStringEncoding encoding
)
2596 m_encoding
= encoding
;
2599 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2603 CFStringRef theString
= CFStringCreateWithBytes (
2604 NULL
, //the allocator
2605 (const UInt8
*)szUnConv
,
2608 false //no BOM/external representation
2611 wxASSERT(theString
);
2613 size_t nOutLength
= CFStringGetLength(theString
);
2617 CFRelease(theString
);
2621 CFRange theRange
= { 0, nOutSize
};
2623 #if SIZEOF_WCHAR_T == 4
2624 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2627 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2629 CFRelease(theString
);
2631 szUniCharBuffer
[nOutLength
] = '\0';
2633 #if SIZEOF_WCHAR_T == 4
2634 wxMBConvUTF16 converter
;
2635 converter
.MB2WC( szOut
, (const char*)szUniCharBuffer
, nOutSize
);
2636 delete [] szUniCharBuffer
;
2642 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2646 size_t nRealOutSize
;
2647 size_t nBufSize
= wxWcslen(szUnConv
);
2648 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2650 #if SIZEOF_WCHAR_T == 4
2651 wxMBConvUTF16 converter
;
2652 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2653 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1];
2654 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
));
2655 nBufSize
/= sizeof(UniChar
);
2658 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2662 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2665 wxASSERT(theString
);
2667 //Note that CER puts a BOM when converting to unicode
2668 //so we check and use getchars instead in that case
2669 if (m_encoding
== kCFStringEncodingUnicode
)
2672 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2674 nRealOutSize
= CFStringGetLength(theString
) + 1;
2680 CFRangeMake(0, CFStringGetLength(theString
)),
2682 0, //what to put in characters that can't be converted -
2683 //0 tells CFString to return NULL if it meets such a character
2684 false, //not an external representation
2687 (CFIndex
*) &nRealOutSize
2691 CFRelease(theString
);
2693 #if SIZEOF_WCHAR_T == 4
2694 delete[] szUniBuffer
;
2697 return nRealOutSize
- 1;
2700 virtual wxMBConv
*Clone() const { return new wxMBConv_cocoa(*this); }
2704 return m_encoding
!= kCFStringEncodingInvalidId
&&
2705 CFStringIsEncodingAvailable(m_encoding
);
2709 CFStringEncoding m_encoding
;
2712 #endif // defined(__WXCOCOA__)
2714 // ============================================================================
2715 // Mac conversion classes
2716 // ============================================================================
2718 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2720 class wxMBConv_mac
: public wxMBConv
2725 Init(CFStringGetSystemEncoding()) ;
2728 wxMBConv_mac(const wxMBConv_mac
& conv
)
2730 Init(conv
.m_char_encoding
);
2734 wxMBConv_mac(const wxChar
* name
)
2736 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) );
2740 wxMBConv_mac(wxFontEncoding encoding
)
2742 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2745 virtual ~wxMBConv_mac()
2747 OSStatus status
= noErr
;
2748 if (m_MB2WC_converter
)
2749 status
= TECDisposeConverter(m_MB2WC_converter
);
2750 if (m_WC2MB_converter
)
2751 status
= TECDisposeConverter(m_WC2MB_converter
);
2754 void Init( TextEncodingBase encoding
,TextEncodingVariant encodingVariant
= kTextEncodingDefaultVariant
,
2755 TextEncodingFormat encodingFormat
= kTextEncodingDefaultFormat
)
2757 m_MB2WC_converter
= NULL
;
2758 m_WC2MB_converter
= NULL
;
2759 m_char_encoding
= CreateTextEncoding(encoding
, encodingVariant
, encodingFormat
) ;
2760 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 0, kUnicode16BitFormat
) ;
2763 virtual void CreateIfNeeded() const
2765 if ( m_MB2WC_converter
== NULL
&& m_WC2MB_converter
== NULL
)
2767 OSStatus status
= noErr
;
2768 status
= TECCreateConverter(&m_MB2WC_converter
,
2770 m_unicode_encoding
);
2771 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2772 status
= TECCreateConverter(&m_WC2MB_converter
,
2775 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2779 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2782 OSStatus status
= noErr
;
2783 ByteCount byteOutLen
;
2784 ByteCount byteInLen
= strlen(psz
) + 1;
2785 wchar_t *tbuf
= NULL
;
2786 UniChar
* ubuf
= NULL
;
2791 // Apple specs say at least 32
2792 n
= wxMax( 32, byteInLen
) ;
2793 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2796 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2798 #if SIZEOF_WCHAR_T == 4
2799 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2801 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2804 status
= TECConvertText(
2805 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2806 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2808 #if SIZEOF_WCHAR_T == 4
2809 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2810 // is not properly terminated we get random characters at the end
2811 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2812 wxMBConvUTF16 converter
;
2813 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
2816 res
= byteOutLen
/ sizeof( UniChar
) ;
2822 if ( buf
&& res
< n
)
2828 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2831 OSStatus status
= noErr
;
2832 ByteCount byteOutLen
;
2833 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2839 // Apple specs say at least 32
2840 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2841 tbuf
= (char*) malloc( n
) ;
2844 ByteCount byteBufferLen
= n
;
2845 UniChar
* ubuf
= NULL
;
2847 #if SIZEOF_WCHAR_T == 4
2848 wxMBConvUTF16 converter
;
2849 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2850 byteInLen
= unicharlen
;
2851 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2852 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2854 ubuf
= (UniChar
*) psz
;
2857 status
= TECConvertText(
2858 m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2859 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2861 #if SIZEOF_WCHAR_T == 4
2868 size_t res
= byteOutLen
;
2869 if ( buf
&& res
< n
)
2873 //we need to double-trip to verify it didn't insert any ? in place
2874 //of bogus characters
2875 wxWCharBuffer
wcBuf(n
);
2876 size_t pszlen
= wxWcslen(psz
);
2877 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2878 wxWcslen(wcBuf
) != pszlen
||
2879 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2881 // we didn't obtain the same thing we started from, hence
2882 // the conversion was lossy and we consider that it failed
2883 return wxCONV_FAILED
;
2890 virtual wxMBConv
*Clone() const { return new wxMBConv_mac(*this); }
2895 return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
;
2899 mutable TECObjectRef m_MB2WC_converter
;
2900 mutable TECObjectRef m_WC2MB_converter
;
2902 TextEncodingBase m_char_encoding
;
2903 TextEncodingBase m_unicode_encoding
;
2906 // MB is decomposed (D) normalized UTF8
2908 class wxMBConv_macUTF8D
: public wxMBConv_mac
2913 Init( kTextEncodingUnicodeDefault
, kUnicodeNoSubset
, kUnicodeUTF8Format
) ;
2918 virtual ~wxMBConv_macUTF8D()
2921 DisposeUnicodeToTextInfo(&m_uni
);
2922 if (m_uniBack
!=NULL
)
2923 DisposeUnicodeToTextInfo(&m_uniBack
);
2926 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2929 OSStatus status
= noErr
;
2930 ByteCount byteOutLen
;
2931 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2937 // Apple specs say at least 32
2938 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2939 tbuf
= (char*) malloc( n
) ;
2942 ByteCount byteBufferLen
= n
;
2943 UniChar
* ubuf
= NULL
;
2945 #if SIZEOF_WCHAR_T == 4
2946 wxMBConvUTF16 converter
;
2947 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2948 byteInLen
= unicharlen
;
2949 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2950 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2952 ubuf
= (UniChar
*) psz
;
2955 // ubuf is a non-decomposed UniChar buffer
2957 ByteCount dcubuflen
= byteInLen
* 2 + 2 ;
2958 ByteCount dcubufread
, dcubufwritten
;
2959 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
2961 ConvertFromUnicodeToText( m_uni
, byteInLen
, ubuf
,
2962 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, dcubuf
) ;
2964 // we now convert that decomposed buffer into UTF8
2966 status
= TECConvertText(
2967 m_WC2MB_converter
, (ConstTextPtr
) dcubuf
, dcubufwritten
, &dcubufread
,
2968 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2972 #if SIZEOF_WCHAR_T == 4
2979 size_t res
= byteOutLen
;
2980 if ( buf
&& res
< n
)
2983 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2989 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2992 OSStatus status
= noErr
;
2993 ByteCount byteOutLen
;
2994 ByteCount byteInLen
= strlen(psz
) + 1;
2995 wchar_t *tbuf
= NULL
;
2996 UniChar
* ubuf
= NULL
;
3001 // Apple specs say at least 32
3002 n
= wxMax( 32, byteInLen
) ;
3003 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
3006 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
3008 #if SIZEOF_WCHAR_T == 4
3009 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
3011 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
3014 ByteCount dcubuflen
= byteBufferLen
* 2 + 2 ;
3015 ByteCount dcubufread
, dcubufwritten
;
3016 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
3018 status
= TECConvertText(
3019 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
3020 (TextPtr
) dcubuf
, dcubuflen
, &byteOutLen
);
3021 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3022 // is not properly terminated we get random characters at the end
3023 dcubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3025 // now from the decomposed UniChar to properly composed uniChar
3026 ConvertFromUnicodeToText( m_uniBack
, byteOutLen
, dcubuf
,
3027 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, ubuf
) ;
3030 byteOutLen
= dcubufwritten
;
3031 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3034 #if SIZEOF_WCHAR_T == 4
3035 wxMBConvUTF16 converter
;
3036 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
3039 res
= byteOutLen
/ sizeof( UniChar
) ;
3045 if ( buf
&& res
< n
)
3051 virtual void CreateIfNeeded() const
3053 wxMBConv_mac::CreateIfNeeded() ;
3054 if ( m_uni
== NULL
)
3056 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3057 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3058 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3059 kUnicodeCanonicalDecompVariant
, kTextEncodingDefaultFormat
);
3060 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3062 OSStatus err
= CreateUnicodeToTextInfo(&m_map
, &m_uni
);
3063 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3065 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3066 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3067 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3068 kUnicodeCanonicalCompVariant
, kTextEncodingDefaultFormat
);
3069 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3070 err
= CreateUnicodeToTextInfo(&m_map
, &m_uniBack
);
3071 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3075 mutable UnicodeToTextInfo m_uni
;
3076 mutable UnicodeToTextInfo m_uniBack
;
3077 mutable UnicodeMapping m_map
;
3079 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3081 // ============================================================================
3082 // wxEncodingConverter based conversion classes
3083 // ============================================================================
3087 class wxMBConv_wxwin
: public wxMBConv
3092 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
3093 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
3097 // temporarily just use wxEncodingConverter stuff,
3098 // so that it works while a better implementation is built
3099 wxMBConv_wxwin(const wxChar
* name
)
3102 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3104 m_enc
= wxFONTENCODING_SYSTEM
;
3109 wxMBConv_wxwin(wxFontEncoding enc
)
3116 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
3118 size_t inbuf
= strlen(psz
);
3121 if (!m2w
.Convert(psz
, buf
))
3122 return wxCONV_FAILED
;
3127 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
3129 const size_t inbuf
= wxWcslen(psz
);
3132 if (!w2m
.Convert(psz
, buf
))
3133 return wxCONV_FAILED
;
3139 virtual size_t GetMBNulLen() const
3143 case wxFONTENCODING_UTF16BE
:
3144 case wxFONTENCODING_UTF16LE
:
3147 case wxFONTENCODING_UTF32BE
:
3148 case wxFONTENCODING_UTF32LE
:
3156 virtual wxMBConv
*Clone() const { return new wxMBConv_wxwin(m_enc
); }
3158 bool IsOk() const { return m_ok
; }
3161 wxFontEncoding m_enc
;
3162 wxEncodingConverter m2w
, w2m
;
3165 // were we initialized successfully?
3168 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
3171 // make the constructors available for unit testing
3172 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
3174 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
3175 if ( !result
->IsOk() )
3184 #endif // wxUSE_FONTMAP
3186 // ============================================================================
3187 // wxCSConv implementation
3188 // ============================================================================
3190 void wxCSConv::Init()
3197 wxCSConv::wxCSConv(const wxChar
*charset
)
3207 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
3209 m_encoding
= wxFONTENCODING_SYSTEM
;
3213 wxCSConv::wxCSConv(wxFontEncoding encoding
)
3215 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
3217 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3219 encoding
= wxFONTENCODING_SYSTEM
;
3224 m_encoding
= encoding
;
3227 wxCSConv::~wxCSConv()
3232 wxCSConv::wxCSConv(const wxCSConv
& conv
)
3237 SetName(conv
.m_name
);
3238 m_encoding
= conv
.m_encoding
;
3241 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
3245 SetName(conv
.m_name
);
3246 m_encoding
= conv
.m_encoding
;
3251 void wxCSConv::Clear()
3260 void wxCSConv::SetName(const wxChar
*charset
)
3264 m_name
= wxStrdup(charset
);
3271 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
3272 wxEncodingNameCache
);
3274 static wxEncodingNameCache gs_nameCache
;
3277 wxMBConv
*wxCSConv::DoCreate() const
3280 wxLogTrace(TRACE_STRCONV
,
3281 wxT("creating conversion for %s"),
3283 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
3284 #endif // wxUSE_FONTMAP
3286 // check for the special case of ASCII or ISO8859-1 charset: as we have
3287 // special knowledge of it anyhow, we don't need to create a special
3288 // conversion object
3289 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
3290 m_encoding
== wxFONTENCODING_DEFAULT
)
3292 // don't convert at all
3296 // we trust OS to do conversion better than we can so try external
3297 // conversion methods first
3299 // the full order is:
3300 // 1. OS conversion (iconv() under Unix or Win32 API)
3301 // 2. hard coded conversions for UTF
3302 // 3. wxEncodingConverter as fall back
3308 #endif // !wxUSE_FONTMAP
3310 wxString
name(m_name
);
3312 wxFontEncoding
encoding(m_encoding
);
3315 if ( !name
.empty() )
3317 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
3325 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3326 #endif // wxUSE_FONTMAP
3330 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
3331 if ( it
!= gs_nameCache
.end() )
3333 if ( it
->second
.empty() )
3336 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
3343 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
3344 // CS : in case this does not return valid names (eg for MacRoman) encoding
3345 // got a 'failure' entry in the cache all the same, although it just has to
3346 // be created using a different method, so only store failed iconv creation
3347 // attempts (or perhaps we shoulnd't do this at all ?)
3348 if ( names
[0] != NULL
)
3350 for ( ; *names
; ++names
)
3352 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
3355 gs_nameCache
[encoding
] = *names
;
3362 gs_nameCache
[encoding
] = _T(""); // cache the failure
3365 #endif // wxUSE_FONTMAP
3367 #endif // HAVE_ICONV
3369 #ifdef wxHAVE_WIN32_MB2WC
3372 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
3373 : new wxMBConv_win32(m_encoding
);
3382 #endif // wxHAVE_WIN32_MB2WC
3384 #if defined(__WXMAC__)
3386 // leave UTF16 and UTF32 to the built-ins of wx
3387 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
3388 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
3391 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
3392 : new wxMBConv_mac(m_encoding
);
3394 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
3404 #if defined(__WXCOCOA__)
3406 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
3409 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
3410 : new wxMBConv_cocoa(m_encoding
);
3412 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
3423 wxFontEncoding enc
= m_encoding
;
3425 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
3427 // use "false" to suppress interactive dialogs -- we can be called from
3428 // anywhere and popping up a dialog from here is the last thing we want to
3430 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
3432 #endif // wxUSE_FONTMAP
3436 case wxFONTENCODING_UTF7
:
3437 return new wxMBConvUTF7
;
3439 case wxFONTENCODING_UTF8
:
3440 return new wxMBConvUTF8
;
3442 case wxFONTENCODING_UTF16BE
:
3443 return new wxMBConvUTF16BE
;
3445 case wxFONTENCODING_UTF16LE
:
3446 return new wxMBConvUTF16LE
;
3448 case wxFONTENCODING_UTF32BE
:
3449 return new wxMBConvUTF32BE
;
3451 case wxFONTENCODING_UTF32LE
:
3452 return new wxMBConvUTF32LE
;
3455 // nothing to do but put here to suppress gcc warnings
3462 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
3463 : new wxMBConv_wxwin(m_encoding
);
3469 #endif // wxUSE_FONTMAP
3471 // NB: This is a hack to prevent deadlock. What could otherwise happen
3472 // in Unicode build: wxConvLocal creation ends up being here
3473 // because of some failure and logs the error. But wxLog will try to
3474 // attach a timestamp, for which it will need wxConvLocal (to convert
3475 // time to char* and then wchar_t*), but that fails, tries to log the
3476 // error, but wxLog has an (already locked) critical section that
3477 // guards the static buffer.
3478 static bool alreadyLoggingError
= false;
3479 if (!alreadyLoggingError
)
3481 alreadyLoggingError
= true;
3482 wxLogError(_("Cannot convert from the charset '%s'!"),
3486 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
3487 #else // !wxUSE_FONTMAP
3488 wxString::Format(_("encoding %i"), m_encoding
).c_str()
3489 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3492 alreadyLoggingError
= false;
3498 void wxCSConv::CreateConvIfNeeded() const
3502 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
3504 // if we don't have neither the name nor the encoding, use the default
3505 // encoding for this system
3506 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
3509 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
3511 // fallback to some reasonable default:
3512 self
->m_encoding
= wxFONTENCODING_ISO8859_1
;
3513 #endif // wxUSE_INTL
3516 self
->m_convReal
= DoCreate();
3517 self
->m_deferred
= false;
3521 bool wxCSConv::IsOk() const
3523 CreateConvIfNeeded();
3525 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3526 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
3527 return true; // always ok as we do it ourselves
3529 // m_convReal->IsOk() is called at its own creation, so we know it must
3530 // be ok if m_convReal is non-NULL
3531 return m_convReal
!= NULL
;
3534 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
,
3535 const char *src
, size_t srcLen
) const
3537 CreateConvIfNeeded();
3540 return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
);
3543 return wxMBConv::ToWChar(dst
, dstLen
, src
, srcLen
);
3546 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
,
3547 const wchar_t *src
, size_t srcLen
) const
3549 CreateConvIfNeeded();
3552 return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
);
3555 return wxMBConv::FromWChar(dst
, dstLen
, src
, srcLen
);
3558 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
3560 CreateConvIfNeeded();
3563 return m_convReal
->MB2WC(buf
, psz
, n
);
3566 size_t len
= strlen(psz
);
3570 for (size_t c
= 0; c
<= len
; c
++)
3571 buf
[c
] = (unsigned char)(psz
[c
]);
3577 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
3579 CreateConvIfNeeded();
3582 return m_convReal
->WC2MB(buf
, psz
, n
);
3585 const size_t len
= wxWcslen(psz
);
3588 for (size_t c
= 0; c
<= len
; c
++)
3591 return wxCONV_FAILED
;
3593 buf
[c
] = (char)psz
[c
];
3598 for (size_t c
= 0; c
<= len
; c
++)
3601 return wxCONV_FAILED
;
3608 size_t wxCSConv::GetMBNulLen() const
3610 CreateConvIfNeeded();
3614 return m_convReal
->GetMBNulLen();
3620 // ----------------------------------------------------------------------------
3622 // ----------------------------------------------------------------------------
3625 static wxMBConv_win32 wxConvLibcObj
;
3626 #elif defined(__WXMAC__) && !defined(__MACH__)
3627 static wxMBConv_mac wxConvLibcObj
;
3629 static wxMBConvLibc wxConvLibcObj
;
3632 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
3633 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
3634 static wxMBConvUTF7 wxConvUTF7Obj
;
3635 static wxMBConvUTF8 wxConvUTF8Obj
;
3636 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3637 static wxMBConv_macUTF8D wxConvMacUTF8DObj
;
3639 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
3640 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
3641 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
3642 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
3643 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
3644 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
3645 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvUI
= &wxConvLocal
;
3646 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
3648 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3657 #else // !wxUSE_WCHAR_T
3659 // stand-ins in absence of wchar_t
3660 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3665 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T