1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
22 #include "wx/hashmap.h"
25 #include "wx/strconv.h"
30 #include "wx/msw/private.h"
31 #include "wx/msw/missing.h"
42 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
43 #define wxHAVE_WIN32_MB2WC
52 #include "wx/thread.h"
55 #include "wx/encconv.h"
56 #include "wx/fontmap.h"
60 #include <ATSUnicode.h>
61 #include <TextCommon.h>
62 #include <TextEncodingConverter.h>
65 // includes Mac headers
66 #include "wx/mac/private.h"
70 #define TRACE_STRCONV _T("strconv")
72 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
74 #if SIZEOF_WCHAR_T == 2
79 // ============================================================================
81 // ============================================================================
83 // helper function of cMB2WC(): check if n bytes at this location are all NUL
84 static bool NotAllNULs(const char *p
, size_t n
)
86 while ( n
&& *p
++ == '\0' )
92 // ----------------------------------------------------------------------------
93 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
94 // ----------------------------------------------------------------------------
96 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
101 *output
= (wxUint16
) input
;
105 else if (input
>= 0x110000)
107 return wxCONV_FAILED
;
113 *output
++ = (wxUint16
) ((input
>> 10) + 0xd7c0);
114 *output
= (wxUint16
) ((input
& 0x3ff) + 0xdc00);
121 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
123 if ((*input
< 0xd800) || (*input
> 0xdfff))
128 else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff))
131 return wxCONV_FAILED
;
135 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
141 typedef wchar_t wxDecodeSurrogate_t
;
143 typedef wxUint16 wxDecodeSurrogate_t
;
144 #endif // WC_UTF16/!WC_UTF16
146 // returns the next UTF-32 character from the wchar_t buffer and advances the
147 // pointer to the character after this one
149 // if an invalid character is found, *pSrc is set to NULL, the caller must
151 static wxUint32
wxDecodeSurrogate(const wxDecodeSurrogate_t
**pSrc
)
155 n
= decode_utf16(wx_reinterpret_cast(const wxUint16
*, *pSrc
), out
);
156 if ( n
== wxCONV_FAILED
)
164 // ----------------------------------------------------------------------------
166 // ----------------------------------------------------------------------------
169 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
,
170 const char *src
, size_t srcLen
) const
172 // although new conversion classes are supposed to implement this function
173 // directly, the existins ones only implement the old MB2WC() and so, to
174 // avoid to have to rewrite all conversion classes at once, we provide a
175 // default (but not efficient) implementation of this one in terms of the
176 // old function by copying the input to ensure that it's NUL-terminated and
177 // then using MB2WC() to convert it
179 // the number of chars [which would be] written to dst [if it were not NULL]
180 size_t dstWritten
= 0;
182 // the number of NULs terminating this string
183 size_t nulLen
= 0; // not really needed, but just to avoid warnings
185 // if we were not given the input size we just have to assume that the
186 // string is properly terminated as we have no way of knowing how long it
187 // is anyhow, but if we do have the size check whether there are enough
191 if ( srcLen
!= wxNO_LEN
)
193 // we need to know how to find the end of this string
194 nulLen
= GetMBNulLen();
195 if ( nulLen
== wxCONV_FAILED
)
196 return wxCONV_FAILED
;
198 // if there are enough NULs we can avoid the copy
199 if ( srcLen
< nulLen
|| NotAllNULs(src
+ srcLen
- nulLen
, nulLen
) )
201 // make a copy in order to properly NUL-terminate the string
202 bufTmp
= wxCharBuffer(srcLen
+ nulLen
- 1 /* 1 will be added */);
203 char * const p
= bufTmp
.data();
204 memcpy(p
, src
, srcLen
);
205 for ( char *s
= p
+ srcLen
; s
< p
+ srcLen
+ nulLen
; s
++ )
211 srcEnd
= src
+ srcLen
;
213 else // quit after the first loop iteration
220 // try to convert the current chunk
221 size_t lenChunk
= MB2WC(NULL
, src
, 0);
222 if ( lenChunk
== wxCONV_FAILED
)
223 return wxCONV_FAILED
;
225 lenChunk
++; // for the L'\0' at the end of this chunk
227 dstWritten
+= lenChunk
;
231 // nothing left in the input string, conversion succeeded
237 if ( dstWritten
> dstLen
)
238 return wxCONV_FAILED
;
240 if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED
)
241 return wxCONV_FAILED
;
248 // we convert just one chunk in this case as this is the entire
253 // advance the input pointer past the end of this chunk
254 while ( NotAllNULs(src
, nulLen
) )
256 // notice that we must skip over multiple bytes here as we suppose
257 // that if NUL takes 2 or 4 bytes, then all the other characters do
258 // too and so if advanced by a single byte we might erroneously
259 // detect sequences of NUL bytes in the middle of the input
263 src
+= nulLen
; // skipping over its terminator as well
265 // note that ">=" (and not just "==") is needed here as the terminator
266 // we skipped just above could be inside or just after the buffer
267 // delimited by inEnd
276 wxMBConv::FromWChar(char *dst
, size_t dstLen
,
277 const wchar_t *src
, size_t srcLen
) const
279 // the number of chars [which would be] written to dst [if it were not NULL]
280 size_t dstWritten
= 0;
282 // make a copy of the input string unless it is already properly
285 // if we don't know its length we have no choice but to assume that it is,
286 // indeed, properly terminated
287 wxWCharBuffer bufTmp
;
288 if ( srcLen
== wxNO_LEN
)
290 srcLen
= wxWcslen(src
) + 1;
292 else if ( srcLen
!= 0 && src
[srcLen
- 1] != L
'\0' )
294 // make a copy in order to properly NUL-terminate the string
295 bufTmp
= wxWCharBuffer(srcLen
);
296 memcpy(bufTmp
.data(), src
, srcLen
* sizeof(wchar_t));
300 const size_t lenNul
= GetMBNulLen();
301 for ( const wchar_t * const srcEnd
= src
+ srcLen
;
303 src
+= wxWcslen(src
) + 1 /* skip L'\0' too */ )
305 // try to convert the current chunk
306 size_t lenChunk
= WC2MB(NULL
, src
, 0);
308 if ( lenChunk
== wxCONV_FAILED
)
309 return wxCONV_FAILED
;
312 dstWritten
+= lenChunk
;
316 if ( dstWritten
> dstLen
)
317 return wxCONV_FAILED
;
319 if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED
)
320 return wxCONV_FAILED
;
329 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const
331 size_t rc
= ToWChar(outBuff
, outLen
, inBuff
);
332 if ( rc
!= wxCONV_FAILED
)
334 // ToWChar() returns the buffer length, i.e. including the trailing
335 // NUL, while this method doesn't take it into account
342 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const
344 size_t rc
= FromWChar(outBuff
, outLen
, inBuff
);
345 if ( rc
!= wxCONV_FAILED
)
353 wxMBConv::~wxMBConv()
355 // nothing to do here (necessary for Darwin linking probably)
358 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
362 // calculate the length of the buffer needed first
363 const size_t nLen
= MB2WC(NULL
, psz
, 0);
364 if ( nLen
!= wxCONV_FAILED
)
366 // now do the actual conversion
367 wxWCharBuffer
buf(nLen
/* +1 added implicitly */);
369 // +1 for the trailing NULL
370 if ( MB2WC(buf
.data(), psz
, nLen
+ 1) != wxCONV_FAILED
)
375 return wxWCharBuffer();
378 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
382 const size_t nLen
= WC2MB(NULL
, pwz
, 0);
383 if ( nLen
!= wxCONV_FAILED
)
385 // extra space for trailing NUL(s)
386 static const size_t extraLen
= GetMaxMBNulLen();
388 wxCharBuffer
buf(nLen
+ extraLen
- 1);
389 if ( WC2MB(buf
.data(), pwz
, nLen
+ extraLen
) != wxCONV_FAILED
)
394 return wxCharBuffer();
398 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const
400 const size_t dstLen
= ToWChar(NULL
, 0, inBuff
, inLen
);
401 if ( dstLen
!= wxCONV_FAILED
)
403 wxWCharBuffer
wbuf(dstLen
- 1);
404 if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
409 if ( wbuf
[dstLen
- 1] == L
'\0' )
420 return wxWCharBuffer();
424 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const
426 size_t dstLen
= FromWChar(NULL
, 0, inBuff
, inLen
);
427 if ( dstLen
!= wxCONV_FAILED
)
429 // special case of empty input: can't allocate 0 size buffer below as
430 // wxCharBuffer insists on NUL-terminating it
431 wxCharBuffer
buf(dstLen
? dstLen
- 1 : 1);
432 if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
438 const size_t nulLen
= GetMBNulLen();
439 if ( dstLen
>= nulLen
&&
440 !NotAllNULs(buf
.data() + dstLen
- nulLen
, nulLen
) )
442 // in this case the output is NUL-terminated and we're not
443 // supposed to count NUL
455 return wxCharBuffer();
458 // ----------------------------------------------------------------------------
460 // ----------------------------------------------------------------------------
462 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
464 return wxMB2WC(buf
, psz
, n
);
467 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
469 return wxWC2MB(buf
, psz
, n
);
472 // ----------------------------------------------------------------------------
473 // wxConvBrokenFileNames
474 // ----------------------------------------------------------------------------
478 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
480 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
481 || wxStricmp(charset
, _T("UTF8")) == 0 )
482 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
484 m_conv
= new wxCSConv(charset
);
489 // ----------------------------------------------------------------------------
491 // ----------------------------------------------------------------------------
493 // Implementation (C) 2004 Fredrik Roubert
496 // BASE64 decoding table
498 static const unsigned char utf7unb64
[] =
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
506 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
507 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
509 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
510 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
511 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
513 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
514 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
515 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
534 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
538 while ( *psz
&& (!buf
|| (len
< n
)) )
540 unsigned char cc
= *psz
++;
548 else if (*psz
== '-')
556 else // start of BASE64 encoded string
560 for ( ok
= lsb
= false, d
= 0, l
= 0;
561 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
566 for (l
+= 6; l
>= 8; lsb
= !lsb
)
568 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
578 *buf
= (wchar_t)(c
<< 8);
587 // in valid UTF7 we should have valid characters after '+'
588 return wxCONV_FAILED
;
596 if ( buf
&& (len
< n
) )
603 // BASE64 encoding table
605 static const unsigned char utf7enb64
[] =
607 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
608 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
609 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
610 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
611 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
612 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
613 'w', 'x', 'y', 'z', '0', '1', '2', '3',
614 '4', '5', '6', '7', '8', '9', '+', '/'
618 // UTF-7 encoding table
620 // 0 - Set D (directly encoded characters)
621 // 1 - Set O (optional direct characters)
622 // 2 - whitespace characters (optional)
623 // 3 - special characters
625 static const unsigned char utf7encode
[128] =
627 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
629 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
631 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
637 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
641 while (*psz
&& ((!buf
) || (len
< n
)))
644 if (cc
< 0x80 && utf7encode
[cc
] < 1)
653 else if (((wxUint32
)cc
) > 0xffff)
655 // no surrogate pair generation (yet?)
656 return wxCONV_FAILED
;
667 // BASE64 encode string
668 unsigned int lsb
, d
, l
;
669 for (d
= 0, l
= 0; /*nothing*/; psz
++)
671 for (lsb
= 0; lsb
< 2; lsb
++)
674 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
676 for (l
+= 8; l
>= 6; )
680 *buf
++ = utf7enb64
[(d
>> l
) % 64];
686 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
693 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
705 if (buf
&& (len
< n
))
711 // ----------------------------------------------------------------------------
713 // ----------------------------------------------------------------------------
715 static wxUint32 utf8_max
[]=
716 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
718 // boundaries of the private use area we use to (temporarily) remap invalid
719 // characters invalid in a UTF-8 encoded string
720 const wxUint32 wxUnicodePUA
= 0x100000;
721 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
723 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
727 while (*psz
&& ((!buf
) || (len
< n
)))
729 const char *opsz
= psz
;
730 bool invalid
= false;
731 unsigned char cc
= *psz
++, fc
= cc
;
733 for (cnt
= 0; fc
& 0x80; cnt
++)
743 // escape the escape character for octal escapes
744 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
745 && cc
== '\\' && (!buf
|| len
< n
))
757 // invalid UTF-8 sequence
762 unsigned ocnt
= cnt
- 1;
763 wxUint32 res
= cc
& (0x3f >> cnt
);
767 if ((cc
& 0xC0) != 0x80)
769 // invalid UTF-8 sequence
775 res
= (res
<< 6) | (cc
& 0x3f);
778 if (invalid
|| res
<= utf8_max
[ocnt
])
780 // illegal UTF-8 encoding
783 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
784 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
786 // if one of our PUA characters turns up externally
787 // it must also be treated as an illegal sequence
788 // (a bit like you have to escape an escape character)
794 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
795 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
796 if (pa
== wxCONV_FAILED
)
808 *buf
++ = (wchar_t)res
;
810 #endif // WC_UTF16/!WC_UTF16
816 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
818 while (opsz
< psz
&& (!buf
|| len
< n
))
821 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
822 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
823 wxASSERT(pa
!= wxCONV_FAILED
);
830 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
836 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
838 while (opsz
< psz
&& (!buf
|| len
< n
))
840 if ( buf
&& len
+ 3 < n
)
842 unsigned char on
= *opsz
;
844 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
845 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
846 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
853 else // MAP_INVALID_UTF8_NOT
855 return wxCONV_FAILED
;
861 if (buf
&& (len
< n
))
867 static inline bool isoctal(wchar_t wch
)
869 return L
'0' <= wch
&& wch
<= L
'7';
872 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
876 while (*psz
&& ((!buf
) || (len
< n
)))
881 // cast is ok for WC_UTF16
882 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
883 psz
+= (pa
== wxCONV_FAILED
) ? 1 : pa
;
885 cc
= (*psz
++) & 0x7fffffff;
888 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
889 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
892 *buf
++ = (char)(cc
- wxUnicodePUA
);
895 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
896 && cc
== L
'\\' && psz
[0] == L
'\\' )
903 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
905 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
909 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 +
910 (psz
[1] - L
'0') * 010 +
920 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++)
936 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
938 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
944 if (buf
&& (len
< n
))
950 // ============================================================================
952 // ============================================================================
954 #ifdef WORDS_BIGENDIAN
955 #define wxMBConvUTF16straight wxMBConvUTF16BE
956 #define wxMBConvUTF16swap wxMBConvUTF16LE
958 #define wxMBConvUTF16swap wxMBConvUTF16BE
959 #define wxMBConvUTF16straight wxMBConvUTF16LE
963 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
)
965 if ( srcLen
== wxNO_LEN
)
967 // count the number of bytes in input, including the trailing NULs
968 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
969 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
972 srcLen
*= BYTES_PER_CHAR
;
974 else // we already have the length
976 // we can only convert an entire number of UTF-16 characters
977 if ( srcLen
% BYTES_PER_CHAR
)
978 return wxCONV_FAILED
;
984 // case when in-memory representation is UTF-16 too
987 // ----------------------------------------------------------------------------
988 // conversions without endianness change
989 // ----------------------------------------------------------------------------
992 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
993 const char *src
, size_t srcLen
) const
995 // set up the scene for using memcpy() (which is presumably more efficient
996 // than copying the bytes one by one)
997 srcLen
= GetLength(src
, srcLen
);
998 if ( srcLen
== wxNO_LEN
)
999 return wxCONV_FAILED
;
1001 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1004 if ( dstLen
< inLen
)
1005 return wxCONV_FAILED
;
1007 memcpy(dst
, src
, srcLen
);
1014 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1015 const wchar_t *src
, size_t srcLen
) const
1017 if ( srcLen
== wxNO_LEN
)
1018 srcLen
= wxWcslen(src
) + 1;
1020 srcLen
*= BYTES_PER_CHAR
;
1024 if ( dstLen
< srcLen
)
1025 return wxCONV_FAILED
;
1027 memcpy(dst
, src
, srcLen
);
1033 // ----------------------------------------------------------------------------
1034 // endian-reversing conversions
1035 // ----------------------------------------------------------------------------
1038 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1039 const char *src
, size_t srcLen
) const
1041 srcLen
= GetLength(src
, srcLen
);
1042 if ( srcLen
== wxNO_LEN
)
1043 return wxCONV_FAILED
;
1045 srcLen
/= BYTES_PER_CHAR
;
1049 if ( dstLen
< srcLen
)
1050 return wxCONV_FAILED
;
1052 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1053 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1055 *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
);
1063 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1064 const wchar_t *src
, size_t srcLen
) const
1066 if ( srcLen
== wxNO_LEN
)
1067 srcLen
= wxWcslen(src
) + 1;
1069 srcLen
*= BYTES_PER_CHAR
;
1073 if ( dstLen
< srcLen
)
1074 return wxCONV_FAILED
;
1076 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1077 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1079 *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
);
1086 #else // !WC_UTF16: wchar_t is UTF-32
1088 // ----------------------------------------------------------------------------
1089 // conversions without endianness change
1090 // ----------------------------------------------------------------------------
1093 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1094 const char *src
, size_t srcLen
) const
1096 srcLen
= GetLength(src
, srcLen
);
1097 if ( srcLen
== wxNO_LEN
)
1098 return wxCONV_FAILED
;
1100 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1103 // optimization: return maximal space which could be needed for this
1104 // string even if the real size could be smaller if the buffer contains
1110 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1111 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1113 const wxUint32 ch
= wxDecodeSurrogate(&inBuff
);
1115 return wxCONV_FAILED
;
1117 if ( ++outLen
> dstLen
)
1118 return wxCONV_FAILED
;
1128 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1129 const wchar_t *src
, size_t srcLen
) const
1131 if ( srcLen
== wxNO_LEN
)
1132 srcLen
= wxWcslen(src
) + 1;
1135 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1136 for ( size_t n
= 0; n
< srcLen
; n
++ )
1139 const size_t numChars
= encode_utf16(*src
++, cc
);
1140 if ( numChars
== wxCONV_FAILED
)
1141 return wxCONV_FAILED
;
1143 outLen
+= numChars
* BYTES_PER_CHAR
;
1146 if ( outLen
> dstLen
)
1147 return wxCONV_FAILED
;
1150 if ( numChars
== 2 )
1152 // second character of a surrogate
1161 // ----------------------------------------------------------------------------
1162 // endian-reversing conversions
1163 // ----------------------------------------------------------------------------
1166 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1167 const char *src
, size_t srcLen
) const
1169 srcLen
= GetLength(src
, srcLen
);
1170 if ( srcLen
== wxNO_LEN
)
1171 return wxCONV_FAILED
;
1173 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1176 // optimization: return maximal space which could be needed for this
1177 // string even if the real size could be smaller if the buffer contains
1183 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1184 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1189 tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1191 tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1193 const size_t numChars
= decode_utf16(tmp
, ch
);
1194 if ( numChars
== wxCONV_FAILED
)
1195 return wxCONV_FAILED
;
1197 if ( numChars
== 2 )
1200 if ( ++outLen
> dstLen
)
1201 return wxCONV_FAILED
;
1211 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1212 const wchar_t *src
, size_t srcLen
) const
1214 if ( srcLen
== wxNO_LEN
)
1215 srcLen
= wxWcslen(src
) + 1;
1218 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1219 for ( const wchar_t *srcEnd
= src
+ srcLen
; src
< srcEnd
; src
++ )
1222 const size_t numChars
= encode_utf16(*src
, cc
);
1223 if ( numChars
== wxCONV_FAILED
)
1224 return wxCONV_FAILED
;
1226 outLen
+= numChars
* BYTES_PER_CHAR
;
1229 if ( outLen
> dstLen
)
1230 return wxCONV_FAILED
;
1232 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]);
1233 if ( numChars
== 2 )
1235 // second character of a surrogate
1236 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]);
1244 #endif // WC_UTF16/!WC_UTF16
1247 // ============================================================================
1249 // ============================================================================
1251 #ifdef WORDS_BIGENDIAN
1252 #define wxMBConvUTF32straight wxMBConvUTF32BE
1253 #define wxMBConvUTF32swap wxMBConvUTF32LE
1255 #define wxMBConvUTF32swap wxMBConvUTF32BE
1256 #define wxMBConvUTF32straight wxMBConvUTF32LE
1260 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1264 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
)
1266 if ( srcLen
== wxNO_LEN
)
1268 // count the number of bytes in input, including the trailing NULs
1269 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1270 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
1273 srcLen
*= BYTES_PER_CHAR
;
1275 else // we already have the length
1277 // we can only convert an entire number of UTF-32 characters
1278 if ( srcLen
% BYTES_PER_CHAR
)
1279 return wxCONV_FAILED
;
1285 // case when in-memory representation is UTF-16
1288 // ----------------------------------------------------------------------------
1289 // conversions without endianness change
1290 // ----------------------------------------------------------------------------
1293 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1294 const char *src
, size_t srcLen
) const
1296 srcLen
= GetLength(src
, srcLen
);
1297 if ( srcLen
== wxNO_LEN
)
1298 return wxCONV_FAILED
;
1300 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1301 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1303 for ( size_t n
= 0; n
< inLen
; n
++ )
1306 const size_t numChars
= encode_utf16(*inBuff
++, cc
);
1307 if ( numChars
== wxCONV_FAILED
)
1308 return wxCONV_FAILED
;
1313 if ( outLen
> dstLen
)
1314 return wxCONV_FAILED
;
1317 if ( numChars
== 2 )
1319 // second character of a surrogate
1329 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1330 const wchar_t *src
, size_t srcLen
) const
1332 if ( srcLen
== wxNO_LEN
)
1333 srcLen
= wxWcslen(src
) + 1;
1337 // optimization: return maximal space which could be needed for this
1338 // string instead of the exact amount which could be less if there are
1339 // any surrogates in the input
1341 // we consider that surrogates are rare enough to make it worthwhile to
1342 // avoid running the loop below at the cost of slightly extra memory
1344 return srcLen
* BYTES_PER_CHAR
;
1347 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1349 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1351 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1353 return wxCONV_FAILED
;
1355 outLen
+= BYTES_PER_CHAR
;
1357 if ( outLen
> dstLen
)
1358 return wxCONV_FAILED
;
1366 // ----------------------------------------------------------------------------
1367 // endian-reversing conversions
1368 // ----------------------------------------------------------------------------
1371 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1372 const char *src
, size_t srcLen
) const
1374 srcLen
= GetLength(src
, srcLen
);
1375 if ( srcLen
== wxNO_LEN
)
1376 return wxCONV_FAILED
;
1378 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1379 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1381 for ( size_t n
= 0; n
< inLen
; n
++, inBuff
++ )
1384 const size_t numChars
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
);
1385 if ( numChars
== wxCONV_FAILED
)
1386 return wxCONV_FAILED
;
1391 if ( outLen
> dstLen
)
1392 return wxCONV_FAILED
;
1395 if ( numChars
== 2 )
1397 // second character of a surrogate
1407 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1408 const wchar_t *src
, size_t srcLen
) const
1410 if ( srcLen
== wxNO_LEN
)
1411 srcLen
= wxWcslen(src
) + 1;
1415 // optimization: return maximal space which could be needed for this
1416 // string instead of the exact amount which could be less if there are
1417 // any surrogates in the input
1419 // we consider that surrogates are rare enough to make it worthwhile to
1420 // avoid running the loop below at the cost of slightly extra memory
1422 return srcLen
*BYTES_PER_CHAR
;
1425 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1427 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1429 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1431 return wxCONV_FAILED
;
1433 outLen
+= BYTES_PER_CHAR
;
1435 if ( outLen
> dstLen
)
1436 return wxCONV_FAILED
;
1438 *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
);
1444 #else // !WC_UTF16: wchar_t is UTF-32
1446 // ----------------------------------------------------------------------------
1447 // conversions without endianness change
1448 // ----------------------------------------------------------------------------
1451 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1452 const char *src
, size_t srcLen
) const
1454 // use memcpy() as it should be much faster than hand-written loop
1455 srcLen
= GetLength(src
, srcLen
);
1456 if ( srcLen
== wxNO_LEN
)
1457 return wxCONV_FAILED
;
1459 const size_t inLen
= srcLen
/BYTES_PER_CHAR
;
1462 if ( dstLen
< inLen
)
1463 return wxCONV_FAILED
;
1465 memcpy(dst
, src
, srcLen
);
1472 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1473 const wchar_t *src
, size_t srcLen
) const
1475 if ( srcLen
== wxNO_LEN
)
1476 srcLen
= wxWcslen(src
) + 1;
1478 srcLen
*= BYTES_PER_CHAR
;
1482 if ( dstLen
< srcLen
)
1483 return wxCONV_FAILED
;
1485 memcpy(dst
, src
, srcLen
);
1491 // ----------------------------------------------------------------------------
1492 // endian-reversing conversions
1493 // ----------------------------------------------------------------------------
1496 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1497 const char *src
, size_t srcLen
) const
1499 srcLen
= GetLength(src
, srcLen
);
1500 if ( srcLen
== wxNO_LEN
)
1501 return wxCONV_FAILED
;
1503 srcLen
/= BYTES_PER_CHAR
;
1507 if ( dstLen
< srcLen
)
1508 return wxCONV_FAILED
;
1510 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1511 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1513 *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
);
1521 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1522 const wchar_t *src
, size_t srcLen
) const
1524 if ( srcLen
== wxNO_LEN
)
1525 srcLen
= wxWcslen(src
) + 1;
1527 srcLen
*= BYTES_PER_CHAR
;
1531 if ( dstLen
< srcLen
)
1532 return wxCONV_FAILED
;
1534 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1535 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1537 *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
);
1544 #endif // WC_UTF16/!WC_UTF16
1547 // ============================================================================
1548 // The classes doing conversion using the iconv_xxx() functions
1549 // ============================================================================
1553 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555 // (unless there's yet another bug in glibc) the only case when iconv()
1556 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1557 // left in the input buffer -- when _real_ error occurs,
1558 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1560 // [This bug does not appear in glibc 2.2.]
1561 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563 (errno != E2BIG || bufLeft != 0))
1565 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1568 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1570 #define ICONV_T_INVALID ((iconv_t)-1)
1572 #if SIZEOF_WCHAR_T == 4
1573 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF32
1575 #elif SIZEOF_WCHAR_T == 2
1576 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1577 #define WC_ENC wxFONTENCODING_UTF16
1578 #else // sizeof(wchar_t) != 2 nor 4
1579 // does this ever happen?
1580 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1583 // ----------------------------------------------------------------------------
1584 // wxMBConv_iconv: encapsulates an iconv character set
1585 // ----------------------------------------------------------------------------
1587 class wxMBConv_iconv
: public wxMBConv
1590 wxMBConv_iconv(const wxChar
*name
);
1591 virtual ~wxMBConv_iconv();
1593 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1594 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1596 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1597 virtual size_t GetMBNulLen() const;
1599 virtual wxMBConv
*Clone() const
1601 wxMBConv_iconv
*p
= new wxMBConv_iconv(m_name
);
1602 p
->m_minMBCharWidth
= m_minMBCharWidth
;
1607 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1610 // the iconv handlers used to translate from multibyte
1611 // to wide char and in the other direction
1616 // guards access to m2w and w2m objects
1617 wxMutex m_iconvMutex
;
1621 // the name (for iconv_open()) of a wide char charset -- if none is
1622 // available on this machine, it will remain NULL
1623 static wxString ms_wcCharsetName
;
1625 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626 // different endian-ness than the native one
1627 static bool ms_wcNeedsSwap
;
1630 // name of the encoding handled by this conversion
1633 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1635 size_t m_minMBCharWidth
;
1638 // make the constructor available for unit testing
1639 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1641 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1642 if ( !result
->IsOk() )
1651 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1652 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1654 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1657 m_minMBCharWidth
= 0;
1659 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660 // names for the charsets
1661 const wxCharBuffer
cname(wxString(name
).ToAscii());
1663 // check for charset that represents wchar_t:
1664 if ( ms_wcCharsetName
.empty() )
1666 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1669 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1670 #else // !wxUSE_FONTMAP
1671 static const wxChar
*names_static
[] =
1673 #if SIZEOF_WCHAR_T == 4
1675 #elif SIZEOF_WCHAR_T = 2
1680 const wxChar
**names
= names_static
;
1681 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1683 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1685 const wxString
nameCS(*names
);
1687 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1688 wxString
nameXE(nameCS
);
1690 #ifdef WORDS_BIGENDIAN
1692 #else // little endian
1696 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1699 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1700 if ( m2w
== ICONV_T_INVALID
)
1702 // try charset w/o bytesex info (e.g. "UCS4")
1703 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1705 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1707 // and check for bytesex ourselves:
1708 if ( m2w
!= ICONV_T_INVALID
)
1710 char buf
[2], *bufPtr
;
1711 wchar_t wbuf
[2], *wbufPtr
;
1719 outsz
= SIZEOF_WCHAR_T
* 2;
1724 m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1725 (char**)&wbufPtr
, &outsz
);
1727 if (ICONV_FAILED(res
, insz
))
1729 wxLogLastError(wxT("iconv"));
1730 wxLogError(_("Conversion to charset '%s' doesn't work."),
1733 else // ok, can convert to this encoding, remember it
1735 ms_wcCharsetName
= nameCS
;
1736 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1740 else // use charset not requiring byte swapping
1742 ms_wcCharsetName
= nameXE
;
1746 wxLogTrace(TRACE_STRCONV
,
1747 wxT("iconv wchar_t charset is \"%s\"%s"),
1748 ms_wcCharsetName
.empty() ? _T("<none>")
1749 : ms_wcCharsetName
.c_str(),
1750 ms_wcNeedsSwap
? _T(" (needs swap)")
1753 else // we already have ms_wcCharsetName
1755 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1758 if ( ms_wcCharsetName
.empty() )
1760 w2m
= ICONV_T_INVALID
;
1764 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1765 if ( w2m
== ICONV_T_INVALID
)
1767 wxLogTrace(TRACE_STRCONV
,
1768 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1769 ms_wcCharsetName
.c_str(), cname
.data());
1774 wxMBConv_iconv::~wxMBConv_iconv()
1776 if ( m2w
!= ICONV_T_INVALID
)
1778 if ( w2m
!= ICONV_T_INVALID
)
1782 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1784 // find the string length: notice that must be done differently for
1785 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1787 const size_t nulLen
= GetMBNulLen();
1791 return wxCONV_FAILED
;
1794 inbuf
= strlen(psz
); // arguably more optimized than our version
1799 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1800 // they also have to start at character boundary and not span two
1801 // adjacent characters
1803 for ( p
= psz
; NotAllNULs(p
, nulLen
); p
+= nulLen
)
1810 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1811 // Unfortunately there are a couple of global wxCSConv objects such as
1812 // wxConvLocal that are used all over wx code, so we have to make sure
1813 // the handle is used by at most one thread at the time. Otherwise
1814 // only a few wx classes would be safe to use from non-main threads
1815 // as MB<->WC conversion would fail "randomly".
1816 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1817 #endif // wxUSE_THREADS
1819 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1821 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1822 wchar_t *bufPtr
= buf
;
1823 const char *pszPtr
= psz
;
1827 // have destination buffer, convert there
1829 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1830 (char**)&bufPtr
, &outbuf
);
1831 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1835 // convert to native endianness
1836 for ( unsigned i
= 0; i
< res
; i
++ )
1837 buf
[n
] = WC_BSWAP(buf
[i
]);
1840 // NUL-terminate the string if there is any space left
1846 // no destination buffer... convert using temp buffer
1847 // to calculate destination buffer requirement
1854 outbuf
= 8 * SIZEOF_WCHAR_T
;
1857 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1858 (char**)&bufPtr
, &outbuf
);
1860 res
+= 8 - (outbuf
/ SIZEOF_WCHAR_T
);
1862 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1865 if (ICONV_FAILED(cres
, inbuf
))
1867 //VS: it is ok if iconv fails, hence trace only
1868 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1869 return wxCONV_FAILED
;
1875 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1878 // NB: explained in MB2WC
1879 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1882 size_t inlen
= wxWcslen(psz
);
1883 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1887 wchar_t *tmpbuf
= 0;
1891 // need to copy to temp buffer to switch endianness
1892 // (doing WC_BSWAP twice on the original buffer won't help, as it
1893 // could be in read-only memory, or be accessed in some other thread)
1894 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1895 for ( size_t i
= 0; i
< inlen
; i
++ )
1896 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1898 tmpbuf
[inlen
] = L
'\0';
1904 // have destination buffer, convert there
1905 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1909 // NB: iconv was given only wcslen(psz) characters on input, and so
1910 // it couldn't convert the trailing zero. Let's do it ourselves
1911 // if there's some room left for it in the output buffer.
1917 // no destination buffer: convert using temp buffer
1918 // to calculate destination buffer requirement
1926 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1930 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1938 if (ICONV_FAILED(cres
, inbuf
))
1940 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1941 return wxCONV_FAILED
;
1947 size_t wxMBConv_iconv::GetMBNulLen() const
1949 if ( m_minMBCharWidth
== 0 )
1951 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1954 // NB: explained in MB2WC
1955 wxMutexLocker
lock(self
->m_iconvMutex
);
1958 wchar_t *wnul
= L
"";
1959 char buf
[8]; // should be enough for NUL in any encoding
1960 size_t inLen
= sizeof(wchar_t),
1961 outLen
= WXSIZEOF(buf
);
1962 char *inBuff
= (char *)wnul
;
1963 char *outBuff
= buf
;
1964 if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 )
1966 self
->m_minMBCharWidth
= (size_t)-1;
1970 self
->m_minMBCharWidth
= outBuff
- buf
;
1974 return m_minMBCharWidth
;
1977 #endif // HAVE_ICONV
1980 // ============================================================================
1981 // Win32 conversion classes
1982 // ============================================================================
1984 #ifdef wxHAVE_WIN32_MB2WC
1988 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1989 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1992 class wxMBConv_win32
: public wxMBConv
1997 m_CodePage
= CP_ACP
;
1998 m_minMBCharWidth
= 0;
2001 wxMBConv_win32(const wxMBConv_win32
& conv
)
2004 m_CodePage
= conv
.m_CodePage
;
2005 m_minMBCharWidth
= conv
.m_minMBCharWidth
;
2009 wxMBConv_win32(const wxChar
* name
)
2011 m_CodePage
= wxCharsetToCodepage(name
);
2012 m_minMBCharWidth
= 0;
2015 wxMBConv_win32(wxFontEncoding encoding
)
2017 m_CodePage
= wxEncodingToCodepage(encoding
);
2018 m_minMBCharWidth
= 0;
2020 #endif // wxUSE_FONTMAP
2022 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2024 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2025 // the behaviour is not compatible with the Unix version (using iconv)
2026 // and break the library itself, e.g. wxTextInputStream::NextChar()
2027 // wouldn't work if reading an incomplete MB char didn't result in an
2030 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2031 // Win XP or newer and it is not supported for UTF-[78] so we always
2032 // use our own conversions in this case. See
2033 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2034 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2035 if ( m_CodePage
== CP_UTF8
)
2037 return wxConvUTF8
.MB2WC(buf
, psz
, n
);
2040 if ( m_CodePage
== CP_UTF7
)
2042 return wxConvUTF7
.MB2WC(buf
, psz
, n
);
2046 if ( (m_CodePage
< 50000 && m_CodePage
!= CP_SYMBOL
) &&
2047 IsAtLeastWin2kSP4() )
2049 flags
= MB_ERR_INVALID_CHARS
;
2052 const size_t len
= ::MultiByteToWideChar
2054 m_CodePage
, // code page
2055 flags
, // flags: fall on error
2056 psz
, // input string
2057 -1, // its length (NUL-terminated)
2058 buf
, // output string
2059 buf
? n
: 0 // size of output buffer
2063 // function totally failed
2064 return wxCONV_FAILED
;
2067 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2068 // check if we succeeded, by doing a double trip:
2069 if ( !flags
&& buf
)
2071 const size_t mbLen
= strlen(psz
);
2072 wxCharBuffer
mbBuf(mbLen
);
2073 if ( ::WideCharToMultiByte
2080 mbLen
+ 1, // size in bytes, not length
2084 strcmp(mbBuf
, psz
) != 0 )
2086 // we didn't obtain the same thing we started from, hence
2087 // the conversion was lossy and we consider that it failed
2088 return wxCONV_FAILED
;
2092 // note that it returns count of written chars for buf != NULL and size
2093 // of the needed buffer for buf == NULL so in either case the length of
2094 // the string (which never includes the terminating NUL) is one less
2098 virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
2101 we have a problem here: by default, WideCharToMultiByte() may
2102 replace characters unrepresentable in the target code page with bad
2103 quality approximations such as turning "1/2" symbol (U+00BD) into
2104 "1" for the code pages which don't have it and we, obviously, want
2105 to avoid this at any price
2107 the trouble is that this function does it _silently_, i.e. it won't
2108 even tell us whether it did or not... Win98/2000 and higher provide
2109 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2110 we have to resort to a round trip, i.e. check that converting back
2111 results in the same string -- this is, of course, expensive but
2112 otherwise we simply can't be sure to not garble the data.
2115 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2116 // it doesn't work with CJK encodings (which we test for rather roughly
2117 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2119 BOOL usedDef
wxDUMMY_INITIALIZE(false);
2122 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
2124 // it's our lucky day
2125 flags
= WC_NO_BEST_FIT_CHARS
;
2126 pUsedDef
= &usedDef
;
2128 else // old system or unsupported encoding
2134 const size_t len
= ::WideCharToMultiByte
2136 m_CodePage
, // code page
2137 flags
, // either none or no best fit
2138 pwz
, // input string
2139 -1, // it is (wide) NUL-terminated
2140 buf
, // output buffer
2141 buf
? n
: 0, // and its size
2142 NULL
, // default "replacement" char
2143 pUsedDef
// [out] was it used?
2148 // function totally failed
2149 return wxCONV_FAILED
;
2152 // if we were really converting, check if we succeeded
2157 // check if the conversion failed, i.e. if any replacements
2160 return wxCONV_FAILED
;
2162 else // we must resort to double tripping...
2164 wxWCharBuffer
wcBuf(n
);
2165 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2166 wcscmp(wcBuf
, pwz
) != 0 )
2168 // we didn't obtain the same thing we started from, hence
2169 // the conversion was lossy and we consider that it failed
2170 return wxCONV_FAILED
;
2175 // see the comment above for the reason of "len - 1"
2179 virtual size_t GetMBNulLen() const
2181 if ( m_minMBCharWidth
== 0 )
2183 int len
= ::WideCharToMultiByte
2185 m_CodePage
, // code page
2187 L
"", // input string
2188 1, // translate just the NUL
2189 NULL
, // output buffer
2191 NULL
, // no replacement char
2192 NULL
// [out] don't care if it was used
2195 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
2199 wxLogDebug(_T("Unexpected NUL length %d"), len
);
2200 self
->m_minMBCharWidth
= (size_t)-1;
2204 self
->m_minMBCharWidth
= (size_t)-1;
2210 self
->m_minMBCharWidth
= len
;
2215 return m_minMBCharWidth
;
2218 virtual wxMBConv
*Clone() const { return new wxMBConv_win32(*this); }
2220 bool IsOk() const { return m_CodePage
!= -1; }
2223 static bool CanUseNoBestFit()
2225 static int s_isWin98Or2k
= -1;
2227 if ( s_isWin98Or2k
== -1 )
2230 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
2232 case wxOS_WINDOWS_9X
:
2233 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
2236 case wxOS_WINDOWS_NT
:
2237 s_isWin98Or2k
= verMaj
>= 5;
2241 // unknown: be conservative by default
2246 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
2249 return s_isWin98Or2k
== 1;
2252 static bool IsAtLeastWin2kSP4()
2257 static int s_isAtLeastWin2kSP4
= -1;
2259 if ( s_isAtLeastWin2kSP4
== -1 )
2261 OSVERSIONINFOEX ver
;
2263 memset(&ver
, 0, sizeof(ver
));
2264 ver
.dwOSVersionInfoSize
= sizeof(ver
);
2265 GetVersionEx((OSVERSIONINFO
*)&ver
);
2267 s_isAtLeastWin2kSP4
=
2268 ((ver
.dwMajorVersion
> 5) || // Vista+
2269 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
2270 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
2271 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
2275 return s_isAtLeastWin2kSP4
== 1;
2280 // the code page we're working with
2283 // cached result of GetMBNulLen(), set to 0 initially meaning
2285 size_t m_minMBCharWidth
;
2288 #endif // wxHAVE_WIN32_MB2WC
2290 // ============================================================================
2291 // Cocoa conversion classes
2292 // ============================================================================
2294 #if defined(__WXCOCOA__)
2296 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2297 // Strangely enough, internally Core Foundation uses
2298 // UTF-32 internally quite a bit - its just not public (yet).
2300 #include <CoreFoundation/CFString.h>
2301 #include <CoreFoundation/CFStringEncodingExt.h>
2303 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
2305 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
2309 case wxFONTENCODING_DEFAULT
:
2310 enc
= CFStringGetSystemEncoding();
2313 case wxFONTENCODING_ISO8859_1
:
2314 enc
= kCFStringEncodingISOLatin1
;
2316 case wxFONTENCODING_ISO8859_2
:
2317 enc
= kCFStringEncodingISOLatin2
;
2319 case wxFONTENCODING_ISO8859_3
:
2320 enc
= kCFStringEncodingISOLatin3
;
2322 case wxFONTENCODING_ISO8859_4
:
2323 enc
= kCFStringEncodingISOLatin4
;
2325 case wxFONTENCODING_ISO8859_5
:
2326 enc
= kCFStringEncodingISOLatinCyrillic
;
2328 case wxFONTENCODING_ISO8859_6
:
2329 enc
= kCFStringEncodingISOLatinArabic
;
2331 case wxFONTENCODING_ISO8859_7
:
2332 enc
= kCFStringEncodingISOLatinGreek
;
2334 case wxFONTENCODING_ISO8859_8
:
2335 enc
= kCFStringEncodingISOLatinHebrew
;
2337 case wxFONTENCODING_ISO8859_9
:
2338 enc
= kCFStringEncodingISOLatin5
;
2340 case wxFONTENCODING_ISO8859_10
:
2341 enc
= kCFStringEncodingISOLatin6
;
2343 case wxFONTENCODING_ISO8859_11
:
2344 enc
= kCFStringEncodingISOLatinThai
;
2346 case wxFONTENCODING_ISO8859_13
:
2347 enc
= kCFStringEncodingISOLatin7
;
2349 case wxFONTENCODING_ISO8859_14
:
2350 enc
= kCFStringEncodingISOLatin8
;
2352 case wxFONTENCODING_ISO8859_15
:
2353 enc
= kCFStringEncodingISOLatin9
;
2356 case wxFONTENCODING_KOI8
:
2357 enc
= kCFStringEncodingKOI8_R
;
2359 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2360 enc
= kCFStringEncodingDOSRussian
;
2363 // case wxFONTENCODING_BULGARIAN :
2367 case wxFONTENCODING_CP437
:
2368 enc
= kCFStringEncodingDOSLatinUS
;
2370 case wxFONTENCODING_CP850
:
2371 enc
= kCFStringEncodingDOSLatin1
;
2373 case wxFONTENCODING_CP852
:
2374 enc
= kCFStringEncodingDOSLatin2
;
2376 case wxFONTENCODING_CP855
:
2377 enc
= kCFStringEncodingDOSCyrillic
;
2379 case wxFONTENCODING_CP866
:
2380 enc
= kCFStringEncodingDOSRussian
;
2382 case wxFONTENCODING_CP874
:
2383 enc
= kCFStringEncodingDOSThai
;
2385 case wxFONTENCODING_CP932
:
2386 enc
= kCFStringEncodingDOSJapanese
;
2388 case wxFONTENCODING_CP936
:
2389 enc
= kCFStringEncodingDOSChineseSimplif
;
2391 case wxFONTENCODING_CP949
:
2392 enc
= kCFStringEncodingDOSKorean
;
2394 case wxFONTENCODING_CP950
:
2395 enc
= kCFStringEncodingDOSChineseTrad
;
2397 case wxFONTENCODING_CP1250
:
2398 enc
= kCFStringEncodingWindowsLatin2
;
2400 case wxFONTENCODING_CP1251
:
2401 enc
= kCFStringEncodingWindowsCyrillic
;
2403 case wxFONTENCODING_CP1252
:
2404 enc
= kCFStringEncodingWindowsLatin1
;
2406 case wxFONTENCODING_CP1253
:
2407 enc
= kCFStringEncodingWindowsGreek
;
2409 case wxFONTENCODING_CP1254
:
2410 enc
= kCFStringEncodingWindowsLatin5
;
2412 case wxFONTENCODING_CP1255
:
2413 enc
= kCFStringEncodingWindowsHebrew
;
2415 case wxFONTENCODING_CP1256
:
2416 enc
= kCFStringEncodingWindowsArabic
;
2418 case wxFONTENCODING_CP1257
:
2419 enc
= kCFStringEncodingWindowsBalticRim
;
2421 // This only really encodes to UTF7 (if that) evidently
2422 // case wxFONTENCODING_UTF7 :
2423 // enc = kCFStringEncodingNonLossyASCII ;
2425 case wxFONTENCODING_UTF8
:
2426 enc
= kCFStringEncodingUTF8
;
2428 case wxFONTENCODING_EUC_JP
:
2429 enc
= kCFStringEncodingEUC_JP
;
2431 case wxFONTENCODING_UTF16
:
2432 enc
= kCFStringEncodingUnicode
;
2434 case wxFONTENCODING_MACROMAN
:
2435 enc
= kCFStringEncodingMacRoman
;
2437 case wxFONTENCODING_MACJAPANESE
:
2438 enc
= kCFStringEncodingMacJapanese
;
2440 case wxFONTENCODING_MACCHINESETRAD
:
2441 enc
= kCFStringEncodingMacChineseTrad
;
2443 case wxFONTENCODING_MACKOREAN
:
2444 enc
= kCFStringEncodingMacKorean
;
2446 case wxFONTENCODING_MACARABIC
:
2447 enc
= kCFStringEncodingMacArabic
;
2449 case wxFONTENCODING_MACHEBREW
:
2450 enc
= kCFStringEncodingMacHebrew
;
2452 case wxFONTENCODING_MACGREEK
:
2453 enc
= kCFStringEncodingMacGreek
;
2455 case wxFONTENCODING_MACCYRILLIC
:
2456 enc
= kCFStringEncodingMacCyrillic
;
2458 case wxFONTENCODING_MACDEVANAGARI
:
2459 enc
= kCFStringEncodingMacDevanagari
;
2461 case wxFONTENCODING_MACGURMUKHI
:
2462 enc
= kCFStringEncodingMacGurmukhi
;
2464 case wxFONTENCODING_MACGUJARATI
:
2465 enc
= kCFStringEncodingMacGujarati
;
2467 case wxFONTENCODING_MACORIYA
:
2468 enc
= kCFStringEncodingMacOriya
;
2470 case wxFONTENCODING_MACBENGALI
:
2471 enc
= kCFStringEncodingMacBengali
;
2473 case wxFONTENCODING_MACTAMIL
:
2474 enc
= kCFStringEncodingMacTamil
;
2476 case wxFONTENCODING_MACTELUGU
:
2477 enc
= kCFStringEncodingMacTelugu
;
2479 case wxFONTENCODING_MACKANNADA
:
2480 enc
= kCFStringEncodingMacKannada
;
2482 case wxFONTENCODING_MACMALAJALAM
:
2483 enc
= kCFStringEncodingMacMalayalam
;
2485 case wxFONTENCODING_MACSINHALESE
:
2486 enc
= kCFStringEncodingMacSinhalese
;
2488 case wxFONTENCODING_MACBURMESE
:
2489 enc
= kCFStringEncodingMacBurmese
;
2491 case wxFONTENCODING_MACKHMER
:
2492 enc
= kCFStringEncodingMacKhmer
;
2494 case wxFONTENCODING_MACTHAI
:
2495 enc
= kCFStringEncodingMacThai
;
2497 case wxFONTENCODING_MACLAOTIAN
:
2498 enc
= kCFStringEncodingMacLaotian
;
2500 case wxFONTENCODING_MACGEORGIAN
:
2501 enc
= kCFStringEncodingMacGeorgian
;
2503 case wxFONTENCODING_MACARMENIAN
:
2504 enc
= kCFStringEncodingMacArmenian
;
2506 case wxFONTENCODING_MACCHINESESIMP
:
2507 enc
= kCFStringEncodingMacChineseSimp
;
2509 case wxFONTENCODING_MACTIBETAN
:
2510 enc
= kCFStringEncodingMacTibetan
;
2512 case wxFONTENCODING_MACMONGOLIAN
:
2513 enc
= kCFStringEncodingMacMongolian
;
2515 case wxFONTENCODING_MACETHIOPIC
:
2516 enc
= kCFStringEncodingMacEthiopic
;
2518 case wxFONTENCODING_MACCENTRALEUR
:
2519 enc
= kCFStringEncodingMacCentralEurRoman
;
2521 case wxFONTENCODING_MACVIATNAMESE
:
2522 enc
= kCFStringEncodingMacVietnamese
;
2524 case wxFONTENCODING_MACARABICEXT
:
2525 enc
= kCFStringEncodingMacExtArabic
;
2527 case wxFONTENCODING_MACSYMBOL
:
2528 enc
= kCFStringEncodingMacSymbol
;
2530 case wxFONTENCODING_MACDINGBATS
:
2531 enc
= kCFStringEncodingMacDingbats
;
2533 case wxFONTENCODING_MACTURKISH
:
2534 enc
= kCFStringEncodingMacTurkish
;
2536 case wxFONTENCODING_MACCROATIAN
:
2537 enc
= kCFStringEncodingMacCroatian
;
2539 case wxFONTENCODING_MACICELANDIC
:
2540 enc
= kCFStringEncodingMacIcelandic
;
2542 case wxFONTENCODING_MACROMANIAN
:
2543 enc
= kCFStringEncodingMacRomanian
;
2545 case wxFONTENCODING_MACCELTIC
:
2546 enc
= kCFStringEncodingMacCeltic
;
2548 case wxFONTENCODING_MACGAELIC
:
2549 enc
= kCFStringEncodingMacGaelic
;
2551 // case wxFONTENCODING_MACKEYBOARD :
2552 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2556 // because gcc is picky
2563 class wxMBConv_cocoa
: public wxMBConv
2568 Init(CFStringGetSystemEncoding()) ;
2571 wxMBConv_cocoa(const wxMBConv_cocoa
& conv
)
2573 m_encoding
= conv
.m_encoding
;
2577 wxMBConv_cocoa(const wxChar
* name
)
2579 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2583 wxMBConv_cocoa(wxFontEncoding encoding
)
2585 Init( wxCFStringEncFromFontEnc(encoding
) );
2592 void Init( CFStringEncoding encoding
)
2594 m_encoding
= encoding
;
2597 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2601 CFStringRef theString
= CFStringCreateWithBytes (
2602 NULL
, //the allocator
2603 (const UInt8
*)szUnConv
,
2606 false //no BOM/external representation
2609 wxASSERT(theString
);
2611 size_t nOutLength
= CFStringGetLength(theString
);
2615 CFRelease(theString
);
2619 CFRange theRange
= { 0, nOutSize
};
2621 #if SIZEOF_WCHAR_T == 4
2622 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2625 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2627 CFRelease(theString
);
2629 szUniCharBuffer
[nOutLength
] = '\0';
2631 #if SIZEOF_WCHAR_T == 4
2632 wxMBConvUTF16 converter
;
2633 converter
.MB2WC( szOut
, (const char*)szUniCharBuffer
, nOutSize
);
2634 delete [] szUniCharBuffer
;
2640 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2644 size_t nRealOutSize
;
2645 size_t nBufSize
= wxWcslen(szUnConv
);
2646 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2648 #if SIZEOF_WCHAR_T == 4
2649 wxMBConvUTF16 converter
;
2650 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2651 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1];
2652 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
));
2653 nBufSize
/= sizeof(UniChar
);
2656 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2660 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2663 wxASSERT(theString
);
2665 //Note that CER puts a BOM when converting to unicode
2666 //so we check and use getchars instead in that case
2667 if (m_encoding
== kCFStringEncodingUnicode
)
2670 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2672 nRealOutSize
= CFStringGetLength(theString
) + 1;
2678 CFRangeMake(0, CFStringGetLength(theString
)),
2680 0, //what to put in characters that can't be converted -
2681 //0 tells CFString to return NULL if it meets such a character
2682 false, //not an external representation
2685 (CFIndex
*) &nRealOutSize
2689 CFRelease(theString
);
2691 #if SIZEOF_WCHAR_T == 4
2692 delete[] szUniBuffer
;
2695 return nRealOutSize
- 1;
2698 virtual wxMBConv
*Clone() const { return new wxMBConv_cocoa(*this); }
2702 return m_encoding
!= kCFStringEncodingInvalidId
&&
2703 CFStringIsEncodingAvailable(m_encoding
);
2707 CFStringEncoding m_encoding
;
2710 #endif // defined(__WXCOCOA__)
2712 // ============================================================================
2713 // Mac conversion classes
2714 // ============================================================================
2716 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2718 class wxMBConv_mac
: public wxMBConv
2723 Init(CFStringGetSystemEncoding()) ;
2726 wxMBConv_mac(const wxMBConv_mac
& conv
)
2728 Init(conv
.m_char_encoding
);
2732 wxMBConv_mac(const wxChar
* name
)
2734 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) );
2738 wxMBConv_mac(wxFontEncoding encoding
)
2740 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2745 OSStatus status
= noErr
;
2746 if (m_MB2WC_converter
)
2747 status
= TECDisposeConverter(m_MB2WC_converter
);
2748 if (m_WC2MB_converter
)
2749 status
= TECDisposeConverter(m_WC2MB_converter
);
2752 void Init( TextEncodingBase encoding
,TextEncodingVariant encodingVariant
= kTextEncodingDefaultVariant
,
2753 TextEncodingFormat encodingFormat
= kTextEncodingDefaultFormat
)
2755 m_MB2WC_converter
= NULL
;
2756 m_WC2MB_converter
= NULL
;
2757 m_char_encoding
= CreateTextEncoding(encoding
, encodingVariant
, encodingFormat
) ;
2758 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 0, kUnicode16BitFormat
) ;
2761 virtual void CreateIfNeeded() const
2763 if ( m_MB2WC_converter
== NULL
&& m_WC2MB_converter
== NULL
)
2765 OSStatus status
= noErr
;
2766 status
= TECCreateConverter(&m_MB2WC_converter
,
2768 m_unicode_encoding
);
2769 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2770 status
= TECCreateConverter(&m_WC2MB_converter
,
2773 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2777 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2780 OSStatus status
= noErr
;
2781 ByteCount byteOutLen
;
2782 ByteCount byteInLen
= strlen(psz
) + 1;
2783 wchar_t *tbuf
= NULL
;
2784 UniChar
* ubuf
= NULL
;
2789 // Apple specs say at least 32
2790 n
= wxMax( 32, byteInLen
) ;
2791 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2794 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2796 #if SIZEOF_WCHAR_T == 4
2797 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2799 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2802 status
= TECConvertText(
2803 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2804 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2806 #if SIZEOF_WCHAR_T == 4
2807 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2808 // is not properly terminated we get random characters at the end
2809 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2810 wxMBConvUTF16 converter
;
2811 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
2814 res
= byteOutLen
/ sizeof( UniChar
) ;
2820 if ( buf
&& res
< n
)
2826 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2829 OSStatus status
= noErr
;
2830 ByteCount byteOutLen
;
2831 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2837 // Apple specs say at least 32
2838 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2839 tbuf
= (char*) malloc( n
) ;
2842 ByteCount byteBufferLen
= n
;
2843 UniChar
* ubuf
= NULL
;
2845 #if SIZEOF_WCHAR_T == 4
2846 wxMBConvUTF16 converter
;
2847 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2848 byteInLen
= unicharlen
;
2849 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2850 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2852 ubuf
= (UniChar
*) psz
;
2855 status
= TECConvertText(
2856 m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2857 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2859 #if SIZEOF_WCHAR_T == 4
2866 size_t res
= byteOutLen
;
2867 if ( buf
&& res
< n
)
2871 //we need to double-trip to verify it didn't insert any ? in place
2872 //of bogus characters
2873 wxWCharBuffer
wcBuf(n
);
2874 size_t pszlen
= wxWcslen(psz
);
2875 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2876 wxWcslen(wcBuf
) != pszlen
||
2877 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2879 // we didn't obtain the same thing we started from, hence
2880 // the conversion was lossy and we consider that it failed
2881 return wxCONV_FAILED
;
2888 virtual wxMBConv
*Clone() const { return new wxMBConv_mac(*this); }
2893 return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
;
2897 mutable TECObjectRef m_MB2WC_converter
;
2898 mutable TECObjectRef m_WC2MB_converter
;
2900 TextEncodingBase m_char_encoding
;
2901 TextEncodingBase m_unicode_encoding
;
2904 // MB is decomposed (D) normalized UTF8
2906 class wxMBConv_macUTF8D
: public wxMBConv_mac
2911 Init( kTextEncodingUnicodeDefault
, kUnicodeNoSubset
, kUnicodeUTF8Format
) ;
2916 ~wxMBConv_macUTF8D()
2919 DisposeUnicodeToTextInfo(&m_uni
);
2920 if (m_uniBack
!=NULL
)
2921 DisposeUnicodeToTextInfo(&m_uniBack
);
2924 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2927 OSStatus status
= noErr
;
2928 ByteCount byteOutLen
;
2929 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2935 // Apple specs say at least 32
2936 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2937 tbuf
= (char*) malloc( n
) ;
2940 ByteCount byteBufferLen
= n
;
2941 UniChar
* ubuf
= NULL
;
2943 #if SIZEOF_WCHAR_T == 4
2944 wxMBConvUTF16 converter
;
2945 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2946 byteInLen
= unicharlen
;
2947 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2948 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2950 ubuf
= (UniChar
*) psz
;
2953 // ubuf is a non-decomposed UniChar buffer
2955 ByteCount dcubuflen
= byteInLen
* 2 + 2 ;
2956 ByteCount dcubufread
, dcubufwritten
;
2957 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
2959 ConvertFromUnicodeToText( m_uni
, byteInLen
, ubuf
,
2960 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, dcubuf
) ;
2962 // we now convert that decomposed buffer into UTF8
2964 status
= TECConvertText(
2965 m_WC2MB_converter
, (ConstTextPtr
) dcubuf
, dcubufwritten
, &dcubufread
,
2966 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2970 #if SIZEOF_WCHAR_T == 4
2977 size_t res
= byteOutLen
;
2978 if ( buf
&& res
< n
)
2981 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2987 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2990 OSStatus status
= noErr
;
2991 ByteCount byteOutLen
;
2992 ByteCount byteInLen
= strlen(psz
) + 1;
2993 wchar_t *tbuf
= NULL
;
2994 UniChar
* ubuf
= NULL
;
2999 // Apple specs say at least 32
3000 n
= wxMax( 32, byteInLen
) ;
3001 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
3004 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
3006 #if SIZEOF_WCHAR_T == 4
3007 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
3009 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
3012 ByteCount dcubuflen
= byteBufferLen
* 2 + 2 ;
3013 ByteCount dcubufread
, dcubufwritten
;
3014 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
3016 status
= TECConvertText(
3017 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
3018 (TextPtr
) dcubuf
, dcubuflen
, &byteOutLen
);
3019 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3020 // is not properly terminated we get random characters at the end
3021 dcubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3023 // now from the decomposed UniChar to properly composed uniChar
3024 ConvertFromUnicodeToText( m_uniBack
, byteOutLen
, dcubuf
,
3025 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, ubuf
) ;
3028 byteOutLen
= dcubufwritten
;
3029 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3032 #if SIZEOF_WCHAR_T == 4
3033 wxMBConvUTF16 converter
;
3034 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
3037 res
= byteOutLen
/ sizeof( UniChar
) ;
3043 if ( buf
&& res
< n
)
3049 virtual void CreateIfNeeded() const
3051 wxMBConv_mac::CreateIfNeeded() ;
3052 if ( m_uni
== NULL
)
3054 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3055 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3056 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3057 kUnicodeCanonicalDecompVariant
, kTextEncodingDefaultFormat
);
3058 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3060 OSStatus err
= CreateUnicodeToTextInfo(&m_map
, &m_uni
);
3061 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3063 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3064 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3065 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3066 kUnicodeCanonicalCompVariant
, kTextEncodingDefaultFormat
);
3067 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3068 err
= CreateUnicodeToTextInfo(&m_map
, &m_uniBack
);
3069 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3073 mutable UnicodeToTextInfo m_uni
;
3074 mutable UnicodeToTextInfo m_uniBack
;
3075 mutable UnicodeMapping m_map
;
3077 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3079 // ============================================================================
3080 // wxEncodingConverter based conversion classes
3081 // ============================================================================
3085 class wxMBConv_wxwin
: public wxMBConv
3090 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
3091 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
3095 // temporarily just use wxEncodingConverter stuff,
3096 // so that it works while a better implementation is built
3097 wxMBConv_wxwin(const wxChar
* name
)
3100 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3102 m_enc
= wxFONTENCODING_SYSTEM
;
3107 wxMBConv_wxwin(wxFontEncoding enc
)
3114 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
3116 size_t inbuf
= strlen(psz
);
3119 if (!m2w
.Convert(psz
, buf
))
3120 return wxCONV_FAILED
;
3125 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
3127 const size_t inbuf
= wxWcslen(psz
);
3130 if (!w2m
.Convert(psz
, buf
))
3131 return wxCONV_FAILED
;
3137 virtual size_t GetMBNulLen() const
3141 case wxFONTENCODING_UTF16BE
:
3142 case wxFONTENCODING_UTF16LE
:
3145 case wxFONTENCODING_UTF32BE
:
3146 case wxFONTENCODING_UTF32LE
:
3154 virtual wxMBConv
*Clone() const { return new wxMBConv_wxwin(m_enc
); }
3156 bool IsOk() const { return m_ok
; }
3159 wxFontEncoding m_enc
;
3160 wxEncodingConverter m2w
, w2m
;
3163 // were we initialized successfully?
3166 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
3169 // make the constructors available for unit testing
3170 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
3172 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
3173 if ( !result
->IsOk() )
3182 #endif // wxUSE_FONTMAP
3184 // ============================================================================
3185 // wxCSConv implementation
3186 // ============================================================================
3188 void wxCSConv::Init()
3195 wxCSConv::wxCSConv(const wxChar
*charset
)
3205 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
3207 m_encoding
= wxFONTENCODING_SYSTEM
;
3211 wxCSConv::wxCSConv(wxFontEncoding encoding
)
3213 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
3215 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3217 encoding
= wxFONTENCODING_SYSTEM
;
3222 m_encoding
= encoding
;
3225 wxCSConv::~wxCSConv()
3230 wxCSConv::wxCSConv(const wxCSConv
& conv
)
3235 SetName(conv
.m_name
);
3236 m_encoding
= conv
.m_encoding
;
3239 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
3243 SetName(conv
.m_name
);
3244 m_encoding
= conv
.m_encoding
;
3249 void wxCSConv::Clear()
3258 void wxCSConv::SetName(const wxChar
*charset
)
3262 m_name
= wxStrdup(charset
);
3269 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
3270 wxEncodingNameCache
);
3272 static wxEncodingNameCache gs_nameCache
;
3275 wxMBConv
*wxCSConv::DoCreate() const
3278 wxLogTrace(TRACE_STRCONV
,
3279 wxT("creating conversion for %s"),
3281 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
3282 #endif // wxUSE_FONTMAP
3284 // check for the special case of ASCII or ISO8859-1 charset: as we have
3285 // special knowledge of it anyhow, we don't need to create a special
3286 // conversion object
3287 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
3288 m_encoding
== wxFONTENCODING_DEFAULT
)
3290 // don't convert at all
3294 // we trust OS to do conversion better than we can so try external
3295 // conversion methods first
3297 // the full order is:
3298 // 1. OS conversion (iconv() under Unix or Win32 API)
3299 // 2. hard coded conversions for UTF
3300 // 3. wxEncodingConverter as fall back
3306 #endif // !wxUSE_FONTMAP
3308 wxString
name(m_name
);
3310 wxFontEncoding
encoding(m_encoding
);
3313 if ( !name
.empty() )
3315 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
3323 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3324 #endif // wxUSE_FONTMAP
3328 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
3329 if ( it
!= gs_nameCache
.end() )
3331 if ( it
->second
.empty() )
3334 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
3341 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
3342 // CS : in case this does not return valid names (eg for MacRoman) encoding
3343 // got a 'failure' entry in the cache all the same, although it just has to
3344 // be created using a different method, so only store failed iconv creation
3345 // attempts (or perhaps we shoulnd't do this at all ?)
3346 if ( names
[0] != NULL
)
3348 for ( ; *names
; ++names
)
3350 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
3353 gs_nameCache
[encoding
] = *names
;
3360 gs_nameCache
[encoding
] = _T(""); // cache the failure
3363 #endif // wxUSE_FONTMAP
3365 #endif // HAVE_ICONV
3367 #ifdef wxHAVE_WIN32_MB2WC
3370 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
3371 : new wxMBConv_win32(m_encoding
);
3380 #endif // wxHAVE_WIN32_MB2WC
3382 #if defined(__WXMAC__)
3384 // leave UTF16 and UTF32 to the built-ins of wx
3385 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
3386 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
3389 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
3390 : new wxMBConv_mac(m_encoding
);
3392 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
3402 #if defined(__WXCOCOA__)
3404 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
3407 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
3408 : new wxMBConv_cocoa(m_encoding
);
3410 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
3421 wxFontEncoding enc
= m_encoding
;
3423 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
3425 // use "false" to suppress interactive dialogs -- we can be called from
3426 // anywhere and popping up a dialog from here is the last thing we want to
3428 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
3430 #endif // wxUSE_FONTMAP
3434 case wxFONTENCODING_UTF7
:
3435 return new wxMBConvUTF7
;
3437 case wxFONTENCODING_UTF8
:
3438 return new wxMBConvUTF8
;
3440 case wxFONTENCODING_UTF16BE
:
3441 return new wxMBConvUTF16BE
;
3443 case wxFONTENCODING_UTF16LE
:
3444 return new wxMBConvUTF16LE
;
3446 case wxFONTENCODING_UTF32BE
:
3447 return new wxMBConvUTF32BE
;
3449 case wxFONTENCODING_UTF32LE
:
3450 return new wxMBConvUTF32LE
;
3453 // nothing to do but put here to suppress gcc warnings
3460 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
3461 : new wxMBConv_wxwin(m_encoding
);
3467 #endif // wxUSE_FONTMAP
3469 // NB: This is a hack to prevent deadlock. What could otherwise happen
3470 // in Unicode build: wxConvLocal creation ends up being here
3471 // because of some failure and logs the error. But wxLog will try to
3472 // attach a timestamp, for which it will need wxConvLocal (to convert
3473 // time to char* and then wchar_t*), but that fails, tries to log the
3474 // error, but wxLog has an (already locked) critical section that
3475 // guards the static buffer.
3476 static bool alreadyLoggingError
= false;
3477 if (!alreadyLoggingError
)
3479 alreadyLoggingError
= true;
3480 wxLogError(_("Cannot convert from the charset '%s'!"),
3484 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
3485 #else // !wxUSE_FONTMAP
3486 wxString::Format(_("encoding %i"), m_encoding
).c_str()
3487 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3490 alreadyLoggingError
= false;
3496 void wxCSConv::CreateConvIfNeeded() const
3500 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
3503 // if we don't have neither the name nor the encoding, use the default
3504 // encoding for this system
3505 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
3507 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
3509 #endif // wxUSE_INTL
3511 self
->m_convReal
= DoCreate();
3512 self
->m_deferred
= false;
3516 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
3518 CreateConvIfNeeded();
3521 return m_convReal
->MB2WC(buf
, psz
, n
);
3524 size_t len
= strlen(psz
);
3528 for (size_t c
= 0; c
<= len
; c
++)
3529 buf
[c
] = (unsigned char)(psz
[c
]);
3535 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
3537 CreateConvIfNeeded();
3540 return m_convReal
->WC2MB(buf
, psz
, n
);
3543 const size_t len
= wxWcslen(psz
);
3546 for (size_t c
= 0; c
<= len
; c
++)
3549 return wxCONV_FAILED
;
3551 buf
[c
] = (char)psz
[c
];
3556 for (size_t c
= 0; c
<= len
; c
++)
3559 return wxCONV_FAILED
;
3566 size_t wxCSConv::GetMBNulLen() const
3568 CreateConvIfNeeded();
3572 return m_convReal
->GetMBNulLen();
3578 // ----------------------------------------------------------------------------
3580 // ----------------------------------------------------------------------------
3583 static wxMBConv_win32 wxConvLibcObj
;
3584 #elif defined(__WXMAC__) && !defined(__MACH__)
3585 static wxMBConv_mac wxConvLibcObj
;
3587 static wxMBConvLibc wxConvLibcObj
;
3590 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
3591 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
3592 static wxMBConvUTF7 wxConvUTF7Obj
;
3593 static wxMBConvUTF8 wxConvUTF8Obj
;
3594 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3595 static wxMBConv_macUTF8D wxConvMacUTF8DObj
;
3597 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
3598 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
3599 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
3600 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
3601 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
3602 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
3603 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvUI
= &wxConvLocal
;
3604 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
3606 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3615 #else // !wxUSE_WCHAR_T
3617 // stand-ins in absence of wchar_t
3618 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3623 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T