1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
26 #include "wx/hashmap.h"
29 #include "wx/strconv.h"
41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
44 #define wxHAVE_WIN32_MB2WC
53 #include "wx/thread.h"
56 #include "wx/encconv.h"
57 #include "wx/fontmap.h"
61 #include <ATSUnicode.h>
62 #include <TextCommon.h>
63 #include <TextEncodingConverter.h>
66 // includes Mac headers
67 #include "wx/mac/private.h"
71 #define TRACE_STRCONV _T("strconv")
73 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
75 #if SIZEOF_WCHAR_T == 2
80 // ============================================================================
82 // ============================================================================
84 // helper function of cMB2WC(): check if n bytes at this location are all NUL
85 static bool NotAllNULs(const char *p
, size_t n
)
87 while ( n
&& *p
++ == '\0' )
93 // ----------------------------------------------------------------------------
94 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
95 // ----------------------------------------------------------------------------
97 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
102 *output
= (wxUint16
) input
;
106 else if (input
>= 0x110000)
108 return wxCONV_FAILED
;
114 *output
++ = (wxUint16
) ((input
>> 10) + 0xd7c0);
115 *output
= (wxUint16
) ((input
& 0x3ff) + 0xdc00);
122 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
124 if ((*input
< 0xd800) || (*input
> 0xdfff))
129 else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff))
132 return wxCONV_FAILED
;
136 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
142 typedef wchar_t wxDecodeSurrogate_t
;
144 typedef wxUint16 wxDecodeSurrogate_t
;
145 #endif // WC_UTF16/!WC_UTF16
147 // returns the next UTF-32 character from the wchar_t buffer and advances the
148 // pointer to the character after this one
150 // if an invalid character is found, *pSrc is set to NULL, the caller must
152 static wxUint32
wxDecodeSurrogate(const wxDecodeSurrogate_t
**pSrc
)
156 n
= decode_utf16(wx_reinterpret_cast(const wxUint16
*, *pSrc
), out
);
157 if ( n
== wxCONV_FAILED
)
165 // ----------------------------------------------------------------------------
167 // ----------------------------------------------------------------------------
170 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
,
171 const char *src
, size_t srcLen
) const
173 // although new conversion classes are supposed to implement this function
174 // directly, the existins ones only implement the old MB2WC() and so, to
175 // avoid to have to rewrite all conversion classes at once, we provide a
176 // default (but not efficient) implementation of this one in terms of the
177 // old function by copying the input to ensure that it's NUL-terminated and
178 // then using MB2WC() to convert it
180 // the number of chars [which would be] written to dst [if it were not NULL]
181 size_t dstWritten
= 0;
183 // the number of NULs terminating this string
184 size_t nulLen
= 0; // not really needed, but just to avoid warnings
186 // if we were not given the input size we just have to assume that the
187 // string is properly terminated as we have no way of knowing how long it
188 // is anyhow, but if we do have the size check whether there are enough
192 if ( srcLen
!= wxNO_LEN
)
194 // we need to know how to find the end of this string
195 nulLen
= GetMBNulLen();
196 if ( nulLen
== wxCONV_FAILED
)
197 return wxCONV_FAILED
;
199 // if there are enough NULs we can avoid the copy
200 if ( srcLen
< nulLen
|| NotAllNULs(src
+ srcLen
- nulLen
, nulLen
) )
202 // make a copy in order to properly NUL-terminate the string
203 bufTmp
= wxCharBuffer(srcLen
+ nulLen
- 1 /* 1 will be added */);
204 char * const p
= bufTmp
.data();
205 memcpy(p
, src
, srcLen
);
206 for ( char *s
= p
+ srcLen
; s
< p
+ srcLen
+ nulLen
; s
++ )
212 srcEnd
= src
+ srcLen
;
214 else // quit after the first loop iteration
221 // try to convert the current chunk
222 size_t lenChunk
= MB2WC(NULL
, src
, 0);
223 if ( lenChunk
== wxCONV_FAILED
)
224 return wxCONV_FAILED
;
226 lenChunk
++; // for the L'\0' at the end of this chunk
228 dstWritten
+= lenChunk
;
232 // nothing left in the input string, conversion succeeded
238 if ( dstWritten
> dstLen
)
239 return wxCONV_FAILED
;
241 if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED
)
242 return wxCONV_FAILED
;
249 // we convert just one chunk in this case as this is the entire
254 // advance the input pointer past the end of this chunk
255 while ( NotAllNULs(src
, nulLen
) )
257 // notice that we must skip over multiple bytes here as we suppose
258 // that if NUL takes 2 or 4 bytes, then all the other characters do
259 // too and so if advanced by a single byte we might erroneously
260 // detect sequences of NUL bytes in the middle of the input
264 src
+= nulLen
; // skipping over its terminator as well
266 // note that ">=" (and not just "==") is needed here as the terminator
267 // we skipped just above could be inside or just after the buffer
268 // delimited by inEnd
277 wxMBConv::FromWChar(char *dst
, size_t dstLen
,
278 const wchar_t *src
, size_t srcLen
) const
280 // the number of chars [which would be] written to dst [if it were not NULL]
281 size_t dstWritten
= 0;
283 // make a copy of the input string unless it is already properly
286 // if we don't know its length we have no choice but to assume that it is,
287 // indeed, properly terminated
288 wxWCharBuffer bufTmp
;
289 if ( srcLen
== wxNO_LEN
)
291 srcLen
= wxWcslen(src
) + 1;
293 else if ( srcLen
!= 0 && src
[srcLen
- 1] != L
'\0' )
295 // make a copy in order to properly NUL-terminate the string
296 bufTmp
= wxWCharBuffer(srcLen
);
297 memcpy(bufTmp
.data(), src
, srcLen
* sizeof(wchar_t));
301 const size_t lenNul
= GetMBNulLen();
302 for ( const wchar_t * const srcEnd
= src
+ srcLen
;
304 src
+= wxWcslen(src
) + 1 /* skip L'\0' too */ )
306 // try to convert the current chunk
307 size_t lenChunk
= WC2MB(NULL
, src
, 0);
309 if ( lenChunk
== wxCONV_FAILED
)
310 return wxCONV_FAILED
;
313 dstWritten
+= lenChunk
;
317 if ( dstWritten
> dstLen
)
318 return wxCONV_FAILED
;
320 if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED
)
321 return wxCONV_FAILED
;
330 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const
332 size_t rc
= ToWChar(outBuff
, outLen
, inBuff
);
333 if ( rc
!= wxCONV_FAILED
)
335 // ToWChar() returns the buffer length, i.e. including the trailing
336 // NUL, while this method doesn't take it into account
343 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const
345 size_t rc
= FromWChar(outBuff
, outLen
, inBuff
);
346 if ( rc
!= wxCONV_FAILED
)
354 wxMBConv::~wxMBConv()
356 // nothing to do here (necessary for Darwin linking probably)
359 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
363 // calculate the length of the buffer needed first
364 const size_t nLen
= MB2WC(NULL
, psz
, 0);
365 if ( nLen
!= wxCONV_FAILED
)
367 // now do the actual conversion
368 wxWCharBuffer
buf(nLen
/* +1 added implicitly */);
370 // +1 for the trailing NULL
371 if ( MB2WC(buf
.data(), psz
, nLen
+ 1) != wxCONV_FAILED
)
376 return wxWCharBuffer();
379 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
383 const size_t nLen
= WC2MB(NULL
, pwz
, 0);
384 if ( nLen
!= wxCONV_FAILED
)
386 // extra space for trailing NUL(s)
387 static const size_t extraLen
= GetMaxMBNulLen();
389 wxCharBuffer
buf(nLen
+ extraLen
- 1);
390 if ( WC2MB(buf
.data(), pwz
, nLen
+ extraLen
) != wxCONV_FAILED
)
395 return wxCharBuffer();
399 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const
401 const size_t dstLen
= ToWChar(NULL
, 0, inBuff
, inLen
);
402 if ( dstLen
!= wxCONV_FAILED
)
404 wxWCharBuffer
wbuf(dstLen
- 1);
405 if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
410 if ( wbuf
[dstLen
- 1] == L
'\0' )
421 return wxWCharBuffer();
425 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const
427 size_t dstLen
= FromWChar(NULL
, 0, inBuff
, inLen
);
428 if ( dstLen
!= wxCONV_FAILED
)
430 // special case of empty input: can't allocate 0 size buffer below as
431 // wxCharBuffer insists on NUL-terminating it
432 wxCharBuffer
buf(dstLen
? dstLen
- 1 : 1);
433 if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
439 const size_t nulLen
= GetMBNulLen();
440 if ( dstLen
>= nulLen
&&
441 !NotAllNULs(buf
.data() + dstLen
- nulLen
, nulLen
) )
443 // in this case the output is NUL-terminated and we're not
444 // supposed to count NUL
456 return wxCharBuffer();
459 // ----------------------------------------------------------------------------
461 // ----------------------------------------------------------------------------
463 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
465 return wxMB2WC(buf
, psz
, n
);
468 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
470 return wxWC2MB(buf
, psz
, n
);
473 // ----------------------------------------------------------------------------
474 // wxConvBrokenFileNames
475 // ----------------------------------------------------------------------------
479 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString
& charset
)
481 if ( wxStricmp(charset
, _T("UTF-8")) == 0 ||
482 wxStricmp(charset
, _T("UTF8")) == 0 )
483 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
);
485 m_conv
= new wxCSConv(charset
);
490 // ----------------------------------------------------------------------------
492 // ----------------------------------------------------------------------------
494 // Implementation (C) 2004 Fredrik Roubert
497 // BASE64 decoding table
499 static const unsigned char utf7unb64
[] =
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
507 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
508 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
510 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
511 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
512 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
514 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
515 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
516 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
535 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
539 while ( *psz
&& (!buf
|| (len
< n
)) )
541 unsigned char cc
= *psz
++;
549 else if (*psz
== '-')
557 else // start of BASE64 encoded string
561 for ( ok
= lsb
= false, d
= 0, l
= 0;
562 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
567 for (l
+= 6; l
>= 8; lsb
= !lsb
)
569 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
579 *buf
= (wchar_t)(c
<< 8);
588 // in valid UTF7 we should have valid characters after '+'
589 return wxCONV_FAILED
;
597 if ( buf
&& (len
< n
) )
604 // BASE64 encoding table
606 static const unsigned char utf7enb64
[] =
608 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
609 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
610 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
611 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
612 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
613 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
614 'w', 'x', 'y', 'z', '0', '1', '2', '3',
615 '4', '5', '6', '7', '8', '9', '+', '/'
619 // UTF-7 encoding table
621 // 0 - Set D (directly encoded characters)
622 // 1 - Set O (optional direct characters)
623 // 2 - whitespace characters (optional)
624 // 3 - special characters
626 static const unsigned char utf7encode
[128] =
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
629 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
630 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
632 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
634 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
638 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
642 while (*psz
&& ((!buf
) || (len
< n
)))
645 if (cc
< 0x80 && utf7encode
[cc
] < 1)
654 else if (((wxUint32
)cc
) > 0xffff)
656 // no surrogate pair generation (yet?)
657 return wxCONV_FAILED
;
668 // BASE64 encode string
669 unsigned int lsb
, d
, l
;
670 for (d
= 0, l
= 0; /*nothing*/; psz
++)
672 for (lsb
= 0; lsb
< 2; lsb
++)
675 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
677 for (l
+= 8; l
>= 6; )
681 *buf
++ = utf7enb64
[(d
>> l
) % 64];
687 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
694 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
706 if (buf
&& (len
< n
))
712 // ----------------------------------------------------------------------------
714 // ----------------------------------------------------------------------------
716 static wxUint32 utf8_max
[]=
717 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
719 // boundaries of the private use area we use to (temporarily) remap invalid
720 // characters invalid in a UTF-8 encoded string
721 const wxUint32 wxUnicodePUA
= 0x100000;
722 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
724 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
728 while (*psz
&& ((!buf
) || (len
< n
)))
730 const char *opsz
= psz
;
731 bool invalid
= false;
732 unsigned char cc
= *psz
++, fc
= cc
;
734 for (cnt
= 0; fc
& 0x80; cnt
++)
744 // escape the escape character for octal escapes
745 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
746 && cc
== '\\' && (!buf
|| len
< n
))
758 // invalid UTF-8 sequence
763 unsigned ocnt
= cnt
- 1;
764 wxUint32 res
= cc
& (0x3f >> cnt
);
768 if ((cc
& 0xC0) != 0x80)
770 // invalid UTF-8 sequence
776 res
= (res
<< 6) | (cc
& 0x3f);
779 if (invalid
|| res
<= utf8_max
[ocnt
])
781 // illegal UTF-8 encoding
784 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
785 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
787 // if one of our PUA characters turns up externally
788 // it must also be treated as an illegal sequence
789 // (a bit like you have to escape an escape character)
795 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
796 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
797 if (pa
== wxCONV_FAILED
)
809 *buf
++ = (wchar_t)res
;
811 #endif // WC_UTF16/!WC_UTF16
817 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
819 while (opsz
< psz
&& (!buf
|| len
< n
))
822 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
823 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
824 wxASSERT(pa
!= wxCONV_FAILED
);
831 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
837 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
839 while (opsz
< psz
&& (!buf
|| len
< n
))
841 if ( buf
&& len
+ 3 < n
)
843 unsigned char on
= *opsz
;
845 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
846 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
847 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
854 else // MAP_INVALID_UTF8_NOT
856 return wxCONV_FAILED
;
862 if (buf
&& (len
< n
))
868 static inline bool isoctal(wchar_t wch
)
870 return L
'0' <= wch
&& wch
<= L
'7';
873 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
877 while (*psz
&& ((!buf
) || (len
< n
)))
882 // cast is ok for WC_UTF16
883 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
884 psz
+= (pa
== wxCONV_FAILED
) ? 1 : pa
;
886 cc
= (*psz
++) & 0x7fffffff;
889 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
890 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
893 *buf
++ = (char)(cc
- wxUnicodePUA
);
896 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
897 && cc
== L
'\\' && psz
[0] == L
'\\' )
904 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
906 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
910 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 +
911 (psz
[1] - L
'0') * 010 +
921 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++)
937 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
939 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
945 if (buf
&& (len
< n
))
951 // ============================================================================
953 // ============================================================================
955 #ifdef WORDS_BIGENDIAN
956 #define wxMBConvUTF16straight wxMBConvUTF16BE
957 #define wxMBConvUTF16swap wxMBConvUTF16LE
959 #define wxMBConvUTF16swap wxMBConvUTF16BE
960 #define wxMBConvUTF16straight wxMBConvUTF16LE
964 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
)
966 if ( srcLen
== wxNO_LEN
)
968 // count the number of bytes in input, including the trailing NULs
969 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
970 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
973 srcLen
*= BYTES_PER_CHAR
;
975 else // we already have the length
977 // we can only convert an entire number of UTF-16 characters
978 if ( srcLen
% BYTES_PER_CHAR
)
979 return wxCONV_FAILED
;
985 // case when in-memory representation is UTF-16 too
988 // ----------------------------------------------------------------------------
989 // conversions without endianness change
990 // ----------------------------------------------------------------------------
993 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
994 const char *src
, size_t srcLen
) const
996 // set up the scene for using memcpy() (which is presumably more efficient
997 // than copying the bytes one by one)
998 srcLen
= GetLength(src
, srcLen
);
999 if ( srcLen
== wxNO_LEN
)
1000 return wxCONV_FAILED
;
1002 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1005 if ( dstLen
< inLen
)
1006 return wxCONV_FAILED
;
1008 memcpy(dst
, src
, srcLen
);
1015 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1016 const wchar_t *src
, size_t srcLen
) const
1018 if ( srcLen
== wxNO_LEN
)
1019 srcLen
= wxWcslen(src
) + 1;
1021 srcLen
*= BYTES_PER_CHAR
;
1025 if ( dstLen
< srcLen
)
1026 return wxCONV_FAILED
;
1028 memcpy(dst
, src
, srcLen
);
1034 // ----------------------------------------------------------------------------
1035 // endian-reversing conversions
1036 // ----------------------------------------------------------------------------
1039 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1040 const char *src
, size_t srcLen
) const
1042 srcLen
= GetLength(src
, srcLen
);
1043 if ( srcLen
== wxNO_LEN
)
1044 return wxCONV_FAILED
;
1046 srcLen
/= BYTES_PER_CHAR
;
1050 if ( dstLen
< srcLen
)
1051 return wxCONV_FAILED
;
1053 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1054 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1056 *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
);
1064 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1065 const wchar_t *src
, size_t srcLen
) const
1067 if ( srcLen
== wxNO_LEN
)
1068 srcLen
= wxWcslen(src
) + 1;
1070 srcLen
*= BYTES_PER_CHAR
;
1074 if ( dstLen
< srcLen
)
1075 return wxCONV_FAILED
;
1077 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1078 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1080 *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
);
1087 #else // !WC_UTF16: wchar_t is UTF-32
1089 // ----------------------------------------------------------------------------
1090 // conversions without endianness change
1091 // ----------------------------------------------------------------------------
1094 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1095 const char *src
, size_t srcLen
) const
1097 srcLen
= GetLength(src
, srcLen
);
1098 if ( srcLen
== wxNO_LEN
)
1099 return wxCONV_FAILED
;
1101 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1104 // optimization: return maximal space which could be needed for this
1105 // string even if the real size could be smaller if the buffer contains
1111 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1112 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1114 const wxUint32 ch
= wxDecodeSurrogate(&inBuff
);
1116 return wxCONV_FAILED
;
1118 if ( ++outLen
> dstLen
)
1119 return wxCONV_FAILED
;
1129 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1130 const wchar_t *src
, size_t srcLen
) const
1132 if ( srcLen
== wxNO_LEN
)
1133 srcLen
= wxWcslen(src
) + 1;
1136 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1137 for ( size_t n
= 0; n
< srcLen
; n
++ )
1140 const size_t numChars
= encode_utf16(*src
++, cc
);
1141 if ( numChars
== wxCONV_FAILED
)
1142 return wxCONV_FAILED
;
1144 outLen
+= numChars
* BYTES_PER_CHAR
;
1147 if ( outLen
> dstLen
)
1148 return wxCONV_FAILED
;
1151 if ( numChars
== 2 )
1153 // second character of a surrogate
1162 // ----------------------------------------------------------------------------
1163 // endian-reversing conversions
1164 // ----------------------------------------------------------------------------
1167 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1168 const char *src
, size_t srcLen
) const
1170 srcLen
= GetLength(src
, srcLen
);
1171 if ( srcLen
== wxNO_LEN
)
1172 return wxCONV_FAILED
;
1174 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1177 // optimization: return maximal space which could be needed for this
1178 // string even if the real size could be smaller if the buffer contains
1184 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1185 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1190 tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1192 tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1194 const size_t numChars
= decode_utf16(tmp
, ch
);
1195 if ( numChars
== wxCONV_FAILED
)
1196 return wxCONV_FAILED
;
1198 if ( numChars
== 2 )
1201 if ( ++outLen
> dstLen
)
1202 return wxCONV_FAILED
;
1212 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1213 const wchar_t *src
, size_t srcLen
) const
1215 if ( srcLen
== wxNO_LEN
)
1216 srcLen
= wxWcslen(src
) + 1;
1219 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1220 for ( const wchar_t *srcEnd
= src
+ srcLen
; src
< srcEnd
; src
++ )
1223 const size_t numChars
= encode_utf16(*src
, cc
);
1224 if ( numChars
== wxCONV_FAILED
)
1225 return wxCONV_FAILED
;
1227 outLen
+= numChars
* BYTES_PER_CHAR
;
1230 if ( outLen
> dstLen
)
1231 return wxCONV_FAILED
;
1233 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]);
1234 if ( numChars
== 2 )
1236 // second character of a surrogate
1237 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]);
1245 #endif // WC_UTF16/!WC_UTF16
1248 // ============================================================================
1250 // ============================================================================
1252 #ifdef WORDS_BIGENDIAN
1253 #define wxMBConvUTF32straight wxMBConvUTF32BE
1254 #define wxMBConvUTF32swap wxMBConvUTF32LE
1256 #define wxMBConvUTF32swap wxMBConvUTF32BE
1257 #define wxMBConvUTF32straight wxMBConvUTF32LE
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1262 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1265 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
)
1267 if ( srcLen
== wxNO_LEN
)
1269 // count the number of bytes in input, including the trailing NULs
1270 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1271 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
1274 srcLen
*= BYTES_PER_CHAR
;
1276 else // we already have the length
1278 // we can only convert an entire number of UTF-32 characters
1279 if ( srcLen
% BYTES_PER_CHAR
)
1280 return wxCONV_FAILED
;
1286 // case when in-memory representation is UTF-16
1289 // ----------------------------------------------------------------------------
1290 // conversions without endianness change
1291 // ----------------------------------------------------------------------------
1294 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1295 const char *src
, size_t srcLen
) const
1297 srcLen
= GetLength(src
, srcLen
);
1298 if ( srcLen
== wxNO_LEN
)
1299 return wxCONV_FAILED
;
1301 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1302 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1304 for ( size_t n
= 0; n
< inLen
; n
++ )
1307 const size_t numChars
= encode_utf16(*inBuff
++, cc
);
1308 if ( numChars
== wxCONV_FAILED
)
1309 return wxCONV_FAILED
;
1314 if ( outLen
> dstLen
)
1315 return wxCONV_FAILED
;
1318 if ( numChars
== 2 )
1320 // second character of a surrogate
1330 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1331 const wchar_t *src
, size_t srcLen
) const
1333 if ( srcLen
== wxNO_LEN
)
1334 srcLen
= wxWcslen(src
) + 1;
1338 // optimization: return maximal space which could be needed for this
1339 // string instead of the exact amount which could be less if there are
1340 // any surrogates in the input
1342 // we consider that surrogates are rare enough to make it worthwhile to
1343 // avoid running the loop below at the cost of slightly extra memory
1345 return srcLen
* BYTES_PER_CHAR
;
1348 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1350 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1352 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1354 return wxCONV_FAILED
;
1356 outLen
+= BYTES_PER_CHAR
;
1358 if ( outLen
> dstLen
)
1359 return wxCONV_FAILED
;
1367 // ----------------------------------------------------------------------------
1368 // endian-reversing conversions
1369 // ----------------------------------------------------------------------------
1372 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1373 const char *src
, size_t srcLen
) const
1375 srcLen
= GetLength(src
, srcLen
);
1376 if ( srcLen
== wxNO_LEN
)
1377 return wxCONV_FAILED
;
1379 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1380 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1382 for ( size_t n
= 0; n
< inLen
; n
++, inBuff
++ )
1385 const size_t numChars
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
);
1386 if ( numChars
== wxCONV_FAILED
)
1387 return wxCONV_FAILED
;
1392 if ( outLen
> dstLen
)
1393 return wxCONV_FAILED
;
1396 if ( numChars
== 2 )
1398 // second character of a surrogate
1408 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1409 const wchar_t *src
, size_t srcLen
) const
1411 if ( srcLen
== wxNO_LEN
)
1412 srcLen
= wxWcslen(src
) + 1;
1416 // optimization: return maximal space which could be needed for this
1417 // string instead of the exact amount which could be less if there are
1418 // any surrogates in the input
1420 // we consider that surrogates are rare enough to make it worthwhile to
1421 // avoid running the loop below at the cost of slightly extra memory
1423 return srcLen
*BYTES_PER_CHAR
;
1426 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1428 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1430 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1432 return wxCONV_FAILED
;
1434 outLen
+= BYTES_PER_CHAR
;
1436 if ( outLen
> dstLen
)
1437 return wxCONV_FAILED
;
1439 *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
);
1445 #else // !WC_UTF16: wchar_t is UTF-32
1447 // ----------------------------------------------------------------------------
1448 // conversions without endianness change
1449 // ----------------------------------------------------------------------------
1452 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1453 const char *src
, size_t srcLen
) const
1455 // use memcpy() as it should be much faster than hand-written loop
1456 srcLen
= GetLength(src
, srcLen
);
1457 if ( srcLen
== wxNO_LEN
)
1458 return wxCONV_FAILED
;
1460 const size_t inLen
= srcLen
/BYTES_PER_CHAR
;
1463 if ( dstLen
< inLen
)
1464 return wxCONV_FAILED
;
1466 memcpy(dst
, src
, srcLen
);
1473 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1474 const wchar_t *src
, size_t srcLen
) const
1476 if ( srcLen
== wxNO_LEN
)
1477 srcLen
= wxWcslen(src
) + 1;
1479 srcLen
*= BYTES_PER_CHAR
;
1483 if ( dstLen
< srcLen
)
1484 return wxCONV_FAILED
;
1486 memcpy(dst
, src
, srcLen
);
1492 // ----------------------------------------------------------------------------
1493 // endian-reversing conversions
1494 // ----------------------------------------------------------------------------
1497 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1498 const char *src
, size_t srcLen
) const
1500 srcLen
= GetLength(src
, srcLen
);
1501 if ( srcLen
== wxNO_LEN
)
1502 return wxCONV_FAILED
;
1504 srcLen
/= BYTES_PER_CHAR
;
1508 if ( dstLen
< srcLen
)
1509 return wxCONV_FAILED
;
1511 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1512 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1514 *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
);
1522 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1523 const wchar_t *src
, size_t srcLen
) const
1525 if ( srcLen
== wxNO_LEN
)
1526 srcLen
= wxWcslen(src
) + 1;
1528 srcLen
*= BYTES_PER_CHAR
;
1532 if ( dstLen
< srcLen
)
1533 return wxCONV_FAILED
;
1535 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1536 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1538 *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
);
1545 #endif // WC_UTF16/!WC_UTF16
1548 // ============================================================================
1549 // The classes doing conversion using the iconv_xxx() functions
1550 // ============================================================================
1554 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1555 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1556 // (unless there's yet another bug in glibc) the only case when iconv()
1557 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1558 // left in the input buffer -- when _real_ error occurs,
1559 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1561 // [This bug does not appear in glibc 2.2.]
1562 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1563 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1564 (errno != E2BIG || bufLeft != 0))
1566 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1569 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1571 #define ICONV_T_INVALID ((iconv_t)-1)
1573 #if SIZEOF_WCHAR_T == 4
1574 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1575 #define WC_ENC wxFONTENCODING_UTF32
1576 #elif SIZEOF_WCHAR_T == 2
1577 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1578 #define WC_ENC wxFONTENCODING_UTF16
1579 #else // sizeof(wchar_t) != 2 nor 4
1580 // does this ever happen?
1581 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1584 // ----------------------------------------------------------------------------
1585 // wxMBConv_iconv: encapsulates an iconv character set
1586 // ----------------------------------------------------------------------------
1588 class wxMBConv_iconv
: public wxMBConv
1591 wxMBConv_iconv(const char *name
);
1592 virtual ~wxMBConv_iconv();
1594 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1595 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1597 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1598 virtual size_t GetMBNulLen() const;
1600 #if wxUSE_UNICODE_UTF8
1601 virtual bool IsUTF8() const;
1604 virtual wxMBConv
*Clone() const
1606 wxMBConv_iconv
*p
= new wxMBConv_iconv(m_name
.ToAscii());
1607 p
->m_minMBCharWidth
= m_minMBCharWidth
;
1612 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1615 // the iconv handlers used to translate from multibyte
1616 // to wide char and in the other direction
1621 // guards access to m2w and w2m objects
1622 wxMutex m_iconvMutex
;
1626 // the name (for iconv_open()) of a wide char charset -- if none is
1627 // available on this machine, it will remain NULL
1628 static wxString ms_wcCharsetName
;
1630 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1631 // different endian-ness than the native one
1632 static bool ms_wcNeedsSwap
;
1635 // name of the encoding handled by this conversion
1638 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1640 size_t m_minMBCharWidth
;
1643 // make the constructor available for unit testing
1644 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const char* name
)
1646 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1647 if ( !result
->IsOk() )
1656 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1657 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1659 wxMBConv_iconv::wxMBConv_iconv(const char *name
)
1662 m_minMBCharWidth
= 0;
1664 // check for charset that represents wchar_t:
1665 if ( ms_wcCharsetName
.empty() )
1667 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1670 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1671 #else // !wxUSE_FONTMAP
1672 static const wxChar
*names_static
[] =
1674 #if SIZEOF_WCHAR_T == 4
1676 #elif SIZEOF_WCHAR_T = 2
1681 const wxChar
**names
= names_static
;
1682 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1684 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1686 const wxString
nameCS(*names
);
1688 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1689 wxString
nameXE(nameCS
);
1691 #ifdef WORDS_BIGENDIAN
1693 #else // little endian
1697 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1700 m2w
= iconv_open(nameXE
.ToAscii(), name
);
1701 if ( m2w
== ICONV_T_INVALID
)
1703 // try charset w/o bytesex info (e.g. "UCS4")
1704 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1706 m2w
= iconv_open(nameCS
.ToAscii(), name
);
1708 // and check for bytesex ourselves:
1709 if ( m2w
!= ICONV_T_INVALID
)
1711 char buf
[2], *bufPtr
;
1712 wchar_t wbuf
[2], *wbufPtr
;
1720 outsz
= SIZEOF_WCHAR_T
* 2;
1725 m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1726 (char**)&wbufPtr
, &outsz
);
1728 if (ICONV_FAILED(res
, insz
))
1730 wxLogLastError(wxT("iconv"));
1731 wxLogError(_("Conversion to charset '%s' doesn't work."),
1734 else // ok, can convert to this encoding, remember it
1736 ms_wcCharsetName
= nameCS
;
1737 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1741 else // use charset not requiring byte swapping
1743 ms_wcCharsetName
= nameXE
;
1747 wxLogTrace(TRACE_STRCONV
,
1748 wxT("iconv wchar_t charset is \"%s\"%s"),
1749 ms_wcCharsetName
.empty() ? wxString("<none>")
1751 ms_wcNeedsSwap
? _T(" (needs swap)")
1754 else // we already have ms_wcCharsetName
1756 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), name
);
1759 if ( ms_wcCharsetName
.empty() )
1761 w2m
= ICONV_T_INVALID
;
1765 w2m
= iconv_open(name
, ms_wcCharsetName
.ToAscii());
1766 if ( w2m
== ICONV_T_INVALID
)
1768 wxLogTrace(TRACE_STRCONV
,
1769 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1770 ms_wcCharsetName
.c_str(), name
);
1775 wxMBConv_iconv::~wxMBConv_iconv()
1777 if ( m2w
!= ICONV_T_INVALID
)
1779 if ( w2m
!= ICONV_T_INVALID
)
1783 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1785 // find the string length: notice that must be done differently for
1786 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1788 const size_t nulLen
= GetMBNulLen();
1792 return wxCONV_FAILED
;
1795 inbuf
= strlen(psz
); // arguably more optimized than our version
1800 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1801 // they also have to start at character boundary and not span two
1802 // adjacent characters
1804 for ( p
= psz
; NotAllNULs(p
, nulLen
); p
+= nulLen
)
1811 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1812 // Unfortunately there are a couple of global wxCSConv objects such as
1813 // wxConvLocal that are used all over wx code, so we have to make sure
1814 // the handle is used by at most one thread at the time. Otherwise
1815 // only a few wx classes would be safe to use from non-main threads
1816 // as MB<->WC conversion would fail "randomly".
1817 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1818 #endif // wxUSE_THREADS
1820 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1822 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1823 wchar_t *bufPtr
= buf
;
1824 const char *pszPtr
= psz
;
1828 // have destination buffer, convert there
1830 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1831 (char**)&bufPtr
, &outbuf
);
1832 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1836 // convert to native endianness
1837 for ( unsigned i
= 0; i
< res
; i
++ )
1838 buf
[n
] = WC_BSWAP(buf
[i
]);
1841 // NUL-terminate the string if there is any space left
1847 // no destination buffer... convert using temp buffer
1848 // to calculate destination buffer requirement
1855 outbuf
= 8 * SIZEOF_WCHAR_T
;
1858 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1859 (char**)&bufPtr
, &outbuf
);
1861 res
+= 8 - (outbuf
/ SIZEOF_WCHAR_T
);
1863 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1866 if (ICONV_FAILED(cres
, inbuf
))
1868 //VS: it is ok if iconv fails, hence trace only
1869 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1870 return wxCONV_FAILED
;
1876 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1879 // NB: explained in MB2WC
1880 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1883 size_t inlen
= wxWcslen(psz
);
1884 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1888 wchar_t *tmpbuf
= 0;
1892 // need to copy to temp buffer to switch endianness
1893 // (doing WC_BSWAP twice on the original buffer won't help, as it
1894 // could be in read-only memory, or be accessed in some other thread)
1895 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1896 for ( size_t i
= 0; i
< inlen
; i
++ )
1897 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1899 tmpbuf
[inlen
] = L
'\0';
1905 // have destination buffer, convert there
1906 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1910 // NB: iconv was given only wcslen(psz) characters on input, and so
1911 // it couldn't convert the trailing zero. Let's do it ourselves
1912 // if there's some room left for it in the output buffer.
1918 // no destination buffer: convert using temp buffer
1919 // to calculate destination buffer requirement
1927 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1931 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1939 if (ICONV_FAILED(cres
, inbuf
))
1941 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1942 return wxCONV_FAILED
;
1948 size_t wxMBConv_iconv::GetMBNulLen() const
1950 if ( m_minMBCharWidth
== 0 )
1952 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1955 // NB: explained in MB2WC
1956 wxMutexLocker
lock(self
->m_iconvMutex
);
1959 const wchar_t *wnul
= L
"";
1960 char buf
[8]; // should be enough for NUL in any encoding
1961 size_t inLen
= sizeof(wchar_t),
1962 outLen
= WXSIZEOF(buf
);
1963 char *inBuff
= (char *)wnul
;
1964 char *outBuff
= buf
;
1965 if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 )
1967 self
->m_minMBCharWidth
= (size_t)-1;
1971 self
->m_minMBCharWidth
= outBuff
- buf
;
1975 return m_minMBCharWidth
;
1978 #if wxUSE_UNICODE_UTF8
1979 bool wxMBConv_iconv::IsUTF8() const
1981 return wxStricmp(m_name
, "UTF-8") == 0 ||
1982 wxStricmp(m_name
, "UTF8") == 0;
1986 #endif // HAVE_ICONV
1989 // ============================================================================
1990 // Win32 conversion classes
1991 // ============================================================================
1993 #ifdef wxHAVE_WIN32_MB2WC
1997 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const char *charset
);
1998 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
2001 class wxMBConv_win32
: public wxMBConv
2006 m_CodePage
= CP_ACP
;
2007 m_minMBCharWidth
= 0;
2010 wxMBConv_win32(const wxMBConv_win32
& conv
)
2013 m_CodePage
= conv
.m_CodePage
;
2014 m_minMBCharWidth
= conv
.m_minMBCharWidth
;
2018 wxMBConv_win32(const char* name
)
2020 m_CodePage
= wxCharsetToCodepage(name
);
2021 m_minMBCharWidth
= 0;
2024 wxMBConv_win32(wxFontEncoding encoding
)
2026 m_CodePage
= wxEncodingToCodepage(encoding
);
2027 m_minMBCharWidth
= 0;
2029 #endif // wxUSE_FONTMAP
2031 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2033 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2034 // the behaviour is not compatible with the Unix version (using iconv)
2035 // and break the library itself, e.g. wxTextInputStream::NextChar()
2036 // wouldn't work if reading an incomplete MB char didn't result in an
2039 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2040 // Win XP or newer and it is not supported for UTF-[78] so we always
2041 // use our own conversions in this case. See
2042 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2043 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2044 if ( m_CodePage
== CP_UTF8
)
2046 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
2049 if ( m_CodePage
== CP_UTF7
)
2051 return wxMBConvUTF7().MB2WC(buf
, psz
, n
);
2055 if ( (m_CodePage
< 50000 && m_CodePage
!= CP_SYMBOL
) &&
2056 IsAtLeastWin2kSP4() )
2058 flags
= MB_ERR_INVALID_CHARS
;
2061 const size_t len
= ::MultiByteToWideChar
2063 m_CodePage
, // code page
2064 flags
, // flags: fall on error
2065 psz
, // input string
2066 -1, // its length (NUL-terminated)
2067 buf
, // output string
2068 buf
? n
: 0 // size of output buffer
2072 // function totally failed
2073 return wxCONV_FAILED
;
2076 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2077 // check if we succeeded, by doing a double trip:
2078 if ( !flags
&& buf
)
2080 const size_t mbLen
= strlen(psz
);
2081 wxCharBuffer
mbBuf(mbLen
);
2082 if ( ::WideCharToMultiByte
2089 mbLen
+ 1, // size in bytes, not length
2093 strcmp(mbBuf
, psz
) != 0 )
2095 // we didn't obtain the same thing we started from, hence
2096 // the conversion was lossy and we consider that it failed
2097 return wxCONV_FAILED
;
2101 // note that it returns count of written chars for buf != NULL and size
2102 // of the needed buffer for buf == NULL so in either case the length of
2103 // the string (which never includes the terminating NUL) is one less
2107 virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
2110 we have a problem here: by default, WideCharToMultiByte() may
2111 replace characters unrepresentable in the target code page with bad
2112 quality approximations such as turning "1/2" symbol (U+00BD) into
2113 "1" for the code pages which don't have it and we, obviously, want
2114 to avoid this at any price
2116 the trouble is that this function does it _silently_, i.e. it won't
2117 even tell us whether it did or not... Win98/2000 and higher provide
2118 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2119 we have to resort to a round trip, i.e. check that converting back
2120 results in the same string -- this is, of course, expensive but
2121 otherwise we simply can't be sure to not garble the data.
2124 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2125 // it doesn't work with CJK encodings (which we test for rather roughly
2126 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2128 BOOL usedDef
wxDUMMY_INITIALIZE(false);
2131 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
2133 // it's our lucky day
2134 flags
= WC_NO_BEST_FIT_CHARS
;
2135 pUsedDef
= &usedDef
;
2137 else // old system or unsupported encoding
2143 const size_t len
= ::WideCharToMultiByte
2145 m_CodePage
, // code page
2146 flags
, // either none or no best fit
2147 pwz
, // input string
2148 -1, // it is (wide) NUL-terminated
2149 buf
, // output buffer
2150 buf
? n
: 0, // and its size
2151 NULL
, // default "replacement" char
2152 pUsedDef
// [out] was it used?
2157 // function totally failed
2158 return wxCONV_FAILED
;
2161 // if we were really converting, check if we succeeded
2166 // check if the conversion failed, i.e. if any replacements
2169 return wxCONV_FAILED
;
2171 else // we must resort to double tripping...
2173 wxWCharBuffer
wcBuf(n
);
2174 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2175 wcscmp(wcBuf
, pwz
) != 0 )
2177 // we didn't obtain the same thing we started from, hence
2178 // the conversion was lossy and we consider that it failed
2179 return wxCONV_FAILED
;
2184 // see the comment above for the reason of "len - 1"
2188 virtual size_t GetMBNulLen() const
2190 if ( m_minMBCharWidth
== 0 )
2192 int len
= ::WideCharToMultiByte
2194 m_CodePage
, // code page
2196 L
"", // input string
2197 1, // translate just the NUL
2198 NULL
, // output buffer
2200 NULL
, // no replacement char
2201 NULL
// [out] don't care if it was used
2204 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
2208 wxLogDebug(_T("Unexpected NUL length %d"), len
);
2209 self
->m_minMBCharWidth
= (size_t)-1;
2213 self
->m_minMBCharWidth
= (size_t)-1;
2219 self
->m_minMBCharWidth
= len
;
2224 return m_minMBCharWidth
;
2227 virtual wxMBConv
*Clone() const { return new wxMBConv_win32(*this); }
2229 bool IsOk() const { return m_CodePage
!= -1; }
2232 static bool CanUseNoBestFit()
2234 static int s_isWin98Or2k
= -1;
2236 if ( s_isWin98Or2k
== -1 )
2239 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
2241 case wxOS_WINDOWS_9X
:
2242 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
2245 case wxOS_WINDOWS_NT
:
2246 s_isWin98Or2k
= verMaj
>= 5;
2250 // unknown: be conservative by default
2255 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
2258 return s_isWin98Or2k
== 1;
2261 static bool IsAtLeastWin2kSP4()
2266 static int s_isAtLeastWin2kSP4
= -1;
2268 if ( s_isAtLeastWin2kSP4
== -1 )
2270 OSVERSIONINFOEX ver
;
2272 memset(&ver
, 0, sizeof(ver
));
2273 ver
.dwOSVersionInfoSize
= sizeof(ver
);
2274 GetVersionEx((OSVERSIONINFO
*)&ver
);
2276 s_isAtLeastWin2kSP4
=
2277 ((ver
.dwMajorVersion
> 5) || // Vista+
2278 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
2279 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
2280 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
2284 return s_isAtLeastWin2kSP4
== 1;
2289 // the code page we're working with
2292 // cached result of GetMBNulLen(), set to 0 initially meaning
2294 size_t m_minMBCharWidth
;
2297 #endif // wxHAVE_WIN32_MB2WC
2299 // ============================================================================
2300 // Cocoa conversion classes
2301 // ============================================================================
2303 #if defined(__WXCOCOA__)
2305 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2306 // Strangely enough, internally Core Foundation uses
2307 // UTF-32 internally quite a bit - its just not public (yet).
2309 #include <CoreFoundation/CFString.h>
2310 #include <CoreFoundation/CFStringEncodingExt.h>
2312 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
2314 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
2318 case wxFONTENCODING_DEFAULT
:
2319 enc
= CFStringGetSystemEncoding();
2322 case wxFONTENCODING_ISO8859_1
:
2323 enc
= kCFStringEncodingISOLatin1
;
2325 case wxFONTENCODING_ISO8859_2
:
2326 enc
= kCFStringEncodingISOLatin2
;
2328 case wxFONTENCODING_ISO8859_3
:
2329 enc
= kCFStringEncodingISOLatin3
;
2331 case wxFONTENCODING_ISO8859_4
:
2332 enc
= kCFStringEncodingISOLatin4
;
2334 case wxFONTENCODING_ISO8859_5
:
2335 enc
= kCFStringEncodingISOLatinCyrillic
;
2337 case wxFONTENCODING_ISO8859_6
:
2338 enc
= kCFStringEncodingISOLatinArabic
;
2340 case wxFONTENCODING_ISO8859_7
:
2341 enc
= kCFStringEncodingISOLatinGreek
;
2343 case wxFONTENCODING_ISO8859_8
:
2344 enc
= kCFStringEncodingISOLatinHebrew
;
2346 case wxFONTENCODING_ISO8859_9
:
2347 enc
= kCFStringEncodingISOLatin5
;
2349 case wxFONTENCODING_ISO8859_10
:
2350 enc
= kCFStringEncodingISOLatin6
;
2352 case wxFONTENCODING_ISO8859_11
:
2353 enc
= kCFStringEncodingISOLatinThai
;
2355 case wxFONTENCODING_ISO8859_13
:
2356 enc
= kCFStringEncodingISOLatin7
;
2358 case wxFONTENCODING_ISO8859_14
:
2359 enc
= kCFStringEncodingISOLatin8
;
2361 case wxFONTENCODING_ISO8859_15
:
2362 enc
= kCFStringEncodingISOLatin9
;
2365 case wxFONTENCODING_KOI8
:
2366 enc
= kCFStringEncodingKOI8_R
;
2368 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2369 enc
= kCFStringEncodingDOSRussian
;
2372 // case wxFONTENCODING_BULGARIAN :
2376 case wxFONTENCODING_CP437
:
2377 enc
= kCFStringEncodingDOSLatinUS
;
2379 case wxFONTENCODING_CP850
:
2380 enc
= kCFStringEncodingDOSLatin1
;
2382 case wxFONTENCODING_CP852
:
2383 enc
= kCFStringEncodingDOSLatin2
;
2385 case wxFONTENCODING_CP855
:
2386 enc
= kCFStringEncodingDOSCyrillic
;
2388 case wxFONTENCODING_CP866
:
2389 enc
= kCFStringEncodingDOSRussian
;
2391 case wxFONTENCODING_CP874
:
2392 enc
= kCFStringEncodingDOSThai
;
2394 case wxFONTENCODING_CP932
:
2395 enc
= kCFStringEncodingDOSJapanese
;
2397 case wxFONTENCODING_CP936
:
2398 enc
= kCFStringEncodingDOSChineseSimplif
;
2400 case wxFONTENCODING_CP949
:
2401 enc
= kCFStringEncodingDOSKorean
;
2403 case wxFONTENCODING_CP950
:
2404 enc
= kCFStringEncodingDOSChineseTrad
;
2406 case wxFONTENCODING_CP1250
:
2407 enc
= kCFStringEncodingWindowsLatin2
;
2409 case wxFONTENCODING_CP1251
:
2410 enc
= kCFStringEncodingWindowsCyrillic
;
2412 case wxFONTENCODING_CP1252
:
2413 enc
= kCFStringEncodingWindowsLatin1
;
2415 case wxFONTENCODING_CP1253
:
2416 enc
= kCFStringEncodingWindowsGreek
;
2418 case wxFONTENCODING_CP1254
:
2419 enc
= kCFStringEncodingWindowsLatin5
;
2421 case wxFONTENCODING_CP1255
:
2422 enc
= kCFStringEncodingWindowsHebrew
;
2424 case wxFONTENCODING_CP1256
:
2425 enc
= kCFStringEncodingWindowsArabic
;
2427 case wxFONTENCODING_CP1257
:
2428 enc
= kCFStringEncodingWindowsBalticRim
;
2430 // This only really encodes to UTF7 (if that) evidently
2431 // case wxFONTENCODING_UTF7 :
2432 // enc = kCFStringEncodingNonLossyASCII ;
2434 case wxFONTENCODING_UTF8
:
2435 enc
= kCFStringEncodingUTF8
;
2437 case wxFONTENCODING_EUC_JP
:
2438 enc
= kCFStringEncodingEUC_JP
;
2440 case wxFONTENCODING_UTF16
:
2441 enc
= kCFStringEncodingUnicode
;
2443 case wxFONTENCODING_MACROMAN
:
2444 enc
= kCFStringEncodingMacRoman
;
2446 case wxFONTENCODING_MACJAPANESE
:
2447 enc
= kCFStringEncodingMacJapanese
;
2449 case wxFONTENCODING_MACCHINESETRAD
:
2450 enc
= kCFStringEncodingMacChineseTrad
;
2452 case wxFONTENCODING_MACKOREAN
:
2453 enc
= kCFStringEncodingMacKorean
;
2455 case wxFONTENCODING_MACARABIC
:
2456 enc
= kCFStringEncodingMacArabic
;
2458 case wxFONTENCODING_MACHEBREW
:
2459 enc
= kCFStringEncodingMacHebrew
;
2461 case wxFONTENCODING_MACGREEK
:
2462 enc
= kCFStringEncodingMacGreek
;
2464 case wxFONTENCODING_MACCYRILLIC
:
2465 enc
= kCFStringEncodingMacCyrillic
;
2467 case wxFONTENCODING_MACDEVANAGARI
:
2468 enc
= kCFStringEncodingMacDevanagari
;
2470 case wxFONTENCODING_MACGURMUKHI
:
2471 enc
= kCFStringEncodingMacGurmukhi
;
2473 case wxFONTENCODING_MACGUJARATI
:
2474 enc
= kCFStringEncodingMacGujarati
;
2476 case wxFONTENCODING_MACORIYA
:
2477 enc
= kCFStringEncodingMacOriya
;
2479 case wxFONTENCODING_MACBENGALI
:
2480 enc
= kCFStringEncodingMacBengali
;
2482 case wxFONTENCODING_MACTAMIL
:
2483 enc
= kCFStringEncodingMacTamil
;
2485 case wxFONTENCODING_MACTELUGU
:
2486 enc
= kCFStringEncodingMacTelugu
;
2488 case wxFONTENCODING_MACKANNADA
:
2489 enc
= kCFStringEncodingMacKannada
;
2491 case wxFONTENCODING_MACMALAJALAM
:
2492 enc
= kCFStringEncodingMacMalayalam
;
2494 case wxFONTENCODING_MACSINHALESE
:
2495 enc
= kCFStringEncodingMacSinhalese
;
2497 case wxFONTENCODING_MACBURMESE
:
2498 enc
= kCFStringEncodingMacBurmese
;
2500 case wxFONTENCODING_MACKHMER
:
2501 enc
= kCFStringEncodingMacKhmer
;
2503 case wxFONTENCODING_MACTHAI
:
2504 enc
= kCFStringEncodingMacThai
;
2506 case wxFONTENCODING_MACLAOTIAN
:
2507 enc
= kCFStringEncodingMacLaotian
;
2509 case wxFONTENCODING_MACGEORGIAN
:
2510 enc
= kCFStringEncodingMacGeorgian
;
2512 case wxFONTENCODING_MACARMENIAN
:
2513 enc
= kCFStringEncodingMacArmenian
;
2515 case wxFONTENCODING_MACCHINESESIMP
:
2516 enc
= kCFStringEncodingMacChineseSimp
;
2518 case wxFONTENCODING_MACTIBETAN
:
2519 enc
= kCFStringEncodingMacTibetan
;
2521 case wxFONTENCODING_MACMONGOLIAN
:
2522 enc
= kCFStringEncodingMacMongolian
;
2524 case wxFONTENCODING_MACETHIOPIC
:
2525 enc
= kCFStringEncodingMacEthiopic
;
2527 case wxFONTENCODING_MACCENTRALEUR
:
2528 enc
= kCFStringEncodingMacCentralEurRoman
;
2530 case wxFONTENCODING_MACVIATNAMESE
:
2531 enc
= kCFStringEncodingMacVietnamese
;
2533 case wxFONTENCODING_MACARABICEXT
:
2534 enc
= kCFStringEncodingMacExtArabic
;
2536 case wxFONTENCODING_MACSYMBOL
:
2537 enc
= kCFStringEncodingMacSymbol
;
2539 case wxFONTENCODING_MACDINGBATS
:
2540 enc
= kCFStringEncodingMacDingbats
;
2542 case wxFONTENCODING_MACTURKISH
:
2543 enc
= kCFStringEncodingMacTurkish
;
2545 case wxFONTENCODING_MACCROATIAN
:
2546 enc
= kCFStringEncodingMacCroatian
;
2548 case wxFONTENCODING_MACICELANDIC
:
2549 enc
= kCFStringEncodingMacIcelandic
;
2551 case wxFONTENCODING_MACROMANIAN
:
2552 enc
= kCFStringEncodingMacRomanian
;
2554 case wxFONTENCODING_MACCELTIC
:
2555 enc
= kCFStringEncodingMacCeltic
;
2557 case wxFONTENCODING_MACGAELIC
:
2558 enc
= kCFStringEncodingMacGaelic
;
2560 // case wxFONTENCODING_MACKEYBOARD :
2561 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2565 // because gcc is picky
2572 class wxMBConv_cocoa
: public wxMBConv
2577 Init(CFStringGetSystemEncoding()) ;
2580 wxMBConv_cocoa(const wxMBConv_cocoa
& conv
)
2582 m_encoding
= conv
.m_encoding
;
2586 wxMBConv_cocoa(const wxChar
* name
)
2588 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2592 wxMBConv_cocoa(wxFontEncoding encoding
)
2594 Init( wxCFStringEncFromFontEnc(encoding
) );
2597 virtual ~wxMBConv_cocoa()
2601 void Init( CFStringEncoding encoding
)
2603 m_encoding
= encoding
;
2606 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2610 CFStringRef theString
= CFStringCreateWithBytes (
2611 NULL
, //the allocator
2612 (const UInt8
*)szUnConv
,
2615 false //no BOM/external representation
2618 wxASSERT(theString
);
2620 size_t nOutLength
= CFStringGetLength(theString
);
2624 CFRelease(theString
);
2628 CFRange theRange
= { 0, nOutSize
};
2630 #if SIZEOF_WCHAR_T == 4
2631 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2634 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2636 CFRelease(theString
);
2638 szUniCharBuffer
[nOutLength
] = '\0';
2640 #if SIZEOF_WCHAR_T == 4
2641 wxMBConvUTF16 converter
;
2642 converter
.MB2WC( szOut
, (const char*)szUniCharBuffer
, nOutSize
);
2643 delete [] szUniCharBuffer
;
2649 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2653 size_t nRealOutSize
;
2654 size_t nBufSize
= wxWcslen(szUnConv
);
2655 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2657 #if SIZEOF_WCHAR_T == 4
2658 wxMBConvUTF16 converter
;
2659 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2660 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1];
2661 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
));
2662 nBufSize
/= sizeof(UniChar
);
2665 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2669 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2672 wxASSERT(theString
);
2674 //Note that CER puts a BOM when converting to unicode
2675 //so we check and use getchars instead in that case
2676 if (m_encoding
== kCFStringEncodingUnicode
)
2679 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2681 nRealOutSize
= CFStringGetLength(theString
) + 1;
2687 CFRangeMake(0, CFStringGetLength(theString
)),
2689 0, //what to put in characters that can't be converted -
2690 //0 tells CFString to return NULL if it meets such a character
2691 false, //not an external representation
2694 (CFIndex
*) &nRealOutSize
2698 CFRelease(theString
);
2700 #if SIZEOF_WCHAR_T == 4
2701 delete[] szUniBuffer
;
2704 return nRealOutSize
- 1;
2707 virtual wxMBConv
*Clone() const { return new wxMBConv_cocoa(*this); }
2711 return m_encoding
!= kCFStringEncodingInvalidId
&&
2712 CFStringIsEncodingAvailable(m_encoding
);
2716 CFStringEncoding m_encoding
;
2719 #endif // defined(__WXCOCOA__)
2721 // ============================================================================
2722 // Mac conversion classes
2723 // ============================================================================
2725 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2727 class wxMBConv_mac
: public wxMBConv
2732 Init(CFStringGetSystemEncoding()) ;
2735 wxMBConv_mac(const wxMBConv_mac
& conv
)
2737 Init(conv
.m_char_encoding
);
2741 wxMBConv_mac(const char* name
)
2743 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) );
2747 wxMBConv_mac(wxFontEncoding encoding
)
2749 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2752 virtual ~wxMBConv_mac()
2754 OSStatus status
= noErr
;
2755 if (m_MB2WC_converter
)
2756 status
= TECDisposeConverter(m_MB2WC_converter
);
2757 if (m_WC2MB_converter
)
2758 status
= TECDisposeConverter(m_WC2MB_converter
);
2761 void Init( TextEncodingBase encoding
,TextEncodingVariant encodingVariant
= kTextEncodingDefaultVariant
,
2762 TextEncodingFormat encodingFormat
= kTextEncodingDefaultFormat
)
2764 m_MB2WC_converter
= NULL
;
2765 m_WC2MB_converter
= NULL
;
2766 m_char_encoding
= CreateTextEncoding(encoding
, encodingVariant
, encodingFormat
) ;
2767 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 0, kUnicode16BitFormat
) ;
2770 virtual void CreateIfNeeded() const
2772 if ( m_MB2WC_converter
== NULL
&& m_WC2MB_converter
== NULL
)
2774 OSStatus status
= noErr
;
2775 status
= TECCreateConverter(&m_MB2WC_converter
,
2777 m_unicode_encoding
);
2778 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2779 status
= TECCreateConverter(&m_WC2MB_converter
,
2782 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2786 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2789 OSStatus status
= noErr
;
2790 ByteCount byteOutLen
;
2791 ByteCount byteInLen
= strlen(psz
) + 1;
2792 wchar_t *tbuf
= NULL
;
2793 UniChar
* ubuf
= NULL
;
2798 // Apple specs say at least 32
2799 n
= wxMax( 32, byteInLen
) ;
2800 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2803 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2805 #if SIZEOF_WCHAR_T == 4
2806 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2808 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2811 status
= TECConvertText(
2812 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2813 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2815 #if SIZEOF_WCHAR_T == 4
2816 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2817 // is not properly terminated we get random characters at the end
2818 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2819 wxMBConvUTF16 converter
;
2820 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
2823 res
= byteOutLen
/ sizeof( UniChar
) ;
2829 if ( buf
&& res
< n
)
2835 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2838 OSStatus status
= noErr
;
2839 ByteCount byteOutLen
;
2840 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2846 // Apple specs say at least 32
2847 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2848 tbuf
= (char*) malloc( n
) ;
2851 ByteCount byteBufferLen
= n
;
2852 UniChar
* ubuf
= NULL
;
2854 #if SIZEOF_WCHAR_T == 4
2855 wxMBConvUTF16 converter
;
2856 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2857 byteInLen
= unicharlen
;
2858 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2859 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2861 ubuf
= (UniChar
*) psz
;
2864 status
= TECConvertText(
2865 m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2866 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2868 #if SIZEOF_WCHAR_T == 4
2875 size_t res
= byteOutLen
;
2876 if ( buf
&& res
< n
)
2880 //we need to double-trip to verify it didn't insert any ? in place
2881 //of bogus characters
2882 wxWCharBuffer
wcBuf(n
);
2883 size_t pszlen
= wxWcslen(psz
);
2884 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2885 wxWcslen(wcBuf
) != pszlen
||
2886 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2888 // we didn't obtain the same thing we started from, hence
2889 // the conversion was lossy and we consider that it failed
2890 return wxCONV_FAILED
;
2897 virtual wxMBConv
*Clone() const { return new wxMBConv_mac(*this); }
2902 return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
;
2906 mutable TECObjectRef m_MB2WC_converter
;
2907 mutable TECObjectRef m_WC2MB_converter
;
2909 TextEncodingBase m_char_encoding
;
2910 TextEncodingBase m_unicode_encoding
;
2913 // MB is decomposed (D) normalized UTF8
2915 class wxMBConv_macUTF8D
: public wxMBConv_mac
2920 Init( kTextEncodingUnicodeDefault
, kUnicodeNoSubset
, kUnicodeUTF8Format
) ;
2925 virtual ~wxMBConv_macUTF8D()
2928 DisposeUnicodeToTextInfo(&m_uni
);
2929 if (m_uniBack
!=NULL
)
2930 DisposeUnicodeToTextInfo(&m_uniBack
);
2933 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2936 OSStatus status
= noErr
;
2937 ByteCount byteOutLen
;
2938 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2944 // Apple specs say at least 32
2945 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2946 tbuf
= (char*) malloc( n
) ;
2949 ByteCount byteBufferLen
= n
;
2950 UniChar
* ubuf
= NULL
;
2952 #if SIZEOF_WCHAR_T == 4
2953 wxMBConvUTF16 converter
;
2954 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2955 byteInLen
= unicharlen
;
2956 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2957 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2959 ubuf
= (UniChar
*) psz
;
2962 // ubuf is a non-decomposed UniChar buffer
2964 ByteCount dcubuflen
= byteInLen
* 2 + 2 ;
2965 ByteCount dcubufread
, dcubufwritten
;
2966 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
2968 ConvertFromUnicodeToText( m_uni
, byteInLen
, ubuf
,
2969 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, dcubuf
) ;
2971 // we now convert that decomposed buffer into UTF8
2973 status
= TECConvertText(
2974 m_WC2MB_converter
, (ConstTextPtr
) dcubuf
, dcubufwritten
, &dcubufread
,
2975 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2979 #if SIZEOF_WCHAR_T == 4
2986 size_t res
= byteOutLen
;
2987 if ( buf
&& res
< n
)
2990 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2996 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2999 OSStatus status
= noErr
;
3000 ByteCount byteOutLen
;
3001 ByteCount byteInLen
= strlen(psz
) + 1;
3002 wchar_t *tbuf
= NULL
;
3003 UniChar
* ubuf
= NULL
;
3008 // Apple specs say at least 32
3009 n
= wxMax( 32, byteInLen
) ;
3010 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
3013 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
3015 #if SIZEOF_WCHAR_T == 4
3016 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
3018 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
3021 ByteCount dcubuflen
= byteBufferLen
* 2 + 2 ;
3022 ByteCount dcubufread
, dcubufwritten
;
3023 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
3025 status
= TECConvertText(
3026 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
3027 (TextPtr
) dcubuf
, dcubuflen
, &byteOutLen
);
3028 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3029 // is not properly terminated we get random characters at the end
3030 dcubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3032 // now from the decomposed UniChar to properly composed uniChar
3033 ConvertFromUnicodeToText( m_uniBack
, byteOutLen
, dcubuf
,
3034 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, ubuf
) ;
3037 byteOutLen
= dcubufwritten
;
3038 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3041 #if SIZEOF_WCHAR_T == 4
3042 wxMBConvUTF16 converter
;
3043 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
3046 res
= byteOutLen
/ sizeof( UniChar
) ;
3052 if ( buf
&& res
< n
)
3058 virtual void CreateIfNeeded() const
3060 wxMBConv_mac::CreateIfNeeded() ;
3061 if ( m_uni
== NULL
)
3063 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3064 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3065 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3066 kUnicodeCanonicalDecompVariant
, kTextEncodingDefaultFormat
);
3067 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3069 OSStatus err
= CreateUnicodeToTextInfo(&m_map
, &m_uni
);
3070 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3072 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3073 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3074 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3075 kUnicodeCanonicalCompVariant
, kTextEncodingDefaultFormat
);
3076 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3077 err
= CreateUnicodeToTextInfo(&m_map
, &m_uniBack
);
3078 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3082 mutable UnicodeToTextInfo m_uni
;
3083 mutable UnicodeToTextInfo m_uniBack
;
3084 mutable UnicodeMapping m_map
;
3086 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3088 // ============================================================================
3089 // wxEncodingConverter based conversion classes
3090 // ============================================================================
3094 class wxMBConv_wxwin
: public wxMBConv
3099 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
3100 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
3104 // temporarily just use wxEncodingConverter stuff,
3105 // so that it works while a better implementation is built
3106 wxMBConv_wxwin(const char* name
)
3109 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3111 m_enc
= wxFONTENCODING_SYSTEM
;
3116 wxMBConv_wxwin(wxFontEncoding enc
)
3123 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
3125 size_t inbuf
= strlen(psz
);
3128 if (!m2w
.Convert(psz
, buf
))
3129 return wxCONV_FAILED
;
3134 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
3136 const size_t inbuf
= wxWcslen(psz
);
3139 if (!w2m
.Convert(psz
, buf
))
3140 return wxCONV_FAILED
;
3146 virtual size_t GetMBNulLen() const
3150 case wxFONTENCODING_UTF16BE
:
3151 case wxFONTENCODING_UTF16LE
:
3154 case wxFONTENCODING_UTF32BE
:
3155 case wxFONTENCODING_UTF32LE
:
3163 virtual wxMBConv
*Clone() const { return new wxMBConv_wxwin(m_enc
); }
3165 bool IsOk() const { return m_ok
; }
3168 wxFontEncoding m_enc
;
3169 wxEncodingConverter m2w
, w2m
;
3172 // were we initialized successfully?
3175 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
3178 // make the constructors available for unit testing
3179 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const char* name
)
3181 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
3182 if ( !result
->IsOk() )
3191 #endif // wxUSE_FONTMAP
3193 // ============================================================================
3194 // wxCSConv implementation
3195 // ============================================================================
3197 void wxCSConv::Init()
3204 wxCSConv::wxCSConv(const wxString
& charset
)
3208 if ( !charset
.empty() )
3210 SetName(charset
.ToAscii());
3214 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
3216 m_encoding
= wxFONTENCODING_SYSTEM
;
3220 wxCSConv::wxCSConv(wxFontEncoding encoding
)
3222 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
3224 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3226 encoding
= wxFONTENCODING_SYSTEM
;
3231 m_encoding
= encoding
;
3234 wxCSConv::~wxCSConv()
3239 wxCSConv::wxCSConv(const wxCSConv
& conv
)
3244 SetName(conv
.m_name
);
3245 m_encoding
= conv
.m_encoding
;
3248 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
3252 SetName(conv
.m_name
);
3253 m_encoding
= conv
.m_encoding
;
3258 void wxCSConv::Clear()
3267 void wxCSConv::SetName(const char *charset
)
3271 m_name
= strdup(charset
);
3278 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
3279 wxEncodingNameCache
);
3281 static wxEncodingNameCache gs_nameCache
;
3284 wxMBConv
*wxCSConv::DoCreate() const
3287 wxLogTrace(TRACE_STRCONV
,
3288 wxT("creating conversion for %s"),
3290 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding
).mb_str()));
3291 #endif // wxUSE_FONTMAP
3293 // check for the special case of ASCII or ISO8859-1 charset: as we have
3294 // special knowledge of it anyhow, we don't need to create a special
3295 // conversion object
3296 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
3297 m_encoding
== wxFONTENCODING_DEFAULT
)
3299 // don't convert at all
3303 // we trust OS to do conversion better than we can so try external
3304 // conversion methods first
3306 // the full order is:
3307 // 1. OS conversion (iconv() under Unix or Win32 API)
3308 // 2. hard coded conversions for UTF
3309 // 3. wxEncodingConverter as fall back
3315 #endif // !wxUSE_FONTMAP
3318 wxFontEncoding
encoding(m_encoding
);
3323 wxMBConv_iconv
*conv
= new wxMBConv_iconv(m_name
);
3331 wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
3332 #endif // wxUSE_FONTMAP
3336 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
3337 if ( it
!= gs_nameCache
.end() )
3339 if ( it
->second
.empty() )
3342 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
.ToAscii());
3349 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
3350 // CS : in case this does not return valid names (eg for MacRoman)
3351 // encoding got a 'failure' entry in the cache all the same,
3352 // although it just has to be created using a different method, so
3353 // only store failed iconv creation attempts (or perhaps we
3354 // shoulnd't do this at all ?)
3355 if ( names
[0] != NULL
)
3357 for ( ; *names
; ++names
)
3359 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3360 // will need changes that will obsolete this
3361 wxString
name(*names
);
3362 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
.ToAscii());
3365 gs_nameCache
[encoding
] = *names
;
3372 gs_nameCache
[encoding
] = _T(""); // cache the failure
3375 #endif // wxUSE_FONTMAP
3377 #endif // HAVE_ICONV
3379 #ifdef wxHAVE_WIN32_MB2WC
3382 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
3383 : new wxMBConv_win32(m_encoding
);
3392 #endif // wxHAVE_WIN32_MB2WC
3394 #if defined(__WXMAC__)
3396 // leave UTF16 and UTF32 to the built-ins of wx
3397 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
3398 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
3401 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
3402 : new wxMBConv_mac(m_encoding
);
3404 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
3414 #if defined(__WXCOCOA__)
3416 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
3419 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
3420 : new wxMBConv_cocoa(m_encoding
);
3422 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
3433 wxFontEncoding enc
= m_encoding
;
3435 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
3437 // use "false" to suppress interactive dialogs -- we can be called from
3438 // anywhere and popping up a dialog from here is the last thing we want to
3440 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
3442 #endif // wxUSE_FONTMAP
3446 case wxFONTENCODING_UTF7
:
3447 return new wxMBConvUTF7
;
3449 case wxFONTENCODING_UTF8
:
3450 return new wxMBConvUTF8
;
3452 case wxFONTENCODING_UTF16BE
:
3453 return new wxMBConvUTF16BE
;
3455 case wxFONTENCODING_UTF16LE
:
3456 return new wxMBConvUTF16LE
;
3458 case wxFONTENCODING_UTF32BE
:
3459 return new wxMBConvUTF32BE
;
3461 case wxFONTENCODING_UTF32LE
:
3462 return new wxMBConvUTF32LE
;
3465 // nothing to do but put here to suppress gcc warnings
3472 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
3473 : new wxMBConv_wxwin(m_encoding
);
3479 #endif // wxUSE_FONTMAP
3481 // NB: This is a hack to prevent deadlock. What could otherwise happen
3482 // in Unicode build: wxConvLocal creation ends up being here
3483 // because of some failure and logs the error. But wxLog will try to
3484 // attach a timestamp, for which it will need wxConvLocal (to convert
3485 // time to char* and then wchar_t*), but that fails, tries to log the
3486 // error, but wxLog has an (already locked) critical section that
3487 // guards the static buffer.
3488 static bool alreadyLoggingError
= false;
3489 if (!alreadyLoggingError
)
3491 alreadyLoggingError
= true;
3492 wxLogError(_("Cannot convert from the charset '%s'!"),
3496 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding
).ToAscii()
3497 #else // !wxUSE_FONTMAP
3498 (const char*)wxString::Format(_("encoding %i"), m_encoding
).ToAscii()
3499 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3502 alreadyLoggingError
= false;
3508 void wxCSConv::CreateConvIfNeeded() const
3512 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
3514 // if we don't have neither the name nor the encoding, use the default
3515 // encoding for this system
3516 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
3519 self
->m_encoding
= wxLocale::GetSystemEncoding();
3521 // fallback to some reasonable default:
3522 self
->m_encoding
= wxFONTENCODING_ISO8859_1
;
3523 #endif // wxUSE_INTL
3526 self
->m_convReal
= DoCreate();
3527 self
->m_deferred
= false;
3531 bool wxCSConv::IsOk() const
3533 CreateConvIfNeeded();
3535 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3536 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
3537 return true; // always ok as we do it ourselves
3539 // m_convReal->IsOk() is called at its own creation, so we know it must
3540 // be ok if m_convReal is non-NULL
3541 return m_convReal
!= NULL
;
3544 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
,
3545 const char *src
, size_t srcLen
) const
3547 CreateConvIfNeeded();
3550 return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
);
3553 return wxMBConv::ToWChar(dst
, dstLen
, src
, srcLen
);
3556 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
,
3557 const wchar_t *src
, size_t srcLen
) const
3559 CreateConvIfNeeded();
3562 return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
);
3565 return wxMBConv::FromWChar(dst
, dstLen
, src
, srcLen
);
3568 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
3570 CreateConvIfNeeded();
3573 return m_convReal
->MB2WC(buf
, psz
, n
);
3576 size_t len
= strlen(psz
);
3580 for (size_t c
= 0; c
<= len
; c
++)
3581 buf
[c
] = (unsigned char)(psz
[c
]);
3587 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
3589 CreateConvIfNeeded();
3592 return m_convReal
->WC2MB(buf
, psz
, n
);
3595 const size_t len
= wxWcslen(psz
);
3598 for (size_t c
= 0; c
<= len
; c
++)
3601 return wxCONV_FAILED
;
3603 buf
[c
] = (char)psz
[c
];
3608 for (size_t c
= 0; c
<= len
; c
++)
3611 return wxCONV_FAILED
;
3618 size_t wxCSConv::GetMBNulLen() const
3620 CreateConvIfNeeded();
3624 return m_convReal
->GetMBNulLen();
3627 // otherwise, we are ISO-8859-1
3631 #if wxUSE_UNICODE_UTF8
3632 bool wxCSConv::IsUTF8() const
3634 CreateConvIfNeeded();
3638 return m_convReal
->IsUTF8();
3641 // otherwise, we are ISO-8859-1
3649 wxWCharBuffer
wxSafeConvertMB2WX(const char *s
)
3652 return wxWCharBuffer();
3654 wxWCharBuffer
wbuf(wxConvLibc
.cMB2WX(s
));
3656 wbuf
= wxMBConvUTF8().cMB2WX(s
);
3658 wbuf
= wxConvISO8859_1
.cMB2WX(s
);
3663 wxCharBuffer
wxSafeConvertWX2MB(const wchar_t *ws
)
3666 return wxCharBuffer();
3668 wxCharBuffer
buf(wxConvLibc
.cWX2MB(ws
));
3670 buf
= wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
).cWX2MB(ws
);
3675 #endif // wxUSE_UNICODE
3677 // ----------------------------------------------------------------------------
3679 // ----------------------------------------------------------------------------
3681 // NB: The reason why we create converted objects in this convoluted way,
3682 // using a factory function instead of global variable, is that they
3683 // may be used at static initialization time (some of them are used by
3684 // wxString ctors and there may be a global wxString object). In other
3685 // words, possibly _before_ the converter global object would be
3692 #undef wxConvISO8859_1
3694 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
3695 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
3696 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
3698 static impl_klass name##Obj ctor_args; \
3699 return &name##Obj; \
3701 /* this ensures that all global converter objects are created */ \
3702 /* by the time static initialization is done, i.e. before any */ \
3703 /* thread is launched: */ \
3704 static klass* gs_##name##instance = wxGet_##name##Ptr()
3706 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3707 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3710 WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_win32
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
);
3711 #elif defined(__WXMAC__) && !defined(__MACH__)
3712 WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_mac
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
);
3714 WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConvLibc
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
);
3717 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8
, wxConvUTF8
, wxEMPTY_PARAMETER_VALUE
);
3718 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7
, wxConvUTF7
, wxEMPTY_PARAMETER_VALUE
);
3720 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvLocal
, (wxFONTENCODING_SYSTEM
));
3721 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvISO8859_1
, (wxFONTENCODING_ISO8859_1
));
3723 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= wxGet_wxConvLibcPtr();
3724 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvUI
= wxGet_wxConvLocalPtr();
3726 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3727 static wxMBConv_macUTF8D wxConvMacUTF8DObj
;
3729 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
=
3731 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3734 wxGet_wxConvUTF8Ptr();
3737 wxGet_wxConvLibcPtr();
3738 #endif // __WXOSX__/!__WXOSX__
3740 #else // !wxUSE_WCHAR_T
3742 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
3743 // stand-ins in absence of wchar_t
3744 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3749 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T