1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
26 #include "wx/hashmap.h"
29 #include "wx/strconv.h"
41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
44 #define wxHAVE_WIN32_MB2WC
53 #include "wx/thread.h"
56 #include "wx/encconv.h"
57 #include "wx/fontmap.h"
60 #include "wx/mac/corefoundation/private/strconv_cf.h"
61 #endif //def __DARWIN__
64 #define TRACE_STRCONV _T("strconv")
66 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
68 #if SIZEOF_WCHAR_T == 2
73 // ============================================================================
75 // ============================================================================
77 // helper function of cMB2WC(): check if n bytes at this location are all NUL
78 static bool NotAllNULs(const char *p
, size_t n
)
80 while ( n
&& *p
++ == '\0' )
86 // ----------------------------------------------------------------------------
87 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
88 // ----------------------------------------------------------------------------
90 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
95 *output
= (wxUint16
) input
;
99 else if (input
>= 0x110000)
101 return wxCONV_FAILED
;
107 *output
++ = (wxUint16
) ((input
>> 10) + 0xd7c0);
108 *output
= (wxUint16
) ((input
& 0x3ff) + 0xdc00);
115 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
117 if ((*input
< 0xd800) || (*input
> 0xdfff))
122 else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff))
125 return wxCONV_FAILED
;
129 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
135 typedef wchar_t wxDecodeSurrogate_t
;
137 typedef wxUint16 wxDecodeSurrogate_t
;
138 #endif // WC_UTF16/!WC_UTF16
140 // returns the next UTF-32 character from the wchar_t buffer and advances the
141 // pointer to the character after this one
143 // if an invalid character is found, *pSrc is set to NULL, the caller must
145 static wxUint32
wxDecodeSurrogate(const wxDecodeSurrogate_t
**pSrc
)
149 n
= decode_utf16(wx_reinterpret_cast(const wxUint16
*, *pSrc
), out
);
150 if ( n
== wxCONV_FAILED
)
158 // ----------------------------------------------------------------------------
160 // ----------------------------------------------------------------------------
163 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
,
164 const char *src
, size_t srcLen
) const
166 // although new conversion classes are supposed to implement this function
167 // directly, the existins ones only implement the old MB2WC() and so, to
168 // avoid to have to rewrite all conversion classes at once, we provide a
169 // default (but not efficient) implementation of this one in terms of the
170 // old function by copying the input to ensure that it's NUL-terminated and
171 // then using MB2WC() to convert it
173 // the number of chars [which would be] written to dst [if it were not NULL]
174 size_t dstWritten
= 0;
176 // the number of NULs terminating this string
177 size_t nulLen
= 0; // not really needed, but just to avoid warnings
179 // if we were not given the input size we just have to assume that the
180 // string is properly terminated as we have no way of knowing how long it
181 // is anyhow, but if we do have the size check whether there are enough
185 if ( srcLen
!= wxNO_LEN
)
187 // we need to know how to find the end of this string
188 nulLen
= GetMBNulLen();
189 if ( nulLen
== wxCONV_FAILED
)
190 return wxCONV_FAILED
;
192 // if there are enough NULs we can avoid the copy
193 if ( srcLen
< nulLen
|| NotAllNULs(src
+ srcLen
- nulLen
, nulLen
) )
195 // make a copy in order to properly NUL-terminate the string
196 bufTmp
= wxCharBuffer(srcLen
+ nulLen
- 1 /* 1 will be added */);
197 char * const p
= bufTmp
.data();
198 memcpy(p
, src
, srcLen
);
199 for ( char *s
= p
+ srcLen
; s
< p
+ srcLen
+ nulLen
; s
++ )
205 srcEnd
= src
+ srcLen
;
207 else // quit after the first loop iteration
214 // try to convert the current chunk
215 size_t lenChunk
= MB2WC(NULL
, src
, 0);
216 if ( lenChunk
== wxCONV_FAILED
)
217 return wxCONV_FAILED
;
219 lenChunk
++; // for the L'\0' at the end of this chunk
221 dstWritten
+= lenChunk
;
225 // nothing left in the input string, conversion succeeded
231 if ( dstWritten
> dstLen
)
232 return wxCONV_FAILED
;
234 if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED
)
235 return wxCONV_FAILED
;
242 // we convert just one chunk in this case as this is the entire
247 // advance the input pointer past the end of this chunk
248 while ( NotAllNULs(src
, nulLen
) )
250 // notice that we must skip over multiple bytes here as we suppose
251 // that if NUL takes 2 or 4 bytes, then all the other characters do
252 // too and so if advanced by a single byte we might erroneously
253 // detect sequences of NUL bytes in the middle of the input
257 src
+= nulLen
; // skipping over its terminator as well
259 // note that ">=" (and not just "==") is needed here as the terminator
260 // we skipped just above could be inside or just after the buffer
261 // delimited by inEnd
270 wxMBConv::FromWChar(char *dst
, size_t dstLen
,
271 const wchar_t *src
, size_t srcLen
) const
273 // the number of chars [which would be] written to dst [if it were not NULL]
274 size_t dstWritten
= 0;
276 // make a copy of the input string unless it is already properly
279 // if we don't know its length we have no choice but to assume that it is,
280 // indeed, properly terminated
281 wxWCharBuffer bufTmp
;
282 if ( srcLen
== wxNO_LEN
)
284 srcLen
= wxWcslen(src
) + 1;
286 else if ( srcLen
!= 0 && src
[srcLen
- 1] != L
'\0' )
288 // make a copy in order to properly NUL-terminate the string
289 bufTmp
= wxWCharBuffer(srcLen
);
290 memcpy(bufTmp
.data(), src
, srcLen
* sizeof(wchar_t));
294 const size_t lenNul
= GetMBNulLen();
295 for ( const wchar_t * const srcEnd
= src
+ srcLen
;
297 src
+= wxWcslen(src
) + 1 /* skip L'\0' too */ )
299 // try to convert the current chunk
300 size_t lenChunk
= WC2MB(NULL
, src
, 0);
302 if ( lenChunk
== wxCONV_FAILED
)
303 return wxCONV_FAILED
;
306 dstWritten
+= lenChunk
;
310 if ( dstWritten
> dstLen
)
311 return wxCONV_FAILED
;
313 if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED
)
314 return wxCONV_FAILED
;
323 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const
325 size_t rc
= ToWChar(outBuff
, outLen
, inBuff
);
326 if ( rc
!= wxCONV_FAILED
)
328 // ToWChar() returns the buffer length, i.e. including the trailing
329 // NUL, while this method doesn't take it into account
336 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const
338 size_t rc
= FromWChar(outBuff
, outLen
, inBuff
);
339 if ( rc
!= wxCONV_FAILED
)
347 wxMBConv::~wxMBConv()
349 // nothing to do here (necessary for Darwin linking probably)
352 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
356 // calculate the length of the buffer needed first
357 const size_t nLen
= MB2WC(NULL
, psz
, 0);
358 if ( nLen
!= wxCONV_FAILED
)
360 // now do the actual conversion
361 wxWCharBuffer
buf(nLen
/* +1 added implicitly */);
363 // +1 for the trailing NULL
364 if ( MB2WC(buf
.data(), psz
, nLen
+ 1) != wxCONV_FAILED
)
369 return wxWCharBuffer();
372 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
376 const size_t nLen
= WC2MB(NULL
, pwz
, 0);
377 if ( nLen
!= wxCONV_FAILED
)
379 // extra space for trailing NUL(s)
380 static const size_t extraLen
= GetMaxMBNulLen();
382 wxCharBuffer
buf(nLen
+ extraLen
- 1);
383 if ( WC2MB(buf
.data(), pwz
, nLen
+ extraLen
) != wxCONV_FAILED
)
388 return wxCharBuffer();
392 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const
394 const size_t dstLen
= ToWChar(NULL
, 0, inBuff
, inLen
);
395 if ( dstLen
!= wxCONV_FAILED
)
397 wxWCharBuffer
wbuf(dstLen
- 1);
398 if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
403 if ( wbuf
[dstLen
- 1] == L
'\0' )
414 return wxWCharBuffer();
418 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const
420 size_t dstLen
= FromWChar(NULL
, 0, inBuff
, inLen
);
421 if ( dstLen
!= wxCONV_FAILED
)
423 // special case of empty input: can't allocate 0 size buffer below as
424 // wxCharBuffer insists on NUL-terminating it
425 wxCharBuffer
buf(dstLen
? dstLen
- 1 : 1);
426 if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
432 const size_t nulLen
= GetMBNulLen();
433 if ( dstLen
>= nulLen
&&
434 !NotAllNULs(buf
.data() + dstLen
- nulLen
, nulLen
) )
436 // in this case the output is NUL-terminated and we're not
437 // supposed to count NUL
449 return wxCharBuffer();
452 // ----------------------------------------------------------------------------
454 // ----------------------------------------------------------------------------
456 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
458 return wxMB2WC(buf
, psz
, n
);
461 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
463 return wxWC2MB(buf
, psz
, n
);
466 // ----------------------------------------------------------------------------
467 // wxConvBrokenFileNames
468 // ----------------------------------------------------------------------------
472 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString
& charset
)
474 if ( wxStricmp(charset
, _T("UTF-8")) == 0 ||
475 wxStricmp(charset
, _T("UTF8")) == 0 )
476 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
);
478 m_conv
= new wxCSConv(charset
);
483 // ----------------------------------------------------------------------------
485 // ----------------------------------------------------------------------------
487 // Implementation (C) 2004 Fredrik Roubert
490 // BASE64 decoding table
492 static const unsigned char utf7unb64
[] =
494 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
495 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
496 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
500 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
501 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
503 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
504 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
505 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
507 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
508 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
509 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
511 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
515 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
528 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
532 while ( *psz
&& (!buf
|| (len
< n
)) )
534 unsigned char cc
= *psz
++;
542 else if (*psz
== '-')
550 else // start of BASE64 encoded string
554 for ( ok
= lsb
= false, d
= 0, l
= 0;
555 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
560 for (l
+= 6; l
>= 8; lsb
= !lsb
)
562 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
572 *buf
= (wchar_t)(c
<< 8);
581 // in valid UTF7 we should have valid characters after '+'
582 return wxCONV_FAILED
;
590 if ( buf
&& (len
< n
) )
597 // BASE64 encoding table
599 static const unsigned char utf7enb64
[] =
601 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
602 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
603 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
604 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
605 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
606 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
607 'w', 'x', 'y', 'z', '0', '1', '2', '3',
608 '4', '5', '6', '7', '8', '9', '+', '/'
612 // UTF-7 encoding table
614 // 0 - Set D (directly encoded characters)
615 // 1 - Set O (optional direct characters)
616 // 2 - whitespace characters (optional)
617 // 3 - special characters
619 static const unsigned char utf7encode
[128] =
621 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
622 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
623 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
625 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
627 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
631 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
635 while (*psz
&& ((!buf
) || (len
< n
)))
638 if (cc
< 0x80 && utf7encode
[cc
] < 1)
647 else if (((wxUint32
)cc
) > 0xffff)
649 // no surrogate pair generation (yet?)
650 return wxCONV_FAILED
;
661 // BASE64 encode string
662 unsigned int lsb
, d
, l
;
663 for (d
= 0, l
= 0; /*nothing*/; psz
++)
665 for (lsb
= 0; lsb
< 2; lsb
++)
668 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
670 for (l
+= 8; l
>= 6; )
674 *buf
++ = utf7enb64
[(d
>> l
) % 64];
680 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
687 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
699 if (buf
&& (len
< n
))
705 // ----------------------------------------------------------------------------
707 // ----------------------------------------------------------------------------
709 static wxUint32 utf8_max
[]=
710 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
712 // boundaries of the private use area we use to (temporarily) remap invalid
713 // characters invalid in a UTF-8 encoded string
714 const wxUint32 wxUnicodePUA
= 0x100000;
715 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
717 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
721 while (*psz
&& ((!buf
) || (len
< n
)))
723 const char *opsz
= psz
;
724 bool invalid
= false;
725 unsigned char cc
= *psz
++, fc
= cc
;
727 for (cnt
= 0; fc
& 0x80; cnt
++)
737 // escape the escape character for octal escapes
738 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
739 && cc
== '\\' && (!buf
|| len
< n
))
751 // invalid UTF-8 sequence
756 unsigned ocnt
= cnt
- 1;
757 wxUint32 res
= cc
& (0x3f >> cnt
);
761 if ((cc
& 0xC0) != 0x80)
763 // invalid UTF-8 sequence
769 res
= (res
<< 6) | (cc
& 0x3f);
772 if (invalid
|| res
<= utf8_max
[ocnt
])
774 // illegal UTF-8 encoding
777 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
778 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
780 // if one of our PUA characters turns up externally
781 // it must also be treated as an illegal sequence
782 // (a bit like you have to escape an escape character)
788 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
789 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
790 if (pa
== wxCONV_FAILED
)
802 *buf
++ = (wchar_t)res
;
804 #endif // WC_UTF16/!WC_UTF16
810 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
812 while (opsz
< psz
&& (!buf
|| len
< n
))
815 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
816 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
817 wxASSERT(pa
!= wxCONV_FAILED
);
824 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
830 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
832 while (opsz
< psz
&& (!buf
|| len
< n
))
834 if ( buf
&& len
+ 3 < n
)
836 unsigned char on
= *opsz
;
838 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
839 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
840 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
847 else // MAP_INVALID_UTF8_NOT
849 return wxCONV_FAILED
;
855 if (buf
&& (len
< n
))
861 static inline bool isoctal(wchar_t wch
)
863 return L
'0' <= wch
&& wch
<= L
'7';
866 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
870 while (*psz
&& ((!buf
) || (len
< n
)))
875 // cast is ok for WC_UTF16
876 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
877 psz
+= (pa
== wxCONV_FAILED
) ? 1 : pa
;
879 cc
= (*psz
++) & 0x7fffffff;
882 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
883 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
886 *buf
++ = (char)(cc
- wxUnicodePUA
);
889 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
890 && cc
== L
'\\' && psz
[0] == L
'\\' )
897 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
899 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
903 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 +
904 (psz
[1] - L
'0') * 010 +
914 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++)
930 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
932 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
938 if (buf
&& (len
< n
))
944 // ============================================================================
946 // ============================================================================
948 #ifdef WORDS_BIGENDIAN
949 #define wxMBConvUTF16straight wxMBConvUTF16BE
950 #define wxMBConvUTF16swap wxMBConvUTF16LE
952 #define wxMBConvUTF16swap wxMBConvUTF16BE
953 #define wxMBConvUTF16straight wxMBConvUTF16LE
957 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
)
959 if ( srcLen
== wxNO_LEN
)
961 // count the number of bytes in input, including the trailing NULs
962 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
963 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
966 srcLen
*= BYTES_PER_CHAR
;
968 else // we already have the length
970 // we can only convert an entire number of UTF-16 characters
971 if ( srcLen
% BYTES_PER_CHAR
)
972 return wxCONV_FAILED
;
978 // case when in-memory representation is UTF-16 too
981 // ----------------------------------------------------------------------------
982 // conversions without endianness change
983 // ----------------------------------------------------------------------------
986 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
987 const char *src
, size_t srcLen
) const
989 // set up the scene for using memcpy() (which is presumably more efficient
990 // than copying the bytes one by one)
991 srcLen
= GetLength(src
, srcLen
);
992 if ( srcLen
== wxNO_LEN
)
993 return wxCONV_FAILED
;
995 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
998 if ( dstLen
< inLen
)
999 return wxCONV_FAILED
;
1001 memcpy(dst
, src
, srcLen
);
1008 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1009 const wchar_t *src
, size_t srcLen
) const
1011 if ( srcLen
== wxNO_LEN
)
1012 srcLen
= wxWcslen(src
) + 1;
1014 srcLen
*= BYTES_PER_CHAR
;
1018 if ( dstLen
< srcLen
)
1019 return wxCONV_FAILED
;
1021 memcpy(dst
, src
, srcLen
);
1027 // ----------------------------------------------------------------------------
1028 // endian-reversing conversions
1029 // ----------------------------------------------------------------------------
1032 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1033 const char *src
, size_t srcLen
) const
1035 srcLen
= GetLength(src
, srcLen
);
1036 if ( srcLen
== wxNO_LEN
)
1037 return wxCONV_FAILED
;
1039 srcLen
/= BYTES_PER_CHAR
;
1043 if ( dstLen
< srcLen
)
1044 return wxCONV_FAILED
;
1046 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1047 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1049 *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
);
1057 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1058 const wchar_t *src
, size_t srcLen
) const
1060 if ( srcLen
== wxNO_LEN
)
1061 srcLen
= wxWcslen(src
) + 1;
1063 srcLen
*= BYTES_PER_CHAR
;
1067 if ( dstLen
< srcLen
)
1068 return wxCONV_FAILED
;
1070 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1071 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1073 *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
);
1080 #else // !WC_UTF16: wchar_t is UTF-32
1082 // ----------------------------------------------------------------------------
1083 // conversions without endianness change
1084 // ----------------------------------------------------------------------------
1087 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1088 const char *src
, size_t srcLen
) const
1090 srcLen
= GetLength(src
, srcLen
);
1091 if ( srcLen
== wxNO_LEN
)
1092 return wxCONV_FAILED
;
1094 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1097 // optimization: return maximal space which could be needed for this
1098 // string even if the real size could be smaller if the buffer contains
1104 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1105 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1107 const wxUint32 ch
= wxDecodeSurrogate(&inBuff
);
1109 return wxCONV_FAILED
;
1111 if ( ++outLen
> dstLen
)
1112 return wxCONV_FAILED
;
1122 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1123 const wchar_t *src
, size_t srcLen
) const
1125 if ( srcLen
== wxNO_LEN
)
1126 srcLen
= wxWcslen(src
) + 1;
1129 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1130 for ( size_t n
= 0; n
< srcLen
; n
++ )
1133 const size_t numChars
= encode_utf16(*src
++, cc
);
1134 if ( numChars
== wxCONV_FAILED
)
1135 return wxCONV_FAILED
;
1137 outLen
+= numChars
* BYTES_PER_CHAR
;
1140 if ( outLen
> dstLen
)
1141 return wxCONV_FAILED
;
1144 if ( numChars
== 2 )
1146 // second character of a surrogate
1155 // ----------------------------------------------------------------------------
1156 // endian-reversing conversions
1157 // ----------------------------------------------------------------------------
1160 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1161 const char *src
, size_t srcLen
) const
1163 srcLen
= GetLength(src
, srcLen
);
1164 if ( srcLen
== wxNO_LEN
)
1165 return wxCONV_FAILED
;
1167 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1170 // optimization: return maximal space which could be needed for this
1171 // string even if the real size could be smaller if the buffer contains
1177 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1178 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1183 tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1185 tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1187 const size_t numChars
= decode_utf16(tmp
, ch
);
1188 if ( numChars
== wxCONV_FAILED
)
1189 return wxCONV_FAILED
;
1191 if ( numChars
== 2 )
1194 if ( ++outLen
> dstLen
)
1195 return wxCONV_FAILED
;
1205 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1206 const wchar_t *src
, size_t srcLen
) const
1208 if ( srcLen
== wxNO_LEN
)
1209 srcLen
= wxWcslen(src
) + 1;
1212 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1213 for ( const wchar_t *srcEnd
= src
+ srcLen
; src
< srcEnd
; src
++ )
1216 const size_t numChars
= encode_utf16(*src
, cc
);
1217 if ( numChars
== wxCONV_FAILED
)
1218 return wxCONV_FAILED
;
1220 outLen
+= numChars
* BYTES_PER_CHAR
;
1223 if ( outLen
> dstLen
)
1224 return wxCONV_FAILED
;
1226 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]);
1227 if ( numChars
== 2 )
1229 // second character of a surrogate
1230 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]);
1238 #endif // WC_UTF16/!WC_UTF16
1241 // ============================================================================
1243 // ============================================================================
1245 #ifdef WORDS_BIGENDIAN
1246 #define wxMBConvUTF32straight wxMBConvUTF32BE
1247 #define wxMBConvUTF32swap wxMBConvUTF32LE
1249 #define wxMBConvUTF32swap wxMBConvUTF32BE
1250 #define wxMBConvUTF32straight wxMBConvUTF32LE
1254 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1255 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1258 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
)
1260 if ( srcLen
== wxNO_LEN
)
1262 // count the number of bytes in input, including the trailing NULs
1263 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1264 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
1267 srcLen
*= BYTES_PER_CHAR
;
1269 else // we already have the length
1271 // we can only convert an entire number of UTF-32 characters
1272 if ( srcLen
% BYTES_PER_CHAR
)
1273 return wxCONV_FAILED
;
1279 // case when in-memory representation is UTF-16
1282 // ----------------------------------------------------------------------------
1283 // conversions without endianness change
1284 // ----------------------------------------------------------------------------
1287 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1288 const char *src
, size_t srcLen
) const
1290 srcLen
= GetLength(src
, srcLen
);
1291 if ( srcLen
== wxNO_LEN
)
1292 return wxCONV_FAILED
;
1294 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1295 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1297 for ( size_t n
= 0; n
< inLen
; n
++ )
1300 const size_t numChars
= encode_utf16(*inBuff
++, cc
);
1301 if ( numChars
== wxCONV_FAILED
)
1302 return wxCONV_FAILED
;
1307 if ( outLen
> dstLen
)
1308 return wxCONV_FAILED
;
1311 if ( numChars
== 2 )
1313 // second character of a surrogate
1323 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1324 const wchar_t *src
, size_t srcLen
) const
1326 if ( srcLen
== wxNO_LEN
)
1327 srcLen
= wxWcslen(src
) + 1;
1331 // optimization: return maximal space which could be needed for this
1332 // string instead of the exact amount which could be less if there are
1333 // any surrogates in the input
1335 // we consider that surrogates are rare enough to make it worthwhile to
1336 // avoid running the loop below at the cost of slightly extra memory
1338 return srcLen
* BYTES_PER_CHAR
;
1341 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1343 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1345 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1347 return wxCONV_FAILED
;
1349 outLen
+= BYTES_PER_CHAR
;
1351 if ( outLen
> dstLen
)
1352 return wxCONV_FAILED
;
1360 // ----------------------------------------------------------------------------
1361 // endian-reversing conversions
1362 // ----------------------------------------------------------------------------
1365 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1366 const char *src
, size_t srcLen
) const
1368 srcLen
= GetLength(src
, srcLen
);
1369 if ( srcLen
== wxNO_LEN
)
1370 return wxCONV_FAILED
;
1372 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1373 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1375 for ( size_t n
= 0; n
< inLen
; n
++, inBuff
++ )
1378 const size_t numChars
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
);
1379 if ( numChars
== wxCONV_FAILED
)
1380 return wxCONV_FAILED
;
1385 if ( outLen
> dstLen
)
1386 return wxCONV_FAILED
;
1389 if ( numChars
== 2 )
1391 // second character of a surrogate
1401 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1402 const wchar_t *src
, size_t srcLen
) const
1404 if ( srcLen
== wxNO_LEN
)
1405 srcLen
= wxWcslen(src
) + 1;
1409 // optimization: return maximal space which could be needed for this
1410 // string instead of the exact amount which could be less if there are
1411 // any surrogates in the input
1413 // we consider that surrogates are rare enough to make it worthwhile to
1414 // avoid running the loop below at the cost of slightly extra memory
1416 return srcLen
*BYTES_PER_CHAR
;
1419 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1421 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1423 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1425 return wxCONV_FAILED
;
1427 outLen
+= BYTES_PER_CHAR
;
1429 if ( outLen
> dstLen
)
1430 return wxCONV_FAILED
;
1432 *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
);
1438 #else // !WC_UTF16: wchar_t is UTF-32
1440 // ----------------------------------------------------------------------------
1441 // conversions without endianness change
1442 // ----------------------------------------------------------------------------
1445 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1446 const char *src
, size_t srcLen
) const
1448 // use memcpy() as it should be much faster than hand-written loop
1449 srcLen
= GetLength(src
, srcLen
);
1450 if ( srcLen
== wxNO_LEN
)
1451 return wxCONV_FAILED
;
1453 const size_t inLen
= srcLen
/BYTES_PER_CHAR
;
1456 if ( dstLen
< inLen
)
1457 return wxCONV_FAILED
;
1459 memcpy(dst
, src
, srcLen
);
1466 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1467 const wchar_t *src
, size_t srcLen
) const
1469 if ( srcLen
== wxNO_LEN
)
1470 srcLen
= wxWcslen(src
) + 1;
1472 srcLen
*= BYTES_PER_CHAR
;
1476 if ( dstLen
< srcLen
)
1477 return wxCONV_FAILED
;
1479 memcpy(dst
, src
, srcLen
);
1485 // ----------------------------------------------------------------------------
1486 // endian-reversing conversions
1487 // ----------------------------------------------------------------------------
1490 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1491 const char *src
, size_t srcLen
) const
1493 srcLen
= GetLength(src
, srcLen
);
1494 if ( srcLen
== wxNO_LEN
)
1495 return wxCONV_FAILED
;
1497 srcLen
/= BYTES_PER_CHAR
;
1501 if ( dstLen
< srcLen
)
1502 return wxCONV_FAILED
;
1504 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1505 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1507 *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
);
1515 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1516 const wchar_t *src
, size_t srcLen
) const
1518 if ( srcLen
== wxNO_LEN
)
1519 srcLen
= wxWcslen(src
) + 1;
1521 srcLen
*= BYTES_PER_CHAR
;
1525 if ( dstLen
< srcLen
)
1526 return wxCONV_FAILED
;
1528 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1529 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1531 *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
);
1538 #endif // WC_UTF16/!WC_UTF16
1541 // ============================================================================
1542 // The classes doing conversion using the iconv_xxx() functions
1543 // ============================================================================
1547 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1548 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1549 // (unless there's yet another bug in glibc) the only case when iconv()
1550 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1551 // left in the input buffer -- when _real_ error occurs,
1552 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1554 // [This bug does not appear in glibc 2.2.]
1555 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1556 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1557 (errno != E2BIG || bufLeft != 0))
1559 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1562 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1564 #define ICONV_T_INVALID ((iconv_t)-1)
1566 #if SIZEOF_WCHAR_T == 4
1567 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1568 #define WC_ENC wxFONTENCODING_UTF32
1569 #elif SIZEOF_WCHAR_T == 2
1570 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1571 #define WC_ENC wxFONTENCODING_UTF16
1572 #else // sizeof(wchar_t) != 2 nor 4
1573 // does this ever happen?
1574 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1577 // ----------------------------------------------------------------------------
1578 // wxMBConv_iconv: encapsulates an iconv character set
1579 // ----------------------------------------------------------------------------
1581 class wxMBConv_iconv
: public wxMBConv
1584 wxMBConv_iconv(const char *name
);
1585 virtual ~wxMBConv_iconv();
1587 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1588 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1590 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1591 virtual size_t GetMBNulLen() const;
1593 #if wxUSE_UNICODE_UTF8
1594 virtual bool IsUTF8() const;
1597 virtual wxMBConv
*Clone() const
1599 wxMBConv_iconv
*p
= new wxMBConv_iconv(m_name
.ToAscii());
1600 p
->m_minMBCharWidth
= m_minMBCharWidth
;
1605 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1608 // the iconv handlers used to translate from multibyte
1609 // to wide char and in the other direction
1614 // guards access to m2w and w2m objects
1615 wxMutex m_iconvMutex
;
1619 // the name (for iconv_open()) of a wide char charset -- if none is
1620 // available on this machine, it will remain NULL
1621 static wxString ms_wcCharsetName
;
1623 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1624 // different endian-ness than the native one
1625 static bool ms_wcNeedsSwap
;
1628 // name of the encoding handled by this conversion
1631 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1633 size_t m_minMBCharWidth
;
1636 // make the constructor available for unit testing
1637 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const char* name
)
1639 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1640 if ( !result
->IsOk() )
1649 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1650 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1652 wxMBConv_iconv::wxMBConv_iconv(const char *name
)
1655 m_minMBCharWidth
= 0;
1657 // check for charset that represents wchar_t:
1658 if ( ms_wcCharsetName
.empty() )
1660 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1663 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1664 #else // !wxUSE_FONTMAP
1665 static const wxChar
*names_static
[] =
1667 #if SIZEOF_WCHAR_T == 4
1669 #elif SIZEOF_WCHAR_T = 2
1674 const wxChar
**names
= names_static
;
1675 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1677 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1679 const wxString
nameCS(*names
);
1681 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1682 wxString
nameXE(nameCS
);
1684 #ifdef WORDS_BIGENDIAN
1686 #else // little endian
1690 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1693 m2w
= iconv_open(nameXE
.ToAscii(), name
);
1694 if ( m2w
== ICONV_T_INVALID
)
1696 // try charset w/o bytesex info (e.g. "UCS4")
1697 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1699 m2w
= iconv_open(nameCS
.ToAscii(), name
);
1701 // and check for bytesex ourselves:
1702 if ( m2w
!= ICONV_T_INVALID
)
1704 char buf
[2], *bufPtr
;
1705 wchar_t wbuf
[2], *wbufPtr
;
1713 outsz
= SIZEOF_WCHAR_T
* 2;
1718 m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1719 (char**)&wbufPtr
, &outsz
);
1721 if (ICONV_FAILED(res
, insz
))
1723 wxLogLastError(wxT("iconv"));
1724 wxLogError(_("Conversion to charset '%s' doesn't work."),
1727 else // ok, can convert to this encoding, remember it
1729 ms_wcCharsetName
= nameCS
;
1730 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1734 else // use charset not requiring byte swapping
1736 ms_wcCharsetName
= nameXE
;
1740 wxLogTrace(TRACE_STRCONV
,
1741 wxT("iconv wchar_t charset is \"%s\"%s"),
1742 ms_wcCharsetName
.empty() ? wxString("<none>")
1744 ms_wcNeedsSwap
? _T(" (needs swap)")
1747 else // we already have ms_wcCharsetName
1749 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), name
);
1752 if ( ms_wcCharsetName
.empty() )
1754 w2m
= ICONV_T_INVALID
;
1758 w2m
= iconv_open(name
, ms_wcCharsetName
.ToAscii());
1759 if ( w2m
== ICONV_T_INVALID
)
1761 wxLogTrace(TRACE_STRCONV
,
1762 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1763 ms_wcCharsetName
.c_str(), name
);
1768 wxMBConv_iconv::~wxMBConv_iconv()
1770 if ( m2w
!= ICONV_T_INVALID
)
1772 if ( w2m
!= ICONV_T_INVALID
)
1776 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1778 // find the string length: notice that must be done differently for
1779 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1781 const size_t nulLen
= GetMBNulLen();
1785 return wxCONV_FAILED
;
1788 inbuf
= strlen(psz
); // arguably more optimized than our version
1793 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1794 // they also have to start at character boundary and not span two
1795 // adjacent characters
1797 for ( p
= psz
; NotAllNULs(p
, nulLen
); p
+= nulLen
)
1804 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1805 // Unfortunately there are a couple of global wxCSConv objects such as
1806 // wxConvLocal that are used all over wx code, so we have to make sure
1807 // the handle is used by at most one thread at the time. Otherwise
1808 // only a few wx classes would be safe to use from non-main threads
1809 // as MB<->WC conversion would fail "randomly".
1810 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1811 #endif // wxUSE_THREADS
1813 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1815 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1816 wchar_t *bufPtr
= buf
;
1817 const char *pszPtr
= psz
;
1821 // have destination buffer, convert there
1823 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1824 (char**)&bufPtr
, &outbuf
);
1825 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1829 // convert to native endianness
1830 for ( unsigned i
= 0; i
< res
; i
++ )
1831 buf
[n
] = WC_BSWAP(buf
[i
]);
1834 // NUL-terminate the string if there is any space left
1840 // no destination buffer... convert using temp buffer
1841 // to calculate destination buffer requirement
1848 outbuf
= 8 * SIZEOF_WCHAR_T
;
1851 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1852 (char**)&bufPtr
, &outbuf
);
1854 res
+= 8 - (outbuf
/ SIZEOF_WCHAR_T
);
1856 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1859 if (ICONV_FAILED(cres
, inbuf
))
1861 //VS: it is ok if iconv fails, hence trace only
1862 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1863 return wxCONV_FAILED
;
1869 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1872 // NB: explained in MB2WC
1873 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1876 size_t inlen
= wxWcslen(psz
);
1877 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1881 wchar_t *tmpbuf
= 0;
1885 // need to copy to temp buffer to switch endianness
1886 // (doing WC_BSWAP twice on the original buffer won't help, as it
1887 // could be in read-only memory, or be accessed in some other thread)
1888 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1889 for ( size_t i
= 0; i
< inlen
; i
++ )
1890 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1892 tmpbuf
[inlen
] = L
'\0';
1898 // have destination buffer, convert there
1899 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1903 // NB: iconv was given only wcslen(psz) characters on input, and so
1904 // it couldn't convert the trailing zero. Let's do it ourselves
1905 // if there's some room left for it in the output buffer.
1911 // no destination buffer: convert using temp buffer
1912 // to calculate destination buffer requirement
1920 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1924 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1932 if (ICONV_FAILED(cres
, inbuf
))
1934 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1935 return wxCONV_FAILED
;
1941 size_t wxMBConv_iconv::GetMBNulLen() const
1943 if ( m_minMBCharWidth
== 0 )
1945 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1948 // NB: explained in MB2WC
1949 wxMutexLocker
lock(self
->m_iconvMutex
);
1952 const wchar_t *wnul
= L
"";
1953 char buf
[8]; // should be enough for NUL in any encoding
1954 size_t inLen
= sizeof(wchar_t),
1955 outLen
= WXSIZEOF(buf
);
1956 char *inBuff
= (char *)wnul
;
1957 char *outBuff
= buf
;
1958 if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 )
1960 self
->m_minMBCharWidth
= (size_t)-1;
1964 self
->m_minMBCharWidth
= outBuff
- buf
;
1968 return m_minMBCharWidth
;
1971 #if wxUSE_UNICODE_UTF8
1972 bool wxMBConv_iconv::IsUTF8() const
1974 return wxStricmp(m_name
, "UTF-8") == 0 ||
1975 wxStricmp(m_name
, "UTF8") == 0;
1979 #endif // HAVE_ICONV
1982 // ============================================================================
1983 // Win32 conversion classes
1984 // ============================================================================
1986 #ifdef wxHAVE_WIN32_MB2WC
1990 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const char *charset
);
1991 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1994 class wxMBConv_win32
: public wxMBConv
1999 m_CodePage
= CP_ACP
;
2000 m_minMBCharWidth
= 0;
2003 wxMBConv_win32(const wxMBConv_win32
& conv
)
2006 m_CodePage
= conv
.m_CodePage
;
2007 m_minMBCharWidth
= conv
.m_minMBCharWidth
;
2011 wxMBConv_win32(const char* name
)
2013 m_CodePage
= wxCharsetToCodepage(name
);
2014 m_minMBCharWidth
= 0;
2017 wxMBConv_win32(wxFontEncoding encoding
)
2019 m_CodePage
= wxEncodingToCodepage(encoding
);
2020 m_minMBCharWidth
= 0;
2022 #endif // wxUSE_FONTMAP
2024 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2026 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2027 // the behaviour is not compatible with the Unix version (using iconv)
2028 // and break the library itself, e.g. wxTextInputStream::NextChar()
2029 // wouldn't work if reading an incomplete MB char didn't result in an
2032 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2033 // Win XP or newer and it is not supported for UTF-[78] so we always
2034 // use our own conversions in this case. See
2035 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2036 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2037 if ( m_CodePage
== CP_UTF8
)
2039 return wxMBConvUTF8().MB2WC(buf
, psz
, n
);
2042 if ( m_CodePage
== CP_UTF7
)
2044 return wxMBConvUTF7().MB2WC(buf
, psz
, n
);
2048 if ( (m_CodePage
< 50000 && m_CodePage
!= CP_SYMBOL
) &&
2049 IsAtLeastWin2kSP4() )
2051 flags
= MB_ERR_INVALID_CHARS
;
2054 const size_t len
= ::MultiByteToWideChar
2056 m_CodePage
, // code page
2057 flags
, // flags: fall on error
2058 psz
, // input string
2059 -1, // its length (NUL-terminated)
2060 buf
, // output string
2061 buf
? n
: 0 // size of output buffer
2065 // function totally failed
2066 return wxCONV_FAILED
;
2069 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2070 // check if we succeeded, by doing a double trip:
2071 if ( !flags
&& buf
)
2073 const size_t mbLen
= strlen(psz
);
2074 wxCharBuffer
mbBuf(mbLen
);
2075 if ( ::WideCharToMultiByte
2082 mbLen
+ 1, // size in bytes, not length
2086 strcmp(mbBuf
, psz
) != 0 )
2088 // we didn't obtain the same thing we started from, hence
2089 // the conversion was lossy and we consider that it failed
2090 return wxCONV_FAILED
;
2094 // note that it returns count of written chars for buf != NULL and size
2095 // of the needed buffer for buf == NULL so in either case the length of
2096 // the string (which never includes the terminating NUL) is one less
2100 virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
2103 we have a problem here: by default, WideCharToMultiByte() may
2104 replace characters unrepresentable in the target code page with bad
2105 quality approximations such as turning "1/2" symbol (U+00BD) into
2106 "1" for the code pages which don't have it and we, obviously, want
2107 to avoid this at any price
2109 the trouble is that this function does it _silently_, i.e. it won't
2110 even tell us whether it did or not... Win98/2000 and higher provide
2111 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2112 we have to resort to a round trip, i.e. check that converting back
2113 results in the same string -- this is, of course, expensive but
2114 otherwise we simply can't be sure to not garble the data.
2117 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2118 // it doesn't work with CJK encodings (which we test for rather roughly
2119 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2121 BOOL usedDef
wxDUMMY_INITIALIZE(false);
2124 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
2126 // it's our lucky day
2127 flags
= WC_NO_BEST_FIT_CHARS
;
2128 pUsedDef
= &usedDef
;
2130 else // old system or unsupported encoding
2136 const size_t len
= ::WideCharToMultiByte
2138 m_CodePage
, // code page
2139 flags
, // either none or no best fit
2140 pwz
, // input string
2141 -1, // it is (wide) NUL-terminated
2142 buf
, // output buffer
2143 buf
? n
: 0, // and its size
2144 NULL
, // default "replacement" char
2145 pUsedDef
// [out] was it used?
2150 // function totally failed
2151 return wxCONV_FAILED
;
2154 // if we were really converting, check if we succeeded
2159 // check if the conversion failed, i.e. if any replacements
2162 return wxCONV_FAILED
;
2164 else // we must resort to double tripping...
2166 wxWCharBuffer
wcBuf(n
);
2167 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2168 wcscmp(wcBuf
, pwz
) != 0 )
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
2172 return wxCONV_FAILED
;
2177 // see the comment above for the reason of "len - 1"
2181 virtual size_t GetMBNulLen() const
2183 if ( m_minMBCharWidth
== 0 )
2185 int len
= ::WideCharToMultiByte
2187 m_CodePage
, // code page
2189 L
"", // input string
2190 1, // translate just the NUL
2191 NULL
, // output buffer
2193 NULL
, // no replacement char
2194 NULL
// [out] don't care if it was used
2197 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
2201 wxLogDebug(_T("Unexpected NUL length %d"), len
);
2202 self
->m_minMBCharWidth
= (size_t)-1;
2206 self
->m_minMBCharWidth
= (size_t)-1;
2212 self
->m_minMBCharWidth
= len
;
2217 return m_minMBCharWidth
;
2220 virtual wxMBConv
*Clone() const { return new wxMBConv_win32(*this); }
2222 bool IsOk() const { return m_CodePage
!= -1; }
2225 static bool CanUseNoBestFit()
2227 static int s_isWin98Or2k
= -1;
2229 if ( s_isWin98Or2k
== -1 )
2232 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
2234 case wxOS_WINDOWS_9X
:
2235 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
2238 case wxOS_WINDOWS_NT
:
2239 s_isWin98Or2k
= verMaj
>= 5;
2243 // unknown: be conservative by default
2248 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
2251 return s_isWin98Or2k
== 1;
2254 static bool IsAtLeastWin2kSP4()
2259 static int s_isAtLeastWin2kSP4
= -1;
2261 if ( s_isAtLeastWin2kSP4
== -1 )
2263 OSVERSIONINFOEX ver
;
2265 memset(&ver
, 0, sizeof(ver
));
2266 ver
.dwOSVersionInfoSize
= sizeof(ver
);
2267 GetVersionEx((OSVERSIONINFO
*)&ver
);
2269 s_isAtLeastWin2kSP4
=
2270 ((ver
.dwMajorVersion
> 5) || // Vista+
2271 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
2272 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
2273 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
2277 return s_isAtLeastWin2kSP4
== 1;
2282 // the code page we're working with
2285 // cached result of GetMBNulLen(), set to 0 initially meaning
2287 size_t m_minMBCharWidth
;
2290 #endif // wxHAVE_WIN32_MB2WC
2293 // ============================================================================
2294 // wxEncodingConverter based conversion classes
2295 // ============================================================================
2299 class wxMBConv_wxwin
: public wxMBConv
2304 // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2305 // The wxMBConv_cf class does a better job.
2306 m_ok
= (m_enc
< wxFONTENCODING_MACMIN
|| m_enc
> wxFONTENCODING_MACMAX
) &&
2307 m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2308 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2312 // temporarily just use wxEncodingConverter stuff,
2313 // so that it works while a better implementation is built
2314 wxMBConv_wxwin(const char* name
)
2317 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2319 m_enc
= wxFONTENCODING_SYSTEM
;
2324 wxMBConv_wxwin(wxFontEncoding enc
)
2331 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2333 size_t inbuf
= strlen(psz
);
2336 if (!m2w
.Convert(psz
, buf
))
2337 return wxCONV_FAILED
;
2342 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2344 const size_t inbuf
= wxWcslen(psz
);
2347 if (!w2m
.Convert(psz
, buf
))
2348 return wxCONV_FAILED
;
2354 virtual size_t GetMBNulLen() const
2358 case wxFONTENCODING_UTF16BE
:
2359 case wxFONTENCODING_UTF16LE
:
2362 case wxFONTENCODING_UTF32BE
:
2363 case wxFONTENCODING_UTF32LE
:
2371 virtual wxMBConv
*Clone() const { return new wxMBConv_wxwin(m_enc
); }
2373 bool IsOk() const { return m_ok
; }
2376 wxFontEncoding m_enc
;
2377 wxEncodingConverter m2w
, w2m
;
2380 // were we initialized successfully?
2383 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2386 // make the constructors available for unit testing
2387 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const char* name
)
2389 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
2390 if ( !result
->IsOk() )
2399 #endif // wxUSE_FONTMAP
2401 // ============================================================================
2402 // wxCSConv implementation
2403 // ============================================================================
2405 void wxCSConv::Init()
2412 wxCSConv::wxCSConv(const wxString
& charset
)
2416 if ( !charset
.empty() )
2418 SetName(charset
.ToAscii());
2422 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
2424 m_encoding
= wxFONTENCODING_SYSTEM
;
2428 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2430 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2432 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2434 encoding
= wxFONTENCODING_SYSTEM
;
2439 m_encoding
= encoding
;
2442 wxCSConv::~wxCSConv()
2447 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2452 SetName(conv
.m_name
);
2453 m_encoding
= conv
.m_encoding
;
2456 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2460 SetName(conv
.m_name
);
2461 m_encoding
= conv
.m_encoding
;
2466 void wxCSConv::Clear()
2475 void wxCSConv::SetName(const char *charset
)
2479 m_name
= strdup(charset
);
2486 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
2487 wxEncodingNameCache
);
2489 static wxEncodingNameCache gs_nameCache
;
2492 wxMBConv
*wxCSConv::DoCreate() const
2495 wxLogTrace(TRACE_STRCONV
,
2496 wxT("creating conversion for %s"),
2498 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding
).mb_str()));
2499 #endif // wxUSE_FONTMAP
2501 // check for the special case of ASCII or ISO8859-1 charset: as we have
2502 // special knowledge of it anyhow, we don't need to create a special
2503 // conversion object
2504 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
2505 m_encoding
== wxFONTENCODING_DEFAULT
)
2507 // don't convert at all
2511 // we trust OS to do conversion better than we can so try external
2512 // conversion methods first
2514 // the full order is:
2515 // 1. OS conversion (iconv() under Unix or Win32 API)
2516 // 2. hard coded conversions for UTF
2517 // 3. wxEncodingConverter as fall back
2523 #endif // !wxUSE_FONTMAP
2526 wxFontEncoding
encoding(m_encoding
);
2531 wxMBConv_iconv
*conv
= new wxMBConv_iconv(m_name
);
2539 wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2540 #endif // wxUSE_FONTMAP
2544 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
2545 if ( it
!= gs_nameCache
.end() )
2547 if ( it
->second
.empty() )
2550 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
.ToAscii());
2557 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
2558 // CS : in case this does not return valid names (eg for MacRoman)
2559 // encoding got a 'failure' entry in the cache all the same,
2560 // although it just has to be created using a different method, so
2561 // only store failed iconv creation attempts (or perhaps we
2562 // shoulnd't do this at all ?)
2563 if ( names
[0] != NULL
)
2565 for ( ; *names
; ++names
)
2567 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
2568 // will need changes that will obsolete this
2569 wxString
name(*names
);
2570 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
.ToAscii());
2573 gs_nameCache
[encoding
] = *names
;
2580 gs_nameCache
[encoding
] = _T(""); // cache the failure
2583 #endif // wxUSE_FONTMAP
2585 #endif // HAVE_ICONV
2587 #ifdef wxHAVE_WIN32_MB2WC
2590 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2591 : new wxMBConv_win32(m_encoding
);
2600 #endif // wxHAVE_WIN32_MB2WC
2604 // leave UTF16 and UTF32 to the built-ins of wx
2605 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2606 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2609 wxMBConv_cf
*conv
= m_name
? new wxMBConv_cf(m_name
)
2610 : new wxMBConv_cf(m_encoding
);
2612 wxMBConv_cf
*conv
= new wxMBConv_cf(m_encoding
);
2621 #endif // __DARWIN__
2624 wxFontEncoding enc
= m_encoding
;
2626 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2628 // use "false" to suppress interactive dialogs -- we can be called from
2629 // anywhere and popping up a dialog from here is the last thing we want to
2631 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2633 #endif // wxUSE_FONTMAP
2637 case wxFONTENCODING_UTF7
:
2638 return new wxMBConvUTF7
;
2640 case wxFONTENCODING_UTF8
:
2641 return new wxMBConvUTF8
;
2643 case wxFONTENCODING_UTF16BE
:
2644 return new wxMBConvUTF16BE
;
2646 case wxFONTENCODING_UTF16LE
:
2647 return new wxMBConvUTF16LE
;
2649 case wxFONTENCODING_UTF32BE
:
2650 return new wxMBConvUTF32BE
;
2652 case wxFONTENCODING_UTF32LE
:
2653 return new wxMBConvUTF32LE
;
2656 // nothing to do but put here to suppress gcc warnings
2663 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2664 : new wxMBConv_wxwin(m_encoding
);
2670 #endif // wxUSE_FONTMAP
2672 // NB: This is a hack to prevent deadlock. What could otherwise happen
2673 // in Unicode build: wxConvLocal creation ends up being here
2674 // because of some failure and logs the error. But wxLog will try to
2675 // attach a timestamp, for which it will need wxConvLocal (to convert
2676 // time to char* and then wchar_t*), but that fails, tries to log the
2677 // error, but wxLog has an (already locked) critical section that
2678 // guards the static buffer.
2679 static bool alreadyLoggingError
= false;
2680 if (!alreadyLoggingError
)
2682 alreadyLoggingError
= true;
2683 wxLogError(_("Cannot convert from the charset '%s'!"),
2687 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding
).ToAscii()
2688 #else // !wxUSE_FONTMAP
2689 (const char*)wxString::Format(_("encoding %i"), m_encoding
).ToAscii()
2690 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2693 alreadyLoggingError
= false;
2699 void wxCSConv::CreateConvIfNeeded() const
2703 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2705 // if we don't have neither the name nor the encoding, use the default
2706 // encoding for this system
2707 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2710 self
->m_encoding
= wxLocale::GetSystemEncoding();
2712 // fallback to some reasonable default:
2713 self
->m_encoding
= wxFONTENCODING_ISO8859_1
;
2714 #endif // wxUSE_INTL
2717 self
->m_convReal
= DoCreate();
2718 self
->m_deferred
= false;
2722 bool wxCSConv::IsOk() const
2724 CreateConvIfNeeded();
2726 // special case: no convReal created for wxFONTENCODING_ISO8859_1
2727 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2728 return true; // always ok as we do it ourselves
2730 // m_convReal->IsOk() is called at its own creation, so we know it must
2731 // be ok if m_convReal is non-NULL
2732 return m_convReal
!= NULL
;
2735 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
,
2736 const char *src
, size_t srcLen
) const
2738 CreateConvIfNeeded();
2741 return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
);
2744 return wxMBConv::ToWChar(dst
, dstLen
, src
, srcLen
);
2747 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
,
2748 const wchar_t *src
, size_t srcLen
) const
2750 CreateConvIfNeeded();
2753 return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
);
2756 return wxMBConv::FromWChar(dst
, dstLen
, src
, srcLen
);
2759 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2761 CreateConvIfNeeded();
2764 return m_convReal
->MB2WC(buf
, psz
, n
);
2767 size_t len
= strlen(psz
);
2771 for (size_t c
= 0; c
<= len
; c
++)
2772 buf
[c
] = (unsigned char)(psz
[c
]);
2778 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2780 CreateConvIfNeeded();
2783 return m_convReal
->WC2MB(buf
, psz
, n
);
2786 const size_t len
= wxWcslen(psz
);
2789 for (size_t c
= 0; c
<= len
; c
++)
2792 return wxCONV_FAILED
;
2794 buf
[c
] = (char)psz
[c
];
2799 for (size_t c
= 0; c
<= len
; c
++)
2802 return wxCONV_FAILED
;
2809 size_t wxCSConv::GetMBNulLen() const
2811 CreateConvIfNeeded();
2815 return m_convReal
->GetMBNulLen();
2818 // otherwise, we are ISO-8859-1
2822 #if wxUSE_UNICODE_UTF8
2823 bool wxCSConv::IsUTF8() const
2825 CreateConvIfNeeded();
2829 return m_convReal
->IsUTF8();
2832 // otherwise, we are ISO-8859-1
2840 wxWCharBuffer
wxSafeConvertMB2WX(const char *s
)
2843 return wxWCharBuffer();
2845 wxWCharBuffer
wbuf(wxConvLibc
.cMB2WX(s
));
2847 wbuf
= wxMBConvUTF8().cMB2WX(s
);
2849 wbuf
= wxConvISO8859_1
.cMB2WX(s
);
2854 wxCharBuffer
wxSafeConvertWX2MB(const wchar_t *ws
)
2857 return wxCharBuffer();
2859 wxCharBuffer
buf(wxConvLibc
.cWX2MB(ws
));
2861 buf
= wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
).cWX2MB(ws
);
2866 #endif // wxUSE_UNICODE
2868 // ----------------------------------------------------------------------------
2870 // ----------------------------------------------------------------------------
2872 // NB: The reason why we create converted objects in this convoluted way,
2873 // using a factory function instead of global variable, is that they
2874 // may be used at static initialization time (some of them are used by
2875 // wxString ctors and there may be a global wxString object). In other
2876 // words, possibly _before_ the converter global object would be
2883 #undef wxConvISO8859_1
2885 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
2886 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
2887 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
2889 static impl_klass name##Obj ctor_args; \
2890 return &name##Obj; \
2892 /* this ensures that all global converter objects are created */ \
2893 /* by the time static initialization is done, i.e. before any */ \
2894 /* thread is launched: */ \
2895 static klass* gs_##name##instance = wxGet_##name##Ptr()
2897 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
2898 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
2901 WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConv_win32
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
);
2903 WX_DEFINE_GLOBAL_CONV2(wxMBConv
, wxMBConvLibc
, wxConvLibc
, wxEMPTY_PARAMETER_VALUE
);
2906 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8
, wxConvUTF8
, wxEMPTY_PARAMETER_VALUE
);
2907 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7
, wxConvUTF7
, wxEMPTY_PARAMETER_VALUE
);
2909 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvLocal
, (wxFONTENCODING_SYSTEM
));
2910 WX_DEFINE_GLOBAL_CONV(wxCSConv
, wxConvISO8859_1
, (wxFONTENCODING_ISO8859_1
));
2912 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= wxGet_wxConvLibcPtr();
2913 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvUI
= wxGet_wxConvLocalPtr();
2916 // The xnu kernel always communicates file paths in decomposed UTF-8.
2917 // WARNING: Are we sure that CFString's conversion will cause decomposition?
2918 static wxMBConv_cf
wxConvMacUTF8DObj(wxFONTENCODING_UTF8
);
2921 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
=
2924 #else // !__DARWIN__
2925 wxGet_wxConvLibcPtr();
2926 #endif // __DARWIN__/!__DARWIN__
2928 #else // !wxUSE_WCHAR_T
2930 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
2931 // stand-ins in absence of wchar_t
2932 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2937 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T