1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
22 #include "wx/hashmap.h"
25 #include "wx/strconv.h"
37 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
38 #include "wx/msw/private.h"
39 #include "wx/msw/missing.h"
40 #define wxHAVE_WIN32_MB2WC
49 #include "wx/thread.h"
52 #include "wx/encconv.h"
53 #include "wx/fontmap.h"
57 #include <ATSUnicode.h>
58 #include <TextCommon.h>
59 #include <TextEncodingConverter.h>
62 // includes Mac headers
63 #include "wx/mac/private.h"
67 #define TRACE_STRCONV _T("strconv")
69 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
71 #if SIZEOF_WCHAR_T == 2
76 // ============================================================================
78 // ============================================================================
80 // helper function of cMB2WC(): check if n bytes at this location are all NUL
81 static bool NotAllNULs(const char *p
, size_t n
)
83 while ( n
&& *p
++ == '\0' )
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
91 // ----------------------------------------------------------------------------
93 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
98 *output
= (wxUint16
) input
;
102 else if (input
>= 0x110000)
104 return wxCONV_FAILED
;
110 *output
++ = (wxUint16
) ((input
>> 10) + 0xd7c0);
111 *output
= (wxUint16
) ((input
& 0x3ff) + 0xdc00);
118 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
120 if ((*input
< 0xd800) || (*input
> 0xdfff))
125 else if ((input
[1] < 0xdc00) || (input
[1] > 0xdfff))
128 return wxCONV_FAILED
;
132 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
138 typedef wchar_t wxDecodeSurrogate_t
;
140 typedef wxUint16 wxDecodeSurrogate_t
;
141 #endif // WC_UTF16/!WC_UTF16
143 // returns the next UTF-32 character from the wchar_t buffer and advances the
144 // pointer to the character after this one
146 // if an invalid character is found, *pSrc is set to NULL, the caller must
148 static wxUint32
wxDecodeSurrogate(const wxDecodeSurrogate_t
**pSrc
)
152 n
= decode_utf16(wx_reinterpret_cast(const wxUint16
*, *pSrc
), out
);
153 if ( n
== wxCONV_FAILED
)
161 // ----------------------------------------------------------------------------
163 // ----------------------------------------------------------------------------
166 wxMBConv::ToWChar(wchar_t *dst
, size_t dstLen
,
167 const char *src
, size_t srcLen
) const
169 // although new conversion classes are supposed to implement this function
170 // directly, the existins ones only implement the old MB2WC() and so, to
171 // avoid to have to rewrite all conversion classes at once, we provide a
172 // default (but not efficient) implementation of this one in terms of the
173 // old function by copying the input to ensure that it's NUL-terminated and
174 // then using MB2WC() to convert it
176 // the number of chars [which would be] written to dst [if it were not NULL]
177 size_t dstWritten
= 0;
179 // the number of NULs terminating this string
180 size_t nulLen
= 0; // not really needed, but just to avoid warnings
182 // if we were not given the input size we just have to assume that the
183 // string is properly terminated as we have no way of knowing how long it
184 // is anyhow, but if we do have the size check whether there are enough
188 if ( srcLen
!= wxNO_LEN
)
190 // we need to know how to find the end of this string
191 nulLen
= GetMBNulLen();
192 if ( nulLen
== wxCONV_FAILED
)
193 return wxCONV_FAILED
;
195 // if there are enough NULs we can avoid the copy
196 if ( srcLen
< nulLen
|| NotAllNULs(src
+ srcLen
- nulLen
, nulLen
) )
198 // make a copy in order to properly NUL-terminate the string
199 bufTmp
= wxCharBuffer(srcLen
+ nulLen
- 1 /* 1 will be added */);
200 char * const p
= bufTmp
.data();
201 memcpy(p
, src
, srcLen
);
202 for ( char *s
= p
+ srcLen
; s
< p
+ srcLen
+ nulLen
; s
++ )
208 srcEnd
= src
+ srcLen
;
210 else // quit after the first loop iteration
217 // try to convert the current chunk
218 size_t lenChunk
= MB2WC(NULL
, src
, 0);
219 if ( lenChunk
== wxCONV_FAILED
)
220 return wxCONV_FAILED
;
222 lenChunk
++; // for the L'\0' at the end of this chunk
224 dstWritten
+= lenChunk
;
228 // nothing left in the input string, conversion succeeded
234 if ( dstWritten
> dstLen
)
235 return wxCONV_FAILED
;
237 if ( MB2WC(dst
, src
, lenChunk
) == wxCONV_FAILED
)
238 return wxCONV_FAILED
;
245 // we convert just one chunk in this case as this is the entire
250 // advance the input pointer past the end of this chunk
251 while ( NotAllNULs(src
, nulLen
) )
253 // notice that we must skip over multiple bytes here as we suppose
254 // that if NUL takes 2 or 4 bytes, then all the other characters do
255 // too and so if advanced by a single byte we might erroneously
256 // detect sequences of NUL bytes in the middle of the input
260 src
+= nulLen
; // skipping over its terminator as well
262 // note that ">=" (and not just "==") is needed here as the terminator
263 // we skipped just above could be inside or just after the buffer
264 // delimited by inEnd
273 wxMBConv::FromWChar(char *dst
, size_t dstLen
,
274 const wchar_t *src
, size_t srcLen
) const
276 // the number of chars [which would be] written to dst [if it were not NULL]
277 size_t dstWritten
= 0;
279 // make a copy of the input string unless it is already properly
282 // if we don't know its length we have no choice but to assume that it is,
283 // indeed, properly terminated
284 wxWCharBuffer bufTmp
;
285 if ( srcLen
== wxNO_LEN
)
287 srcLen
= wxWcslen(src
) + 1;
289 else if ( srcLen
!= 0 && src
[srcLen
- 1] != L
'\0' )
291 // make a copy in order to properly NUL-terminate the string
292 bufTmp
= wxWCharBuffer(srcLen
);
293 memcpy(bufTmp
.data(), src
, srcLen
* sizeof(wchar_t));
297 const size_t lenNul
= GetMBNulLen();
298 for ( const wchar_t * const srcEnd
= src
+ srcLen
;
300 src
+= wxWcslen(src
) + 1 /* skip L'\0' too */ )
302 // try to convert the current chunk
303 size_t lenChunk
= WC2MB(NULL
, src
, 0);
305 if ( lenChunk
== wxCONV_FAILED
)
306 return wxCONV_FAILED
;
309 dstWritten
+= lenChunk
;
313 if ( dstWritten
> dstLen
)
314 return wxCONV_FAILED
;
316 if ( WC2MB(dst
, src
, lenChunk
) == wxCONV_FAILED
)
317 return wxCONV_FAILED
;
326 size_t wxMBConv::MB2WC(wchar_t *outBuff
, const char *inBuff
, size_t outLen
) const
328 size_t rc
= ToWChar(outBuff
, outLen
, inBuff
);
329 if ( rc
!= wxCONV_FAILED
)
331 // ToWChar() returns the buffer length, i.e. including the trailing
332 // NUL, while this method doesn't take it into account
339 size_t wxMBConv::WC2MB(char *outBuff
, const wchar_t *inBuff
, size_t outLen
) const
341 size_t rc
= FromWChar(outBuff
, outLen
, inBuff
);
342 if ( rc
!= wxCONV_FAILED
)
350 wxMBConv::~wxMBConv()
352 // nothing to do here (necessary for Darwin linking probably)
355 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
359 // calculate the length of the buffer needed first
360 const size_t nLen
= MB2WC(NULL
, psz
, 0);
361 if ( nLen
!= wxCONV_FAILED
)
363 // now do the actual conversion
364 wxWCharBuffer
buf(nLen
/* +1 added implicitly */);
366 // +1 for the trailing NULL
367 if ( MB2WC(buf
.data(), psz
, nLen
+ 1) != wxCONV_FAILED
)
372 return wxWCharBuffer();
375 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
379 const size_t nLen
= WC2MB(NULL
, pwz
, 0);
380 if ( nLen
!= wxCONV_FAILED
)
382 // extra space for trailing NUL(s)
383 static const size_t extraLen
= GetMaxMBNulLen();
385 wxCharBuffer
buf(nLen
+ extraLen
- 1);
386 if ( WC2MB(buf
.data(), pwz
, nLen
+ extraLen
) != wxCONV_FAILED
)
391 return wxCharBuffer();
395 wxMBConv::cMB2WC(const char *inBuff
, size_t inLen
, size_t *outLen
) const
397 const size_t dstLen
= ToWChar(NULL
, 0, inBuff
, inLen
);
398 if ( dstLen
!= wxCONV_FAILED
)
400 wxWCharBuffer
wbuf(dstLen
- 1);
401 if ( ToWChar(wbuf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
406 if ( wbuf
[dstLen
- 1] == L
'\0' )
417 return wxWCharBuffer();
421 wxMBConv::cWC2MB(const wchar_t *inBuff
, size_t inLen
, size_t *outLen
) const
423 size_t dstLen
= FromWChar(NULL
, 0, inBuff
, inLen
);
424 if ( dstLen
!= wxCONV_FAILED
)
426 // special case of empty input: can't allocate 0 size buffer below as
427 // wxCharBuffer insists on NUL-terminating it
428 wxCharBuffer
buf(dstLen
? dstLen
- 1 : 1);
429 if ( FromWChar(buf
.data(), dstLen
, inBuff
, inLen
) != wxCONV_FAILED
)
435 const size_t nulLen
= GetMBNulLen();
436 if ( dstLen
>= nulLen
&&
437 !NotAllNULs(buf
.data() + dstLen
- nulLen
, nulLen
) )
439 // in this case the output is NUL-terminated and we're not
440 // supposed to count NUL
452 return wxCharBuffer();
455 // ----------------------------------------------------------------------------
457 // ----------------------------------------------------------------------------
459 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
461 return wxMB2WC(buf
, psz
, n
);
464 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
466 return wxWC2MB(buf
, psz
, n
);
469 // ----------------------------------------------------------------------------
470 // wxConvBrokenFileNames
471 // ----------------------------------------------------------------------------
475 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar
*charset
)
477 if ( !charset
|| wxStricmp(charset
, _T("UTF-8")) == 0
478 || wxStricmp(charset
, _T("UTF8")) == 0 )
479 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
481 m_conv
= new wxCSConv(charset
);
486 // ----------------------------------------------------------------------------
488 // ----------------------------------------------------------------------------
490 // Implementation (C) 2004 Fredrik Roubert
493 // BASE64 decoding table
495 static const unsigned char utf7unb64
[] =
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
503 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
504 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
506 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
507 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
508 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
510 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
511 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
512 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
515 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
531 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
535 while ( *psz
&& (!buf
|| (len
< n
)) )
537 unsigned char cc
= *psz
++;
545 else if (*psz
== '-')
553 else // start of BASE64 encoded string
557 for ( ok
= lsb
= false, d
= 0, l
= 0;
558 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff;
563 for (l
+= 6; l
>= 8; lsb
= !lsb
)
565 unsigned char c
= (unsigned char)((d
>> (l
-= 8)) % 256);
575 *buf
= (wchar_t)(c
<< 8);
584 // in valid UTF7 we should have valid characters after '+'
585 return wxCONV_FAILED
;
593 if ( buf
&& (len
< n
) )
600 // BASE64 encoding table
602 static const unsigned char utf7enb64
[] =
604 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
605 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
606 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
607 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
608 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
609 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
610 'w', 'x', 'y', 'z', '0', '1', '2', '3',
611 '4', '5', '6', '7', '8', '9', '+', '/'
615 // UTF-7 encoding table
617 // 0 - Set D (directly encoded characters)
618 // 1 - Set O (optional direct characters)
619 // 2 - whitespace characters (optional)
620 // 3 - special characters
622 static const unsigned char utf7encode
[128] =
624 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
625 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
626 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
628 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
630 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
634 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
638 while (*psz
&& ((!buf
) || (len
< n
)))
641 if (cc
< 0x80 && utf7encode
[cc
] < 1)
650 else if (((wxUint32
)cc
) > 0xffff)
652 // no surrogate pair generation (yet?)
653 return wxCONV_FAILED
;
664 // BASE64 encode string
665 unsigned int lsb
, d
, l
;
666 for (d
= 0, l
= 0; /*nothing*/; psz
++)
668 for (lsb
= 0; lsb
< 2; lsb
++)
671 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
673 for (l
+= 8; l
>= 6; )
677 *buf
++ = utf7enb64
[(d
>> l
) % 64];
683 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
690 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
702 if (buf
&& (len
< n
))
708 // ----------------------------------------------------------------------------
710 // ----------------------------------------------------------------------------
712 static wxUint32 utf8_max
[]=
713 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
715 // boundaries of the private use area we use to (temporarily) remap invalid
716 // characters invalid in a UTF-8 encoded string
717 const wxUint32 wxUnicodePUA
= 0x100000;
718 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
720 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
724 while (*psz
&& ((!buf
) || (len
< n
)))
726 const char *opsz
= psz
;
727 bool invalid
= false;
728 unsigned char cc
= *psz
++, fc
= cc
;
730 for (cnt
= 0; fc
& 0x80; cnt
++)
740 // escape the escape character for octal escapes
741 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
742 && cc
== '\\' && (!buf
|| len
< n
))
754 // invalid UTF-8 sequence
759 unsigned ocnt
= cnt
- 1;
760 wxUint32 res
= cc
& (0x3f >> cnt
);
764 if ((cc
& 0xC0) != 0x80)
766 // invalid UTF-8 sequence
772 res
= (res
<< 6) | (cc
& 0x3f);
775 if (invalid
|| res
<= utf8_max
[ocnt
])
777 // illegal UTF-8 encoding
780 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
781 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
783 // if one of our PUA characters turns up externally
784 // it must also be treated as an illegal sequence
785 // (a bit like you have to escape an escape character)
791 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
792 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
793 if (pa
== wxCONV_FAILED
)
805 *buf
++ = (wchar_t)res
;
807 #endif // WC_UTF16/!WC_UTF16
813 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
815 while (opsz
< psz
&& (!buf
|| len
< n
))
818 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
819 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
820 wxASSERT(pa
!= wxCONV_FAILED
);
827 *buf
++ = (wchar_t)(wxUnicodePUA
+ (unsigned char)*opsz
);
833 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
835 while (opsz
< psz
&& (!buf
|| len
< n
))
837 if ( buf
&& len
+ 3 < n
)
839 unsigned char on
= *opsz
;
841 *buf
++ = (wchar_t)( L
'0' + on
/ 0100 );
842 *buf
++ = (wchar_t)( L
'0' + (on
% 0100) / 010 );
843 *buf
++ = (wchar_t)( L
'0' + on
% 010 );
850 else // MAP_INVALID_UTF8_NOT
852 return wxCONV_FAILED
;
858 if (buf
&& (len
< n
))
864 static inline bool isoctal(wchar_t wch
)
866 return L
'0' <= wch
&& wch
<= L
'7';
869 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
873 while (*psz
&& ((!buf
) || (len
< n
)))
878 // cast is ok for WC_UTF16
879 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
880 psz
+= (pa
== wxCONV_FAILED
) ? 1 : pa
;
882 cc
= (*psz
++) & 0x7fffffff;
885 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
886 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
889 *buf
++ = (char)(cc
- wxUnicodePUA
);
892 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
893 && cc
== L
'\\' && psz
[0] == L
'\\' )
900 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
902 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
906 *buf
++ = (char) ((psz
[0] - L
'0') * 0100 +
907 (psz
[1] - L
'0') * 010 +
917 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++)
933 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
935 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
941 if (buf
&& (len
< n
))
947 // ============================================================================
949 // ============================================================================
951 #ifdef WORDS_BIGENDIAN
952 #define wxMBConvUTF16straight wxMBConvUTF16BE
953 #define wxMBConvUTF16swap wxMBConvUTF16LE
955 #define wxMBConvUTF16swap wxMBConvUTF16BE
956 #define wxMBConvUTF16straight wxMBConvUTF16LE
960 size_t wxMBConvUTF16Base::GetLength(const char *src
, size_t srcLen
)
962 if ( srcLen
== wxNO_LEN
)
964 // count the number of bytes in input, including the trailing NULs
965 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
966 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
969 srcLen
*= BYTES_PER_CHAR
;
971 else // we already have the length
973 // we can only convert an entire number of UTF-16 characters
974 if ( srcLen
% BYTES_PER_CHAR
)
975 return wxCONV_FAILED
;
981 // case when in-memory representation is UTF-16 too
984 // ----------------------------------------------------------------------------
985 // conversions without endianness change
986 // ----------------------------------------------------------------------------
989 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
990 const char *src
, size_t srcLen
) const
992 // set up the scene for using memcpy() (which is presumably more efficient
993 // than copying the bytes one by one)
994 srcLen
= GetLength(src
, srcLen
);
995 if ( srcLen
== wxNO_LEN
)
996 return wxCONV_FAILED
;
998 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1001 if ( dstLen
< inLen
)
1002 return wxCONV_FAILED
;
1004 memcpy(dst
, src
, srcLen
);
1011 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1012 const wchar_t *src
, size_t srcLen
) const
1014 if ( srcLen
== wxNO_LEN
)
1015 srcLen
= wxWcslen(src
) + 1;
1017 srcLen
*= BYTES_PER_CHAR
;
1021 if ( dstLen
< srcLen
)
1022 return wxCONV_FAILED
;
1024 memcpy(dst
, src
, srcLen
);
1030 // ----------------------------------------------------------------------------
1031 // endian-reversing conversions
1032 // ----------------------------------------------------------------------------
1035 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1036 const char *src
, size_t srcLen
) const
1038 srcLen
= GetLength(src
, srcLen
);
1039 if ( srcLen
== wxNO_LEN
)
1040 return wxCONV_FAILED
;
1042 srcLen
/= BYTES_PER_CHAR
;
1046 if ( dstLen
< srcLen
)
1047 return wxCONV_FAILED
;
1049 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1050 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1052 *dst
++ = wxUINT16_SWAP_ALWAYS(*inBuff
);
1060 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1061 const wchar_t *src
, size_t srcLen
) const
1063 if ( srcLen
== wxNO_LEN
)
1064 srcLen
= wxWcslen(src
) + 1;
1066 srcLen
*= BYTES_PER_CHAR
;
1070 if ( dstLen
< srcLen
)
1071 return wxCONV_FAILED
;
1073 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1074 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1076 *outBuff
++ = wxUINT16_SWAP_ALWAYS(*src
);
1083 #else // !WC_UTF16: wchar_t is UTF-32
1085 // ----------------------------------------------------------------------------
1086 // conversions without endianness change
1087 // ----------------------------------------------------------------------------
1090 wxMBConvUTF16straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1091 const char *src
, size_t srcLen
) const
1093 srcLen
= GetLength(src
, srcLen
);
1094 if ( srcLen
== wxNO_LEN
)
1095 return wxCONV_FAILED
;
1097 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1100 // optimization: return maximal space which could be needed for this
1101 // string even if the real size could be smaller if the buffer contains
1107 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1108 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1110 const wxUint32 ch
= wxDecodeSurrogate(&inBuff
);
1112 return wxCONV_FAILED
;
1114 if ( ++outLen
> dstLen
)
1115 return wxCONV_FAILED
;
1125 wxMBConvUTF16straight::FromWChar(char *dst
, size_t dstLen
,
1126 const wchar_t *src
, size_t srcLen
) const
1128 if ( srcLen
== wxNO_LEN
)
1129 srcLen
= wxWcslen(src
) + 1;
1132 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1133 for ( size_t n
= 0; n
< srcLen
; n
++ )
1136 const size_t numChars
= encode_utf16(*src
++, cc
);
1137 if ( numChars
== wxCONV_FAILED
)
1138 return wxCONV_FAILED
;
1140 outLen
+= numChars
* BYTES_PER_CHAR
;
1143 if ( outLen
> dstLen
)
1144 return wxCONV_FAILED
;
1147 if ( numChars
== 2 )
1149 // second character of a surrogate
1158 // ----------------------------------------------------------------------------
1159 // endian-reversing conversions
1160 // ----------------------------------------------------------------------------
1163 wxMBConvUTF16swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1164 const char *src
, size_t srcLen
) const
1166 srcLen
= GetLength(src
, srcLen
);
1167 if ( srcLen
== wxNO_LEN
)
1168 return wxCONV_FAILED
;
1170 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1173 // optimization: return maximal space which could be needed for this
1174 // string even if the real size could be smaller if the buffer contains
1180 const wxUint16
*inBuff
= wx_reinterpret_cast(const wxUint16
*, src
);
1181 for ( const wxUint16
* const inEnd
= inBuff
+ inLen
; inBuff
< inEnd
; )
1186 tmp
[0] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1188 tmp
[1] = wxUINT16_SWAP_ALWAYS(*inBuff
);
1190 const size_t numChars
= decode_utf16(tmp
, ch
);
1191 if ( numChars
== wxCONV_FAILED
)
1192 return wxCONV_FAILED
;
1194 if ( numChars
== 2 )
1197 if ( ++outLen
> dstLen
)
1198 return wxCONV_FAILED
;
1208 wxMBConvUTF16swap::FromWChar(char *dst
, size_t dstLen
,
1209 const wchar_t *src
, size_t srcLen
) const
1211 if ( srcLen
== wxNO_LEN
)
1212 srcLen
= wxWcslen(src
) + 1;
1215 wxUint16
*outBuff
= wx_reinterpret_cast(wxUint16
*, dst
);
1216 for ( const wchar_t *srcEnd
= src
+ srcLen
; src
< srcEnd
; src
++ )
1219 const size_t numChars
= encode_utf16(*src
, cc
);
1220 if ( numChars
== wxCONV_FAILED
)
1221 return wxCONV_FAILED
;
1223 outLen
+= numChars
* BYTES_PER_CHAR
;
1226 if ( outLen
> dstLen
)
1227 return wxCONV_FAILED
;
1229 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[0]);
1230 if ( numChars
== 2 )
1232 // second character of a surrogate
1233 *outBuff
++ = wxUINT16_SWAP_ALWAYS(cc
[1]);
1241 #endif // WC_UTF16/!WC_UTF16
1244 // ============================================================================
1246 // ============================================================================
1248 #ifdef WORDS_BIGENDIAN
1249 #define wxMBConvUTF32straight wxMBConvUTF32BE
1250 #define wxMBConvUTF32swap wxMBConvUTF32LE
1252 #define wxMBConvUTF32swap wxMBConvUTF32BE
1253 #define wxMBConvUTF32straight wxMBConvUTF32LE
1257 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1258 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1261 size_t wxMBConvUTF32Base::GetLength(const char *src
, size_t srcLen
)
1263 if ( srcLen
== wxNO_LEN
)
1265 // count the number of bytes in input, including the trailing NULs
1266 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1267 for ( srcLen
= 1; *inBuff
++; srcLen
++ )
1270 srcLen
*= BYTES_PER_CHAR
;
1272 else // we already have the length
1274 // we can only convert an entire number of UTF-32 characters
1275 if ( srcLen
% BYTES_PER_CHAR
)
1276 return wxCONV_FAILED
;
1282 // case when in-memory representation is UTF-16
1285 // ----------------------------------------------------------------------------
1286 // conversions without endianness change
1287 // ----------------------------------------------------------------------------
1290 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1291 const char *src
, size_t srcLen
) const
1293 srcLen
= GetLength(src
, srcLen
);
1294 if ( srcLen
== wxNO_LEN
)
1295 return wxCONV_FAILED
;
1297 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1298 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1300 for ( size_t n
= 0; n
< inLen
; n
++ )
1303 const size_t numChars
= encode_utf16(*inBuff
++, cc
);
1304 if ( numChars
== wxCONV_FAILED
)
1305 return wxCONV_FAILED
;
1310 if ( outLen
> dstLen
)
1311 return wxCONV_FAILED
;
1314 if ( numChars
== 2 )
1316 // second character of a surrogate
1326 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1327 const wchar_t *src
, size_t srcLen
) const
1329 if ( srcLen
== wxNO_LEN
)
1330 srcLen
= wxWcslen(src
) + 1;
1334 // optimization: return maximal space which could be needed for this
1335 // string instead of the exact amount which could be less if there are
1336 // any surrogates in the input
1338 // we consider that surrogates are rare enough to make it worthwhile to
1339 // avoid running the loop below at the cost of slightly extra memory
1341 return srcLen
* BYTES_PER_CHAR
;
1344 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1346 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1348 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1350 return wxCONV_FAILED
;
1352 outLen
+= BYTES_PER_CHAR
;
1354 if ( outLen
> dstLen
)
1355 return wxCONV_FAILED
;
1363 // ----------------------------------------------------------------------------
1364 // endian-reversing conversions
1365 // ----------------------------------------------------------------------------
1368 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1369 const char *src
, size_t srcLen
) const
1371 srcLen
= GetLength(src
, srcLen
);
1372 if ( srcLen
== wxNO_LEN
)
1373 return wxCONV_FAILED
;
1375 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1376 const size_t inLen
= srcLen
/ BYTES_PER_CHAR
;
1378 for ( size_t n
= 0; n
< inLen
; n
++, inBuff
++ )
1381 const size_t numChars
= encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff
), cc
);
1382 if ( numChars
== wxCONV_FAILED
)
1383 return wxCONV_FAILED
;
1388 if ( outLen
> dstLen
)
1389 return wxCONV_FAILED
;
1392 if ( numChars
== 2 )
1394 // second character of a surrogate
1404 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1405 const wchar_t *src
, size_t srcLen
) const
1407 if ( srcLen
== wxNO_LEN
)
1408 srcLen
= wxWcslen(src
) + 1;
1412 // optimization: return maximal space which could be needed for this
1413 // string instead of the exact amount which could be less if there are
1414 // any surrogates in the input
1416 // we consider that surrogates are rare enough to make it worthwhile to
1417 // avoid running the loop below at the cost of slightly extra memory
1419 return srcLen
*BYTES_PER_CHAR
;
1422 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1424 for ( const wchar_t * const srcEnd
= src
+ srcLen
; src
< srcEnd
; )
1426 const wxUint32 ch
= wxDecodeSurrogate(&src
);
1428 return wxCONV_FAILED
;
1430 outLen
+= BYTES_PER_CHAR
;
1432 if ( outLen
> dstLen
)
1433 return wxCONV_FAILED
;
1435 *outBuff
++ = wxUINT32_SWAP_ALWAYS(ch
);
1441 #else // !WC_UTF16: wchar_t is UTF-32
1443 // ----------------------------------------------------------------------------
1444 // conversions without endianness change
1445 // ----------------------------------------------------------------------------
1448 wxMBConvUTF32straight::ToWChar(wchar_t *dst
, size_t dstLen
,
1449 const char *src
, size_t srcLen
) const
1451 // use memcpy() as it should be much faster than hand-written loop
1452 srcLen
= GetLength(src
, srcLen
);
1453 if ( srcLen
== wxNO_LEN
)
1454 return wxCONV_FAILED
;
1456 const size_t inLen
= srcLen
/BYTES_PER_CHAR
;
1459 if ( dstLen
< inLen
)
1460 return wxCONV_FAILED
;
1462 memcpy(dst
, src
, srcLen
);
1469 wxMBConvUTF32straight::FromWChar(char *dst
, size_t dstLen
,
1470 const wchar_t *src
, size_t srcLen
) const
1472 if ( srcLen
== wxNO_LEN
)
1473 srcLen
= wxWcslen(src
) + 1;
1475 srcLen
*= BYTES_PER_CHAR
;
1479 if ( dstLen
< srcLen
)
1480 return wxCONV_FAILED
;
1482 memcpy(dst
, src
, srcLen
);
1488 // ----------------------------------------------------------------------------
1489 // endian-reversing conversions
1490 // ----------------------------------------------------------------------------
1493 wxMBConvUTF32swap::ToWChar(wchar_t *dst
, size_t dstLen
,
1494 const char *src
, size_t srcLen
) const
1496 srcLen
= GetLength(src
, srcLen
);
1497 if ( srcLen
== wxNO_LEN
)
1498 return wxCONV_FAILED
;
1500 srcLen
/= BYTES_PER_CHAR
;
1504 if ( dstLen
< srcLen
)
1505 return wxCONV_FAILED
;
1507 const wxUint32
*inBuff
= wx_reinterpret_cast(const wxUint32
*, src
);
1508 for ( size_t n
= 0; n
< srcLen
; n
++, inBuff
++ )
1510 *dst
++ = wxUINT32_SWAP_ALWAYS(*inBuff
);
1518 wxMBConvUTF32swap::FromWChar(char *dst
, size_t dstLen
,
1519 const wchar_t *src
, size_t srcLen
) const
1521 if ( srcLen
== wxNO_LEN
)
1522 srcLen
= wxWcslen(src
) + 1;
1524 srcLen
*= BYTES_PER_CHAR
;
1528 if ( dstLen
< srcLen
)
1529 return wxCONV_FAILED
;
1531 wxUint32
*outBuff
= wx_reinterpret_cast(wxUint32
*, dst
);
1532 for ( size_t n
= 0; n
< srcLen
; n
+= BYTES_PER_CHAR
, src
++ )
1534 *outBuff
++ = wxUINT32_SWAP_ALWAYS(*src
);
1541 #endif // WC_UTF16/!WC_UTF16
1544 // ============================================================================
1545 // The classes doing conversion using the iconv_xxx() functions
1546 // ============================================================================
1550 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1551 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1552 // (unless there's yet another bug in glibc) the only case when iconv()
1553 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1554 // left in the input buffer -- when _real_ error occurs,
1555 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1557 // [This bug does not appear in glibc 2.2.]
1558 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1559 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1560 (errno != E2BIG || bufLeft != 0))
1562 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1565 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1567 #define ICONV_T_INVALID ((iconv_t)-1)
1569 #if SIZEOF_WCHAR_T == 4
1570 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1571 #define WC_ENC wxFONTENCODING_UTF32
1572 #elif SIZEOF_WCHAR_T == 2
1573 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF16
1575 #else // sizeof(wchar_t) != 2 nor 4
1576 // does this ever happen?
1577 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1580 // ----------------------------------------------------------------------------
1581 // wxMBConv_iconv: encapsulates an iconv character set
1582 // ----------------------------------------------------------------------------
1584 class wxMBConv_iconv
: public wxMBConv
1587 wxMBConv_iconv(const wxChar
*name
);
1588 virtual ~wxMBConv_iconv();
1590 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1591 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1593 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1594 virtual size_t GetMBNulLen() const;
1596 virtual wxMBConv
*Clone() const
1598 wxMBConv_iconv
*p
= new wxMBConv_iconv(m_name
);
1599 p
->m_minMBCharWidth
= m_minMBCharWidth
;
1604 { return (m2w
!= ICONV_T_INVALID
) && (w2m
!= ICONV_T_INVALID
); }
1607 // the iconv handlers used to translate from multibyte
1608 // to wide char and in the other direction
1613 // guards access to m2w and w2m objects
1614 wxMutex m_iconvMutex
;
1618 // the name (for iconv_open()) of a wide char charset -- if none is
1619 // available on this machine, it will remain NULL
1620 static wxString ms_wcCharsetName
;
1622 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1623 // different endian-ness than the native one
1624 static bool ms_wcNeedsSwap
;
1627 // name of the encoding handled by this conversion
1630 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1632 size_t m_minMBCharWidth
;
1635 // make the constructor available for unit testing
1636 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_iconv( const wxChar
* name
)
1638 wxMBConv_iconv
* result
= new wxMBConv_iconv( name
);
1639 if ( !result
->IsOk() )
1648 wxString
wxMBConv_iconv::ms_wcCharsetName
;
1649 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1651 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1654 m_minMBCharWidth
= 0;
1656 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1657 // names for the charsets
1658 const wxCharBuffer
cname(wxString(name
).ToAscii());
1660 // check for charset that represents wchar_t:
1661 if ( ms_wcCharsetName
.empty() )
1663 wxLogTrace(TRACE_STRCONV
, _T("Looking for wide char codeset:"));
1666 const wxChar
**names
= wxFontMapperBase::GetAllEncodingNames(WC_ENC
);
1667 #else // !wxUSE_FONTMAP
1668 static const wxChar
*names_static
[] =
1670 #if SIZEOF_WCHAR_T == 4
1672 #elif SIZEOF_WCHAR_T = 2
1677 const wxChar
**names
= names_static
;
1678 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1680 for ( ; *names
&& ms_wcCharsetName
.empty(); ++names
)
1682 const wxString
nameCS(*names
);
1684 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1685 wxString
nameXE(nameCS
);
1687 #ifdef WORDS_BIGENDIAN
1689 #else // little endian
1693 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1696 m2w
= iconv_open(nameXE
.ToAscii(), cname
);
1697 if ( m2w
== ICONV_T_INVALID
)
1699 // try charset w/o bytesex info (e.g. "UCS4")
1700 wxLogTrace(TRACE_STRCONV
, _T(" trying charset \"%s\""),
1702 m2w
= iconv_open(nameCS
.ToAscii(), cname
);
1704 // and check for bytesex ourselves:
1705 if ( m2w
!= ICONV_T_INVALID
)
1707 char buf
[2], *bufPtr
;
1708 wchar_t wbuf
[2], *wbufPtr
;
1716 outsz
= SIZEOF_WCHAR_T
* 2;
1721 m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1722 (char**)&wbufPtr
, &outsz
);
1724 if (ICONV_FAILED(res
, insz
))
1726 wxLogLastError(wxT("iconv"));
1727 wxLogError(_("Conversion to charset '%s' doesn't work."),
1730 else // ok, can convert to this encoding, remember it
1732 ms_wcCharsetName
= nameCS
;
1733 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1737 else // use charset not requiring byte swapping
1739 ms_wcCharsetName
= nameXE
;
1743 wxLogTrace(TRACE_STRCONV
,
1744 wxT("iconv wchar_t charset is \"%s\"%s"),
1745 ms_wcCharsetName
.empty() ? _T("<none>")
1746 : ms_wcCharsetName
.c_str(),
1747 ms_wcNeedsSwap
? _T(" (needs swap)")
1750 else // we already have ms_wcCharsetName
1752 m2w
= iconv_open(ms_wcCharsetName
.ToAscii(), cname
);
1755 if ( ms_wcCharsetName
.empty() )
1757 w2m
= ICONV_T_INVALID
;
1761 w2m
= iconv_open(cname
, ms_wcCharsetName
.ToAscii());
1762 if ( w2m
== ICONV_T_INVALID
)
1764 wxLogTrace(TRACE_STRCONV
,
1765 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1766 ms_wcCharsetName
.c_str(), cname
.data());
1771 wxMBConv_iconv::~wxMBConv_iconv()
1773 if ( m2w
!= ICONV_T_INVALID
)
1775 if ( w2m
!= ICONV_T_INVALID
)
1779 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1781 // find the string length: notice that must be done differently for
1782 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1784 const size_t nulLen
= GetMBNulLen();
1788 return wxCONV_FAILED
;
1791 inbuf
= strlen(psz
); // arguably more optimized than our version
1796 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1797 // they also have to start at character boundary and not span two
1798 // adjacent characters
1800 for ( p
= psz
; NotAllNULs(p
, nulLen
); p
+= nulLen
)
1807 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1808 // Unfortunately there are a couple of global wxCSConv objects such as
1809 // wxConvLocal that are used all over wx code, so we have to make sure
1810 // the handle is used by at most one thread at the time. Otherwise
1811 // only a few wx classes would be safe to use from non-main threads
1812 // as MB<->WC conversion would fail "randomly".
1813 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1814 #endif // wxUSE_THREADS
1816 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1818 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1819 wchar_t *bufPtr
= buf
;
1820 const char *pszPtr
= psz
;
1824 // have destination buffer, convert there
1826 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1827 (char**)&bufPtr
, &outbuf
);
1828 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1832 // convert to native endianness
1833 for ( unsigned i
= 0; i
< res
; i
++ )
1834 buf
[n
] = WC_BSWAP(buf
[i
]);
1837 // NUL-terminate the string if there is any space left
1843 // no destination buffer... convert using temp buffer
1844 // to calculate destination buffer requirement
1851 outbuf
= 8 * SIZEOF_WCHAR_T
;
1854 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1855 (char**)&bufPtr
, &outbuf
);
1857 res
+= 8 - (outbuf
/ SIZEOF_WCHAR_T
);
1859 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1862 if (ICONV_FAILED(cres
, inbuf
))
1864 //VS: it is ok if iconv fails, hence trace only
1865 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1866 return wxCONV_FAILED
;
1872 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1875 // NB: explained in MB2WC
1876 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1879 size_t inlen
= wxWcslen(psz
);
1880 size_t inbuf
= inlen
* SIZEOF_WCHAR_T
;
1884 wchar_t *tmpbuf
= 0;
1888 // need to copy to temp buffer to switch endianness
1889 // (doing WC_BSWAP twice on the original buffer won't help, as it
1890 // could be in read-only memory, or be accessed in some other thread)
1891 tmpbuf
= (wchar_t *)malloc(inbuf
+ SIZEOF_WCHAR_T
);
1892 for ( size_t i
= 0; i
< inlen
; i
++ )
1893 tmpbuf
[n
] = WC_BSWAP(psz
[i
]);
1895 tmpbuf
[inlen
] = L
'\0';
1901 // have destination buffer, convert there
1902 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1906 // NB: iconv was given only wcslen(psz) characters on input, and so
1907 // it couldn't convert the trailing zero. Let's do it ourselves
1908 // if there's some room left for it in the output buffer.
1914 // no destination buffer: convert using temp buffer
1915 // to calculate destination buffer requirement
1923 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1927 while ((cres
== (size_t)-1) && (errno
== E2BIG
));
1935 if (ICONV_FAILED(cres
, inbuf
))
1937 wxLogTrace(TRACE_STRCONV
, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1938 return wxCONV_FAILED
;
1944 size_t wxMBConv_iconv::GetMBNulLen() const
1946 if ( m_minMBCharWidth
== 0 )
1948 wxMBConv_iconv
* const self
= wxConstCast(this, wxMBConv_iconv
);
1951 // NB: explained in MB2WC
1952 wxMutexLocker
lock(self
->m_iconvMutex
);
1955 wchar_t *wnul
= L
"";
1956 char buf
[8]; // should be enough for NUL in any encoding
1957 size_t inLen
= sizeof(wchar_t),
1958 outLen
= WXSIZEOF(buf
);
1959 char *inBuff
= (char *)wnul
;
1960 char *outBuff
= buf
;
1961 if ( iconv(w2m
, ICONV_CHAR_CAST(&inBuff
), &inLen
, &outBuff
, &outLen
) == (size_t)-1 )
1963 self
->m_minMBCharWidth
= (size_t)-1;
1967 self
->m_minMBCharWidth
= outBuff
- buf
;
1971 return m_minMBCharWidth
;
1974 #endif // HAVE_ICONV
1977 // ============================================================================
1978 // Win32 conversion classes
1979 // ============================================================================
1981 #ifdef wxHAVE_WIN32_MB2WC
1985 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1986 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1989 class wxMBConv_win32
: public wxMBConv
1994 m_CodePage
= CP_ACP
;
1995 m_minMBCharWidth
= 0;
1998 wxMBConv_win32(const wxMBConv_win32
& conv
)
2001 m_CodePage
= conv
.m_CodePage
;
2002 m_minMBCharWidth
= conv
.m_minMBCharWidth
;
2006 wxMBConv_win32(const wxChar
* name
)
2008 m_CodePage
= wxCharsetToCodepage(name
);
2009 m_minMBCharWidth
= 0;
2012 wxMBConv_win32(wxFontEncoding encoding
)
2014 m_CodePage
= wxEncodingToCodepage(encoding
);
2015 m_minMBCharWidth
= 0;
2017 #endif // wxUSE_FONTMAP
2019 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2021 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2022 // the behaviour is not compatible with the Unix version (using iconv)
2023 // and break the library itself, e.g. wxTextInputStream::NextChar()
2024 // wouldn't work if reading an incomplete MB char didn't result in an
2027 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2028 // Win XP or newer and it is not supported for UTF-[78] so we always
2029 // use our own conversions in this case. See
2030 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2031 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2032 if ( m_CodePage
== CP_UTF8
)
2034 return wxConvUTF8
.MB2WC(buf
, psz
, n
);
2037 if ( m_CodePage
== CP_UTF7
)
2039 return wxConvUTF7
.MB2WC(buf
, psz
, n
);
2043 if ( (m_CodePage
< 50000 && m_CodePage
!= CP_SYMBOL
) &&
2044 IsAtLeastWin2kSP4() )
2046 flags
= MB_ERR_INVALID_CHARS
;
2049 const size_t len
= ::MultiByteToWideChar
2051 m_CodePage
, // code page
2052 flags
, // flags: fall on error
2053 psz
, // input string
2054 -1, // its length (NUL-terminated)
2055 buf
, // output string
2056 buf
? n
: 0 // size of output buffer
2060 // function totally failed
2061 return wxCONV_FAILED
;
2064 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2065 // check if we succeeded, by doing a double trip:
2066 if ( !flags
&& buf
)
2068 const size_t mbLen
= strlen(psz
);
2069 wxCharBuffer
mbBuf(mbLen
);
2070 if ( ::WideCharToMultiByte
2077 mbLen
+ 1, // size in bytes, not length
2081 strcmp(mbBuf
, psz
) != 0 )
2083 // we didn't obtain the same thing we started from, hence
2084 // the conversion was lossy and we consider that it failed
2085 return wxCONV_FAILED
;
2089 // note that it returns count of written chars for buf != NULL and size
2090 // of the needed buffer for buf == NULL so in either case the length of
2091 // the string (which never includes the terminating NUL) is one less
2095 virtual size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
2098 we have a problem here: by default, WideCharToMultiByte() may
2099 replace characters unrepresentable in the target code page with bad
2100 quality approximations such as turning "1/2" symbol (U+00BD) into
2101 "1" for the code pages which don't have it and we, obviously, want
2102 to avoid this at any price
2104 the trouble is that this function does it _silently_, i.e. it won't
2105 even tell us whether it did or not... Win98/2000 and higher provide
2106 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2107 we have to resort to a round trip, i.e. check that converting back
2108 results in the same string -- this is, of course, expensive but
2109 otherwise we simply can't be sure to not garble the data.
2112 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2113 // it doesn't work with CJK encodings (which we test for rather roughly
2114 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2116 BOOL usedDef
wxDUMMY_INITIALIZE(false);
2119 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
2121 // it's our lucky day
2122 flags
= WC_NO_BEST_FIT_CHARS
;
2123 pUsedDef
= &usedDef
;
2125 else // old system or unsupported encoding
2131 const size_t len
= ::WideCharToMultiByte
2133 m_CodePage
, // code page
2134 flags
, // either none or no best fit
2135 pwz
, // input string
2136 -1, // it is (wide) NUL-terminated
2137 buf
, // output buffer
2138 buf
? n
: 0, // and its size
2139 NULL
, // default "replacement" char
2140 pUsedDef
// [out] was it used?
2145 // function totally failed
2146 return wxCONV_FAILED
;
2149 // if we were really converting, check if we succeeded
2154 // check if the conversion failed, i.e. if any replacements
2157 return wxCONV_FAILED
;
2159 else // we must resort to double tripping...
2161 wxWCharBuffer
wcBuf(n
);
2162 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2163 wcscmp(wcBuf
, pwz
) != 0 )
2165 // we didn't obtain the same thing we started from, hence
2166 // the conversion was lossy and we consider that it failed
2167 return wxCONV_FAILED
;
2172 // see the comment above for the reason of "len - 1"
2176 virtual size_t GetMBNulLen() const
2178 if ( m_minMBCharWidth
== 0 )
2180 int len
= ::WideCharToMultiByte
2182 m_CodePage
, // code page
2184 L
"", // input string
2185 1, // translate just the NUL
2186 NULL
, // output buffer
2188 NULL
, // no replacement char
2189 NULL
// [out] don't care if it was used
2192 wxMBConv_win32
* const self
= wxConstCast(this, wxMBConv_win32
);
2196 wxLogDebug(_T("Unexpected NUL length %d"), len
);
2197 self
->m_minMBCharWidth
= (size_t)-1;
2201 self
->m_minMBCharWidth
= (size_t)-1;
2207 self
->m_minMBCharWidth
= len
;
2212 return m_minMBCharWidth
;
2215 virtual wxMBConv
*Clone() const { return new wxMBConv_win32(*this); }
2217 bool IsOk() const { return m_CodePage
!= -1; }
2220 static bool CanUseNoBestFit()
2222 static int s_isWin98Or2k
= -1;
2224 if ( s_isWin98Or2k
== -1 )
2227 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
2229 case wxOS_WINDOWS_9X
:
2230 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
2233 case wxOS_WINDOWS_NT
:
2234 s_isWin98Or2k
= verMaj
>= 5;
2238 // unknown: be conservative by default
2243 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
2246 return s_isWin98Or2k
== 1;
2249 static bool IsAtLeastWin2kSP4()
2254 static int s_isAtLeastWin2kSP4
= -1;
2256 if ( s_isAtLeastWin2kSP4
== -1 )
2258 OSVERSIONINFOEX ver
;
2260 memset(&ver
, 0, sizeof(ver
));
2261 ver
.dwOSVersionInfoSize
= sizeof(ver
);
2262 GetVersionEx((OSVERSIONINFO
*)&ver
);
2264 s_isAtLeastWin2kSP4
=
2265 ((ver
.dwMajorVersion
> 5) || // Vista+
2266 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
> 0) || // XP/2003
2267 (ver
.dwMajorVersion
== 5 && ver
.dwMinorVersion
== 0 &&
2268 ver
.wServicePackMajor
>= 4)) // 2000 SP4+
2272 return s_isAtLeastWin2kSP4
== 1;
2277 // the code page we're working with
2280 // cached result of GetMBNulLen(), set to 0 initially meaning
2282 size_t m_minMBCharWidth
;
2285 #endif // wxHAVE_WIN32_MB2WC
2287 // ============================================================================
2288 // Cocoa conversion classes
2289 // ============================================================================
2291 #if defined(__WXCOCOA__)
2293 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2294 // Strangely enough, internally Core Foundation uses
2295 // UTF-32 internally quite a bit - its just not public (yet).
2297 #include <CoreFoundation/CFString.h>
2298 #include <CoreFoundation/CFStringEncodingExt.h>
2300 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
2302 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
2306 case wxFONTENCODING_DEFAULT
:
2307 enc
= CFStringGetSystemEncoding();
2310 case wxFONTENCODING_ISO8859_1
:
2311 enc
= kCFStringEncodingISOLatin1
;
2313 case wxFONTENCODING_ISO8859_2
:
2314 enc
= kCFStringEncodingISOLatin2
;
2316 case wxFONTENCODING_ISO8859_3
:
2317 enc
= kCFStringEncodingISOLatin3
;
2319 case wxFONTENCODING_ISO8859_4
:
2320 enc
= kCFStringEncodingISOLatin4
;
2322 case wxFONTENCODING_ISO8859_5
:
2323 enc
= kCFStringEncodingISOLatinCyrillic
;
2325 case wxFONTENCODING_ISO8859_6
:
2326 enc
= kCFStringEncodingISOLatinArabic
;
2328 case wxFONTENCODING_ISO8859_7
:
2329 enc
= kCFStringEncodingISOLatinGreek
;
2331 case wxFONTENCODING_ISO8859_8
:
2332 enc
= kCFStringEncodingISOLatinHebrew
;
2334 case wxFONTENCODING_ISO8859_9
:
2335 enc
= kCFStringEncodingISOLatin5
;
2337 case wxFONTENCODING_ISO8859_10
:
2338 enc
= kCFStringEncodingISOLatin6
;
2340 case wxFONTENCODING_ISO8859_11
:
2341 enc
= kCFStringEncodingISOLatinThai
;
2343 case wxFONTENCODING_ISO8859_13
:
2344 enc
= kCFStringEncodingISOLatin7
;
2346 case wxFONTENCODING_ISO8859_14
:
2347 enc
= kCFStringEncodingISOLatin8
;
2349 case wxFONTENCODING_ISO8859_15
:
2350 enc
= kCFStringEncodingISOLatin9
;
2353 case wxFONTENCODING_KOI8
:
2354 enc
= kCFStringEncodingKOI8_R
;
2356 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
2357 enc
= kCFStringEncodingDOSRussian
;
2360 // case wxFONTENCODING_BULGARIAN :
2364 case wxFONTENCODING_CP437
:
2365 enc
= kCFStringEncodingDOSLatinUS
;
2367 case wxFONTENCODING_CP850
:
2368 enc
= kCFStringEncodingDOSLatin1
;
2370 case wxFONTENCODING_CP852
:
2371 enc
= kCFStringEncodingDOSLatin2
;
2373 case wxFONTENCODING_CP855
:
2374 enc
= kCFStringEncodingDOSCyrillic
;
2376 case wxFONTENCODING_CP866
:
2377 enc
= kCFStringEncodingDOSRussian
;
2379 case wxFONTENCODING_CP874
:
2380 enc
= kCFStringEncodingDOSThai
;
2382 case wxFONTENCODING_CP932
:
2383 enc
= kCFStringEncodingDOSJapanese
;
2385 case wxFONTENCODING_CP936
:
2386 enc
= kCFStringEncodingDOSChineseSimplif
;
2388 case wxFONTENCODING_CP949
:
2389 enc
= kCFStringEncodingDOSKorean
;
2391 case wxFONTENCODING_CP950
:
2392 enc
= kCFStringEncodingDOSChineseTrad
;
2394 case wxFONTENCODING_CP1250
:
2395 enc
= kCFStringEncodingWindowsLatin2
;
2397 case wxFONTENCODING_CP1251
:
2398 enc
= kCFStringEncodingWindowsCyrillic
;
2400 case wxFONTENCODING_CP1252
:
2401 enc
= kCFStringEncodingWindowsLatin1
;
2403 case wxFONTENCODING_CP1253
:
2404 enc
= kCFStringEncodingWindowsGreek
;
2406 case wxFONTENCODING_CP1254
:
2407 enc
= kCFStringEncodingWindowsLatin5
;
2409 case wxFONTENCODING_CP1255
:
2410 enc
= kCFStringEncodingWindowsHebrew
;
2412 case wxFONTENCODING_CP1256
:
2413 enc
= kCFStringEncodingWindowsArabic
;
2415 case wxFONTENCODING_CP1257
:
2416 enc
= kCFStringEncodingWindowsBalticRim
;
2418 // This only really encodes to UTF7 (if that) evidently
2419 // case wxFONTENCODING_UTF7 :
2420 // enc = kCFStringEncodingNonLossyASCII ;
2422 case wxFONTENCODING_UTF8
:
2423 enc
= kCFStringEncodingUTF8
;
2425 case wxFONTENCODING_EUC_JP
:
2426 enc
= kCFStringEncodingEUC_JP
;
2428 case wxFONTENCODING_UTF16
:
2429 enc
= kCFStringEncodingUnicode
;
2431 case wxFONTENCODING_MACROMAN
:
2432 enc
= kCFStringEncodingMacRoman
;
2434 case wxFONTENCODING_MACJAPANESE
:
2435 enc
= kCFStringEncodingMacJapanese
;
2437 case wxFONTENCODING_MACCHINESETRAD
:
2438 enc
= kCFStringEncodingMacChineseTrad
;
2440 case wxFONTENCODING_MACKOREAN
:
2441 enc
= kCFStringEncodingMacKorean
;
2443 case wxFONTENCODING_MACARABIC
:
2444 enc
= kCFStringEncodingMacArabic
;
2446 case wxFONTENCODING_MACHEBREW
:
2447 enc
= kCFStringEncodingMacHebrew
;
2449 case wxFONTENCODING_MACGREEK
:
2450 enc
= kCFStringEncodingMacGreek
;
2452 case wxFONTENCODING_MACCYRILLIC
:
2453 enc
= kCFStringEncodingMacCyrillic
;
2455 case wxFONTENCODING_MACDEVANAGARI
:
2456 enc
= kCFStringEncodingMacDevanagari
;
2458 case wxFONTENCODING_MACGURMUKHI
:
2459 enc
= kCFStringEncodingMacGurmukhi
;
2461 case wxFONTENCODING_MACGUJARATI
:
2462 enc
= kCFStringEncodingMacGujarati
;
2464 case wxFONTENCODING_MACORIYA
:
2465 enc
= kCFStringEncodingMacOriya
;
2467 case wxFONTENCODING_MACBENGALI
:
2468 enc
= kCFStringEncodingMacBengali
;
2470 case wxFONTENCODING_MACTAMIL
:
2471 enc
= kCFStringEncodingMacTamil
;
2473 case wxFONTENCODING_MACTELUGU
:
2474 enc
= kCFStringEncodingMacTelugu
;
2476 case wxFONTENCODING_MACKANNADA
:
2477 enc
= kCFStringEncodingMacKannada
;
2479 case wxFONTENCODING_MACMALAJALAM
:
2480 enc
= kCFStringEncodingMacMalayalam
;
2482 case wxFONTENCODING_MACSINHALESE
:
2483 enc
= kCFStringEncodingMacSinhalese
;
2485 case wxFONTENCODING_MACBURMESE
:
2486 enc
= kCFStringEncodingMacBurmese
;
2488 case wxFONTENCODING_MACKHMER
:
2489 enc
= kCFStringEncodingMacKhmer
;
2491 case wxFONTENCODING_MACTHAI
:
2492 enc
= kCFStringEncodingMacThai
;
2494 case wxFONTENCODING_MACLAOTIAN
:
2495 enc
= kCFStringEncodingMacLaotian
;
2497 case wxFONTENCODING_MACGEORGIAN
:
2498 enc
= kCFStringEncodingMacGeorgian
;
2500 case wxFONTENCODING_MACARMENIAN
:
2501 enc
= kCFStringEncodingMacArmenian
;
2503 case wxFONTENCODING_MACCHINESESIMP
:
2504 enc
= kCFStringEncodingMacChineseSimp
;
2506 case wxFONTENCODING_MACTIBETAN
:
2507 enc
= kCFStringEncodingMacTibetan
;
2509 case wxFONTENCODING_MACMONGOLIAN
:
2510 enc
= kCFStringEncodingMacMongolian
;
2512 case wxFONTENCODING_MACETHIOPIC
:
2513 enc
= kCFStringEncodingMacEthiopic
;
2515 case wxFONTENCODING_MACCENTRALEUR
:
2516 enc
= kCFStringEncodingMacCentralEurRoman
;
2518 case wxFONTENCODING_MACVIATNAMESE
:
2519 enc
= kCFStringEncodingMacVietnamese
;
2521 case wxFONTENCODING_MACARABICEXT
:
2522 enc
= kCFStringEncodingMacExtArabic
;
2524 case wxFONTENCODING_MACSYMBOL
:
2525 enc
= kCFStringEncodingMacSymbol
;
2527 case wxFONTENCODING_MACDINGBATS
:
2528 enc
= kCFStringEncodingMacDingbats
;
2530 case wxFONTENCODING_MACTURKISH
:
2531 enc
= kCFStringEncodingMacTurkish
;
2533 case wxFONTENCODING_MACCROATIAN
:
2534 enc
= kCFStringEncodingMacCroatian
;
2536 case wxFONTENCODING_MACICELANDIC
:
2537 enc
= kCFStringEncodingMacIcelandic
;
2539 case wxFONTENCODING_MACROMANIAN
:
2540 enc
= kCFStringEncodingMacRomanian
;
2542 case wxFONTENCODING_MACCELTIC
:
2543 enc
= kCFStringEncodingMacCeltic
;
2545 case wxFONTENCODING_MACGAELIC
:
2546 enc
= kCFStringEncodingMacGaelic
;
2548 // case wxFONTENCODING_MACKEYBOARD :
2549 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2553 // because gcc is picky
2560 class wxMBConv_cocoa
: public wxMBConv
2565 Init(CFStringGetSystemEncoding()) ;
2568 wxMBConv_cocoa(const wxMBConv_cocoa
& conv
)
2570 m_encoding
= conv
.m_encoding
;
2574 wxMBConv_cocoa(const wxChar
* name
)
2576 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2580 wxMBConv_cocoa(wxFontEncoding encoding
)
2582 Init( wxCFStringEncFromFontEnc(encoding
) );
2585 virtual ~wxMBConv_cocoa()
2589 void Init( CFStringEncoding encoding
)
2591 m_encoding
= encoding
;
2594 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2598 CFStringRef theString
= CFStringCreateWithBytes (
2599 NULL
, //the allocator
2600 (const UInt8
*)szUnConv
,
2603 false //no BOM/external representation
2606 wxASSERT(theString
);
2608 size_t nOutLength
= CFStringGetLength(theString
);
2612 CFRelease(theString
);
2616 CFRange theRange
= { 0, nOutSize
};
2618 #if SIZEOF_WCHAR_T == 4
2619 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2622 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2624 CFRelease(theString
);
2626 szUniCharBuffer
[nOutLength
] = '\0';
2628 #if SIZEOF_WCHAR_T == 4
2629 wxMBConvUTF16 converter
;
2630 converter
.MB2WC( szOut
, (const char*)szUniCharBuffer
, nOutSize
);
2631 delete [] szUniCharBuffer
;
2637 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2641 size_t nRealOutSize
;
2642 size_t nBufSize
= wxWcslen(szUnConv
);
2643 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2645 #if SIZEOF_WCHAR_T == 4
2646 wxMBConvUTF16 converter
;
2647 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2648 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1];
2649 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
));
2650 nBufSize
/= sizeof(UniChar
);
2653 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2657 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2660 wxASSERT(theString
);
2662 //Note that CER puts a BOM when converting to unicode
2663 //so we check and use getchars instead in that case
2664 if (m_encoding
== kCFStringEncodingUnicode
)
2667 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2669 nRealOutSize
= CFStringGetLength(theString
) + 1;
2675 CFRangeMake(0, CFStringGetLength(theString
)),
2677 0, //what to put in characters that can't be converted -
2678 //0 tells CFString to return NULL if it meets such a character
2679 false, //not an external representation
2682 (CFIndex
*) &nRealOutSize
2686 CFRelease(theString
);
2688 #if SIZEOF_WCHAR_T == 4
2689 delete[] szUniBuffer
;
2692 return nRealOutSize
- 1;
2695 virtual wxMBConv
*Clone() const { return new wxMBConv_cocoa(*this); }
2699 return m_encoding
!= kCFStringEncodingInvalidId
&&
2700 CFStringIsEncodingAvailable(m_encoding
);
2704 CFStringEncoding m_encoding
;
2707 #endif // defined(__WXCOCOA__)
2709 // ============================================================================
2710 // Mac conversion classes
2711 // ============================================================================
2713 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2715 class wxMBConv_mac
: public wxMBConv
2720 Init(CFStringGetSystemEncoding()) ;
2723 wxMBConv_mac(const wxMBConv_mac
& conv
)
2725 Init(conv
.m_char_encoding
);
2729 wxMBConv_mac(const wxChar
* name
)
2731 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) );
2735 wxMBConv_mac(wxFontEncoding encoding
)
2737 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2740 virtual ~wxMBConv_mac()
2742 OSStatus status
= noErr
;
2743 if (m_MB2WC_converter
)
2744 status
= TECDisposeConverter(m_MB2WC_converter
);
2745 if (m_WC2MB_converter
)
2746 status
= TECDisposeConverter(m_WC2MB_converter
);
2749 void Init( TextEncodingBase encoding
,TextEncodingVariant encodingVariant
= kTextEncodingDefaultVariant
,
2750 TextEncodingFormat encodingFormat
= kTextEncodingDefaultFormat
)
2752 m_MB2WC_converter
= NULL
;
2753 m_WC2MB_converter
= NULL
;
2754 m_char_encoding
= CreateTextEncoding(encoding
, encodingVariant
, encodingFormat
) ;
2755 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
, 0, kUnicode16BitFormat
) ;
2758 virtual void CreateIfNeeded() const
2760 if ( m_MB2WC_converter
== NULL
&& m_WC2MB_converter
== NULL
)
2762 OSStatus status
= noErr
;
2763 status
= TECCreateConverter(&m_MB2WC_converter
,
2765 m_unicode_encoding
);
2766 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2767 status
= TECCreateConverter(&m_WC2MB_converter
,
2770 wxASSERT_MSG( status
== noErr
, _("Unable to create TextEncodingConverter")) ;
2774 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2777 OSStatus status
= noErr
;
2778 ByteCount byteOutLen
;
2779 ByteCount byteInLen
= strlen(psz
) + 1;
2780 wchar_t *tbuf
= NULL
;
2781 UniChar
* ubuf
= NULL
;
2786 // Apple specs say at least 32
2787 n
= wxMax( 32, byteInLen
) ;
2788 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2791 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2793 #if SIZEOF_WCHAR_T == 4
2794 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2796 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2799 status
= TECConvertText(
2800 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2801 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2803 #if SIZEOF_WCHAR_T == 4
2804 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2805 // is not properly terminated we get random characters at the end
2806 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2807 wxMBConvUTF16 converter
;
2808 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
2811 res
= byteOutLen
/ sizeof( UniChar
) ;
2817 if ( buf
&& res
< n
)
2823 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2826 OSStatus status
= noErr
;
2827 ByteCount byteOutLen
;
2828 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2834 // Apple specs say at least 32
2835 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2836 tbuf
= (char*) malloc( n
) ;
2839 ByteCount byteBufferLen
= n
;
2840 UniChar
* ubuf
= NULL
;
2842 #if SIZEOF_WCHAR_T == 4
2843 wxMBConvUTF16 converter
;
2844 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2845 byteInLen
= unicharlen
;
2846 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2847 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2849 ubuf
= (UniChar
*) psz
;
2852 status
= TECConvertText(
2853 m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2854 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2856 #if SIZEOF_WCHAR_T == 4
2863 size_t res
= byteOutLen
;
2864 if ( buf
&& res
< n
)
2868 //we need to double-trip to verify it didn't insert any ? in place
2869 //of bogus characters
2870 wxWCharBuffer
wcBuf(n
);
2871 size_t pszlen
= wxWcslen(psz
);
2872 if ( MB2WC(wcBuf
.data(), buf
, n
) == wxCONV_FAILED
||
2873 wxWcslen(wcBuf
) != pszlen
||
2874 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2876 // we didn't obtain the same thing we started from, hence
2877 // the conversion was lossy and we consider that it failed
2878 return wxCONV_FAILED
;
2885 virtual wxMBConv
*Clone() const { return new wxMBConv_mac(*this); }
2890 return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
;
2894 mutable TECObjectRef m_MB2WC_converter
;
2895 mutable TECObjectRef m_WC2MB_converter
;
2897 TextEncodingBase m_char_encoding
;
2898 TextEncodingBase m_unicode_encoding
;
2901 // MB is decomposed (D) normalized UTF8
2903 class wxMBConv_macUTF8D
: public wxMBConv_mac
2908 Init( kTextEncodingUnicodeDefault
, kUnicodeNoSubset
, kUnicodeUTF8Format
) ;
2913 virtual ~wxMBConv_macUTF8D()
2916 DisposeUnicodeToTextInfo(&m_uni
);
2917 if (m_uniBack
!=NULL
)
2918 DisposeUnicodeToTextInfo(&m_uniBack
);
2921 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2924 OSStatus status
= noErr
;
2925 ByteCount byteOutLen
;
2926 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2932 // Apple specs say at least 32
2933 n
= wxMax( 32, ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2934 tbuf
= (char*) malloc( n
) ;
2937 ByteCount byteBufferLen
= n
;
2938 UniChar
* ubuf
= NULL
;
2940 #if SIZEOF_WCHAR_T == 4
2941 wxMBConvUTF16 converter
;
2942 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2943 byteInLen
= unicharlen
;
2944 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2945 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2947 ubuf
= (UniChar
*) psz
;
2950 // ubuf is a non-decomposed UniChar buffer
2952 ByteCount dcubuflen
= byteInLen
* 2 + 2 ;
2953 ByteCount dcubufread
, dcubufwritten
;
2954 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
2956 ConvertFromUnicodeToText( m_uni
, byteInLen
, ubuf
,
2957 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, dcubuf
) ;
2959 // we now convert that decomposed buffer into UTF8
2961 status
= TECConvertText(
2962 m_WC2MB_converter
, (ConstTextPtr
) dcubuf
, dcubufwritten
, &dcubufread
,
2963 (TextPtr
) (buf
? buf
: tbuf
), byteBufferLen
, &byteOutLen
);
2967 #if SIZEOF_WCHAR_T == 4
2974 size_t res
= byteOutLen
;
2975 if ( buf
&& res
< n
)
2978 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2984 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2987 OSStatus status
= noErr
;
2988 ByteCount byteOutLen
;
2989 ByteCount byteInLen
= strlen(psz
) + 1;
2990 wchar_t *tbuf
= NULL
;
2991 UniChar
* ubuf
= NULL
;
2996 // Apple specs say at least 32
2997 n
= wxMax( 32, byteInLen
) ;
2998 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
3001 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
3003 #if SIZEOF_WCHAR_T == 4
3004 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
3006 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
3009 ByteCount dcubuflen
= byteBufferLen
* 2 + 2 ;
3010 ByteCount dcubufread
, dcubufwritten
;
3011 UniChar
*dcubuf
= (UniChar
*) malloc( dcubuflen
) ;
3013 status
= TECConvertText(
3014 m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
3015 (TextPtr
) dcubuf
, dcubuflen
, &byteOutLen
);
3016 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3017 // is not properly terminated we get random characters at the end
3018 dcubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3020 // now from the decomposed UniChar to properly composed uniChar
3021 ConvertFromUnicodeToText( m_uniBack
, byteOutLen
, dcubuf
,
3022 kUnicodeDefaultDirectionMask
, 0, NULL
, NULL
, NULL
, dcubuflen
, &dcubufread
, &dcubufwritten
, ubuf
) ;
3025 byteOutLen
= dcubufwritten
;
3026 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
3029 #if SIZEOF_WCHAR_T == 4
3030 wxMBConvUTF16 converter
;
3031 res
= converter
.MB2WC( (buf
? buf
: tbuf
), (const char*)ubuf
, n
) ;
3034 res
= byteOutLen
/ sizeof( UniChar
) ;
3040 if ( buf
&& res
< n
)
3046 virtual void CreateIfNeeded() const
3048 wxMBConv_mac::CreateIfNeeded() ;
3049 if ( m_uni
== NULL
)
3051 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3052 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3053 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3054 kUnicodeCanonicalDecompVariant
, kTextEncodingDefaultFormat
);
3055 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3057 OSStatus err
= CreateUnicodeToTextInfo(&m_map
, &m_uni
);
3058 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3060 m_map
.unicodeEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3061 kUnicodeNoSubset
, kTextEncodingDefaultFormat
);
3062 m_map
.otherEncoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
3063 kUnicodeCanonicalCompVariant
, kTextEncodingDefaultFormat
);
3064 m_map
.mappingVersion
= kUnicodeUseLatestMapping
;
3065 err
= CreateUnicodeToTextInfo(&m_map
, &m_uniBack
);
3066 wxASSERT_MSG( err
== noErr
, _(" Couldn't create the UnicodeConverter")) ;
3070 mutable UnicodeToTextInfo m_uni
;
3071 mutable UnicodeToTextInfo m_uniBack
;
3072 mutable UnicodeMapping m_map
;
3074 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3076 // ============================================================================
3077 // wxEncodingConverter based conversion classes
3078 // ============================================================================
3082 class wxMBConv_wxwin
: public wxMBConv
3087 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
3088 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
3092 // temporarily just use wxEncodingConverter stuff,
3093 // so that it works while a better implementation is built
3094 wxMBConv_wxwin(const wxChar
* name
)
3097 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3099 m_enc
= wxFONTENCODING_SYSTEM
;
3104 wxMBConv_wxwin(wxFontEncoding enc
)
3111 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
3113 size_t inbuf
= strlen(psz
);
3116 if (!m2w
.Convert(psz
, buf
))
3117 return wxCONV_FAILED
;
3122 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
3124 const size_t inbuf
= wxWcslen(psz
);
3127 if (!w2m
.Convert(psz
, buf
))
3128 return wxCONV_FAILED
;
3134 virtual size_t GetMBNulLen() const
3138 case wxFONTENCODING_UTF16BE
:
3139 case wxFONTENCODING_UTF16LE
:
3142 case wxFONTENCODING_UTF32BE
:
3143 case wxFONTENCODING_UTF32LE
:
3151 virtual wxMBConv
*Clone() const { return new wxMBConv_wxwin(m_enc
); }
3153 bool IsOk() const { return m_ok
; }
3156 wxFontEncoding m_enc
;
3157 wxEncodingConverter m2w
, w2m
;
3160 // were we initialized successfully?
3163 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
3166 // make the constructors available for unit testing
3167 WXDLLIMPEXP_BASE wxMBConv
* new_wxMBConv_wxwin( const wxChar
* name
)
3169 wxMBConv_wxwin
* result
= new wxMBConv_wxwin( name
);
3170 if ( !result
->IsOk() )
3179 #endif // wxUSE_FONTMAP
3181 // ============================================================================
3182 // wxCSConv implementation
3183 // ============================================================================
3185 void wxCSConv::Init()
3192 wxCSConv::wxCSConv(const wxChar
*charset
)
3202 m_encoding
= wxFontMapperBase::GetEncodingFromName(charset
);
3204 m_encoding
= wxFONTENCODING_SYSTEM
;
3208 wxCSConv::wxCSConv(wxFontEncoding encoding
)
3210 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
3212 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3214 encoding
= wxFONTENCODING_SYSTEM
;
3219 m_encoding
= encoding
;
3222 wxCSConv::~wxCSConv()
3227 wxCSConv::wxCSConv(const wxCSConv
& conv
)
3232 SetName(conv
.m_name
);
3233 m_encoding
= conv
.m_encoding
;
3236 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
3240 SetName(conv
.m_name
);
3241 m_encoding
= conv
.m_encoding
;
3246 void wxCSConv::Clear()
3255 void wxCSConv::SetName(const wxChar
*charset
)
3259 m_name
= wxStrdup(charset
);
3266 WX_DECLARE_HASH_MAP( wxFontEncoding
, wxString
, wxIntegerHash
, wxIntegerEqual
,
3267 wxEncodingNameCache
);
3269 static wxEncodingNameCache gs_nameCache
;
3272 wxMBConv
*wxCSConv::DoCreate() const
3275 wxLogTrace(TRACE_STRCONV
,
3276 wxT("creating conversion for %s"),
3278 : wxFontMapperBase::GetEncodingName(m_encoding
).c_str()));
3279 #endif // wxUSE_FONTMAP
3281 // check for the special case of ASCII or ISO8859-1 charset: as we have
3282 // special knowledge of it anyhow, we don't need to create a special
3283 // conversion object
3284 if ( m_encoding
== wxFONTENCODING_ISO8859_1
||
3285 m_encoding
== wxFONTENCODING_DEFAULT
)
3287 // don't convert at all
3291 // we trust OS to do conversion better than we can so try external
3292 // conversion methods first
3294 // the full order is:
3295 // 1. OS conversion (iconv() under Unix or Win32 API)
3296 // 2. hard coded conversions for UTF
3297 // 3. wxEncodingConverter as fall back
3303 #endif // !wxUSE_FONTMAP
3305 wxString
name(m_name
);
3307 wxFontEncoding
encoding(m_encoding
);
3310 if ( !name
.empty() )
3312 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
3320 wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
3321 #endif // wxUSE_FONTMAP
3325 const wxEncodingNameCache::iterator it
= gs_nameCache
.find(encoding
);
3326 if ( it
!= gs_nameCache
.end() )
3328 if ( it
->second
.empty() )
3331 wxMBConv_iconv
*conv
= new wxMBConv_iconv(it
->second
);
3338 const wxChar
** names
= wxFontMapperBase::GetAllEncodingNames(encoding
);
3339 // CS : in case this does not return valid names (eg for MacRoman) encoding
3340 // got a 'failure' entry in the cache all the same, although it just has to
3341 // be created using a different method, so only store failed iconv creation
3342 // attempts (or perhaps we shoulnd't do this at all ?)
3343 if ( names
[0] != NULL
)
3345 for ( ; *names
; ++names
)
3347 wxMBConv_iconv
*conv
= new wxMBConv_iconv(*names
);
3350 gs_nameCache
[encoding
] = *names
;
3357 gs_nameCache
[encoding
] = _T(""); // cache the failure
3360 #endif // wxUSE_FONTMAP
3362 #endif // HAVE_ICONV
3364 #ifdef wxHAVE_WIN32_MB2WC
3367 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
3368 : new wxMBConv_win32(m_encoding
);
3377 #endif // wxHAVE_WIN32_MB2WC
3379 #if defined(__WXMAC__)
3381 // leave UTF16 and UTF32 to the built-ins of wx
3382 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
3383 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
3386 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
3387 : new wxMBConv_mac(m_encoding
);
3389 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
3399 #if defined(__WXCOCOA__)
3401 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
3404 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
3405 : new wxMBConv_cocoa(m_encoding
);
3407 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
3418 wxFontEncoding enc
= m_encoding
;
3420 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
3422 // use "false" to suppress interactive dialogs -- we can be called from
3423 // anywhere and popping up a dialog from here is the last thing we want to
3425 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
3427 #endif // wxUSE_FONTMAP
3431 case wxFONTENCODING_UTF7
:
3432 return new wxMBConvUTF7
;
3434 case wxFONTENCODING_UTF8
:
3435 return new wxMBConvUTF8
;
3437 case wxFONTENCODING_UTF16BE
:
3438 return new wxMBConvUTF16BE
;
3440 case wxFONTENCODING_UTF16LE
:
3441 return new wxMBConvUTF16LE
;
3443 case wxFONTENCODING_UTF32BE
:
3444 return new wxMBConvUTF32BE
;
3446 case wxFONTENCODING_UTF32LE
:
3447 return new wxMBConvUTF32LE
;
3450 // nothing to do but put here to suppress gcc warnings
3457 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
3458 : new wxMBConv_wxwin(m_encoding
);
3464 #endif // wxUSE_FONTMAP
3466 // NB: This is a hack to prevent deadlock. What could otherwise happen
3467 // in Unicode build: wxConvLocal creation ends up being here
3468 // because of some failure and logs the error. But wxLog will try to
3469 // attach a timestamp, for which it will need wxConvLocal (to convert
3470 // time to char* and then wchar_t*), but that fails, tries to log the
3471 // error, but wxLog has an (already locked) critical section that
3472 // guards the static buffer.
3473 static bool alreadyLoggingError
= false;
3474 if (!alreadyLoggingError
)
3476 alreadyLoggingError
= true;
3477 wxLogError(_("Cannot convert from the charset '%s'!"),
3481 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
3482 #else // !wxUSE_FONTMAP
3483 wxString::Format(_("encoding %i"), m_encoding
).c_str()
3484 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3487 alreadyLoggingError
= false;
3493 void wxCSConv::CreateConvIfNeeded() const
3497 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
3499 // if we don't have neither the name nor the encoding, use the default
3500 // encoding for this system
3501 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
3504 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
3506 // fallback to some reasonable default:
3507 self
->m_encoding
= wxFONTENCODING_ISO8859_1
;
3508 #endif // wxUSE_INTL
3511 self
->m_convReal
= DoCreate();
3512 self
->m_deferred
= false;
3516 bool wxCSConv::IsOk() const
3518 CreateConvIfNeeded();
3520 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3521 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
3522 return true; // always ok as we do it ourselves
3524 // m_convReal->IsOk() is called at its own creation, so we know it must
3525 // be ok if m_convReal is non-NULL
3526 return m_convReal
!= NULL
;
3529 size_t wxCSConv::ToWChar(wchar_t *dst
, size_t dstLen
,
3530 const char *src
, size_t srcLen
) const
3532 CreateConvIfNeeded();
3535 return m_convReal
->ToWChar(dst
, dstLen
, src
, srcLen
);
3538 return wxMBConv::ToWChar(dst
, dstLen
, src
, srcLen
);
3541 size_t wxCSConv::FromWChar(char *dst
, size_t dstLen
,
3542 const wchar_t *src
, size_t srcLen
) const
3544 CreateConvIfNeeded();
3547 return m_convReal
->FromWChar(dst
, dstLen
, src
, srcLen
);
3550 return wxMBConv::FromWChar(dst
, dstLen
, src
, srcLen
);
3553 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
3555 CreateConvIfNeeded();
3558 return m_convReal
->MB2WC(buf
, psz
, n
);
3561 size_t len
= strlen(psz
);
3565 for (size_t c
= 0; c
<= len
; c
++)
3566 buf
[c
] = (unsigned char)(psz
[c
]);
3572 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
3574 CreateConvIfNeeded();
3577 return m_convReal
->WC2MB(buf
, psz
, n
);
3580 const size_t len
= wxWcslen(psz
);
3583 for (size_t c
= 0; c
<= len
; c
++)
3586 return wxCONV_FAILED
;
3588 buf
[c
] = (char)psz
[c
];
3593 for (size_t c
= 0; c
<= len
; c
++)
3596 return wxCONV_FAILED
;
3603 size_t wxCSConv::GetMBNulLen() const
3605 CreateConvIfNeeded();
3609 return m_convReal
->GetMBNulLen();
3615 // ----------------------------------------------------------------------------
3617 // ----------------------------------------------------------------------------
3620 static wxMBConv_win32 wxConvLibcObj
;
3621 #elif defined(__WXMAC__) && !defined(__MACH__)
3622 static wxMBConv_mac wxConvLibcObj
;
3624 static wxMBConvLibc wxConvLibcObj
;
3627 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
3628 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
3629 static wxMBConvUTF7 wxConvUTF7Obj
;
3630 static wxMBConvUTF8 wxConvUTF8Obj
;
3631 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3632 static wxMBConv_macUTF8D wxConvMacUTF8DObj
;
3634 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
3635 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
3636 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
3637 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
3638 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
3639 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
3640 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvUI
= &wxConvLocal
;
3641 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
3643 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3650 #endif // __WXOSX__/!__WXOSX__
3654 wxWCharBuffer
wxSafeConvertMB2WX(const char *s
)
3657 return wxWCharBuffer();
3659 wxWCharBuffer
wbuf(wxConvLibc
.cMB2WX(s
));
3661 wbuf
= wxConvUTF8
.cMB2WX(s
);
3663 wbuf
= wxConvISO8859_1
.cMB2WX(s
);
3668 wxCharBuffer
wxSafeConvertWX2MB(const wchar_t *ws
)
3671 return wxCharBuffer();
3673 wxCharBuffer
buf(wxConvLibc
.cWX2MB(ws
));
3675 buf
= wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
).cWX2MB(ws
);
3680 #endif // wxUSE_UNICODE
3682 #else // !wxUSE_WCHAR_T
3684 // stand-ins in absence of wchar_t
3685 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
3690 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T