1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
73 #include "wx/thread.h"
76 #include "wx/encconv.h"
77 #include "wx/fontmap.h"
81 #include <ATSUnicode.h>
82 #include <TextCommon.h>
83 #include <TextEncodingConverter.h>
85 #include "wx/mac/private.h" // includes mac headers
87 // ----------------------------------------------------------------------------
89 // ----------------------------------------------------------------------------
91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
94 #if SIZEOF_WCHAR_T == 4
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
100 #define WC_NAME_BEST "UCS-4LE"
102 #elif SIZEOF_WCHAR_T == 2
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
109 #define WC_NAME_BEST "UTF-16LE"
111 #else // sizeof(wchar_t) != 2 nor 4
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
116 // ============================================================================
118 // ============================================================================
120 // ----------------------------------------------------------------------------
121 // UTF-16 en/decoding to/from UCS-4
122 // ----------------------------------------------------------------------------
125 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
130 *output
= (wxUint16
) input
;
133 else if (input
>=0x110000)
141 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
142 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
148 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
150 if ((*input
<0xd800) || (*input
>0xdfff))
155 else if ((input
[1]<0xdc00) || (input
[1]>0xdfff))
162 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
168 // ----------------------------------------------------------------------------
170 // ----------------------------------------------------------------------------
172 wxMBConv::~wxMBConv()
174 // nothing to do here (necessary for Darwin linking probably)
177 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
181 // calculate the length of the buffer needed first
182 size_t nLen
= MB2WC(NULL
, psz
, 0);
183 if ( nLen
!= (size_t)-1 )
185 // now do the actual conversion
186 wxWCharBuffer
buf(nLen
);
187 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
188 if ( nLen
!= (size_t)-1 )
195 wxWCharBuffer
buf((wchar_t *)NULL
);
200 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
204 size_t nLen
= WC2MB(NULL
, pwz
, 0);
205 if ( nLen
!= (size_t)-1 )
207 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
208 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
209 if ( nLen
!= (size_t)-1 )
216 wxCharBuffer
buf((char *)NULL
);
221 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
223 wxASSERT(pOutSize
!= NULL
);
225 const char* szEnd
= szString
+ nStringLen
+ 1;
226 const char* szPos
= szString
;
227 const char* szStart
= szPos
;
229 size_t nActualLength
= 0;
230 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
232 wxWCharBuffer
theBuffer(nCurrentSize
);
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos
!= szEnd
)
238 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
240 //Get the length of the current (sub)string
241 size_t nLen
= MB2WC(NULL
, szPos
, 0);
243 //Invalid conversion?
244 if( nLen
== (size_t)-1 )
247 theBuffer
.data()[0u] = wxT('\0');
252 //Increase the actual length (+1 for current null character)
253 nActualLength
+= nLen
+ 1;
255 //if buffer too big, realloc the buffer
256 if (nActualLength
> (nCurrentSize
+1))
258 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
259 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
260 theBuffer
= theNewBuffer
;
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
268 theBuffer
.data()[0u] = wxT('\0');
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos
+= strlen(szPos
) + 1;
279 //success - return actual length and the buffer
280 *pOutSize
= nActualLength
;
284 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
286 wxASSERT(pOutSize
!= NULL
);
288 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
289 const wchar_t* szPos
= szString
;
290 const wchar_t* szStart
= szPos
;
292 size_t nActualLength
= 0;
293 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
295 wxCharBuffer
theBuffer(nCurrentSize
);
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos
!= szEnd
)
301 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
303 //Get the length of the current (sub)string
304 size_t nLen
= WC2MB(NULL
, szPos
, 0);
306 //Invalid conversion?
307 if( nLen
== (size_t)-1 )
310 theBuffer
.data()[0u] = wxT('\0');
314 //Increase the actual length (+1 for current null character)
315 nActualLength
+= nLen
+ 1;
317 //if buffer too big, realloc the buffer
318 if (nActualLength
> (nCurrentSize
+1))
320 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
321 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
322 theBuffer
= theNewBuffer
;
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
330 theBuffer
.data()[0u] = wxT('\0');
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos
+= wxWcslen(szPos
) + 1;
341 //success - return actual length and the buffer
342 *pOutSize
= nActualLength
;
346 // ----------------------------------------------------------------------------
348 // ----------------------------------------------------------------------------
350 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
352 return wxMB2WC(buf
, psz
, n
);
355 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
357 return wxWC2MB(buf
, psz
, n
);
362 // ----------------------------------------------------------------------------
363 // wxConvBrokenFileNames
364 // ----------------------------------------------------------------------------
366 wxConvBrokenFileNames::wxConvBrokenFileNames()
368 // decide which conversion to use for the file names
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
372 wxString
encName(wxGetenv(_T("G_FILENAME_ENCODING")));
374 if ( !encName
.empty() && encName
!= _T("UTF-8") && encName
!= _T("UTF8") )
376 m_conv
= new wxCSConv(encName
);
378 else // no G_FILENAME_ENCODING
380 if ( encName
.empty() )
381 encName
= wxLocale::GetSystemEncodingName().Upper();
383 // (2) if a non default locale is set, assume that the user wants his
384 // filenames in this locale too
385 if ( !encName
.empty() && encName
!= _T("UTF-8") && encName
!= _T("UTF8") )
387 wxSetEnv(_T("G_FILENAME_ENCODING"), encName
);
388 m_conv
= new wxMBConvLibc
;
392 // (3) finally use UTF-8 by default
393 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
399 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
401 size_t outputSize
) const
403 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
407 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
409 size_t outputSize
) const
411 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
416 // ----------------------------------------------------------------------------
418 // ----------------------------------------------------------------------------
420 // Implementation (C) 2004 Fredrik Roubert
423 // BASE64 decoding table
425 static const unsigned char utf7unb64
[] =
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
433 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
434 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
436 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
437 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
438 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
440 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
441 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
442 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
461 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
465 while (*psz
&& ((!buf
) || (len
< n
)))
467 unsigned char cc
= *psz
++;
475 else if (*psz
== '-')
485 // BASE64 encoded string
489 for (lsb
= false, d
= 0, l
= 0;
490 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
494 for (l
+= 6; l
>= 8; lsb
= !lsb
)
496 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
505 *buf
= (wchar_t)(c
<< 8);
512 if (buf
&& (len
< n
))
518 // BASE64 encoding table
520 static const unsigned char utf7enb64
[] =
522 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
523 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
524 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
525 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
526 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
527 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
528 'w', 'x', 'y', 'z', '0', '1', '2', '3',
529 '4', '5', '6', '7', '8', '9', '+', '/'
533 // UTF-7 encoding table
535 // 0 - Set D (directly encoded characters)
536 // 1 - Set O (optional direct characters)
537 // 2 - whitespace characters (optional)
538 // 3 - special characters
540 static const unsigned char utf7encode
[128] =
542 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
543 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
544 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
548 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
552 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
558 while (*psz
&& ((!buf
) || (len
< n
)))
561 if (cc
< 0x80 && utf7encode
[cc
] < 1)
569 else if (((wxUint32
)cc
) > 0xffff)
571 // no surrogate pair generation (yet?)
582 // BASE64 encode string
583 unsigned int lsb
, d
, l
;
584 for (d
= 0, l
= 0;; psz
++)
586 for (lsb
= 0; lsb
< 2; lsb
++)
589 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
591 for (l
+= 8; l
>= 6; )
595 *buf
++ = utf7enb64
[(d
>> l
) % 64];
600 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
606 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
615 if (buf
&& (len
< n
))
620 // ----------------------------------------------------------------------------
622 // ----------------------------------------------------------------------------
624 static wxUint32 utf8_max
[]=
625 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
627 // boundaries of the private use area we use to (temporarily) remap invalid
628 // characters invalid in a UTF-8 encoded string
629 const wxUint32 wxUnicodePUA
= 0x100000;
630 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
632 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
636 while (*psz
&& ((!buf
) || (len
< n
)))
638 const char *opsz
= psz
;
639 bool invalid
= false;
640 unsigned char cc
= *psz
++, fc
= cc
;
642 for (cnt
= 0; fc
& 0x80; cnt
++)
651 // escape the escape character for octal escapes
652 if ((m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
653 && cc
== '\\' && (!buf
|| len
< n
))
665 // invalid UTF-8 sequence
670 unsigned ocnt
= cnt
- 1;
671 wxUint32 res
= cc
& (0x3f >> cnt
);
675 if ((cc
& 0xC0) != 0x80)
677 // invalid UTF-8 sequence
682 res
= (res
<< 6) | (cc
& 0x3f);
684 if (invalid
|| res
<= utf8_max
[ocnt
])
686 // illegal UTF-8 encoding
689 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
690 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
692 // if one of our PUA characters turns up externally
693 // it must also be treated as an illegal sequence
694 // (a bit like you have to escape an escape character)
700 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
701 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
702 if (pa
== (size_t)-1)
716 #endif // WC_UTF16/!WC_UTF16
721 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
723 while (opsz
< psz
&& (!buf
|| len
< n
))
726 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
727 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
728 wxASSERT(pa
!= (size_t)-1);
735 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
741 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
743 while (opsz
< psz
&& (!buf
|| len
< n
))
745 if ( buf
&& len
+ 3 < n
)
747 unsigned char n
= *opsz
;
749 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
750 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
751 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
757 else // MAP_INVALID_UTF8_NOT
764 if (buf
&& (len
< n
))
769 static inline bool isoctal(wchar_t wch
)
771 return L
'0' <= wch
&& wch
<= L
'7';
774 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
778 while (*psz
&& ((!buf
) || (len
< n
)))
782 // cast is ok for WC_UTF16
783 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
784 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
786 cc
=(*psz
++) & 0x7fffffff;
789 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
790 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
793 *buf
++ = (char)(cc
- wxUnicodePUA
);
796 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
797 && cc
== L
'\\' && psz
[0] == L
'\\' )
804 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
806 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
810 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
811 (psz
[1] - L
'0')*010 +
821 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
835 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
837 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
849 // ----------------------------------------------------------------------------
851 // ----------------------------------------------------------------------------
853 #ifdef WORDS_BIGENDIAN
854 #define wxMBConvUTF16straight wxMBConvUTF16BE
855 #define wxMBConvUTF16swap wxMBConvUTF16LE
857 #define wxMBConvUTF16swap wxMBConvUTF16BE
858 #define wxMBConvUTF16straight wxMBConvUTF16LE
864 // copy 16bit MB to 16bit String
865 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
869 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
872 *buf
++ = *(wxUint16
*)psz
;
875 psz
+= sizeof(wxUint16
);
877 if (buf
&& len
<n
) *buf
=0;
883 // copy 16bit String to 16bit MB
884 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
888 while (*psz
&& (!buf
|| len
< n
))
892 *(wxUint16
*)buf
= *psz
;
893 buf
+= sizeof(wxUint16
);
895 len
+= sizeof(wxUint16
);
898 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
904 // swap 16bit MB to 16bit String
905 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
909 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
913 ((char *)buf
)[0] = psz
[1];
914 ((char *)buf
)[1] = psz
[0];
918 psz
+= sizeof(wxUint16
);
920 if (buf
&& len
<n
) *buf
=0;
926 // swap 16bit MB to 16bit String
927 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
931 while (*psz
&& (!buf
|| len
< n
))
935 *buf
++ = ((char*)psz
)[1];
936 *buf
++ = ((char*)psz
)[0];
938 len
+= sizeof(wxUint16
);
941 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
950 // copy 16bit MB to 32bit String
951 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
955 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
958 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
959 if (pa
== (size_t)-1)
965 psz
+= pa
* sizeof(wxUint16
);
967 if (buf
&& len
<n
) *buf
=0;
973 // copy 32bit String to 16bit MB
974 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
978 while (*psz
&& (!buf
|| len
< n
))
981 size_t pa
=encode_utf16(*psz
, cc
);
983 if (pa
== (size_t)-1)
988 *(wxUint16
*)buf
= cc
[0];
989 buf
+= sizeof(wxUint16
);
992 *(wxUint16
*)buf
= cc
[1];
993 buf
+= sizeof(wxUint16
);
997 len
+= pa
*sizeof(wxUint16
);
1000 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1006 // swap 16bit MB to 32bit String
1007 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1011 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
1015 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
1016 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
1018 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
1019 if (pa
== (size_t)-1)
1026 psz
+= pa
* sizeof(wxUint16
);
1028 if (buf
&& len
<n
) *buf
=0;
1034 // swap 32bit String to 16bit MB
1035 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1039 while (*psz
&& (!buf
|| len
< n
))
1042 size_t pa
=encode_utf16(*psz
, cc
);
1044 if (pa
== (size_t)-1)
1049 *buf
++ = ((char*)cc
)[1];
1050 *buf
++ = ((char*)cc
)[0];
1053 *buf
++ = ((char*)cc
)[3];
1054 *buf
++ = ((char*)cc
)[2];
1058 len
+= pa
*sizeof(wxUint16
);
1061 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1069 // ----------------------------------------------------------------------------
1071 // ----------------------------------------------------------------------------
1073 #ifdef WORDS_BIGENDIAN
1074 #define wxMBConvUTF32straight wxMBConvUTF32BE
1075 #define wxMBConvUTF32swap wxMBConvUTF32LE
1077 #define wxMBConvUTF32swap wxMBConvUTF32BE
1078 #define wxMBConvUTF32straight wxMBConvUTF32LE
1082 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1083 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1088 // copy 32bit MB to 16bit String
1089 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1093 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1097 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1098 if (pa
== (size_t)-1)
1108 psz
+= sizeof(wxUint32
);
1110 if (buf
&& len
<n
) *buf
=0;
1116 // copy 16bit String to 32bit MB
1117 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1121 while (*psz
&& (!buf
|| len
< n
))
1125 // cast is ok for WC_UTF16
1126 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1127 if (pa
== (size_t)-1)
1132 *(wxUint32
*)buf
= cc
;
1133 buf
+= sizeof(wxUint32
);
1135 len
+= sizeof(wxUint32
);
1139 if (buf
&& len
<=n
-sizeof(wxUint32
))
1147 // swap 32bit MB to 16bit String
1148 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1152 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1155 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1156 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1161 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1162 if (pa
== (size_t)-1)
1172 psz
+= sizeof(wxUint32
);
1182 // swap 16bit String to 32bit MB
1183 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1187 while (*psz
&& (!buf
|| len
< n
))
1191 // cast is ok for WC_UTF16
1192 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1193 if (pa
== (size_t)-1)
1203 len
+= sizeof(wxUint32
);
1207 if (buf
&& len
<=n
-sizeof(wxUint32
))
1216 // copy 32bit MB to 32bit String
1217 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1221 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1224 *buf
++ = *(wxUint32
*)psz
;
1226 psz
+= sizeof(wxUint32
);
1236 // copy 32bit String to 32bit MB
1237 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1241 while (*psz
&& (!buf
|| len
< n
))
1245 *(wxUint32
*)buf
= *psz
;
1246 buf
+= sizeof(wxUint32
);
1249 len
+= sizeof(wxUint32
);
1253 if (buf
&& len
<=n
-sizeof(wxUint32
))
1260 // swap 32bit MB to 32bit String
1261 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1265 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1269 ((char *)buf
)[0] = psz
[3];
1270 ((char *)buf
)[1] = psz
[2];
1271 ((char *)buf
)[2] = psz
[1];
1272 ((char *)buf
)[3] = psz
[0];
1276 psz
+= sizeof(wxUint32
);
1286 // swap 32bit String to 32bit MB
1287 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1291 while (*psz
&& (!buf
|| len
< n
))
1295 *buf
++ = ((char *)psz
)[3];
1296 *buf
++ = ((char *)psz
)[2];
1297 *buf
++ = ((char *)psz
)[1];
1298 *buf
++ = ((char *)psz
)[0];
1300 len
+= sizeof(wxUint32
);
1304 if (buf
&& len
<=n
-sizeof(wxUint32
))
1314 // ============================================================================
1315 // The classes doing conversion using the iconv_xxx() functions
1316 // ============================================================================
1320 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1321 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1322 // (unless there's yet another bug in glibc) the only case when iconv()
1323 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1324 // left in the input buffer -- when _real_ error occurs,
1325 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1327 // [This bug does not appear in glibc 2.2.]
1328 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1329 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1330 (errno != E2BIG || bufLeft != 0))
1332 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1335 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1337 // ----------------------------------------------------------------------------
1338 // wxMBConv_iconv: encapsulates an iconv character set
1339 // ----------------------------------------------------------------------------
1341 class wxMBConv_iconv
: public wxMBConv
1344 wxMBConv_iconv(const wxChar
*name
);
1345 virtual ~wxMBConv_iconv();
1347 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1348 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1351 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1354 // the iconv handlers used to translate from multibyte to wide char and in
1355 // the other direction
1359 // guards access to m2w and w2m objects
1360 wxMutex m_iconvMutex
;
1364 // the name (for iconv_open()) of a wide char charset -- if none is
1365 // available on this machine, it will remain NULL
1366 static const char *ms_wcCharsetName
;
1368 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1369 // different endian-ness than the native one
1370 static bool ms_wcNeedsSwap
;
1373 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1374 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1376 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1378 // Do it the hard way
1380 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1381 cname
[i
] = (char) name
[i
];
1383 // check for charset that represents wchar_t:
1384 if (ms_wcCharsetName
== NULL
)
1386 ms_wcNeedsSwap
= false;
1388 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1389 ms_wcCharsetName
= WC_NAME_BEST
;
1390 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1392 if (m2w
== (iconv_t
)-1)
1394 // try charset w/o bytesex info (e.g. "UCS4")
1395 // and check for bytesex ourselves:
1396 ms_wcCharsetName
= WC_NAME
;
1397 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1399 // last bet, try if it knows WCHAR_T pseudo-charset
1400 if (m2w
== (iconv_t
)-1)
1402 ms_wcCharsetName
= "WCHAR_T";
1403 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1406 if (m2w
!= (iconv_t
)-1)
1408 char buf
[2], *bufPtr
;
1409 wchar_t wbuf
[2], *wbufPtr
;
1417 outsz
= SIZEOF_WCHAR_T
* 2;
1421 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1422 (char**)&wbufPtr
, &outsz
);
1424 if (ICONV_FAILED(res
, insz
))
1426 ms_wcCharsetName
= NULL
;
1427 wxLogLastError(wxT("iconv"));
1428 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1432 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1437 ms_wcCharsetName
= NULL
;
1439 // VS: we must not output an error here, since wxWidgets will safely
1440 // fall back to using wxEncodingConverter.
1441 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1445 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1447 else // we already have ms_wcCharsetName
1449 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1452 // NB: don't ever pass NULL to iconv_open(), it may crash!
1453 if ( ms_wcCharsetName
)
1455 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1463 wxMBConv_iconv::~wxMBConv_iconv()
1465 if ( m2w
!= (iconv_t
)-1 )
1467 if ( w2m
!= (iconv_t
)-1 )
1471 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1474 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1475 // Unfortunately there is a couple of global wxCSConv objects such as
1476 // wxConvLocal that are used all over wx code, so we have to make sure
1477 // the handle is used by at most one thread at the time. Otherwise
1478 // only a few wx classes would be safe to use from non-main threads
1479 // as MB<->WC conversion would fail "randomly".
1480 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1483 size_t inbuf
= strlen(psz
);
1484 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1486 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1487 wchar_t *bufPtr
= buf
;
1488 const char *pszPtr
= psz
;
1492 // have destination buffer, convert there
1494 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1495 (char**)&bufPtr
, &outbuf
);
1496 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1500 // convert to native endianness
1501 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1504 // NB: iconv was given only strlen(psz) characters on input, and so
1505 // it couldn't convert the trailing zero. Let's do it ourselves
1506 // if there's some room left for it in the output buffer.
1512 // no destination buffer... convert using temp buffer
1513 // to calculate destination buffer requirement
1518 outbuf
= 8*SIZEOF_WCHAR_T
;
1521 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1522 (char**)&bufPtr
, &outbuf
);
1524 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1525 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1528 if (ICONV_FAILED(cres
, inbuf
))
1530 //VS: it is ok if iconv fails, hence trace only
1531 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1538 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1541 // NB: explained in MB2WC
1542 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1545 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1549 wchar_t *tmpbuf
= 0;
1553 // need to copy to temp buffer to switch endianness
1554 // this absolutely doesn't rock!
1555 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1556 // could be in read-only memory, or be accessed in some other thread)
1557 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1558 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1559 WC_BSWAP(tmpbuf
, inbuf
)
1565 // have destination buffer, convert there
1566 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1570 // NB: iconv was given only wcslen(psz) characters on input, and so
1571 // it couldn't convert the trailing zero. Let's do it ourselves
1572 // if there's some room left for it in the output buffer.
1578 // no destination buffer... convert using temp buffer
1579 // to calculate destination buffer requirement
1583 buf
= tbuf
; outbuf
= 16;
1585 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1588 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1596 if (ICONV_FAILED(cres
, inbuf
))
1598 //VS: it is ok if iconv fails, hence trace only
1599 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1606 #endif // HAVE_ICONV
1609 // ============================================================================
1610 // Win32 conversion classes
1611 // ============================================================================
1613 #ifdef wxHAVE_WIN32_MB2WC
1617 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1618 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1621 class wxMBConv_win32
: public wxMBConv
1626 m_CodePage
= CP_ACP
;
1630 wxMBConv_win32(const wxChar
* name
)
1632 m_CodePage
= wxCharsetToCodepage(name
);
1635 wxMBConv_win32(wxFontEncoding encoding
)
1637 m_CodePage
= wxEncodingToCodepage(encoding
);
1641 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1643 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1644 // the behaviour is not compatible with the Unix version (using iconv)
1645 // and break the library itself, e.g. wxTextInputStream::NextChar()
1646 // wouldn't work if reading an incomplete MB char didn't result in an
1649 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1650 // an error (tested under Windows Server 2003) and apparently it is
1651 // done on purpose, i.e. the function accepts any input in this case
1652 // and although I'd prefer to return error on ill-formed output, our
1653 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1654 // explicitly ill-formed according to RFC 2152) neither so we don't
1655 // even have any fallback here...
1656 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1658 const size_t len
= ::MultiByteToWideChar
1660 m_CodePage
, // code page
1661 flags
, // flags: fall on error
1662 psz
, // input string
1663 -1, // its length (NUL-terminated)
1664 buf
, // output string
1665 buf
? n
: 0 // size of output buffer
1668 // note that it returns count of written chars for buf != NULL and size
1669 // of the needed buffer for buf == NULL so in either case the length of
1670 // the string (which never includes the terminating NUL) is one less
1671 return len
? len
- 1 : (size_t)-1;
1674 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1677 we have a problem here: by default, WideCharToMultiByte() may
1678 replace characters unrepresentable in the target code page with bad
1679 quality approximations such as turning "1/2" symbol (U+00BD) into
1680 "1" for the code pages which don't have it and we, obviously, want
1681 to avoid this at any price
1683 the trouble is that this function does it _silently_, i.e. it won't
1684 even tell us whether it did or not... Win98/2000 and higher provide
1685 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1686 we have to resort to a round trip, i.e. check that converting back
1687 results in the same string -- this is, of course, expensive but
1688 otherwise we simply can't be sure to not garble the data.
1691 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1692 // it doesn't work with CJK encodings (which we test for rather roughly
1693 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1695 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1698 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1700 // it's our lucky day
1701 flags
= WC_NO_BEST_FIT_CHARS
;
1702 pUsedDef
= &usedDef
;
1704 else // old system or unsupported encoding
1710 const size_t len
= ::WideCharToMultiByte
1712 m_CodePage
, // code page
1713 flags
, // either none or no best fit
1714 pwz
, // input string
1715 -1, // it is (wide) NUL-terminated
1716 buf
, // output buffer
1717 buf
? n
: 0, // and its size
1718 NULL
, // default "replacement" char
1719 pUsedDef
// [out] was it used?
1724 // function totally failed
1728 // if we were really converting, check if we succeeded
1733 // check if the conversion failed, i.e. if any replacements
1738 else // we must resort to double tripping...
1740 wxWCharBuffer
wcBuf(n
);
1741 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1742 wcscmp(wcBuf
, pwz
) != 0 )
1744 // we didn't obtain the same thing we started from, hence
1745 // the conversion was lossy and we consider that it failed
1751 // see the comment above for the reason of "len - 1"
1755 bool IsOk() const { return m_CodePage
!= -1; }
1758 static bool CanUseNoBestFit()
1760 static int s_isWin98Or2k
= -1;
1762 if ( s_isWin98Or2k
== -1 )
1765 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1768 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1772 s_isWin98Or2k
= verMaj
>= 5;
1776 // unknown, be conseravtive by default
1780 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1783 return s_isWin98Or2k
== 1;
1789 #endif // wxHAVE_WIN32_MB2WC
1791 // ============================================================================
1792 // Cocoa conversion classes
1793 // ============================================================================
1795 #if defined(__WXCOCOA__)
1797 // RN: There is no UTF-32 support in either Core Foundation or
1798 // Cocoa. Strangely enough, internally Core Foundation uses
1799 // UTF 32 internally quite a bit - its just not public (yet).
1801 #include <CoreFoundation/CFString.h>
1802 #include <CoreFoundation/CFStringEncodingExt.h>
1804 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1806 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1807 if ( encoding
== wxFONTENCODING_DEFAULT
)
1809 enc
= CFStringGetSystemEncoding();
1811 else switch( encoding
)
1813 case wxFONTENCODING_ISO8859_1
:
1814 enc
= kCFStringEncodingISOLatin1
;
1816 case wxFONTENCODING_ISO8859_2
:
1817 enc
= kCFStringEncodingISOLatin2
;
1819 case wxFONTENCODING_ISO8859_3
:
1820 enc
= kCFStringEncodingISOLatin3
;
1822 case wxFONTENCODING_ISO8859_4
:
1823 enc
= kCFStringEncodingISOLatin4
;
1825 case wxFONTENCODING_ISO8859_5
:
1826 enc
= kCFStringEncodingISOLatinCyrillic
;
1828 case wxFONTENCODING_ISO8859_6
:
1829 enc
= kCFStringEncodingISOLatinArabic
;
1831 case wxFONTENCODING_ISO8859_7
:
1832 enc
= kCFStringEncodingISOLatinGreek
;
1834 case wxFONTENCODING_ISO8859_8
:
1835 enc
= kCFStringEncodingISOLatinHebrew
;
1837 case wxFONTENCODING_ISO8859_9
:
1838 enc
= kCFStringEncodingISOLatin5
;
1840 case wxFONTENCODING_ISO8859_10
:
1841 enc
= kCFStringEncodingISOLatin6
;
1843 case wxFONTENCODING_ISO8859_11
:
1844 enc
= kCFStringEncodingISOLatinThai
;
1846 case wxFONTENCODING_ISO8859_13
:
1847 enc
= kCFStringEncodingISOLatin7
;
1849 case wxFONTENCODING_ISO8859_14
:
1850 enc
= kCFStringEncodingISOLatin8
;
1852 case wxFONTENCODING_ISO8859_15
:
1853 enc
= kCFStringEncodingISOLatin9
;
1856 case wxFONTENCODING_KOI8
:
1857 enc
= kCFStringEncodingKOI8_R
;
1859 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1860 enc
= kCFStringEncodingDOSRussian
;
1863 // case wxFONTENCODING_BULGARIAN :
1867 case wxFONTENCODING_CP437
:
1868 enc
=kCFStringEncodingDOSLatinUS
;
1870 case wxFONTENCODING_CP850
:
1871 enc
= kCFStringEncodingDOSLatin1
;
1873 case wxFONTENCODING_CP852
:
1874 enc
= kCFStringEncodingDOSLatin2
;
1876 case wxFONTENCODING_CP855
:
1877 enc
= kCFStringEncodingDOSCyrillic
;
1879 case wxFONTENCODING_CP866
:
1880 enc
=kCFStringEncodingDOSRussian
;
1882 case wxFONTENCODING_CP874
:
1883 enc
= kCFStringEncodingDOSThai
;
1885 case wxFONTENCODING_CP932
:
1886 enc
= kCFStringEncodingDOSJapanese
;
1888 case wxFONTENCODING_CP936
:
1889 enc
=kCFStringEncodingDOSChineseSimplif
;
1891 case wxFONTENCODING_CP949
:
1892 enc
= kCFStringEncodingDOSKorean
;
1894 case wxFONTENCODING_CP950
:
1895 enc
= kCFStringEncodingDOSChineseTrad
;
1897 case wxFONTENCODING_CP1250
:
1898 enc
= kCFStringEncodingWindowsLatin2
;
1900 case wxFONTENCODING_CP1251
:
1901 enc
=kCFStringEncodingWindowsCyrillic
;
1903 case wxFONTENCODING_CP1252
:
1904 enc
=kCFStringEncodingWindowsLatin1
;
1906 case wxFONTENCODING_CP1253
:
1907 enc
= kCFStringEncodingWindowsGreek
;
1909 case wxFONTENCODING_CP1254
:
1910 enc
= kCFStringEncodingWindowsLatin5
;
1912 case wxFONTENCODING_CP1255
:
1913 enc
=kCFStringEncodingWindowsHebrew
;
1915 case wxFONTENCODING_CP1256
:
1916 enc
=kCFStringEncodingWindowsArabic
;
1918 case wxFONTENCODING_CP1257
:
1919 enc
= kCFStringEncodingWindowsBalticRim
;
1921 // This only really encodes to UTF7 (if that) evidently
1922 // case wxFONTENCODING_UTF7 :
1923 // enc = kCFStringEncodingNonLossyASCII ;
1925 case wxFONTENCODING_UTF8
:
1926 enc
= kCFStringEncodingUTF8
;
1928 case wxFONTENCODING_EUC_JP
:
1929 enc
= kCFStringEncodingEUC_JP
;
1931 case wxFONTENCODING_UTF16
:
1932 enc
= kCFStringEncodingUnicode
;
1934 case wxFONTENCODING_MACROMAN
:
1935 enc
= kCFStringEncodingMacRoman
;
1937 case wxFONTENCODING_MACJAPANESE
:
1938 enc
= kCFStringEncodingMacJapanese
;
1940 case wxFONTENCODING_MACCHINESETRAD
:
1941 enc
= kCFStringEncodingMacChineseTrad
;
1943 case wxFONTENCODING_MACKOREAN
:
1944 enc
= kCFStringEncodingMacKorean
;
1946 case wxFONTENCODING_MACARABIC
:
1947 enc
= kCFStringEncodingMacArabic
;
1949 case wxFONTENCODING_MACHEBREW
:
1950 enc
= kCFStringEncodingMacHebrew
;
1952 case wxFONTENCODING_MACGREEK
:
1953 enc
= kCFStringEncodingMacGreek
;
1955 case wxFONTENCODING_MACCYRILLIC
:
1956 enc
= kCFStringEncodingMacCyrillic
;
1958 case wxFONTENCODING_MACDEVANAGARI
:
1959 enc
= kCFStringEncodingMacDevanagari
;
1961 case wxFONTENCODING_MACGURMUKHI
:
1962 enc
= kCFStringEncodingMacGurmukhi
;
1964 case wxFONTENCODING_MACGUJARATI
:
1965 enc
= kCFStringEncodingMacGujarati
;
1967 case wxFONTENCODING_MACORIYA
:
1968 enc
= kCFStringEncodingMacOriya
;
1970 case wxFONTENCODING_MACBENGALI
:
1971 enc
= kCFStringEncodingMacBengali
;
1973 case wxFONTENCODING_MACTAMIL
:
1974 enc
= kCFStringEncodingMacTamil
;
1976 case wxFONTENCODING_MACTELUGU
:
1977 enc
= kCFStringEncodingMacTelugu
;
1979 case wxFONTENCODING_MACKANNADA
:
1980 enc
= kCFStringEncodingMacKannada
;
1982 case wxFONTENCODING_MACMALAJALAM
:
1983 enc
= kCFStringEncodingMacMalayalam
;
1985 case wxFONTENCODING_MACSINHALESE
:
1986 enc
= kCFStringEncodingMacSinhalese
;
1988 case wxFONTENCODING_MACBURMESE
:
1989 enc
= kCFStringEncodingMacBurmese
;
1991 case wxFONTENCODING_MACKHMER
:
1992 enc
= kCFStringEncodingMacKhmer
;
1994 case wxFONTENCODING_MACTHAI
:
1995 enc
= kCFStringEncodingMacThai
;
1997 case wxFONTENCODING_MACLAOTIAN
:
1998 enc
= kCFStringEncodingMacLaotian
;
2000 case wxFONTENCODING_MACGEORGIAN
:
2001 enc
= kCFStringEncodingMacGeorgian
;
2003 case wxFONTENCODING_MACARMENIAN
:
2004 enc
= kCFStringEncodingMacArmenian
;
2006 case wxFONTENCODING_MACCHINESESIMP
:
2007 enc
= kCFStringEncodingMacChineseSimp
;
2009 case wxFONTENCODING_MACTIBETAN
:
2010 enc
= kCFStringEncodingMacTibetan
;
2012 case wxFONTENCODING_MACMONGOLIAN
:
2013 enc
= kCFStringEncodingMacMongolian
;
2015 case wxFONTENCODING_MACETHIOPIC
:
2016 enc
= kCFStringEncodingMacEthiopic
;
2018 case wxFONTENCODING_MACCENTRALEUR
:
2019 enc
= kCFStringEncodingMacCentralEurRoman
;
2021 case wxFONTENCODING_MACVIATNAMESE
:
2022 enc
= kCFStringEncodingMacVietnamese
;
2024 case wxFONTENCODING_MACARABICEXT
:
2025 enc
= kCFStringEncodingMacExtArabic
;
2027 case wxFONTENCODING_MACSYMBOL
:
2028 enc
= kCFStringEncodingMacSymbol
;
2030 case wxFONTENCODING_MACDINGBATS
:
2031 enc
= kCFStringEncodingMacDingbats
;
2033 case wxFONTENCODING_MACTURKISH
:
2034 enc
= kCFStringEncodingMacTurkish
;
2036 case wxFONTENCODING_MACCROATIAN
:
2037 enc
= kCFStringEncodingMacCroatian
;
2039 case wxFONTENCODING_MACICELANDIC
:
2040 enc
= kCFStringEncodingMacIcelandic
;
2042 case wxFONTENCODING_MACROMANIAN
:
2043 enc
= kCFStringEncodingMacRomanian
;
2045 case wxFONTENCODING_MACCELTIC
:
2046 enc
= kCFStringEncodingMacCeltic
;
2048 case wxFONTENCODING_MACGAELIC
:
2049 enc
= kCFStringEncodingMacGaelic
;
2051 // case wxFONTENCODING_MACKEYBOARD :
2052 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2055 // because gcc is picky
2061 class wxMBConv_cocoa
: public wxMBConv
2066 Init(CFStringGetSystemEncoding()) ;
2070 wxMBConv_cocoa(const wxChar
* name
)
2072 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2076 wxMBConv_cocoa(wxFontEncoding encoding
)
2078 Init( wxCFStringEncFromFontEnc(encoding
) );
2085 void Init( CFStringEncoding encoding
)
2087 m_encoding
= encoding
;
2090 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2094 CFStringRef theString
= CFStringCreateWithBytes (
2095 NULL
, //the allocator
2096 (const UInt8
*)szUnConv
,
2099 false //no BOM/external representation
2102 wxASSERT(theString
);
2104 size_t nOutLength
= CFStringGetLength(theString
);
2108 CFRelease(theString
);
2112 CFRange theRange
= { 0, nOutSize
};
2114 #if SIZEOF_WCHAR_T == 4
2115 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2118 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2120 CFRelease(theString
);
2122 szUniCharBuffer
[nOutLength
] = '\0' ;
2124 #if SIZEOF_WCHAR_T == 4
2125 wxMBConvUTF16 converter
;
2126 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2127 delete[] szUniCharBuffer
;
2133 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2137 size_t nRealOutSize
;
2138 size_t nBufSize
= wxWcslen(szUnConv
);
2139 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2141 #if SIZEOF_WCHAR_T == 4
2142 wxMBConvUTF16BE converter
;
2143 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2144 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2145 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2146 nBufSize
/= sizeof(UniChar
);
2149 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2153 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2156 wxASSERT(theString
);
2158 //Note that CER puts a BOM when converting to unicode
2159 //so we check and use getchars instead in that case
2160 if (m_encoding
== kCFStringEncodingUnicode
)
2163 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2165 nRealOutSize
= CFStringGetLength(theString
) + 1;
2171 CFRangeMake(0, CFStringGetLength(theString
)),
2173 0, //what to put in characters that can't be converted -
2174 //0 tells CFString to return NULL if it meets such a character
2175 false, //not an external representation
2178 (CFIndex
*) &nRealOutSize
2182 CFRelease(theString
);
2184 #if SIZEOF_WCHAR_T == 4
2185 delete[] szUniBuffer
;
2188 return nRealOutSize
- 1;
2193 return m_encoding
!= kCFStringEncodingInvalidId
&&
2194 CFStringIsEncodingAvailable(m_encoding
);
2198 CFStringEncoding m_encoding
;
2201 #endif // defined(__WXCOCOA__)
2203 // ============================================================================
2204 // Mac conversion classes
2205 // ============================================================================
2207 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2209 class wxMBConv_mac
: public wxMBConv
2214 Init(CFStringGetSystemEncoding()) ;
2218 wxMBConv_mac(const wxChar
* name
)
2220 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2224 wxMBConv_mac(wxFontEncoding encoding
)
2226 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2231 OSStatus status
= noErr
;
2232 status
= TECDisposeConverter(m_MB2WC_converter
);
2233 status
= TECDisposeConverter(m_WC2MB_converter
);
2237 void Init( TextEncodingBase encoding
)
2239 OSStatus status
= noErr
;
2240 m_char_encoding
= encoding
;
2241 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2243 status
= TECCreateConverter(&m_MB2WC_converter
,
2245 m_unicode_encoding
);
2246 status
= TECCreateConverter(&m_WC2MB_converter
,
2251 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2253 OSStatus status
= noErr
;
2254 ByteCount byteOutLen
;
2255 ByteCount byteInLen
= strlen(psz
) ;
2256 wchar_t *tbuf
= NULL
;
2257 UniChar
* ubuf
= NULL
;
2262 //apple specs say at least 32
2263 n
= wxMax( 32 , byteInLen
) ;
2264 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2266 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2267 #if SIZEOF_WCHAR_T == 4
2268 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2270 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2272 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2273 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2274 #if SIZEOF_WCHAR_T == 4
2275 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2276 // is not properly terminated we get random characters at the end
2277 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2278 wxMBConvUTF16BE converter
;
2279 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2282 res
= byteOutLen
/ sizeof( UniChar
) ;
2287 if ( buf
&& res
< n
)
2293 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2295 OSStatus status
= noErr
;
2296 ByteCount byteOutLen
;
2297 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2303 //apple specs say at least 32
2304 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2305 tbuf
= (char*) malloc( n
) ;
2308 ByteCount byteBufferLen
= n
;
2309 UniChar
* ubuf
= NULL
;
2310 #if SIZEOF_WCHAR_T == 4
2311 wxMBConvUTF16BE converter
;
2312 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2313 byteInLen
= unicharlen
;
2314 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2315 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2317 ubuf
= (UniChar
*) psz
;
2319 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2320 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2321 #if SIZEOF_WCHAR_T == 4
2327 size_t res
= byteOutLen
;
2328 if ( buf
&& res
< n
)
2332 //we need to double-trip to verify it didn't insert any ? in place
2333 //of bogus characters
2334 wxWCharBuffer
wcBuf(n
);
2335 size_t pszlen
= wxWcslen(psz
);
2336 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2337 wxWcslen(wcBuf
) != pszlen
||
2338 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2340 // we didn't obtain the same thing we started from, hence
2341 // the conversion was lossy and we consider that it failed
2350 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2353 TECObjectRef m_MB2WC_converter
;
2354 TECObjectRef m_WC2MB_converter
;
2356 TextEncodingBase m_char_encoding
;
2357 TextEncodingBase m_unicode_encoding
;
2360 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2362 // ============================================================================
2363 // wxEncodingConverter based conversion classes
2364 // ============================================================================
2368 class wxMBConv_wxwin
: public wxMBConv
2373 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2374 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2378 // temporarily just use wxEncodingConverter stuff,
2379 // so that it works while a better implementation is built
2380 wxMBConv_wxwin(const wxChar
* name
)
2383 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2385 m_enc
= wxFONTENCODING_SYSTEM
;
2390 wxMBConv_wxwin(wxFontEncoding enc
)
2397 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2399 size_t inbuf
= strlen(psz
);
2402 if (!m2w
.Convert(psz
,buf
))
2408 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2410 const size_t inbuf
= wxWcslen(psz
);
2413 if (!w2m
.Convert(psz
,buf
))
2420 bool IsOk() const { return m_ok
; }
2423 wxFontEncoding m_enc
;
2424 wxEncodingConverter m2w
, w2m
;
2426 // were we initialized successfully?
2429 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2432 #endif // wxUSE_FONTMAP
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2438 void wxCSConv::Init()
2445 wxCSConv::wxCSConv(const wxChar
*charset
)
2454 m_encoding
= wxFONTENCODING_SYSTEM
;
2457 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2459 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2461 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463 encoding
= wxFONTENCODING_SYSTEM
;
2468 m_encoding
= encoding
;
2471 wxCSConv::~wxCSConv()
2476 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2481 SetName(conv
.m_name
);
2482 m_encoding
= conv
.m_encoding
;
2485 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2489 SetName(conv
.m_name
);
2490 m_encoding
= conv
.m_encoding
;
2495 void wxCSConv::Clear()
2504 void wxCSConv::SetName(const wxChar
*charset
)
2508 m_name
= wxStrdup(charset
);
2513 wxMBConv
*wxCSConv::DoCreate() const
2515 // check for the special case of ASCII or ISO8859-1 charset: as we have
2516 // special knowledge of it anyhow, we don't need to create a special
2517 // conversion object
2518 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2520 // don't convert at all
2524 // we trust OS to do conversion better than we can so try external
2525 // conversion methods first
2527 // the full order is:
2528 // 1. OS conversion (iconv() under Unix or Win32 API)
2529 // 2. hard coded conversions for UTF
2530 // 3. wxEncodingConverter as fall back
2536 #endif // !wxUSE_FONTMAP
2538 wxString
name(m_name
);
2542 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2543 #endif // wxUSE_FONTMAP
2545 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2551 #endif // HAVE_ICONV
2553 #ifdef wxHAVE_WIN32_MB2WC
2556 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2557 : new wxMBConv_win32(m_encoding
);
2566 #endif // wxHAVE_WIN32_MB2WC
2567 #if defined(__WXMAC__)
2569 // leave UTF16 and UTF32 to the built-ins of wx
2570 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2571 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2575 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2576 : new wxMBConv_mac(m_encoding
);
2578 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2587 #if defined(__WXCOCOA__)
2589 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2593 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2594 : new wxMBConv_cocoa(m_encoding
);
2596 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2606 wxFontEncoding enc
= m_encoding
;
2608 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2610 // use "false" to suppress interactive dialogs -- we can be called from
2611 // anywhere and popping up a dialog from here is the last thing we want to
2613 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2615 #endif // wxUSE_FONTMAP
2619 case wxFONTENCODING_UTF7
:
2620 return new wxMBConvUTF7
;
2622 case wxFONTENCODING_UTF8
:
2623 return new wxMBConvUTF8
;
2625 case wxFONTENCODING_UTF16BE
:
2626 return new wxMBConvUTF16BE
;
2628 case wxFONTENCODING_UTF16LE
:
2629 return new wxMBConvUTF16LE
;
2631 case wxFONTENCODING_UTF32BE
:
2632 return new wxMBConvUTF32BE
;
2634 case wxFONTENCODING_UTF32LE
:
2635 return new wxMBConvUTF32LE
;
2638 // nothing to do but put here to suppress gcc warnings
2645 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2646 : new wxMBConv_wxwin(m_encoding
);
2652 #endif // wxUSE_FONTMAP
2654 // NB: This is a hack to prevent deadlock. What could otherwise happen
2655 // in Unicode build: wxConvLocal creation ends up being here
2656 // because of some failure and logs the error. But wxLog will try to
2657 // attach timestamp, for which it will need wxConvLocal (to convert
2658 // time to char* and then wchar_t*), but that fails, tries to log
2659 // error, but wxLog has a (already locked) critical section that
2660 // guards static buffer.
2661 static bool alreadyLoggingError
= false;
2662 if (!alreadyLoggingError
)
2664 alreadyLoggingError
= true;
2665 wxLogError(_("Cannot convert from the charset '%s'!"),
2669 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2670 #else // !wxUSE_FONTMAP
2671 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2672 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2674 alreadyLoggingError
= false;
2680 void wxCSConv::CreateConvIfNeeded() const
2684 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2687 // if we don't have neither the name nor the encoding, use the default
2688 // encoding for this system
2689 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2691 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2693 #endif // wxUSE_INTL
2695 self
->m_convReal
= DoCreate();
2696 self
->m_deferred
= false;
2700 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2702 CreateConvIfNeeded();
2705 return m_convReal
->MB2WC(buf
, psz
, n
);
2708 size_t len
= strlen(psz
);
2712 for (size_t c
= 0; c
<= len
; c
++)
2713 buf
[c
] = (unsigned char)(psz
[c
]);
2719 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2721 CreateConvIfNeeded();
2724 return m_convReal
->WC2MB(buf
, psz
, n
);
2727 const size_t len
= wxWcslen(psz
);
2730 for (size_t c
= 0; c
<= len
; c
++)
2734 buf
[c
] = (char)psz
[c
];
2739 for (size_t c
= 0; c
<= len
; c
++)
2749 // ----------------------------------------------------------------------------
2751 // ----------------------------------------------------------------------------
2754 static wxMBConv_win32 wxConvLibcObj
;
2755 #elif defined(__WXMAC__) && !defined(__MACH__)
2756 static wxMBConv_mac wxConvLibcObj
;
2758 static wxMBConvLibc wxConvLibcObj
;
2761 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2762 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2763 static wxMBConvUTF7 wxConvUTF7Obj
;
2764 static wxMBConvUTF8 wxConvUTF8Obj
;
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2768 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2772 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2780 #else // !wxUSE_WCHAR_T
2782 // stand-ins in absence of wchar_t
2783 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2788 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T