1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
58 #ifdef HAVE_LANGINFO_H
62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
63 #define wxHAVE_WIN32_MB2WC
64 #endif // __WIN32__ but !__WXMICROWIN__
66 // ----------------------------------------------------------------------------
68 // ----------------------------------------------------------------------------
76 #include "wx/thread.h"
79 #include "wx/encconv.h"
80 #include "wx/fontmap.h"
84 #include <ATSUnicode.h>
85 #include <TextCommon.h>
86 #include <TextEncodingConverter.h>
88 #include "wx/mac/private.h" // includes mac headers
90 // ----------------------------------------------------------------------------
92 // ----------------------------------------------------------------------------
94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
97 #if SIZEOF_WCHAR_T == 4
98 #define WC_NAME "UCS4"
99 #define WC_BSWAP BSWAP_UCS4
100 #ifdef WORDS_BIGENDIAN
101 #define WC_NAME_BEST "UCS-4BE"
103 #define WC_NAME_BEST "UCS-4LE"
105 #elif SIZEOF_WCHAR_T == 2
106 #define WC_NAME "UTF16"
107 #define WC_BSWAP BSWAP_UTF16
109 #ifdef WORDS_BIGENDIAN
110 #define WC_NAME_BEST "UTF-16BE"
112 #define WC_NAME_BEST "UTF-16LE"
114 #else // sizeof(wchar_t) != 2 nor 4
115 // does this ever happen?
116 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
119 // ============================================================================
121 // ============================================================================
123 // ----------------------------------------------------------------------------
124 // UTF-16 en/decoding to/from UCS-4
125 // ----------------------------------------------------------------------------
128 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
133 *output
= (wxUint16
) input
;
136 else if (input
>=0x110000)
144 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
145 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
151 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
153 if ((*input
<0xd800) || (*input
>0xdfff))
158 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
165 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
171 // ----------------------------------------------------------------------------
173 // ----------------------------------------------------------------------------
175 wxMBConv::~wxMBConv()
177 // nothing to do here (necessary for Darwin linking probably)
180 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
184 // calculate the length of the buffer needed first
185 size_t nLen
= MB2WC(NULL
, psz
, 0);
186 if ( nLen
!= (size_t)-1 )
188 // now do the actual conversion
189 wxWCharBuffer
buf(nLen
);
190 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
191 if ( nLen
!= (size_t)-1 )
198 wxWCharBuffer
buf((wchar_t *)NULL
);
203 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
207 size_t nLen
= WC2MB(NULL
, pwz
, 0);
208 if ( nLen
!= (size_t)-1 )
210 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
211 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
212 if ( nLen
!= (size_t)-1 )
219 wxCharBuffer
buf((char *)NULL
);
224 const wxWCharBuffer
wxMBConv::cMB2WC(const char *szString
, size_t nStringLen
, size_t* pOutSize
) const
226 wxASSERT(pOutSize
!= NULL
);
228 const char* szEnd
= szString
+ nStringLen
+ 1;
229 const char* szPos
= szString
;
230 const char* szStart
= szPos
;
232 size_t nActualLength
= 0;
233 size_t nCurrentSize
= nStringLen
; //try normal size first (should never resize?)
235 wxWCharBuffer
theBuffer(nCurrentSize
);
237 //Convert the string until the length() is reached, continuing the
238 //loop every time a null character is reached
239 while(szPos
!= szEnd
)
241 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
243 //Get the length of the current (sub)string
244 size_t nLen
= MB2WC(NULL
, szPos
, 0);
246 //Invalid conversion?
247 if( nLen
== (size_t)-1 )
250 theBuffer
.data()[0u] = wxT('\0');
255 //Increase the actual length (+1 for current null character)
256 nActualLength
+= nLen
+ 1;
258 //if buffer too big, realloc the buffer
259 if (nActualLength
> (nCurrentSize
+1))
261 wxWCharBuffer
theNewBuffer(nCurrentSize
<< 1);
262 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
* sizeof(wchar_t));
263 theBuffer
= theNewBuffer
;
267 //Convert the current (sub)string
268 if ( MB2WC(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
271 theBuffer
.data()[0u] = wxT('\0');
275 //Increment to next (sub)string
276 //Note that we have to use strlen here instead of nLen
277 //here because XX2XX gives us the size of the output buffer,
278 //not neccessarly the length of the string
279 szPos
+= strlen(szPos
) + 1;
282 //success - return actual length and the buffer
283 *pOutSize
= nActualLength
;
287 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *szString
, size_t nStringLen
, size_t* pOutSize
) const
289 wxASSERT(pOutSize
!= NULL
);
291 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
292 const wchar_t* szPos
= szString
;
293 const wchar_t* szStart
= szPos
;
295 size_t nActualLength
= 0;
296 size_t nCurrentSize
= nStringLen
<< 2; //try * 4 first
298 wxCharBuffer
theBuffer(nCurrentSize
);
300 //Convert the string until the length() is reached, continuing the
301 //loop every time a null character is reached
302 while(szPos
!= szEnd
)
304 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
306 //Get the length of the current (sub)string
307 size_t nLen
= WC2MB(NULL
, szPos
, 0);
309 //Invalid conversion?
310 if( nLen
== (size_t)-1 )
313 theBuffer
.data()[0u] = wxT('\0');
317 //Increase the actual length (+1 for current null character)
318 nActualLength
+= nLen
+ 1;
320 //if buffer too big, realloc the buffer
321 if (nActualLength
> (nCurrentSize
+1))
323 wxCharBuffer
theNewBuffer(nCurrentSize
<< 1);
324 memcpy(theNewBuffer
.data(), theBuffer
.data(), nCurrentSize
);
325 theBuffer
= theNewBuffer
;
329 //Convert the current (sub)string
330 if(WC2MB(&theBuffer
.data()[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
333 theBuffer
.data()[0u] = wxT('\0');
337 //Increment to next (sub)string
338 //Note that we have to use wxWcslen here instead of nLen
339 //here because XX2XX gives us the size of the output buffer,
340 //not neccessarly the length of the string
341 szPos
+= wxWcslen(szPos
) + 1;
344 //success - return actual length and the buffer
345 *pOutSize
= nActualLength
;
349 // ----------------------------------------------------------------------------
351 // ----------------------------------------------------------------------------
353 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
355 return wxMB2WC(buf
, psz
, n
);
358 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
360 return wxWC2MB(buf
, psz
, n
);
365 // ----------------------------------------------------------------------------
366 // wxConvBrokenFileNames is made for GTK2 in Unicode mode when
367 // files are accidentally written in an encoding which is not
368 // the system encoding. Typically, the system encoding will be
369 // UTF8 but there might be files stored in ISO8859-1 on disk.
370 // ----------------------------------------------------------------------------
372 class wxConvBrokenFileNames
: public wxMBConv
375 wxConvBrokenFileNames();
376 virtual ~wxConvBrokenFileNames() { delete m_conv
; }
378 virtual size_t MB2WC(wchar_t *outputBuf
, const char *psz
, size_t outputSize
) const;
379 virtual size_t WC2MB(char *outputBuf
, const wchar_t *psz
, size_t outputSize
) const;
382 // the conversion object we forward to
386 wxConvBrokenFileNames::wxConvBrokenFileNames()
388 // decide which conversion to use for the file names
390 // (1) this variable exists for the sole purpose of specifying the encoding
391 // of the filenames for GTK+ programs, so use it if it is set
392 const wxChar
*encName
= wxGetenv(_T("G_FILENAME_ENCODING"));
395 m_conv
= new wxCSConv(encName
);
397 else // no G_FILENAME_ENCODING
399 // (2) if a non default locale is set, assume that the user wants his
400 // filenames in this locale too
401 switch ( wxLocale::GetSystemEncoding() )
404 m_conv
= new wxMBConvLibc
;
407 // (3) finally use UTF-8 by default
408 case wxFONTENCODING_SYSTEM
:
409 case wxFONTENCODING_UTF8
:
410 m_conv
= new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
417 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf
,
419 size_t outputSize
) const
421 return m_conv
->MB2WC( outputBuf
, psz
, outputSize
);
425 wxConvBrokenFileNames::WC2MB(char *outputBuf
,
427 size_t outputSize
) const
429 return m_conv
->WC2MB( outputBuf
, psz
, outputSize
);
432 #endif // __WXGTK20__
434 // ----------------------------------------------------------------------------
436 // ----------------------------------------------------------------------------
438 // Implementation (C) 2004 Fredrik Roubert
441 // BASE64 decoding table
443 static const unsigned char utf7unb64
[] =
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
451 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
452 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
454 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
455 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
456 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
458 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
459 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
460 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
461 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
462 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
470 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
473 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
474 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
475 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
476 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
479 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
483 while (*psz
&& ((!buf
) || (len
< n
)))
485 unsigned char cc
= *psz
++;
493 else if (*psz
== '-')
503 // BASE64 encoded string
507 for (lsb
= false, d
= 0, l
= 0;
508 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
512 for (l
+= 6; l
>= 8; lsb
= !lsb
)
514 c
= (unsigned char)((d
>> (l
-= 8)) % 256);
523 *buf
= (wchar_t)(c
<< 8);
530 if (buf
&& (len
< n
))
536 // BASE64 encoding table
538 static const unsigned char utf7enb64
[] =
540 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
541 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
542 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
543 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
544 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
545 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
546 'w', 'x', 'y', 'z', '0', '1', '2', '3',
547 '4', '5', '6', '7', '8', '9', '+', '/'
551 // UTF-7 encoding table
553 // 0 - Set D (directly encoded characters)
554 // 1 - Set O (optional direct characters)
555 // 2 - whitespace characters (optional)
556 // 3 - special characters
558 static const unsigned char utf7encode
[128] =
560 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
561 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
562 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
564 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
566 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
570 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
576 while (*psz
&& ((!buf
) || (len
< n
)))
579 if (cc
< 0x80 && utf7encode
[cc
] < 1)
587 else if (((wxUint32
)cc
) > 0xffff)
589 // no surrogate pair generation (yet?)
600 // BASE64 encode string
601 unsigned int lsb
, d
, l
;
602 for (d
= 0, l
= 0;; psz
++)
604 for (lsb
= 0; lsb
< 2; lsb
++)
607 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
609 for (l
+= 8; l
>= 6; )
613 *buf
++ = utf7enb64
[(d
>> l
) % 64];
618 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
624 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
633 if (buf
&& (len
< n
))
638 // ----------------------------------------------------------------------------
640 // ----------------------------------------------------------------------------
642 static wxUint32 utf8_max
[]=
643 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
645 // boundaries of the private use area we use to (temporarily) remap invalid
646 // characters invalid in a UTF-8 encoded string
647 const wxUint32 wxUnicodePUA
= 0x100000;
648 const wxUint32 wxUnicodePUAEnd
= wxUnicodePUA
+ 256;
650 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
654 while (*psz
&& ((!buf
) || (len
< n
)))
656 const char *opsz
= psz
;
657 bool invalid
= false;
658 unsigned char cc
= *psz
++, fc
= cc
;
660 for (cnt
= 0; fc
& 0x80; cnt
++)
674 // invalid UTF-8 sequence
679 unsigned ocnt
= cnt
- 1;
680 wxUint32 res
= cc
& (0x3f >> cnt
);
684 if ((cc
& 0xC0) != 0x80)
686 // invalid UTF-8 sequence
691 res
= (res
<< 6) | (cc
& 0x3f);
693 if (invalid
|| res
<= utf8_max
[ocnt
])
695 // illegal UTF-8 encoding
698 else if ((m_options
& MAP_INVALID_UTF8_TO_PUA
) &&
699 res
>= wxUnicodePUA
&& res
< wxUnicodePUAEnd
)
701 // if one of our PUA characters turns up externally
702 // it must also be treated as an illegal sequence
703 // (a bit like you have to escape an escape character)
709 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
710 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
711 if (pa
== (size_t)-1)
725 #endif // WC_UTF16/!WC_UTF16
730 if (m_options
& MAP_INVALID_UTF8_TO_PUA
)
732 while (opsz
< psz
&& (!buf
|| len
< n
))
735 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
736 size_t pa
= encode_utf16((unsigned char)*opsz
+ wxUnicodePUA
, (wxUint16
*)buf
);
737 wxASSERT(pa
!= (size_t)-1);
744 *buf
++ = wxUnicodePUA
+ (unsigned char)*opsz
;
750 else if (m_options
& MAP_INVALID_UTF8_TO_OCTAL
)
752 while (opsz
< psz
&& (!buf
|| len
< n
))
754 if ( buf
&& len
+ 3 < n
)
756 unsigned char n
= *opsz
;
758 *buf
++ = (wchar_t)( L
'0' + n
/ 0100 );
759 *buf
++ = (wchar_t)( L
'0' + (n
% 0100) / 010 );
760 *buf
++ = (wchar_t)( L
'0' + n
% 010 );
766 else // MAP_INVALID_UTF8_NOT
773 if (buf
&& (len
< n
))
778 static inline bool isoctal(wchar_t wch
)
780 return L
'0' <= wch
&& wch
<= L
'7';
783 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
787 while (*psz
&& ((!buf
) || (len
< n
)))
791 // cast is ok for WC_UTF16
792 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
793 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
795 cc
=(*psz
++) & 0x7fffffff;
798 if ( (m_options
& MAP_INVALID_UTF8_TO_PUA
)
799 && cc
>= wxUnicodePUA
&& cc
< wxUnicodePUAEnd
)
802 *buf
++ = (char)(cc
- wxUnicodePUA
);
805 else if ( (m_options
& MAP_INVALID_UTF8_TO_OCTAL
) &&
807 isoctal(psz
[0]) && isoctal(psz
[1]) && isoctal(psz
[2]) )
811 *buf
++ = (char) ((psz
[0] - L
'0')*0100 +
812 (psz
[1] - L
'0')*010 +
822 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
836 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
838 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
850 // ----------------------------------------------------------------------------
852 // ----------------------------------------------------------------------------
854 #ifdef WORDS_BIGENDIAN
855 #define wxMBConvUTF16straight wxMBConvUTF16BE
856 #define wxMBConvUTF16swap wxMBConvUTF16LE
858 #define wxMBConvUTF16swap wxMBConvUTF16BE
859 #define wxMBConvUTF16straight wxMBConvUTF16LE
865 // copy 16bit MB to 16bit String
866 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
870 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
873 *buf
++ = *(wxUint16
*)psz
;
876 psz
+= sizeof(wxUint16
);
878 if (buf
&& len
<n
) *buf
=0;
884 // copy 16bit String to 16bit MB
885 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
889 while (*psz
&& (!buf
|| len
< n
))
893 *(wxUint16
*)buf
= *psz
;
894 buf
+= sizeof(wxUint16
);
896 len
+= sizeof(wxUint16
);
899 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
905 // swap 16bit MB to 16bit String
906 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
910 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
914 ((char *)buf
)[0] = psz
[1];
915 ((char *)buf
)[1] = psz
[0];
919 psz
+= sizeof(wxUint16
);
921 if (buf
&& len
<n
) *buf
=0;
927 // swap 16bit MB to 16bit String
928 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
932 while (*psz
&& (!buf
|| len
< n
))
936 *buf
++ = ((char*)psz
)[1];
937 *buf
++ = ((char*)psz
)[0];
939 len
+= sizeof(wxUint16
);
942 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
951 // copy 16bit MB to 32bit String
952 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
956 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
959 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
960 if (pa
== (size_t)-1)
966 psz
+= pa
* sizeof(wxUint16
);
968 if (buf
&& len
<n
) *buf
=0;
974 // copy 32bit String to 16bit MB
975 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
979 while (*psz
&& (!buf
|| len
< n
))
982 size_t pa
=encode_utf16(*psz
, cc
);
984 if (pa
== (size_t)-1)
989 *(wxUint16
*)buf
= cc
[0];
990 buf
+= sizeof(wxUint16
);
993 *(wxUint16
*)buf
= cc
[1];
994 buf
+= sizeof(wxUint16
);
998 len
+= pa
*sizeof(wxUint16
);
1001 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1007 // swap 16bit MB to 32bit String
1008 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1012 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
1016 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
1017 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
1019 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
1020 if (pa
== (size_t)-1)
1027 psz
+= pa
* sizeof(wxUint16
);
1029 if (buf
&& len
<n
) *buf
=0;
1035 // swap 32bit String to 16bit MB
1036 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1040 while (*psz
&& (!buf
|| len
< n
))
1043 size_t pa
=encode_utf16(*psz
, cc
);
1045 if (pa
== (size_t)-1)
1050 *buf
++ = ((char*)cc
)[1];
1051 *buf
++ = ((char*)cc
)[0];
1054 *buf
++ = ((char*)cc
)[3];
1055 *buf
++ = ((char*)cc
)[2];
1059 len
+= pa
*sizeof(wxUint16
);
1062 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
1070 // ----------------------------------------------------------------------------
1072 // ----------------------------------------------------------------------------
1074 #ifdef WORDS_BIGENDIAN
1075 #define wxMBConvUTF32straight wxMBConvUTF32BE
1076 #define wxMBConvUTF32swap wxMBConvUTF32LE
1078 #define wxMBConvUTF32swap wxMBConvUTF32BE
1079 #define wxMBConvUTF32straight wxMBConvUTF32LE
1083 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
1084 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
1089 // copy 32bit MB to 16bit String
1090 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1094 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1098 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
1099 if (pa
== (size_t)-1)
1109 psz
+= sizeof(wxUint32
);
1111 if (buf
&& len
<n
) *buf
=0;
1117 // copy 16bit String to 32bit MB
1118 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1122 while (*psz
&& (!buf
|| len
< n
))
1126 // cast is ok for WC_UTF16
1127 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
1128 if (pa
== (size_t)-1)
1133 *(wxUint32
*)buf
= cc
;
1134 buf
+= sizeof(wxUint32
);
1136 len
+= sizeof(wxUint32
);
1140 if (buf
&& len
<=n
-sizeof(wxUint32
))
1148 // swap 32bit MB to 16bit String
1149 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1153 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1156 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
1157 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
1162 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
1163 if (pa
== (size_t)-1)
1173 psz
+= sizeof(wxUint32
);
1183 // swap 16bit String to 32bit MB
1184 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1188 while (*psz
&& (!buf
|| len
< n
))
1192 // cast is ok for WC_UTF16
1193 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
1194 if (pa
== (size_t)-1)
1204 len
+= sizeof(wxUint32
);
1208 if (buf
&& len
<=n
-sizeof(wxUint32
))
1217 // copy 32bit MB to 32bit String
1218 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1222 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1225 *buf
++ = *(wxUint32
*)psz
;
1227 psz
+= sizeof(wxUint32
);
1237 // copy 32bit String to 32bit MB
1238 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1242 while (*psz
&& (!buf
|| len
< n
))
1246 *(wxUint32
*)buf
= *psz
;
1247 buf
+= sizeof(wxUint32
);
1250 len
+= sizeof(wxUint32
);
1254 if (buf
&& len
<=n
-sizeof(wxUint32
))
1261 // swap 32bit MB to 32bit String
1262 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1266 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1270 ((char *)buf
)[0] = psz
[3];
1271 ((char *)buf
)[1] = psz
[2];
1272 ((char *)buf
)[2] = psz
[1];
1273 ((char *)buf
)[3] = psz
[0];
1277 psz
+= sizeof(wxUint32
);
1287 // swap 32bit String to 32bit MB
1288 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1292 while (*psz
&& (!buf
|| len
< n
))
1296 *buf
++ = ((char *)psz
)[3];
1297 *buf
++ = ((char *)psz
)[2];
1298 *buf
++ = ((char *)psz
)[1];
1299 *buf
++ = ((char *)psz
)[0];
1301 len
+= sizeof(wxUint32
);
1305 if (buf
&& len
<=n
-sizeof(wxUint32
))
1315 // ============================================================================
1316 // The classes doing conversion using the iconv_xxx() functions
1317 // ============================================================================
1321 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1322 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1323 // (unless there's yet another bug in glibc) the only case when iconv()
1324 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1325 // left in the input buffer -- when _real_ error occurs,
1326 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1328 // [This bug does not appear in glibc 2.2.]
1329 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1330 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1331 (errno != E2BIG || bufLeft != 0))
1333 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1336 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1338 // ----------------------------------------------------------------------------
1339 // wxMBConv_iconv: encapsulates an iconv character set
1340 // ----------------------------------------------------------------------------
1342 class wxMBConv_iconv
: public wxMBConv
1345 wxMBConv_iconv(const wxChar
*name
);
1346 virtual ~wxMBConv_iconv();
1348 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1349 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1352 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1355 // the iconv handlers used to translate from multibyte to wide char and in
1356 // the other direction
1360 // guards access to m2w and w2m objects
1361 wxMutex m_iconvMutex
;
1365 // the name (for iconv_open()) of a wide char charset -- if none is
1366 // available on this machine, it will remain NULL
1367 static const char *ms_wcCharsetName
;
1369 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1370 // different endian-ness than the native one
1371 static bool ms_wcNeedsSwap
;
1374 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1375 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1377 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1379 // Do it the hard way
1381 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1382 cname
[i
] = (char) name
[i
];
1384 // check for charset that represents wchar_t:
1385 if (ms_wcCharsetName
== NULL
)
1387 ms_wcNeedsSwap
= false;
1389 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1390 ms_wcCharsetName
= WC_NAME_BEST
;
1391 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1393 if (m2w
== (iconv_t
)-1)
1395 // try charset w/o bytesex info (e.g. "UCS4")
1396 // and check for bytesex ourselves:
1397 ms_wcCharsetName
= WC_NAME
;
1398 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1400 // last bet, try if it knows WCHAR_T pseudo-charset
1401 if (m2w
== (iconv_t
)-1)
1403 ms_wcCharsetName
= "WCHAR_T";
1404 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1407 if (m2w
!= (iconv_t
)-1)
1409 char buf
[2], *bufPtr
;
1410 wchar_t wbuf
[2], *wbufPtr
;
1418 outsz
= SIZEOF_WCHAR_T
* 2;
1422 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1423 (char**)&wbufPtr
, &outsz
);
1425 if (ICONV_FAILED(res
, insz
))
1427 ms_wcCharsetName
= NULL
;
1428 wxLogLastError(wxT("iconv"));
1429 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1433 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1438 ms_wcCharsetName
= NULL
;
1440 // VS: we must not output an error here, since wxWidgets will safely
1441 // fall back to using wxEncodingConverter.
1442 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1446 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1448 else // we already have ms_wcCharsetName
1450 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1453 // NB: don't ever pass NULL to iconv_open(), it may crash!
1454 if ( ms_wcCharsetName
)
1456 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1464 wxMBConv_iconv::~wxMBConv_iconv()
1466 if ( m2w
!= (iconv_t
)-1 )
1468 if ( w2m
!= (iconv_t
)-1 )
1472 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1475 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1476 // Unfortunately there is a couple of global wxCSConv objects such as
1477 // wxConvLocal that are used all over wx code, so we have to make sure
1478 // the handle is used by at most one thread at the time. Otherwise
1479 // only a few wx classes would be safe to use from non-main threads
1480 // as MB<->WC conversion would fail "randomly".
1481 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1484 size_t inbuf
= strlen(psz
);
1485 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1487 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1488 wchar_t *bufPtr
= buf
;
1489 const char *pszPtr
= psz
;
1493 // have destination buffer, convert there
1495 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1496 (char**)&bufPtr
, &outbuf
);
1497 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1501 // convert to native endianness
1502 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1505 // NB: iconv was given only strlen(psz) characters on input, and so
1506 // it couldn't convert the trailing zero. Let's do it ourselves
1507 // if there's some room left for it in the output buffer.
1513 // no destination buffer... convert using temp buffer
1514 // to calculate destination buffer requirement
1519 outbuf
= 8*SIZEOF_WCHAR_T
;
1522 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1523 (char**)&bufPtr
, &outbuf
);
1525 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1526 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1529 if (ICONV_FAILED(cres
, inbuf
))
1531 //VS: it is ok if iconv fails, hence trace only
1532 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1539 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1542 // NB: explained in MB2WC
1543 wxMutexLocker
lock(wxConstCast(this, wxMBConv_iconv
)->m_iconvMutex
);
1546 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1550 wchar_t *tmpbuf
= 0;
1554 // need to copy to temp buffer to switch endianness
1555 // this absolutely doesn't rock!
1556 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1557 // could be in read-only memory, or be accessed in some other thread)
1558 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1559 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1560 WC_BSWAP(tmpbuf
, inbuf
)
1566 // have destination buffer, convert there
1567 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1571 // NB: iconv was given only wcslen(psz) characters on input, and so
1572 // it couldn't convert the trailing zero. Let's do it ourselves
1573 // if there's some room left for it in the output buffer.
1579 // no destination buffer... convert using temp buffer
1580 // to calculate destination buffer requirement
1584 buf
= tbuf
; outbuf
= 16;
1586 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1589 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1597 if (ICONV_FAILED(cres
, inbuf
))
1599 //VS: it is ok if iconv fails, hence trace only
1600 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1607 #endif // HAVE_ICONV
1610 // ============================================================================
1611 // Win32 conversion classes
1612 // ============================================================================
1614 #ifdef wxHAVE_WIN32_MB2WC
1618 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1619 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1622 class wxMBConv_win32
: public wxMBConv
1627 m_CodePage
= CP_ACP
;
1631 wxMBConv_win32(const wxChar
* name
)
1633 m_CodePage
= wxCharsetToCodepage(name
);
1636 wxMBConv_win32(wxFontEncoding encoding
)
1638 m_CodePage
= wxEncodingToCodepage(encoding
);
1642 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1644 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1645 // the behaviour is not compatible with the Unix version (using iconv)
1646 // and break the library itself, e.g. wxTextInputStream::NextChar()
1647 // wouldn't work if reading an incomplete MB char didn't result in an
1650 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1651 // an error (tested under Windows Server 2003) and apparently it is
1652 // done on purpose, i.e. the function accepts any input in this case
1653 // and although I'd prefer to return error on ill-formed output, our
1654 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1655 // explicitly ill-formed according to RFC 2152) neither so we don't
1656 // even have any fallback here...
1657 int flags
= m_CodePage
== CP_UTF7
? 0 : MB_ERR_INVALID_CHARS
;
1659 const size_t len
= ::MultiByteToWideChar
1661 m_CodePage
, // code page
1662 flags
, // flags: fall on error
1663 psz
, // input string
1664 -1, // its length (NUL-terminated)
1665 buf
, // output string
1666 buf
? n
: 0 // size of output buffer
1669 // note that it returns count of written chars for buf != NULL and size
1670 // of the needed buffer for buf == NULL so in either case the length of
1671 // the string (which never includes the terminating NUL) is one less
1672 return len
? len
- 1 : (size_t)-1;
1675 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1678 we have a problem here: by default, WideCharToMultiByte() may
1679 replace characters unrepresentable in the target code page with bad
1680 quality approximations such as turning "1/2" symbol (U+00BD) into
1681 "1" for the code pages which don't have it and we, obviously, want
1682 to avoid this at any price
1684 the trouble is that this function does it _silently_, i.e. it won't
1685 even tell us whether it did or not... Win98/2000 and higher provide
1686 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1687 we have to resort to a round trip, i.e. check that converting back
1688 results in the same string -- this is, of course, expensive but
1689 otherwise we simply can't be sure to not garble the data.
1692 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1693 // it doesn't work with CJK encodings (which we test for rather roughly
1694 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1696 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1699 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1701 // it's our lucky day
1702 flags
= WC_NO_BEST_FIT_CHARS
;
1703 pUsedDef
= &usedDef
;
1705 else // old system or unsupported encoding
1711 const size_t len
= ::WideCharToMultiByte
1713 m_CodePage
, // code page
1714 flags
, // either none or no best fit
1715 pwz
, // input string
1716 -1, // it is (wide) NUL-terminated
1717 buf
, // output buffer
1718 buf
? n
: 0, // and its size
1719 NULL
, // default "replacement" char
1720 pUsedDef
// [out] was it used?
1725 // function totally failed
1729 // if we were really converting, check if we succeeded
1734 // check if the conversion failed, i.e. if any replacements
1739 else // we must resort to double tripping...
1741 wxWCharBuffer
wcBuf(n
);
1742 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1743 wcscmp(wcBuf
, pwz
) != 0 )
1745 // we didn't obtain the same thing we started from, hence
1746 // the conversion was lossy and we consider that it failed
1752 // see the comment above for the reason of "len - 1"
1756 bool IsOk() const { return m_CodePage
!= -1; }
1759 static bool CanUseNoBestFit()
1761 static int s_isWin98Or2k
= -1;
1763 if ( s_isWin98Or2k
== -1 )
1766 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1769 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1773 s_isWin98Or2k
= verMaj
>= 5;
1777 // unknown, be conseravtive by default
1781 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1784 return s_isWin98Or2k
== 1;
1790 #endif // wxHAVE_WIN32_MB2WC
1792 // ============================================================================
1793 // Cocoa conversion classes
1794 // ============================================================================
1796 #if defined(__WXCOCOA__)
1798 // RN: There is no UTF-32 support in either Core Foundation or
1799 // Cocoa. Strangely enough, internally Core Foundation uses
1800 // UTF 32 internally quite a bit - its just not public (yet).
1802 #include <CoreFoundation/CFString.h>
1803 #include <CoreFoundation/CFStringEncodingExt.h>
1805 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1807 CFStringEncoding enc
= kCFStringEncodingInvalidId
;
1808 if ( encoding
== wxFONTENCODING_DEFAULT
)
1810 enc
= CFStringGetSystemEncoding();
1812 else switch( encoding
)
1814 case wxFONTENCODING_ISO8859_1
:
1815 enc
= kCFStringEncodingISOLatin1
;
1817 case wxFONTENCODING_ISO8859_2
:
1818 enc
= kCFStringEncodingISOLatin2
;
1820 case wxFONTENCODING_ISO8859_3
:
1821 enc
= kCFStringEncodingISOLatin3
;
1823 case wxFONTENCODING_ISO8859_4
:
1824 enc
= kCFStringEncodingISOLatin4
;
1826 case wxFONTENCODING_ISO8859_5
:
1827 enc
= kCFStringEncodingISOLatinCyrillic
;
1829 case wxFONTENCODING_ISO8859_6
:
1830 enc
= kCFStringEncodingISOLatinArabic
;
1832 case wxFONTENCODING_ISO8859_7
:
1833 enc
= kCFStringEncodingISOLatinGreek
;
1835 case wxFONTENCODING_ISO8859_8
:
1836 enc
= kCFStringEncodingISOLatinHebrew
;
1838 case wxFONTENCODING_ISO8859_9
:
1839 enc
= kCFStringEncodingISOLatin5
;
1841 case wxFONTENCODING_ISO8859_10
:
1842 enc
= kCFStringEncodingISOLatin6
;
1844 case wxFONTENCODING_ISO8859_11
:
1845 enc
= kCFStringEncodingISOLatinThai
;
1847 case wxFONTENCODING_ISO8859_13
:
1848 enc
= kCFStringEncodingISOLatin7
;
1850 case wxFONTENCODING_ISO8859_14
:
1851 enc
= kCFStringEncodingISOLatin8
;
1853 case wxFONTENCODING_ISO8859_15
:
1854 enc
= kCFStringEncodingISOLatin9
;
1857 case wxFONTENCODING_KOI8
:
1858 enc
= kCFStringEncodingKOI8_R
;
1860 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1861 enc
= kCFStringEncodingDOSRussian
;
1864 // case wxFONTENCODING_BULGARIAN :
1868 case wxFONTENCODING_CP437
:
1869 enc
=kCFStringEncodingDOSLatinUS
;
1871 case wxFONTENCODING_CP850
:
1872 enc
= kCFStringEncodingDOSLatin1
;
1874 case wxFONTENCODING_CP852
:
1875 enc
= kCFStringEncodingDOSLatin2
;
1877 case wxFONTENCODING_CP855
:
1878 enc
= kCFStringEncodingDOSCyrillic
;
1880 case wxFONTENCODING_CP866
:
1881 enc
=kCFStringEncodingDOSRussian
;
1883 case wxFONTENCODING_CP874
:
1884 enc
= kCFStringEncodingDOSThai
;
1886 case wxFONTENCODING_CP932
:
1887 enc
= kCFStringEncodingDOSJapanese
;
1889 case wxFONTENCODING_CP936
:
1890 enc
=kCFStringEncodingDOSChineseSimplif
;
1892 case wxFONTENCODING_CP949
:
1893 enc
= kCFStringEncodingDOSKorean
;
1895 case wxFONTENCODING_CP950
:
1896 enc
= kCFStringEncodingDOSChineseTrad
;
1898 case wxFONTENCODING_CP1250
:
1899 enc
= kCFStringEncodingWindowsLatin2
;
1901 case wxFONTENCODING_CP1251
:
1902 enc
=kCFStringEncodingWindowsCyrillic
;
1904 case wxFONTENCODING_CP1252
:
1905 enc
=kCFStringEncodingWindowsLatin1
;
1907 case wxFONTENCODING_CP1253
:
1908 enc
= kCFStringEncodingWindowsGreek
;
1910 case wxFONTENCODING_CP1254
:
1911 enc
= kCFStringEncodingWindowsLatin5
;
1913 case wxFONTENCODING_CP1255
:
1914 enc
=kCFStringEncodingWindowsHebrew
;
1916 case wxFONTENCODING_CP1256
:
1917 enc
=kCFStringEncodingWindowsArabic
;
1919 case wxFONTENCODING_CP1257
:
1920 enc
= kCFStringEncodingWindowsBalticRim
;
1922 // This only really encodes to UTF7 (if that) evidently
1923 // case wxFONTENCODING_UTF7 :
1924 // enc = kCFStringEncodingNonLossyASCII ;
1926 case wxFONTENCODING_UTF8
:
1927 enc
= kCFStringEncodingUTF8
;
1929 case wxFONTENCODING_EUC_JP
:
1930 enc
= kCFStringEncodingEUC_JP
;
1932 case wxFONTENCODING_UTF16
:
1933 enc
= kCFStringEncodingUnicode
;
1935 case wxFONTENCODING_MACROMAN
:
1936 enc
= kCFStringEncodingMacRoman
;
1938 case wxFONTENCODING_MACJAPANESE
:
1939 enc
= kCFStringEncodingMacJapanese
;
1941 case wxFONTENCODING_MACCHINESETRAD
:
1942 enc
= kCFStringEncodingMacChineseTrad
;
1944 case wxFONTENCODING_MACKOREAN
:
1945 enc
= kCFStringEncodingMacKorean
;
1947 case wxFONTENCODING_MACARABIC
:
1948 enc
= kCFStringEncodingMacArabic
;
1950 case wxFONTENCODING_MACHEBREW
:
1951 enc
= kCFStringEncodingMacHebrew
;
1953 case wxFONTENCODING_MACGREEK
:
1954 enc
= kCFStringEncodingMacGreek
;
1956 case wxFONTENCODING_MACCYRILLIC
:
1957 enc
= kCFStringEncodingMacCyrillic
;
1959 case wxFONTENCODING_MACDEVANAGARI
:
1960 enc
= kCFStringEncodingMacDevanagari
;
1962 case wxFONTENCODING_MACGURMUKHI
:
1963 enc
= kCFStringEncodingMacGurmukhi
;
1965 case wxFONTENCODING_MACGUJARATI
:
1966 enc
= kCFStringEncodingMacGujarati
;
1968 case wxFONTENCODING_MACORIYA
:
1969 enc
= kCFStringEncodingMacOriya
;
1971 case wxFONTENCODING_MACBENGALI
:
1972 enc
= kCFStringEncodingMacBengali
;
1974 case wxFONTENCODING_MACTAMIL
:
1975 enc
= kCFStringEncodingMacTamil
;
1977 case wxFONTENCODING_MACTELUGU
:
1978 enc
= kCFStringEncodingMacTelugu
;
1980 case wxFONTENCODING_MACKANNADA
:
1981 enc
= kCFStringEncodingMacKannada
;
1983 case wxFONTENCODING_MACMALAJALAM
:
1984 enc
= kCFStringEncodingMacMalayalam
;
1986 case wxFONTENCODING_MACSINHALESE
:
1987 enc
= kCFStringEncodingMacSinhalese
;
1989 case wxFONTENCODING_MACBURMESE
:
1990 enc
= kCFStringEncodingMacBurmese
;
1992 case wxFONTENCODING_MACKHMER
:
1993 enc
= kCFStringEncodingMacKhmer
;
1995 case wxFONTENCODING_MACTHAI
:
1996 enc
= kCFStringEncodingMacThai
;
1998 case wxFONTENCODING_MACLAOTIAN
:
1999 enc
= kCFStringEncodingMacLaotian
;
2001 case wxFONTENCODING_MACGEORGIAN
:
2002 enc
= kCFStringEncodingMacGeorgian
;
2004 case wxFONTENCODING_MACARMENIAN
:
2005 enc
= kCFStringEncodingMacArmenian
;
2007 case wxFONTENCODING_MACCHINESESIMP
:
2008 enc
= kCFStringEncodingMacChineseSimp
;
2010 case wxFONTENCODING_MACTIBETAN
:
2011 enc
= kCFStringEncodingMacTibetan
;
2013 case wxFONTENCODING_MACMONGOLIAN
:
2014 enc
= kCFStringEncodingMacMongolian
;
2016 case wxFONTENCODING_MACETHIOPIC
:
2017 enc
= kCFStringEncodingMacEthiopic
;
2019 case wxFONTENCODING_MACCENTRALEUR
:
2020 enc
= kCFStringEncodingMacCentralEurRoman
;
2022 case wxFONTENCODING_MACVIATNAMESE
:
2023 enc
= kCFStringEncodingMacVietnamese
;
2025 case wxFONTENCODING_MACARABICEXT
:
2026 enc
= kCFStringEncodingMacExtArabic
;
2028 case wxFONTENCODING_MACSYMBOL
:
2029 enc
= kCFStringEncodingMacSymbol
;
2031 case wxFONTENCODING_MACDINGBATS
:
2032 enc
= kCFStringEncodingMacDingbats
;
2034 case wxFONTENCODING_MACTURKISH
:
2035 enc
= kCFStringEncodingMacTurkish
;
2037 case wxFONTENCODING_MACCROATIAN
:
2038 enc
= kCFStringEncodingMacCroatian
;
2040 case wxFONTENCODING_MACICELANDIC
:
2041 enc
= kCFStringEncodingMacIcelandic
;
2043 case wxFONTENCODING_MACROMANIAN
:
2044 enc
= kCFStringEncodingMacRomanian
;
2046 case wxFONTENCODING_MACCELTIC
:
2047 enc
= kCFStringEncodingMacCeltic
;
2049 case wxFONTENCODING_MACGAELIC
:
2050 enc
= kCFStringEncodingMacGaelic
;
2052 // case wxFONTENCODING_MACKEYBOARD :
2053 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2056 // because gcc is picky
2062 class wxMBConv_cocoa
: public wxMBConv
2067 Init(CFStringGetSystemEncoding()) ;
2071 wxMBConv_cocoa(const wxChar
* name
)
2073 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2077 wxMBConv_cocoa(wxFontEncoding encoding
)
2079 Init( wxCFStringEncFromFontEnc(encoding
) );
2086 void Init( CFStringEncoding encoding
)
2088 m_encoding
= encoding
;
2091 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2095 CFStringRef theString
= CFStringCreateWithBytes (
2096 NULL
, //the allocator
2097 (const UInt8
*)szUnConv
,
2100 false //no BOM/external representation
2103 wxASSERT(theString
);
2105 size_t nOutLength
= CFStringGetLength(theString
);
2109 CFRelease(theString
);
2113 CFRange theRange
= { 0, nOutSize
};
2115 #if SIZEOF_WCHAR_T == 4
2116 UniChar
* szUniCharBuffer
= new UniChar
[nOutSize
];
2119 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2121 CFRelease(theString
);
2123 szUniCharBuffer
[nOutLength
] = '\0' ;
2125 #if SIZEOF_WCHAR_T == 4
2126 wxMBConvUTF16 converter
;
2127 converter
.MB2WC(szOut
, (const char*)szUniCharBuffer
, nOutSize
) ;
2128 delete[] szUniCharBuffer
;
2134 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2138 size_t nRealOutSize
;
2139 size_t nBufSize
= wxWcslen(szUnConv
);
2140 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2142 #if SIZEOF_WCHAR_T == 4
2143 wxMBConvUTF16BE converter
;
2144 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2145 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2146 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2147 nBufSize
/= sizeof(UniChar
);
2150 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2154 kCFAllocatorNull
//deallocator - we want to deallocate it ourselves
2157 wxASSERT(theString
);
2159 //Note that CER puts a BOM when converting to unicode
2160 //so we check and use getchars instead in that case
2161 if (m_encoding
== kCFStringEncodingUnicode
)
2164 CFStringGetCharacters(theString
, CFRangeMake(0, nOutSize
- 1), (UniChar
*) szOut
);
2166 nRealOutSize
= CFStringGetLength(theString
) + 1;
2172 CFRangeMake(0, CFStringGetLength(theString
)),
2174 0, //what to put in characters that can't be converted -
2175 //0 tells CFString to return NULL if it meets such a character
2176 false, //not an external representation
2179 (CFIndex
*) &nRealOutSize
2183 CFRelease(theString
);
2185 #if SIZEOF_WCHAR_T == 4
2186 delete[] szUniBuffer
;
2189 return nRealOutSize
- 1;
2194 return m_encoding
!= kCFStringEncodingInvalidId
&&
2195 CFStringIsEncodingAvailable(m_encoding
);
2199 CFStringEncoding m_encoding
;
2202 #endif // defined(__WXCOCOA__)
2204 // ============================================================================
2205 // Mac conversion classes
2206 // ============================================================================
2208 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2210 class wxMBConv_mac
: public wxMBConv
2215 Init(CFStringGetSystemEncoding()) ;
2219 wxMBConv_mac(const wxChar
* name
)
2221 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name
, false) ) ) ;
2225 wxMBConv_mac(wxFontEncoding encoding
)
2227 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2232 OSStatus status
= noErr
;
2233 status
= TECDisposeConverter(m_MB2WC_converter
);
2234 status
= TECDisposeConverter(m_WC2MB_converter
);
2238 void Init( TextEncodingBase encoding
)
2240 OSStatus status
= noErr
;
2241 m_char_encoding
= encoding
;
2242 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2244 status
= TECCreateConverter(&m_MB2WC_converter
,
2246 m_unicode_encoding
);
2247 status
= TECCreateConverter(&m_WC2MB_converter
,
2252 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2254 OSStatus status
= noErr
;
2255 ByteCount byteOutLen
;
2256 ByteCount byteInLen
= strlen(psz
) ;
2257 wchar_t *tbuf
= NULL
;
2258 UniChar
* ubuf
= NULL
;
2263 //apple specs say at least 32
2264 n
= wxMax( 32 , byteInLen
) ;
2265 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2267 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2268 #if SIZEOF_WCHAR_T == 4
2269 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2271 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2273 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2274 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2275 #if SIZEOF_WCHAR_T == 4
2276 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2277 // is not properly terminated we get random characters at the end
2278 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2279 wxMBConvUTF16BE converter
;
2280 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2283 res
= byteOutLen
/ sizeof( UniChar
) ;
2288 if ( buf
&& res
< n
)
2294 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2296 OSStatus status
= noErr
;
2297 ByteCount byteOutLen
;
2298 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2304 //apple specs say at least 32
2305 n
= wxMax( 32 , ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
);
2306 tbuf
= (char*) malloc( n
) ;
2309 ByteCount byteBufferLen
= n
;
2310 UniChar
* ubuf
= NULL
;
2311 #if SIZEOF_WCHAR_T == 4
2312 wxMBConvUTF16BE converter
;
2313 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2314 byteInLen
= unicharlen
;
2315 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2316 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2318 ubuf
= (UniChar
*) psz
;
2320 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2321 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2322 #if SIZEOF_WCHAR_T == 4
2328 size_t res
= byteOutLen
;
2329 if ( buf
&& res
< n
)
2333 //we need to double-trip to verify it didn't insert any ? in place
2334 //of bogus characters
2335 wxWCharBuffer
wcBuf(n
);
2336 size_t pszlen
= wxWcslen(psz
);
2337 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
2338 wxWcslen(wcBuf
) != pszlen
||
2339 memcmp(wcBuf
, psz
, pszlen
* sizeof(wchar_t)) != 0 )
2341 // we didn't obtain the same thing we started from, hence
2342 // the conversion was lossy and we consider that it failed
2351 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2354 TECObjectRef m_MB2WC_converter
;
2355 TECObjectRef m_WC2MB_converter
;
2357 TextEncodingBase m_char_encoding
;
2358 TextEncodingBase m_unicode_encoding
;
2361 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2363 // ============================================================================
2364 // wxEncodingConverter based conversion classes
2365 // ============================================================================
2369 class wxMBConv_wxwin
: public wxMBConv
2374 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2375 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2379 // temporarily just use wxEncodingConverter stuff,
2380 // so that it works while a better implementation is built
2381 wxMBConv_wxwin(const wxChar
* name
)
2384 m_enc
= wxFontMapperBase::Get()->CharsetToEncoding(name
, false);
2386 m_enc
= wxFONTENCODING_SYSTEM
;
2391 wxMBConv_wxwin(wxFontEncoding enc
)
2398 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2400 size_t inbuf
= strlen(psz
);
2403 if (!m2w
.Convert(psz
,buf
))
2409 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2411 const size_t inbuf
= wxWcslen(psz
);
2414 if (!w2m
.Convert(psz
,buf
))
2421 bool IsOk() const { return m_ok
; }
2424 wxFontEncoding m_enc
;
2425 wxEncodingConverter m2w
, w2m
;
2427 // were we initialized successfully?
2430 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2433 #endif // wxUSE_FONTMAP
2435 // ============================================================================
2436 // wxCSConv implementation
2437 // ============================================================================
2439 void wxCSConv::Init()
2446 wxCSConv::wxCSConv(const wxChar
*charset
)
2455 m_encoding
= wxFONTENCODING_SYSTEM
;
2458 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2460 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2462 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2464 encoding
= wxFONTENCODING_SYSTEM
;
2469 m_encoding
= encoding
;
2472 wxCSConv::~wxCSConv()
2477 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2482 SetName(conv
.m_name
);
2483 m_encoding
= conv
.m_encoding
;
2486 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2490 SetName(conv
.m_name
);
2491 m_encoding
= conv
.m_encoding
;
2496 void wxCSConv::Clear()
2505 void wxCSConv::SetName(const wxChar
*charset
)
2509 m_name
= wxStrdup(charset
);
2514 wxMBConv
*wxCSConv::DoCreate() const
2516 // check for the special case of ASCII or ISO8859-1 charset: as we have
2517 // special knowledge of it anyhow, we don't need to create a special
2518 // conversion object
2519 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2521 // don't convert at all
2525 // we trust OS to do conversion better than we can so try external
2526 // conversion methods first
2528 // the full order is:
2529 // 1. OS conversion (iconv() under Unix or Win32 API)
2530 // 2. hard coded conversions for UTF
2531 // 3. wxEncodingConverter as fall back
2537 #endif // !wxUSE_FONTMAP
2539 wxString
name(m_name
);
2543 name
= wxFontMapperBase::Get()->GetEncodingName(m_encoding
);
2544 #endif // wxUSE_FONTMAP
2546 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2552 #endif // HAVE_ICONV
2554 #ifdef wxHAVE_WIN32_MB2WC
2557 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2558 : new wxMBConv_win32(m_encoding
);
2567 #endif // wxHAVE_WIN32_MB2WC
2568 #if defined(__WXMAC__)
2570 // leave UTF16 and UTF32 to the built-ins of wx
2571 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
||
2572 ( m_encoding
>= wxFONTENCODING_MACMIN
&& m_encoding
<= wxFONTENCODING_MACMAX
) ) )
2576 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2577 : new wxMBConv_mac(m_encoding
);
2579 wxMBConv_mac
*conv
= new wxMBConv_mac(m_encoding
);
2588 #if defined(__WXCOCOA__)
2590 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2594 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2595 : new wxMBConv_cocoa(m_encoding
);
2597 wxMBConv_cocoa
*conv
= new wxMBConv_cocoa(m_encoding
);
2607 wxFontEncoding enc
= m_encoding
;
2609 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2611 // use "false" to suppress interactive dialogs -- we can be called from
2612 // anywhere and popping up a dialog from here is the last thing we want to
2614 enc
= wxFontMapperBase::Get()->CharsetToEncoding(m_name
, false);
2616 #endif // wxUSE_FONTMAP
2620 case wxFONTENCODING_UTF7
:
2621 return new wxMBConvUTF7
;
2623 case wxFONTENCODING_UTF8
:
2624 return new wxMBConvUTF8
;
2626 case wxFONTENCODING_UTF16BE
:
2627 return new wxMBConvUTF16BE
;
2629 case wxFONTENCODING_UTF16LE
:
2630 return new wxMBConvUTF16LE
;
2632 case wxFONTENCODING_UTF32BE
:
2633 return new wxMBConvUTF32BE
;
2635 case wxFONTENCODING_UTF32LE
:
2636 return new wxMBConvUTF32LE
;
2639 // nothing to do but put here to suppress gcc warnings
2646 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2647 : new wxMBConv_wxwin(m_encoding
);
2653 #endif // wxUSE_FONTMAP
2655 // NB: This is a hack to prevent deadlock. What could otherwise happen
2656 // in Unicode build: wxConvLocal creation ends up being here
2657 // because of some failure and logs the error. But wxLog will try to
2658 // attach timestamp, for which it will need wxConvLocal (to convert
2659 // time to char* and then wchar_t*), but that fails, tries to log
2660 // error, but wxLog has a (already locked) critical section that
2661 // guards static buffer.
2662 static bool alreadyLoggingError
= false;
2663 if (!alreadyLoggingError
)
2665 alreadyLoggingError
= true;
2666 wxLogError(_("Cannot convert from the charset '%s'!"),
2670 wxFontMapperBase::GetEncodingDescription(m_encoding
).c_str()
2671 #else // !wxUSE_FONTMAP
2672 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2673 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2675 alreadyLoggingError
= false;
2681 void wxCSConv::CreateConvIfNeeded() const
2685 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2688 // if we don't have neither the name nor the encoding, use the default
2689 // encoding for this system
2690 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2692 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2694 #endif // wxUSE_INTL
2696 self
->m_convReal
= DoCreate();
2697 self
->m_deferred
= false;
2701 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2703 CreateConvIfNeeded();
2706 return m_convReal
->MB2WC(buf
, psz
, n
);
2709 size_t len
= strlen(psz
);
2713 for (size_t c
= 0; c
<= len
; c
++)
2714 buf
[c
] = (unsigned char)(psz
[c
]);
2720 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2722 CreateConvIfNeeded();
2725 return m_convReal
->WC2MB(buf
, psz
, n
);
2728 const size_t len
= wxWcslen(psz
);
2731 for (size_t c
= 0; c
<= len
; c
++)
2735 buf
[c
] = (char)psz
[c
];
2740 for (size_t c
= 0; c
<= len
; c
++)
2750 // ----------------------------------------------------------------------------
2752 // ----------------------------------------------------------------------------
2755 static wxMBConv_win32 wxConvLibcObj
;
2756 #elif defined(__WXMAC__) && !defined(__MACH__)
2757 static wxMBConv_mac wxConvLibcObj
;
2759 static wxMBConvLibc wxConvLibcObj
;
2762 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2763 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2764 static wxMBConvUTF7 wxConvUTF7Obj
;
2765 static wxMBConvUTF8 wxConvUTF8Obj
;
2768 static wxConvBrokenFileNames wxConvBrokenFileNamesObj
;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2772 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2773 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2774 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2775 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2776 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2777 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvFileName
= &
2781 wxConvBrokenFileNamesObj
;
2787 #else // !wxUSE_WCHAR_T
2789 // stand-ins in absence of wchar_t
2790 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2795 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T