1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
15 // ============================================================================
17 // ============================================================================
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
39 #include "wx/strconv.h"
44 #include "wx/msw/private.h"
48 #include "wx/msw/missing.h"
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
80 #include <ATSUnicode.h>
81 #include <TextCommon.h>
82 #include <TextEncodingConverter.h>
84 #include "wx/mac/private.h" // includes mac headers
86 // ----------------------------------------------------------------------------
88 // ----------------------------------------------------------------------------
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
99 #define WC_NAME_BEST "UCS-4LE"
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
108 #define WC_NAME_BEST "UTF-16LE"
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
115 // ============================================================================
117 // ============================================================================
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
124 static size_t encode_utf16(wxUint32 input
, wxUint16
*output
)
129 *output
= (wxUint16
) input
;
132 else if (input
>=0x110000)
140 *output
++ = (wxUint16
) ((input
>> 10)+0xd7c0);
141 *output
= (wxUint16
) ((input
&0x3ff)+0xdc00);
147 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
)
149 if ((*input
<0xd800) || (*input
>0xdfff))
154 else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff))
161 output
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00);
167 // ----------------------------------------------------------------------------
169 // ----------------------------------------------------------------------------
171 wxMBConv::~wxMBConv()
173 // nothing to do here (necessary for Darwin linking probably)
176 const wxWCharBuffer
wxMBConv::cMB2WC(const char *psz
) const
180 // calculate the length of the buffer needed first
181 size_t nLen
= MB2WC(NULL
, psz
, 0);
182 if ( nLen
!= (size_t)-1 )
184 // now do the actual conversion
185 wxWCharBuffer
buf(nLen
);
186 nLen
= MB2WC(buf
.data(), psz
, nLen
+ 1); // with the trailing NULL
187 if ( nLen
!= (size_t)-1 )
194 wxWCharBuffer
buf((wchar_t *)NULL
);
199 const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *pwz
) const
203 size_t nLen
= WC2MB(NULL
, pwz
, 0);
204 if ( nLen
!= (size_t)-1 )
206 wxCharBuffer
buf(nLen
+3); // space for a wxUint32 trailing zero
207 nLen
= WC2MB(buf
.data(), pwz
, nLen
+ 4);
208 if ( nLen
!= (size_t)-1 )
215 wxCharBuffer
buf((char *)NULL
);
220 size_t wxMBConv::MB2WC(wchar_t* szBuffer
, const char* szString
,
221 size_t outsize
, size_t nStringLen
) const
223 const char* szEnd
= szString
+ nStringLen
+ 1;
224 const char* szPos
= szString
;
225 const char* szStart
= szPos
;
227 size_t nActualLength
= 0;
229 //Convert the string until the length() is reached, continuing the
230 //loop every time a null character is reached
231 while(szPos
!= szEnd
)
233 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
235 //Get the length of the current (sub)string
236 size_t nLen
= MB2WC(NULL
, szPos
, 0);
238 //Invalid conversion?
239 if( nLen
== (size_t)-1 )
242 //Increase the actual length (+1 for current null character)
243 nActualLength
+= nLen
+ 1;
245 //Only copy data in if buffer size is big enough
246 if (szBuffer
!= NULL
&&
247 nActualLength
<= outsize
)
249 //Convert the current (sub)string
250 if ( MB2WC(&szBuffer
[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
254 //Increment to next (sub)string
255 //Note that we have to use strlen here instead of nLen
256 //here because XX2XX gives us the size of the output buffer,
257 //not neccessarly the length of the string
258 szPos
+= strlen(szPos
) + 1;
261 return nActualLength
- 1; //success - return actual length
264 size_t wxMBConv::WC2MB(char* szBuffer
, const wchar_t* szString
,
265 size_t outsize
, size_t nStringLen
) const
267 const wchar_t* szEnd
= szString
+ nStringLen
+ 1;
268 const wchar_t* szPos
= szString
;
269 const wchar_t* szStart
= szPos
;
271 size_t nActualLength
= 0;
273 //Convert the string until the length() is reached, continuing the
274 //loop every time a null character is reached
275 while(szPos
!= szEnd
)
277 wxASSERT(szPos
< szEnd
); //something is _really_ screwed up if this rings true
279 //Get the length of the current (sub)string
280 size_t nLen
= WC2MB(NULL
, szPos
, 0);
282 //Invalid conversion?
283 if( nLen
== (size_t)-1 )
286 //Increase the actual length (+1 for current null character)
287 nActualLength
+= nLen
+ 1;
289 //Only copy data in if buffer size is big enough
290 if (szBuffer
!= NULL
&&
291 nActualLength
<= outsize
)
293 //Convert the current (sub)string
294 if(WC2MB(&szBuffer
[szPos
- szStart
], szPos
, nLen
+ 1) == (size_t)-1 )
298 //Increment to next (sub)string
299 //Note that we have to use wxWcslen here instead of nLen
300 //here because XX2XX gives us the size of the output buffer,
301 //not neccessarly the length of the string
302 szPos
+= wxWcslen(szPos
) + 1;
305 return nActualLength
- 1; //success - return actual length
308 // ----------------------------------------------------------------------------
310 // ----------------------------------------------------------------------------
312 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
314 return wxMB2WC(buf
, psz
, n
);
317 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
319 return wxWC2MB(buf
, psz
, n
);
321 // ----------------------------------------------------------------------------
323 // ----------------------------------------------------------------------------
325 // Implementation (C) 2004 Fredrik Roubert
328 // BASE64 decoding table
330 static const unsigned char utf7unb64
[] =
332 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
335 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
336 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
338 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
339 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
340 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
341 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
342 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
343 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
344 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
345 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
346 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
347 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
351 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
352 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
353 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
355 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
356 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
357 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
358 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
359 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
360 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
361 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
362 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
363 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
371 while (*psz
&& ((!buf
) || (len
< n
)))
373 unsigned char cc
= *psz
++;
381 else if (*psz
== '-')
391 // BASE64 encoded string
395 for (lsb
= false, d
= 0, l
= 0;
396 (cc
= utf7unb64
[(unsigned char)*psz
]) != 0xff; psz
++)
400 for (l
+= 6; l
>= 8; lsb
= !lsb
)
402 c
= (d
>> (l
-= 8)) % 256;
418 if (buf
&& (len
< n
))
424 // BASE64 encoding table
426 static const unsigned char utf7enb64
[] =
428 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
429 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
430 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
431 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
432 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
433 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
434 'w', 'x', 'y', 'z', '0', '1', '2', '3',
435 '4', '5', '6', '7', '8', '9', '+', '/'
439 // UTF-7 encoding table
441 // 0 - Set D (directly encoded characters)
442 // 1 - Set O (optional direct characters)
443 // 2 - whitespace characters (optional)
444 // 3 - special characters
446 static const unsigned char utf7encode
[128] =
448 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
450 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
452 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
454 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
458 size_t wxMBConvUTF7::WC2MB(char *buf
, const wchar_t
459 *psz
, size_t n
) const
465 while (*psz
&& ((!buf
) || (len
< n
)))
468 if (cc
< 0x80 && utf7encode
[cc
] < 1)
476 else if (cc
> 0xffff)
478 // no surrogate pair generation (yet?)
489 // BASE64 encode string
490 unsigned int lsb
, d
, l
;
491 for (d
= 0, l
= 0;; psz
++)
493 for (lsb
= 0; lsb
< 2; lsb
++)
496 d
+= lsb
? cc
& 0xff : (cc
& 0xff00) >> 8;
498 for (l
+= 8; l
>= 6; )
502 *buf
++ = utf7enb64
[(d
>> l
) % 64];
507 if (!(cc
) || (cc
< 0x80 && utf7encode
[cc
] < 1))
513 *buf
++ = utf7enb64
[((d
% 16) << (6 - l
)) % 64];
522 if (buf
&& (len
< n
))
527 // ----------------------------------------------------------------------------
529 // ----------------------------------------------------------------------------
531 static wxUint32 utf8_max
[]=
532 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
534 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
538 while (*psz
&& ((!buf
) || (len
< n
)))
540 unsigned char cc
= *psz
++, fc
= cc
;
542 for (cnt
= 0; fc
& 0x80; cnt
++)
556 // invalid UTF-8 sequence
561 unsigned ocnt
= cnt
- 1;
562 wxUint32 res
= cc
& (0x3f >> cnt
);
566 if ((cc
& 0xC0) != 0x80)
568 // invalid UTF-8 sequence
571 res
= (res
<< 6) | (cc
& 0x3f);
573 if (res
<= utf8_max
[ocnt
])
575 // illegal UTF-8 encoding
579 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
580 size_t pa
= encode_utf16(res
, (wxUint16
*)buf
);
581 if (pa
== (size_t)-1)
590 #endif // WC_UTF16/!WC_UTF16
594 if (buf
&& (len
< n
))
599 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
603 while (*psz
&& ((!buf
) || (len
< n
)))
607 // cast is ok for WC_UTF16
608 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
609 psz
+= (pa
== (size_t)-1) ? 1 : pa
;
611 cc
=(*psz
++) & 0x7fffffff;
614 for (cnt
= 0; cc
> utf8_max
[cnt
]; cnt
++) {}
628 *buf
++ = (char) ((-128 >> cnt
) | ((cc
>> (cnt
* 6)) & (0x3f >> cnt
)));
630 *buf
++ = (char) (0x80 | ((cc
>> (cnt
* 6)) & 0x3f));
635 if (buf
&& (len
<n
)) *buf
= 0;
643 // ----------------------------------------------------------------------------
645 // ----------------------------------------------------------------------------
647 #ifdef WORDS_BIGENDIAN
648 #define wxMBConvUTF16straight wxMBConvUTF16BE
649 #define wxMBConvUTF16swap wxMBConvUTF16LE
651 #define wxMBConvUTF16swap wxMBConvUTF16BE
652 #define wxMBConvUTF16straight wxMBConvUTF16LE
658 // copy 16bit MB to 16bit String
659 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
663 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
666 *buf
++ = *(wxUint16
*)psz
;
669 psz
+= sizeof(wxUint16
);
671 if (buf
&& len
<n
) *buf
=0;
677 // copy 16bit String to 16bit MB
678 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
682 while (*psz
&& (!buf
|| len
< n
))
686 *(wxUint16
*)buf
= *psz
;
687 buf
+= sizeof(wxUint16
);
689 len
+= sizeof(wxUint16
);
692 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
698 // swap 16bit MB to 16bit String
699 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
703 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
707 ((char *)buf
)[0] = psz
[1];
708 ((char *)buf
)[1] = psz
[0];
712 psz
+= sizeof(wxUint16
);
714 if (buf
&& len
<n
) *buf
=0;
720 // swap 16bit MB to 16bit String
721 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
725 while (*psz
&& (!buf
|| len
< n
))
729 *buf
++ = ((char*)psz
)[1];
730 *buf
++ = ((char*)psz
)[0];
732 len
+= sizeof(wxUint16
);
735 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
744 // copy 16bit MB to 32bit String
745 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
749 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
752 size_t pa
=decode_utf16((wxUint16
*)psz
, cc
);
753 if (pa
== (size_t)-1)
759 psz
+= pa
* sizeof(wxUint16
);
761 if (buf
&& len
<n
) *buf
=0;
767 // copy 32bit String to 16bit MB
768 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
772 while (*psz
&& (!buf
|| len
< n
))
775 size_t pa
=encode_utf16(*psz
, cc
);
777 if (pa
== (size_t)-1)
782 *(wxUint16
*)buf
= cc
[0];
783 buf
+= sizeof(wxUint16
);
786 *(wxUint16
*)buf
= cc
[1];
787 buf
+= sizeof(wxUint16
);
791 len
+= pa
*sizeof(wxUint16
);
794 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
800 // swap 16bit MB to 32bit String
801 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
805 while (*(wxUint16
*)psz
&& (!buf
|| len
< n
))
809 tmp
[0]=psz
[1]; tmp
[1]=psz
[0];
810 tmp
[2]=psz
[3]; tmp
[3]=psz
[2];
812 size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
);
813 if (pa
== (size_t)-1)
820 psz
+= pa
* sizeof(wxUint16
);
822 if (buf
&& len
<n
) *buf
=0;
828 // swap 32bit String to 16bit MB
829 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
833 while (*psz
&& (!buf
|| len
< n
))
836 size_t pa
=encode_utf16(*psz
, cc
);
838 if (pa
== (size_t)-1)
843 *buf
++ = ((char*)cc
)[1];
844 *buf
++ = ((char*)cc
)[0];
847 *buf
++ = ((char*)cc
)[3];
848 *buf
++ = ((char*)cc
)[2];
852 len
+= pa
*sizeof(wxUint16
);
855 if (buf
&& len
<=n
-sizeof(wxUint16
)) *(wxUint16
*)buf
=0;
863 // ----------------------------------------------------------------------------
865 // ----------------------------------------------------------------------------
867 #ifdef WORDS_BIGENDIAN
868 #define wxMBConvUTF32straight wxMBConvUTF32BE
869 #define wxMBConvUTF32swap wxMBConvUTF32LE
871 #define wxMBConvUTF32swap wxMBConvUTF32BE
872 #define wxMBConvUTF32straight wxMBConvUTF32LE
876 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
;
877 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
;
882 // copy 32bit MB to 16bit String
883 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
887 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
891 size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
);
892 if (pa
== (size_t)-1)
902 psz
+= sizeof(wxUint32
);
904 if (buf
&& len
<n
) *buf
=0;
910 // copy 16bit String to 32bit MB
911 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
915 while (*psz
&& (!buf
|| len
< n
))
919 // cast is ok for WC_UTF16
920 size_t pa
= decode_utf16((const wxUint16
*)psz
, cc
);
921 if (pa
== (size_t)-1)
926 *(wxUint32
*)buf
= cc
;
927 buf
+= sizeof(wxUint32
);
929 len
+= sizeof(wxUint32
);
933 if (buf
&& len
<=n
-sizeof(wxUint32
))
941 // swap 32bit MB to 16bit String
942 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
946 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
949 tmp
[0] = psz
[3]; tmp
[1] = psz
[2];
950 tmp
[2] = psz
[1]; tmp
[3] = psz
[0];
955 size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
);
956 if (pa
== (size_t)-1)
966 psz
+= sizeof(wxUint32
);
976 // swap 16bit String to 32bit MB
977 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
981 while (*psz
&& (!buf
|| len
< n
))
985 // cast is ok for WC_UTF16
986 size_t pa
=decode_utf16((const wxUint16
*)psz
, *(wxUint32
*)cc
);
987 if (pa
== (size_t)-1)
997 len
+= sizeof(wxUint32
);
1001 if (buf
&& len
<=n
-sizeof(wxUint32
))
1010 // copy 32bit MB to 32bit String
1011 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1015 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1018 *buf
++ = *(wxUint32
*)psz
;
1020 psz
+= sizeof(wxUint32
);
1030 // copy 32bit String to 32bit MB
1031 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1035 while (*psz
&& (!buf
|| len
< n
))
1039 *(wxUint32
*)buf
= *psz
;
1040 buf
+= sizeof(wxUint32
);
1043 len
+= sizeof(wxUint32
);
1047 if (buf
&& len
<=n
-sizeof(wxUint32
))
1054 // swap 32bit MB to 32bit String
1055 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1059 while (*(wxUint32
*)psz
&& (!buf
|| len
< n
))
1063 ((char *)buf
)[0] = psz
[3];
1064 ((char *)buf
)[1] = psz
[2];
1065 ((char *)buf
)[2] = psz
[1];
1066 ((char *)buf
)[3] = psz
[0];
1070 psz
+= sizeof(wxUint32
);
1080 // swap 32bit String to 32bit MB
1081 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1085 while (*psz
&& (!buf
|| len
< n
))
1089 *buf
++ = ((char *)psz
)[3];
1090 *buf
++ = ((char *)psz
)[2];
1091 *buf
++ = ((char *)psz
)[1];
1092 *buf
++ = ((char *)psz
)[0];
1094 len
+= sizeof(wxUint32
);
1098 if (buf
&& len
<=n
-sizeof(wxUint32
))
1108 // ============================================================================
1109 // The classes doing conversion using the iconv_xxx() functions
1110 // ============================================================================
1114 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1115 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
1116 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1117 // (which means error) and says there are 0 bytes left in the input buffer --
1118 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1119 // this alternative test for iconv() failure.
1120 // [This bug does not appear in glibc 2.2.]
1121 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1122 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1123 (errno != E2BIG || bufLeft != 0))
1125 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1128 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1130 // ----------------------------------------------------------------------------
1131 // wxMBConv_iconv: encapsulates an iconv character set
1132 // ----------------------------------------------------------------------------
1134 class wxMBConv_iconv
: public wxMBConv
1137 wxMBConv_iconv(const wxChar
*name
);
1138 virtual ~wxMBConv_iconv();
1140 virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const;
1141 virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const;
1144 { return (m2w
!= (iconv_t
)-1) && (w2m
!= (iconv_t
)-1); }
1147 // the iconv handlers used to translate from multibyte to wide char and in
1148 // the other direction
1153 // the name (for iconv_open()) of a wide char charset -- if none is
1154 // available on this machine, it will remain NULL
1155 static const char *ms_wcCharsetName
;
1157 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1158 // different endian-ness than the native one
1159 static bool ms_wcNeedsSwap
;
1162 const char *wxMBConv_iconv::ms_wcCharsetName
= NULL
;
1163 bool wxMBConv_iconv::ms_wcNeedsSwap
= false;
1165 wxMBConv_iconv::wxMBConv_iconv(const wxChar
*name
)
1167 // Do it the hard way
1169 for (size_t i
= 0; i
< wxStrlen(name
)+1; i
++)
1170 cname
[i
] = (char) name
[i
];
1172 // check for charset that represents wchar_t:
1173 if (ms_wcCharsetName
== NULL
)
1175 ms_wcNeedsSwap
= false;
1177 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1178 ms_wcCharsetName
= WC_NAME_BEST
;
1179 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1181 if (m2w
== (iconv_t
)-1)
1183 // try charset w/o bytesex info (e.g. "UCS4")
1184 // and check for bytesex ourselves:
1185 ms_wcCharsetName
= WC_NAME
;
1186 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1188 // last bet, try if it knows WCHAR_T pseudo-charset
1189 if (m2w
== (iconv_t
)-1)
1191 ms_wcCharsetName
= "WCHAR_T";
1192 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1195 if (m2w
!= (iconv_t
)-1)
1197 char buf
[2], *bufPtr
;
1198 wchar_t wbuf
[2], *wbufPtr
;
1206 outsz
= SIZEOF_WCHAR_T
* 2;
1210 res
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
,
1211 (char**)&wbufPtr
, &outsz
);
1213 if (ICONV_FAILED(res
, insz
))
1215 ms_wcCharsetName
= NULL
;
1216 wxLogLastError(wxT("iconv"));
1217 wxLogError(_("Conversion to charset '%s' doesn't work."), name
);
1221 ms_wcNeedsSwap
= wbuf
[0] != (wchar_t)buf
[0];
1226 ms_wcCharsetName
= NULL
;
1228 // VS: we must not output an error here, since wxWidgets will safely
1229 // fall back to using wxEncodingConverter.
1230 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
);
1234 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
);
1236 else // we already have ms_wcCharsetName
1238 m2w
= iconv_open(ms_wcCharsetName
, cname
);
1241 // NB: don't ever pass NULL to iconv_open(), it may crash!
1242 if ( ms_wcCharsetName
)
1244 w2m
= iconv_open( cname
, ms_wcCharsetName
);
1252 wxMBConv_iconv::~wxMBConv_iconv()
1254 if ( m2w
!= (iconv_t
)-1 )
1256 if ( w2m
!= (iconv_t
)-1 )
1260 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1262 size_t inbuf
= strlen(psz
);
1263 size_t outbuf
= n
* SIZEOF_WCHAR_T
;
1265 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1266 wchar_t *bufPtr
= buf
;
1267 const char *pszPtr
= psz
;
1271 // have destination buffer, convert there
1273 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1274 (char**)&bufPtr
, &outbuf
);
1275 res
= n
- (outbuf
/ SIZEOF_WCHAR_T
);
1279 // convert to native endianness
1280 WC_BSWAP(buf
/* _not_ bufPtr */, res
)
1283 // NB: iconv was given only strlen(psz) characters on input, and so
1284 // it couldn't convert the trailing zero. Let's do it ourselves
1285 // if there's some room left for it in the output buffer.
1291 // no destination buffer... convert using temp buffer
1292 // to calculate destination buffer requirement
1297 outbuf
= 8*SIZEOF_WCHAR_T
;
1300 ICONV_CHAR_CAST(&pszPtr
), &inbuf
,
1301 (char**)&bufPtr
, &outbuf
);
1303 res
+= 8-(outbuf
/SIZEOF_WCHAR_T
);
1304 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1307 if (ICONV_FAILED(cres
, inbuf
))
1309 //VS: it is ok if iconv fails, hence trace only
1310 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1317 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
1319 size_t inbuf
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
1323 wchar_t *tmpbuf
= 0;
1327 // need to copy to temp buffer to switch endianness
1328 // this absolutely doesn't rock!
1329 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1330 // could be in read-only memory, or be accessed in some other thread)
1331 tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
);
1332 memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
);
1333 WC_BSWAP(tmpbuf
, inbuf
)
1339 // have destination buffer, convert there
1340 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1344 // NB: iconv was given only wcslen(psz) characters on input, and so
1345 // it couldn't convert the trailing zero. Let's do it ourselves
1346 // if there's some room left for it in the output buffer.
1352 // no destination buffer... convert using temp buffer
1353 // to calculate destination buffer requirement
1357 buf
= tbuf
; outbuf
= 16;
1359 cres
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf
);
1362 } while ((cres
==(size_t)-1) && (errno
==E2BIG
));
1370 if (ICONV_FAILED(cres
, inbuf
))
1372 //VS: it is ok if iconv fails, hence trace only
1373 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1380 #endif // HAVE_ICONV
1383 // ============================================================================
1384 // Win32 conversion classes
1385 // ============================================================================
1387 #ifdef wxHAVE_WIN32_MB2WC
1391 extern WXDLLIMPEXP_BASE
long wxCharsetToCodepage(const wxChar
*charset
);
1392 extern WXDLLIMPEXP_BASE
long wxEncodingToCodepage(wxFontEncoding encoding
);
1395 class wxMBConv_win32
: public wxMBConv
1400 m_CodePage
= CP_ACP
;
1404 wxMBConv_win32(const wxChar
* name
)
1406 m_CodePage
= wxCharsetToCodepage(name
);
1409 wxMBConv_win32(wxFontEncoding encoding
)
1411 m_CodePage
= wxEncodingToCodepage(encoding
);
1415 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
1417 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1418 // the behaviour is not compatible with the Unix version (using iconv)
1419 // and break the library itself, e.g. wxTextInputStream::NextChar()
1420 // wouldn't work if reading an incomplete MB char didn't result in an
1422 const size_t len
= ::MultiByteToWideChar
1424 m_CodePage
, // code page
1425 MB_ERR_INVALID_CHARS
, // flags: fall on error
1426 psz
, // input string
1427 -1, // its length (NUL-terminated)
1428 buf
, // output string
1429 buf
? n
: 0 // size of output buffer
1432 // note that it returns count of written chars for buf != NULL and size
1433 // of the needed buffer for buf == NULL so in either case the length of
1434 // the string (which never includes the terminating NUL) is one less
1435 return len
? len
- 1 : (size_t)-1;
1438 size_t WC2MB(char *buf
, const wchar_t *pwz
, size_t n
) const
1441 we have a problem here: by default, WideCharToMultiByte() may
1442 replace characters unrepresentable in the target code page with bad
1443 quality approximations such as turning "1/2" symbol (U+00BD) into
1444 "1" for the code pages which don't have it and we, obviously, want
1445 to avoid this at any price
1447 the trouble is that this function does it _silently_, i.e. it won't
1448 even tell us whether it did or not... Win98/2000 and higher provide
1449 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1450 we have to resort to a round trip, i.e. check that converting back
1451 results in the same string -- this is, of course, expensive but
1452 otherwise we simply can't be sure to not garble the data.
1455 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1456 // it doesn't work with CJK encodings (which we test for rather roughly
1457 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1459 BOOL usedDef
wxDUMMY_INITIALIZE(false);
1462 if ( CanUseNoBestFit() && m_CodePage
< 50000 )
1464 // it's our lucky day
1465 flags
= WC_NO_BEST_FIT_CHARS
;
1466 pUsedDef
= &usedDef
;
1468 else // old system or unsupported encoding
1474 const size_t len
= ::WideCharToMultiByte
1476 m_CodePage
, // code page
1477 flags
, // either none or no best fit
1478 pwz
, // input string
1479 -1, // it is (wide) NUL-terminated
1480 buf
, // output buffer
1481 buf
? n
: 0, // and its size
1482 NULL
, // default "replacement" char
1483 pUsedDef
// [out] was it used?
1488 // function totally failed
1492 // if we were really converting, check if we succeeded
1497 // check if the conversion failed, i.e. if any replacements
1502 else // we must resort to double tripping...
1504 wxWCharBuffer
wcBuf(n
);
1505 if ( MB2WC(wcBuf
.data(), buf
, n
) == (size_t)-1 ||
1506 wcscmp(wcBuf
, pwz
) != 0 )
1508 // we didn't obtain the same thing we started from, hence
1509 // the conversion was lossy and we consider that it failed
1515 // see the comment above for the reason of "len - 1"
1519 bool IsOk() const { return m_CodePage
!= -1; }
1522 static bool CanUseNoBestFit()
1524 static int s_isWin98Or2k
= -1;
1526 if ( s_isWin98Or2k
== -1 )
1529 switch ( wxGetOsVersion(&verMaj
, &verMin
) )
1532 s_isWin98Or2k
= verMaj
>= 4 && verMin
>= 10;
1536 s_isWin98Or2k
= verMaj
>= 5;
1540 // unknown, be conseravtive by default
1544 wxASSERT_MSG( s_isWin98Or2k
!= -1, _T("should be set above") );
1547 return s_isWin98Or2k
== 1;
1553 #endif // wxHAVE_WIN32_MB2WC
1555 // ============================================================================
1556 // Cocoa conversion classes
1557 // ============================================================================
1559 #if defined(__WXCOCOA__)
1561 // RN: There is no UTF-32 support in either Core Foundation or
1562 // Cocoa. Strangely enough, internally Core Foundation uses
1563 // UTF 32 internally quite a bit - its just not public (yet).
1565 #include <CoreFoundation/CFString.h>
1566 #include <CoreFoundation/CFStringEncodingExt.h>
1568 CFStringEncoding
wxCFStringEncFromFontEnc(wxFontEncoding encoding
)
1570 CFStringEncoding enc
= 0 ;
1571 if ( encoding
== wxFONTENCODING_DEFAULT
)
1574 encoding
= wxFont::GetDefaultEncoding() ;
1576 encoding
= wxLocale::GetSystemEncoding() ;
1579 else switch( encoding
)
1581 case wxFONTENCODING_ISO8859_1
:
1582 enc
= kCFStringEncodingISOLatin1
;
1584 case wxFONTENCODING_ISO8859_2
:
1585 enc
= kCFStringEncodingISOLatin2
;
1587 case wxFONTENCODING_ISO8859_3
:
1588 enc
= kCFStringEncodingISOLatin3
;
1590 case wxFONTENCODING_ISO8859_4
:
1591 enc
= kCFStringEncodingISOLatin4
;
1593 case wxFONTENCODING_ISO8859_5
:
1594 enc
= kCFStringEncodingISOLatinCyrillic
;
1596 case wxFONTENCODING_ISO8859_6
:
1597 enc
= kCFStringEncodingISOLatinArabic
;
1599 case wxFONTENCODING_ISO8859_7
:
1600 enc
= kCFStringEncodingISOLatinGreek
;
1602 case wxFONTENCODING_ISO8859_8
:
1603 enc
= kCFStringEncodingISOLatinHebrew
;
1605 case wxFONTENCODING_ISO8859_9
:
1606 enc
= kCFStringEncodingISOLatin5
;
1608 case wxFONTENCODING_ISO8859_10
:
1609 enc
= kCFStringEncodingISOLatin6
;
1611 case wxFONTENCODING_ISO8859_11
:
1612 enc
= kCFStringEncodingISOLatinThai
;
1614 case wxFONTENCODING_ISO8859_13
:
1615 enc
= kCFStringEncodingISOLatin7
;
1617 case wxFONTENCODING_ISO8859_14
:
1618 enc
= kCFStringEncodingISOLatin8
;
1620 case wxFONTENCODING_ISO8859_15
:
1621 enc
= kCFStringEncodingISOLatin9
;
1624 case wxFONTENCODING_KOI8
:
1625 enc
= kCFStringEncodingKOI8_R
;
1627 case wxFONTENCODING_ALTERNATIVE
: // MS-DOS CP866
1628 enc
= kCFStringEncodingDOSRussian
;
1631 // case wxFONTENCODING_BULGARIAN :
1635 case wxFONTENCODING_CP437
:
1636 enc
=kCFStringEncodingDOSLatinUS
;
1638 case wxFONTENCODING_CP850
:
1639 enc
= kCFStringEncodingDOSLatin1
;
1641 case wxFONTENCODING_CP852
:
1642 enc
= kCFStringEncodingDOSLatin2
;
1644 case wxFONTENCODING_CP855
:
1645 enc
= kCFStringEncodingDOSCyrillic
;
1647 case wxFONTENCODING_CP866
:
1648 enc
=kCFStringEncodingDOSRussian
;
1650 case wxFONTENCODING_CP874
:
1651 enc
= kCFStringEncodingDOSThai
;
1653 case wxFONTENCODING_CP932
:
1654 enc
= kCFStringEncodingDOSJapanese
;
1656 case wxFONTENCODING_CP936
:
1657 enc
=kCFStringEncodingDOSChineseSimplif
;
1659 case wxFONTENCODING_CP949
:
1660 enc
= kCFStringEncodingDOSKorean
;
1662 case wxFONTENCODING_CP950
:
1663 enc
= kCFStringEncodingDOSChineseTrad
;
1666 case wxFONTENCODING_CP1250
:
1667 enc
= kCFStringEncodingWindowsLatin2
;
1669 case wxFONTENCODING_CP1251
:
1670 enc
=kCFStringEncodingWindowsCyrillic
;
1672 case wxFONTENCODING_CP1252
:
1673 enc
=kCFStringEncodingWindowsLatin1
;
1675 case wxFONTENCODING_CP1253
:
1676 enc
= kCFStringEncodingWindowsGreek
;
1678 case wxFONTENCODING_CP1254
:
1679 enc
= kCFStringEncodingWindowsLatin5
;
1681 case wxFONTENCODING_CP1255
:
1682 enc
=kCFStringEncodingWindowsHebrew
;
1684 case wxFONTENCODING_CP1256
:
1685 enc
=kCFStringEncodingWindowsArabic
;
1687 case wxFONTENCODING_CP1257
:
1688 enc
= kCFStringEncodingWindowsBalticRim
;
1690 case wxFONTENCODING_UTF7
:
1691 enc
= kCFStringEncodingNonLossyASCII
;
1693 case wxFONTENCODING_UTF8
:
1694 enc
= kCFStringEncodingUTF8
;
1696 case wxFONTENCODING_EUC_JP
:
1697 enc
= kCFStringEncodingEUC_JP
;
1699 case wxFONTENCODING_UTF16
:
1700 enc
= kCFStringEncodingUnicode
;
1702 case wxFONTENCODING_MACROMAN
:
1703 enc
= kCFStringEncodingMacRoman
;
1705 case wxFONTENCODING_MACJAPANESE
:
1706 enc
= kCFStringEncodingMacJapanese
;
1708 case wxFONTENCODING_MACCHINESETRAD
:
1709 enc
= kCFStringEncodingMacChineseTrad
;
1711 case wxFONTENCODING_MACKOREAN
:
1712 enc
= kCFStringEncodingMacKorean
;
1714 case wxFONTENCODING_MACARABIC
:
1715 enc
= kCFStringEncodingMacArabic
;
1717 case wxFONTENCODING_MACHEBREW
:
1718 enc
= kCFStringEncodingMacHebrew
;
1720 case wxFONTENCODING_MACGREEK
:
1721 enc
= kCFStringEncodingMacGreek
;
1723 case wxFONTENCODING_MACCYRILLIC
:
1724 enc
= kCFStringEncodingMacCyrillic
;
1726 case wxFONTENCODING_MACDEVANAGARI
:
1727 enc
= kCFStringEncodingMacDevanagari
;
1729 case wxFONTENCODING_MACGURMUKHI
:
1730 enc
= kCFStringEncodingMacGurmukhi
;
1732 case wxFONTENCODING_MACGUJARATI
:
1733 enc
= kCFStringEncodingMacGujarati
;
1735 case wxFONTENCODING_MACORIYA
:
1736 enc
= kCFStringEncodingMacOriya
;
1738 case wxFONTENCODING_MACBENGALI
:
1739 enc
= kCFStringEncodingMacBengali
;
1741 case wxFONTENCODING_MACTAMIL
:
1742 enc
= kCFStringEncodingMacTamil
;
1744 case wxFONTENCODING_MACTELUGU
:
1745 enc
= kCFStringEncodingMacTelugu
;
1747 case wxFONTENCODING_MACKANNADA
:
1748 enc
= kCFStringEncodingMacKannada
;
1750 case wxFONTENCODING_MACMALAJALAM
:
1751 enc
= kCFStringEncodingMacMalayalam
;
1753 case wxFONTENCODING_MACSINHALESE
:
1754 enc
= kCFStringEncodingMacSinhalese
;
1756 case wxFONTENCODING_MACBURMESE
:
1757 enc
= kCFStringEncodingMacBurmese
;
1759 case wxFONTENCODING_MACKHMER
:
1760 enc
= kCFStringEncodingMacKhmer
;
1762 case wxFONTENCODING_MACTHAI
:
1763 enc
= kCFStringEncodingMacThai
;
1765 case wxFONTENCODING_MACLAOTIAN
:
1766 enc
= kCFStringEncodingMacLaotian
;
1768 case wxFONTENCODING_MACGEORGIAN
:
1769 enc
= kCFStringEncodingMacGeorgian
;
1771 case wxFONTENCODING_MACARMENIAN
:
1772 enc
= kCFStringEncodingMacArmenian
;
1774 case wxFONTENCODING_MACCHINESESIMP
:
1775 enc
= kCFStringEncodingMacChineseSimp
;
1777 case wxFONTENCODING_MACTIBETAN
:
1778 enc
= kCFStringEncodingMacTibetan
;
1780 case wxFONTENCODING_MACMONGOLIAN
:
1781 enc
= kCFStringEncodingMacMongolian
;
1783 case wxFONTENCODING_MACETHIOPIC
:
1784 enc
= kCFStringEncodingMacEthiopic
;
1786 case wxFONTENCODING_MACCENTRALEUR
:
1787 enc
= kCFStringEncodingMacCentralEurRoman
;
1789 case wxFONTENCODING_MACVIATNAMESE
:
1790 enc
= kCFStringEncodingMacVietnamese
;
1792 case wxFONTENCODING_MACARABICEXT
:
1793 enc
= kCFStringEncodingMacExtArabic
;
1795 case wxFONTENCODING_MACSYMBOL
:
1796 enc
= kCFStringEncodingMacSymbol
;
1798 case wxFONTENCODING_MACDINGBATS
:
1799 enc
= kCFStringEncodingMacDingbats
;
1801 case wxFONTENCODING_MACTURKISH
:
1802 enc
= kCFStringEncodingMacTurkish
;
1804 case wxFONTENCODING_MACCROATIAN
:
1805 enc
= kCFStringEncodingMacCroatian
;
1807 case wxFONTENCODING_MACICELANDIC
:
1808 enc
= kCFStringEncodingMacIcelandic
;
1810 case wxFONTENCODING_MACROMANIAN
:
1811 enc
= kCFStringEncodingMacRomanian
;
1813 case wxFONTENCODING_MACCELTIC
:
1814 enc
= kCFStringEncodingMacCeltic
;
1816 case wxFONTENCODING_MACGAELIC
:
1817 enc
= kCFStringEncodingMacGaelic
;
1819 // case wxFONTENCODING_MACKEYBOARD :
1820 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1823 // because gcc is picky
1829 wxFontEncoding
wxFontEncFromCFStringEnc(CFStringEncoding encoding
)
1831 wxFontEncoding enc
= wxFONTENCODING_DEFAULT
;
1835 case kCFStringEncodingISOLatin1
:
1836 enc
= wxFONTENCODING_ISO8859_1
;
1838 case kCFStringEncodingISOLatin2
:
1839 enc
= wxFONTENCODING_ISO8859_2
;
1841 case kCFStringEncodingISOLatin3
:
1842 enc
= wxFONTENCODING_ISO8859_3
;
1844 case kCFStringEncodingISOLatin4
:
1845 enc
= wxFONTENCODING_ISO8859_4
;
1847 case kCFStringEncodingISOLatinCyrillic
:
1848 enc
= wxFONTENCODING_ISO8859_5
;
1850 case kCFStringEncodingISOLatinArabic
:
1851 enc
= wxFONTENCODING_ISO8859_6
;
1853 case kCFStringEncodingISOLatinGreek
:
1854 enc
= wxFONTENCODING_ISO8859_7
;
1856 case kCFStringEncodingISOLatinHebrew
:
1857 enc
= wxFONTENCODING_ISO8859_8
;
1859 case kCFStringEncodingISOLatin5
:
1860 enc
= wxFONTENCODING_ISO8859_9
;
1862 case kCFStringEncodingISOLatin6
:
1863 enc
= wxFONTENCODING_ISO8859_10
;
1865 case kCFStringEncodingISOLatin7
:
1866 enc
= wxFONTENCODING_ISO8859_13
;
1868 case kCFStringEncodingISOLatin8
:
1869 enc
= wxFONTENCODING_ISO8859_14
;
1871 case kCFStringEncodingISOLatin9
:
1872 enc
=wxFONTENCODING_ISO8859_15
;
1875 case kCFStringEncodingKOI8_R
:
1876 enc
= wxFONTENCODING_KOI8
;
1880 // enc = wxFONTENCODING_BULGARIAN;
1883 case kCFStringEncodingDOSLatinUS
:
1884 enc
= wxFONTENCODING_CP437
;
1886 case kCFStringEncodingDOSLatin1
:
1887 enc
= wxFONTENCODING_CP850
;
1889 case kCFStringEncodingDOSLatin2
:
1890 enc
=wxFONTENCODING_CP852
;
1892 case kCFStringEncodingDOSCyrillic
:
1893 enc
= wxFONTENCODING_CP855
;
1895 case kCFStringEncodingDOSRussian
:
1896 enc
= wxFONTENCODING_CP866
;
1898 case kCFStringEncodingDOSThai
:
1899 enc
=wxFONTENCODING_CP874
;
1901 case kCFStringEncodingDOSJapanese
:
1902 enc
= wxFONTENCODING_CP932
;
1904 case kCFStringEncodingDOSChineseSimplif
:
1905 enc
= wxFONTENCODING_CP936
;
1907 case kCFStringEncodingDOSKorean
:
1908 enc
= wxFONTENCODING_CP949
;
1910 case kCFStringEncodingDOSChineseTrad
:
1911 enc
= wxFONTENCODING_CP950
;
1914 case kCFStringEncodingWindowsLatin2
:
1915 enc
= wxFONTENCODING_CP1250
;
1917 case kCFStringEncodingWindowsCyrillic
:
1918 enc
= wxFONTENCODING_CP1251
;
1920 case kCFStringEncodingWindowsLatin1
:
1921 enc
= wxFONTENCODING_CP1252
;
1923 case kCFStringEncodingWindowsGreek
:
1924 enc
= wxFONTENCODING_CP1253
;
1926 case kCFStringEncodingWindowsLatin5
:
1927 enc
= wxFONTENCODING_CP1254
;
1929 case kCFStringEncodingWindowsHebrew
:
1930 enc
= wxFONTENCODING_CP1255
;
1932 case kCFStringEncodingWindowsArabic
:
1933 enc
= wxFONTENCODING_CP1256
;
1935 case kCFStringEncodingWindowsBalticRim
:
1936 enc
=wxFONTENCODING_CP1257
;
1938 case kCFStringEncodingEUC_JP
:
1939 enc
= wxFONTENCODING_EUC_JP
;
1941 case kCFStringEncodingUnicode
:
1942 enc
= wxFONTENCODING_UTF16
;
1944 case kCFStringEncodingMacRoman
:
1945 enc
= wxFONTENCODING_MACROMAN
;
1947 case kCFStringEncodingMacJapanese
:
1948 enc
= wxFONTENCODING_MACJAPANESE
;
1950 case kCFStringEncodingMacChineseTrad
:
1951 enc
= wxFONTENCODING_MACCHINESETRAD
;
1953 case kCFStringEncodingMacKorean
:
1954 enc
= wxFONTENCODING_MACKOREAN
;
1956 case kCFStringEncodingMacArabic
:
1957 enc
=wxFONTENCODING_MACARABIC
;
1959 case kCFStringEncodingMacHebrew
:
1960 enc
= wxFONTENCODING_MACHEBREW
;
1962 case kCFStringEncodingMacGreek
:
1963 enc
= wxFONTENCODING_MACGREEK
;
1965 case kCFStringEncodingMacCyrillic
:
1966 enc
= wxFONTENCODING_MACCYRILLIC
;
1968 case kCFStringEncodingMacDevanagari
:
1969 enc
= wxFONTENCODING_MACDEVANAGARI
;
1971 case kCFStringEncodingMacGurmukhi
:
1972 enc
= wxFONTENCODING_MACGURMUKHI
;
1974 case kCFStringEncodingMacGujarati
:
1975 enc
= wxFONTENCODING_MACGUJARATI
;
1977 case kCFStringEncodingMacOriya
:
1978 enc
=wxFONTENCODING_MACORIYA
;
1980 case kCFStringEncodingMacBengali
:
1981 enc
=wxFONTENCODING_MACBENGALI
;
1983 case kCFStringEncodingMacTamil
:
1984 enc
= wxFONTENCODING_MACTAMIL
;
1986 case kCFStringEncodingMacTelugu
:
1987 enc
= wxFONTENCODING_MACTELUGU
;
1989 case kCFStringEncodingMacKannada
:
1990 enc
= wxFONTENCODING_MACKANNADA
;
1992 case kCFStringEncodingMacMalayalam
:
1993 enc
= wxFONTENCODING_MACMALAJALAM
;
1995 case kCFStringEncodingMacSinhalese
:
1996 enc
= wxFONTENCODING_MACSINHALESE
;
1998 case kCFStringEncodingMacBurmese
:
1999 enc
= wxFONTENCODING_MACBURMESE
;
2001 case kCFStringEncodingMacKhmer
:
2002 enc
= wxFONTENCODING_MACKHMER
;
2004 case kCFStringEncodingMacThai
:
2005 enc
= wxFONTENCODING_MACTHAI
;
2007 case kCFStringEncodingMacLaotian
:
2008 enc
= wxFONTENCODING_MACLAOTIAN
;
2010 case kCFStringEncodingMacGeorgian
:
2011 enc
= wxFONTENCODING_MACGEORGIAN
;
2013 case kCFStringEncodingMacArmenian
:
2014 enc
= wxFONTENCODING_MACARMENIAN
;
2016 case kCFStringEncodingMacChineseSimp
:
2017 enc
= wxFONTENCODING_MACCHINESESIMP
;
2019 case kCFStringEncodingMacTibetan
:
2020 enc
= wxFONTENCODING_MACTIBETAN
;
2022 case kCFStringEncodingMacMongolian
:
2023 enc
= wxFONTENCODING_MACMONGOLIAN
;
2025 case kCFStringEncodingMacEthiopic
:
2026 enc
= wxFONTENCODING_MACETHIOPIC
;
2028 case kCFStringEncodingMacCentralEurRoman
:
2029 enc
= wxFONTENCODING_MACCENTRALEUR
;
2031 case kCFStringEncodingMacVietnamese
:
2032 enc
= wxFONTENCODING_MACVIATNAMESE
;
2034 case kCFStringEncodingMacExtArabic
:
2035 enc
= wxFONTENCODING_MACARABICEXT
;
2037 case kCFStringEncodingMacSymbol
:
2038 enc
= wxFONTENCODING_MACSYMBOL
;
2040 case kCFStringEncodingMacDingbats
:
2041 enc
= wxFONTENCODING_MACDINGBATS
;
2043 case kCFStringEncodingMacTurkish
:
2044 enc
= wxFONTENCODING_MACTURKISH
;
2046 case kCFStringEncodingMacCroatian
:
2047 enc
= wxFONTENCODING_MACCROATIAN
;
2049 case kCFStringEncodingMacIcelandic
:
2050 enc
= wxFONTENCODING_MACICELANDIC
;
2052 case kCFStringEncodingMacRomanian
:
2053 enc
= wxFONTENCODING_MACROMANIAN
;
2055 case kCFStringEncodingMacCeltic
:
2056 enc
= wxFONTENCODING_MACCELTIC
;
2058 case kCFStringEncodingMacGaelic
:
2059 enc
= wxFONTENCODING_MACGAELIC
;
2061 // case kCFStringEncodingMacKeyboardGlyphs :
2062 // enc = wxFONTENCODING_MACKEYBOARD ;
2068 class wxMBConv_cocoa
: public wxMBConv
2073 Init(CFStringGetSystemEncoding()) ;
2076 wxMBConv_cocoa(const wxChar
* name
)
2078 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
2081 wxMBConv_cocoa(wxFontEncoding encoding
)
2083 Init( wxCFStringEncFromFontEnc(encoding
) );
2090 void Init( CFStringEncoding encoding
)
2092 m_char_encoding
= encoding
;
2093 m_unicode_encoding
= kCFStringEncodingUnicode
;
2096 size_t MB2WC(wchar_t * szOut
, const char * szUnConv
, size_t nOutSize
) const
2100 size_t nBufSize
= strlen(szUnConv
) + 1;
2101 size_t nRealOutSize
;
2103 UniChar
* szUniCharBuffer
= (UniChar
*) szOut
;
2104 wchar_t* szConvBuffer
= szOut
;
2106 if (szConvBuffer
== NULL
&& nOutSize
!= 0)
2108 szConvBuffer
= new wchar_t[nOutSize
] ;
2111 #if SIZEOF_WCHAR_T == 4
2112 szUniCharBuffer
= new UniChar
[nOutSize
];
2115 CFDataRef theData
= CFDataCreateWithBytesNoCopy (
2117 (const UInt8
*)szUnConv
,
2124 CFStringRef theString
= CFStringCreateFromExternalRepresentation (
2130 wxASSERT(theString
);
2134 nRealOutSize
= CFStringGetLength(theString
) + 1;
2135 CFRelease(theString
);
2136 return nRealOutSize
- 1;
2139 CFRange theRange
= { 0, CFStringGetLength(theString
) };
2141 CFStringGetCharacters(theString
, theRange
, szUniCharBuffer
);
2144 nRealOutSize
= (CFStringGetLength(theString
) + 1);
2146 CFRelease(theString
);
2148 szUniCharBuffer
[nRealOutSize
-1] = '\0' ;
2150 #if SIZEOF_WCHAR_T == 4
2151 wxMBConvUTF16 converter
;
2152 converter
.MB2WC(szConvBuffer
, (const char*)szUniCharBuffer
, nRealOutSize
) ;
2153 delete[] szUniCharBuffer
;
2155 if ( szOut
== NULL
)
2156 delete [] szConvBuffer
;
2158 return nRealOutSize
;
2161 size_t WC2MB(char *szOut
, const wchar_t *szUnConv
, size_t nOutSize
) const
2163 size_t nBufSize
= wxWcslen(szUnConv
) + 1;
2164 size_t nRealOutSize
;
2165 char* szBuffer
= szOut
;
2166 UniChar
* szUniBuffer
= (UniChar
*) szUnConv
;
2171 nRealOutSize
= ((nBufSize
- 1) * 8) +1 ;
2172 szBuffer
= new char[ nRealOutSize
] ;
2175 nRealOutSize
= nOutSize
;
2177 #if SIZEOF_WCHAR_T == 4
2178 wxMBConvUTF16BE converter
;
2179 nBufSize
= converter
.WC2MB( NULL
, szUnConv
, 0 );
2180 szUniBuffer
= new UniChar
[ (nBufSize
/ sizeof(UniChar
)) + 1] ;
2181 converter
.WC2MB( (char*) szUniBuffer
, szUnConv
, nBufSize
+ sizeof(UniChar
)) ;
2182 nBufSize
/= sizeof(UniChar
);
2186 CFStringRef theString
= CFStringCreateWithCharactersNoCopy(
2193 wxASSERT(theString
);
2195 //Note that CER puts a BOM when converting to unicode
2196 //so we may want to check and use getchars instead in that case
2197 CFDataRef theData
= CFStringCreateExternalRepresentation(
2201 0 //what to put in characters that can't be converted -
2202 //0 tells CFString to return NULL if it meets such a character
2208 CFRelease(theString
);
2210 nRealOutSize
= CFDataGetLength(theData
);
2212 if ( szOut
== NULL
)
2217 //TODO: This gets flagged as a non-malloced address by the debugger...
2218 //#if SIZEOF_WCHAR_T == 4
2219 // delete[] szUniBuffer;
2222 return nRealOutSize
- 1;
2225 CFRange theRange
= {0, CFDataGetLength(theData
) };
2226 CFDataGetBytes(theData
, theRange
, (UInt8
*) szBuffer
);
2230 //TODO: This gets flagged as a non-malloced address by the debugger...
2231 //#if SIZEOF_WCHAR_T == 4
2232 // delete[] szUniBuffer;
2234 return nRealOutSize
- 1;
2239 //TODO: check for invalid en/de/coding
2244 CFStringEncoding m_char_encoding
;
2245 CFStringEncoding m_unicode_encoding
;
2248 #endif // defined(__WXCOCOA__)
2250 // ============================================================================
2251 // Mac conversion classes
2252 // ============================================================================
2254 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2256 class wxMBConv_mac
: public wxMBConv
2261 Init(CFStringGetSystemEncoding()) ;
2264 wxMBConv_mac(const wxChar
* name
)
2266 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name
, false) ) ) ;
2269 wxMBConv_mac(wxFontEncoding encoding
)
2271 Init( wxMacGetSystemEncFromFontEnc(encoding
) );
2276 OSStatus status
= noErr
;
2277 status
= TECDisposeConverter(m_MB2WC_converter
);
2278 status
= TECDisposeConverter(m_WC2MB_converter
);
2282 void Init( TextEncodingBase encoding
)
2284 OSStatus status
= noErr
;
2285 m_char_encoding
= encoding
;
2286 m_unicode_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
2288 status
= TECCreateConverter(&m_MB2WC_converter
,
2290 m_unicode_encoding
);
2291 status
= TECCreateConverter(&m_WC2MB_converter
,
2296 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2298 OSStatus status
= noErr
;
2299 ByteCount byteOutLen
;
2300 ByteCount byteInLen
= strlen(psz
) ;
2301 wchar_t *tbuf
= NULL
;
2302 UniChar
* ubuf
= NULL
;
2308 tbuf
= (wchar_t*) malloc( n
* SIZEOF_WCHAR_T
) ;
2310 ByteCount byteBufferLen
= n
* sizeof( UniChar
) ;
2311 #if SIZEOF_WCHAR_T == 4
2312 ubuf
= (UniChar
*) malloc( byteBufferLen
+ 2 ) ;
2314 ubuf
= (UniChar
*) (buf
? buf
: tbuf
) ;
2316 status
= TECConvertText(m_MB2WC_converter
, (ConstTextPtr
) psz
, byteInLen
, &byteInLen
,
2317 (TextPtr
) ubuf
, byteBufferLen
, &byteOutLen
);
2318 #if SIZEOF_WCHAR_T == 4
2319 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2320 // is not properly terminated we get random characters at the end
2321 ubuf
[byteOutLen
/ sizeof( UniChar
) ] = 0 ;
2322 wxMBConvUTF16BE converter
;
2323 res
= converter
.MB2WC( (buf
? buf
: tbuf
) , (const char*)ubuf
, n
) ;
2326 res
= byteOutLen
/ sizeof( UniChar
) ;
2331 if ( buf
&& res
< n
)
2337 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2339 OSStatus status
= noErr
;
2340 ByteCount byteOutLen
;
2341 ByteCount byteInLen
= wxWcslen(psz
) * SIZEOF_WCHAR_T
;
2348 n
= ((byteInLen
/ SIZEOF_WCHAR_T
) * 8) + SIZEOF_WCHAR_T
;
2349 tbuf
= (char*) malloc( n
) ;
2352 ByteCount byteBufferLen
= n
;
2353 UniChar
* ubuf
= NULL
;
2354 #if SIZEOF_WCHAR_T == 4
2355 wxMBConvUTF16BE converter
;
2356 size_t unicharlen
= converter
.WC2MB( NULL
, psz
, 0 ) ;
2357 byteInLen
= unicharlen
;
2358 ubuf
= (UniChar
*) malloc( byteInLen
+ 2 ) ;
2359 converter
.WC2MB( (char*) ubuf
, psz
, unicharlen
+ 2 ) ;
2361 ubuf
= (UniChar
*) psz
;
2363 status
= TECConvertText(m_WC2MB_converter
, (ConstTextPtr
) ubuf
, byteInLen
, &byteInLen
,
2364 (TextPtr
) (buf
? buf
: tbuf
) , byteBufferLen
, &byteOutLen
);
2365 #if SIZEOF_WCHAR_T == 4
2371 size_t res
= byteOutLen
;
2372 if ( buf
&& res
< n
)
2379 { return m_MB2WC_converter
!= NULL
&& m_WC2MB_converter
!= NULL
; }
2382 TECObjectRef m_MB2WC_converter
;
2383 TECObjectRef m_WC2MB_converter
;
2385 TextEncodingBase m_char_encoding
;
2386 TextEncodingBase m_unicode_encoding
;
2389 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2391 // ============================================================================
2392 // wxEncodingConverter based conversion classes
2393 // ============================================================================
2397 class wxMBConv_wxwin
: public wxMBConv
2402 m_ok
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) &&
2403 w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
);
2407 // temporarily just use wxEncodingConverter stuff,
2408 // so that it works while a better implementation is built
2409 wxMBConv_wxwin(const wxChar
* name
)
2412 m_enc
= wxFontMapper::Get()->CharsetToEncoding(name
, false);
2414 m_enc
= wxFONTENCODING_SYSTEM
;
2419 wxMBConv_wxwin(wxFontEncoding enc
)
2426 size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const
2428 size_t inbuf
= strlen(psz
);
2430 m2w
.Convert(psz
,buf
);
2434 size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const
2436 const size_t inbuf
= wxWcslen(psz
);
2438 w2m
.Convert(psz
,buf
);
2443 bool IsOk() const { return m_ok
; }
2446 wxFontEncoding m_enc
;
2447 wxEncodingConverter m2w
, w2m
;
2449 // were we initialized successfully?
2452 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
)
2455 #endif // wxUSE_FONTMAP
2457 // ============================================================================
2458 // wxCSConv implementation
2459 // ============================================================================
2461 void wxCSConv::Init()
2468 wxCSConv::wxCSConv(const wxChar
*charset
)
2477 m_encoding
= wxFONTENCODING_SYSTEM
;
2480 wxCSConv::wxCSConv(wxFontEncoding encoding
)
2482 if ( encoding
== wxFONTENCODING_MAX
|| encoding
== wxFONTENCODING_DEFAULT
)
2484 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2486 encoding
= wxFONTENCODING_SYSTEM
;
2491 m_encoding
= encoding
;
2494 wxCSConv::~wxCSConv()
2499 wxCSConv::wxCSConv(const wxCSConv
& conv
)
2504 SetName(conv
.m_name
);
2505 m_encoding
= conv
.m_encoding
;
2508 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
)
2512 SetName(conv
.m_name
);
2513 m_encoding
= conv
.m_encoding
;
2518 void wxCSConv::Clear()
2527 void wxCSConv::SetName(const wxChar
*charset
)
2531 m_name
= wxStrdup(charset
);
2536 wxMBConv
*wxCSConv::DoCreate() const
2538 // check for the special case of ASCII or ISO8859-1 charset: as we have
2539 // special knowledge of it anyhow, we don't need to create a special
2540 // conversion object
2541 if ( m_encoding
== wxFONTENCODING_ISO8859_1
)
2543 // don't convert at all
2547 // we trust OS to do conversion better than we can so try external
2548 // conversion methods first
2550 // the full order is:
2551 // 1. OS conversion (iconv() under Unix or Win32 API)
2552 // 2. hard coded conversions for UTF
2553 // 3. wxEncodingConverter as fall back
2559 #endif // !wxUSE_FONTMAP
2561 wxString
name(m_name
);
2565 name
= wxFontMapper::Get()->GetEncodingName(m_encoding
);
2566 #endif // wxUSE_FONTMAP
2568 wxMBConv_iconv
*conv
= new wxMBConv_iconv(name
);
2574 #endif // HAVE_ICONV
2576 #ifdef wxHAVE_WIN32_MB2WC
2579 wxMBConv_win32
*conv
= m_name
? new wxMBConv_win32(m_name
)
2580 : new wxMBConv_win32(m_encoding
);
2589 #endif // wxHAVE_WIN32_MB2WC
2590 #if defined(__WXMAC__)
2592 if ( m_name
|| ( m_encoding
< wxFONTENCODING_UTF16BE
) )
2595 wxMBConv_mac
*conv
= m_name
? new wxMBConv_mac(m_name
)
2596 : new wxMBConv_mac(m_encoding
);
2604 #if defined(__WXCOCOA__)
2606 if ( m_name
|| ( m_encoding
<= wxFONTENCODING_UTF16
) )
2609 wxMBConv_cocoa
*conv
= m_name
? new wxMBConv_cocoa(m_name
)
2610 : new wxMBConv_cocoa(m_encoding
);
2619 wxFontEncoding enc
= m_encoding
;
2621 if ( enc
== wxFONTENCODING_SYSTEM
&& m_name
)
2623 // use "false" to suppress interactive dialogs -- we can be called from
2624 // anywhere and popping up a dialog from here is the last thing we want to
2626 enc
= wxFontMapper::Get()->CharsetToEncoding(m_name
, false);
2628 #endif // wxUSE_FONTMAP
2632 case wxFONTENCODING_UTF7
:
2633 return new wxMBConvUTF7
;
2635 case wxFONTENCODING_UTF8
:
2636 return new wxMBConvUTF8
;
2638 case wxFONTENCODING_UTF16BE
:
2639 return new wxMBConvUTF16BE
;
2641 case wxFONTENCODING_UTF16LE
:
2642 return new wxMBConvUTF16LE
;
2644 case wxFONTENCODING_UTF32BE
:
2645 return new wxMBConvUTF32BE
;
2647 case wxFONTENCODING_UTF32LE
:
2648 return new wxMBConvUTF32LE
;
2651 // nothing to do but put here to suppress gcc warnings
2658 wxMBConv_wxwin
*conv
= m_name
? new wxMBConv_wxwin(m_name
)
2659 : new wxMBConv_wxwin(m_encoding
);
2665 #endif // wxUSE_FONTMAP
2667 // NB: This is a hack to prevent deadlock. What could otherwise happen
2668 // in Unicode build: wxConvLocal creation ends up being here
2669 // because of some failure and logs the error. But wxLog will try to
2670 // attach timestamp, for which it will need wxConvLocal (to convert
2671 // time to char* and then wchar_t*), but that fails, tries to log
2672 // error, but wxLog has a (already locked) critical section that
2673 // guards static buffer.
2674 static bool alreadyLoggingError
= false;
2675 if (!alreadyLoggingError
)
2677 alreadyLoggingError
= true;
2678 wxLogError(_("Cannot convert from the charset '%s'!"),
2682 wxFontMapper::GetEncodingDescription(m_encoding
).c_str()
2683 #else // !wxUSE_FONTMAP
2684 wxString::Format(_("encoding %s"), m_encoding
).c_str()
2685 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2687 alreadyLoggingError
= false;
2693 void wxCSConv::CreateConvIfNeeded() const
2697 wxCSConv
*self
= (wxCSConv
*)this; // const_cast
2700 // if we don't have neither the name nor the encoding, use the default
2701 // encoding for this system
2702 if ( !m_name
&& m_encoding
== wxFONTENCODING_SYSTEM
)
2704 self
->m_name
= wxStrdup(wxLocale::GetSystemEncodingName());
2706 #endif // wxUSE_INTL
2708 self
->m_convReal
= DoCreate();
2709 self
->m_deferred
= false;
2713 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const
2715 CreateConvIfNeeded();
2718 return m_convReal
->MB2WC(buf
, psz
, n
);
2721 size_t len
= strlen(psz
);
2725 for (size_t c
= 0; c
<= len
; c
++)
2726 buf
[c
] = (unsigned char)(psz
[c
]);
2732 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const
2734 CreateConvIfNeeded();
2737 return m_convReal
->WC2MB(buf
, psz
, n
);
2740 const size_t len
= wxWcslen(psz
);
2743 for (size_t c
= 0; c
<= len
; c
++)
2747 buf
[c
] = (char)psz
[c
];
2752 for (size_t c
= 0; c
<= len
; c
++)
2762 // ----------------------------------------------------------------------------
2764 // ----------------------------------------------------------------------------
2767 static wxMBConv_win32 wxConvLibcObj
;
2768 #elif defined(__WXMAC__) && !defined(__MACH__)
2769 static wxMBConv_mac wxConvLibcObj
;
2771 static wxMBConvLibc wxConvLibcObj
;
2774 static wxCSConv
wxConvLocalObj(wxFONTENCODING_SYSTEM
);
2775 static wxCSConv
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
);
2776 static wxMBConvUTF7 wxConvUTF7Obj
;
2777 static wxMBConvUTF8 wxConvUTF8Obj
;
2780 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc
= wxConvLibcObj
;
2781 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal
= wxConvLocalObj
;
2782 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1
= wxConvISO8859_1Obj
;
2783 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7
= wxConvUTF7Obj
;
2784 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8
= wxConvUTF8Obj
;
2785 WXDLLIMPEXP_DATA_BASE(wxMBConv
*) wxConvCurrent
= &wxConvLibcObj
;
2787 #else // !wxUSE_WCHAR_T
2789 // stand-ins in absence of wchar_t
2790 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
,
2795 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T