]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
Warning fixes found under hardest mode of OpenWatcom. Seems clean in Borland, MinGW...
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ============================================================================
14 // declarations
15 // ============================================================================
16
17 // ----------------------------------------------------------------------------
18 // headers
19 // ----------------------------------------------------------------------------
20
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
23 #endif
24
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
27
28 #ifdef __BORLANDC__
29 #pragma hdrstop
30 #endif
31
32 #ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35 #endif // WX_PRECOMP
36
37 #include "wx/strconv.h"
38
39 #if wxUSE_WCHAR_T
40
41 #ifdef __WXMSW__
42 #include "wx/msw/private.h"
43 #endif
44
45 #ifdef __WINDOWS__
46 #include "wx/msw/missing.h"
47 #endif
48
49 #ifndef __WXWINCE__
50 #include <errno.h>
51 #endif
52
53 #include <ctype.h>
54 #include <string.h>
55 #include <stdlib.h>
56
57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59 #endif // __WIN32__ but !__WXMICROWIN__
60
61 // ----------------------------------------------------------------------------
62 // headers
63 // ----------------------------------------------------------------------------
64
65 #ifdef __SALFORDC__
66 #include <clib.h>
67 #endif
68
69 #ifdef HAVE_ICONV
70 #include <iconv.h>
71 #endif
72
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
75 #include "wx/utils.h"
76
77 #ifdef __WXMAC__
78 #include <ATSUnicode.h>
79 #include <TextCommon.h>
80 #include <TextEncodingConverter.h>
81
82 #include "wx/mac/private.h" // includes mac headers
83 #endif
84 // ----------------------------------------------------------------------------
85 // macros
86 // ----------------------------------------------------------------------------
87
88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
90
91 #if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
99 #elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
102 #define WC_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
108 #else // sizeof(wchar_t) != 2 nor 4
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
111 #endif
112
113 // ============================================================================
114 // implementation
115 // ============================================================================
116
117 // ----------------------------------------------------------------------------
118 // UTF-16 en/decoding to/from UCS-4
119 // ----------------------------------------------------------------------------
120
121
122 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
123 {
124 if (input<=0xffff)
125 {
126 if (output)
127 *output = (wxUint16) input;
128 return 1;
129 }
130 else if (input>=0x110000)
131 {
132 return (size_t)-1;
133 }
134 else
135 {
136 if (output)
137 {
138 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
139 *output = (wxUint16) ((input&0x3ff)+0xdc00);
140 }
141 return 2;
142 }
143 }
144
145 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
146 {
147 if ((*input<0xd800) || (*input>0xdfff))
148 {
149 output = *input;
150 return 1;
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
153 {
154 output = *input;
155 return (size_t)-1;
156 }
157 else
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
162 }
163
164
165 // ----------------------------------------------------------------------------
166 // wxMBConv
167 // ----------------------------------------------------------------------------
168
169 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
170 {
171 if ( psz )
172 {
173 // calculate the length of the buffer needed first
174 size_t nLen = MB2WC(NULL, psz, 0);
175 if ( nLen != (size_t)-1 )
176 {
177 // now do the actual conversion
178 wxWCharBuffer buf(nLen);
179 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
180 if ( nLen != (size_t)-1 )
181 {
182 return buf;
183 }
184 }
185 }
186
187 wxWCharBuffer buf((wchar_t *)NULL);
188
189 return buf;
190 }
191
192 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
193 {
194 if ( pwz )
195 {
196 size_t nLen = WC2MB(NULL, pwz, 0);
197 if ( nLen != (size_t)-1 )
198 {
199 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
200 nLen = WC2MB(buf.data(), pwz, nLen + 4);
201 if ( nLen != (size_t)-1 )
202 {
203 return buf;
204 }
205 }
206 }
207
208 wxCharBuffer buf((char *)NULL);
209
210 return buf;
211 }
212
213 // ----------------------------------------------------------------------------
214 // wxMBConvLibc
215 // ----------------------------------------------------------------------------
216
217 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
218 {
219 return wxMB2WC(buf, psz, n);
220 }
221
222 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
223 {
224 return wxWC2MB(buf, psz, n);
225 }
226
227 // ----------------------------------------------------------------------------
228 // UTF-7
229 // ----------------------------------------------------------------------------
230
231 #if 0
232 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
233 "abcdefghijklmnopqrstuvwxyz"
234 "0123456789'(),-./:?";
235 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
236 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
237 "abcdefghijklmnopqrstuvwxyz"
238 "0123456789+/";
239 #endif
240
241 // TODO: write actual implementations of UTF-7 here
242 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
243 const char * WXUNUSED(psz),
244 size_t WXUNUSED(n)) const
245 {
246 return 0;
247 }
248
249 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
250 const wchar_t * WXUNUSED(psz),
251 size_t WXUNUSED(n)) const
252 {
253 return 0;
254 }
255
256 // ----------------------------------------------------------------------------
257 // UTF-8
258 // ----------------------------------------------------------------------------
259
260 static wxUint32 utf8_max[]=
261 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
262
263 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
264 {
265 size_t len = 0;
266
267 while (*psz && ((!buf) || (len < n)))
268 {
269 unsigned char cc = *psz++, fc = cc;
270 unsigned cnt;
271 for (cnt = 0; fc & 0x80; cnt++)
272 fc <<= 1;
273 if (!cnt)
274 {
275 // plain ASCII char
276 if (buf)
277 *buf++ = cc;
278 len++;
279 }
280 else
281 {
282 cnt--;
283 if (!cnt)
284 {
285 // invalid UTF-8 sequence
286 return (size_t)-1;
287 }
288 else
289 {
290 unsigned ocnt = cnt - 1;
291 wxUint32 res = cc & (0x3f >> cnt);
292 while (cnt--)
293 {
294 cc = *psz++;
295 if ((cc & 0xC0) != 0x80)
296 {
297 // invalid UTF-8 sequence
298 return (size_t)-1;
299 }
300 res = (res << 6) | (cc & 0x3f);
301 }
302 if (res <= utf8_max[ocnt])
303 {
304 // illegal UTF-8 encoding
305 return (size_t)-1;
306 }
307 #ifdef WC_UTF16
308 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
309 size_t pa = encode_utf16(res, (wxUint16 *)buf);
310 if (pa == (size_t)-1)
311 return (size_t)-1;
312 if (buf)
313 buf += pa;
314 len += pa;
315 #else // !WC_UTF16
316 if (buf)
317 *buf++ = res;
318 len++;
319 #endif // WC_UTF16/!WC_UTF16
320 }
321 }
322 }
323 if (buf && (len < n))
324 *buf = 0;
325 return len;
326 }
327
328 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
329 {
330 size_t len = 0;
331
332 while (*psz && ((!buf) || (len < n)))
333 {
334 wxUint32 cc;
335 #ifdef WC_UTF16
336 // cast is ok for WC_UTF16
337 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
338 psz += (pa == (size_t)-1) ? 1 : pa;
339 #else
340 cc=(*psz++) & 0x7fffffff;
341 #endif
342 unsigned cnt;
343 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
344 if (!cnt)
345 {
346 // plain ASCII char
347 if (buf)
348 *buf++ = (char) cc;
349 len++;
350 }
351
352 else
353 {
354 len += cnt + 1;
355 if (buf)
356 {
357 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
358 while (cnt--)
359 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
360 }
361 }
362 }
363
364 if (buf && (len<n)) *buf = 0;
365
366 return len;
367 }
368
369
370
371
372 // ----------------------------------------------------------------------------
373 // UTF-16
374 // ----------------------------------------------------------------------------
375
376 #ifdef WORDS_BIGENDIAN
377 #define wxMBConvUTF16straight wxMBConvUTF16BE
378 #define wxMBConvUTF16swap wxMBConvUTF16LE
379 #else
380 #define wxMBConvUTF16swap wxMBConvUTF16BE
381 #define wxMBConvUTF16straight wxMBConvUTF16LE
382 #endif
383
384
385 #ifdef WC_UTF16
386
387 // copy 16bit MB to 16bit String
388 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
389 {
390 size_t len=0;
391
392 while (*(wxUint16*)psz && (!buf || len < n))
393 {
394 if (buf)
395 *buf++ = *(wxUint16*)psz;
396 len++;
397
398 psz += sizeof(wxUint16);
399 }
400 if (buf && len<n) *buf=0;
401
402 return len;
403 }
404
405
406 // copy 16bit String to 16bit MB
407 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
408 {
409 size_t len=0;
410
411 while (*psz && (!buf || len < n))
412 {
413 if (buf)
414 {
415 *(wxUint16*)buf = *psz;
416 buf += sizeof(wxUint16);
417 }
418 len += sizeof(wxUint16);
419 psz++;
420 }
421 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
422
423 return len;
424 }
425
426
427 // swap 16bit MB to 16bit String
428 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
429 {
430 size_t len=0;
431
432 while (*(wxUint16*)psz && (!buf || len < n))
433 {
434 if (buf)
435 {
436 ((char *)buf)[0] = psz[1];
437 ((char *)buf)[1] = psz[0];
438 buf++;
439 }
440 len++;
441 psz += sizeof(wxUint16);
442 }
443 if (buf && len<n) *buf=0;
444
445 return len;
446 }
447
448
449 // swap 16bit MB to 16bit String
450 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
451 {
452 size_t len=0;
453
454 while (*psz && (!buf || len < n))
455 {
456 if (buf)
457 {
458 *buf++ = ((char*)psz)[1];
459 *buf++ = ((char*)psz)[0];
460 }
461 len += sizeof(wxUint16);
462 psz++;
463 }
464 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
465
466 return len;
467 }
468
469
470 #else // WC_UTF16
471
472
473 // copy 16bit MB to 32bit String
474 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
475 {
476 size_t len=0;
477
478 while (*(wxUint16*)psz && (!buf || len < n))
479 {
480 wxUint32 cc;
481 size_t pa=decode_utf16((wxUint16*)psz, cc);
482 if (pa == (size_t)-1)
483 return pa;
484
485 if (buf)
486 *buf++ = cc;
487 len++;
488 psz += pa * sizeof(wxUint16);
489 }
490 if (buf && len<n) *buf=0;
491
492 return len;
493 }
494
495
496 // copy 32bit String to 16bit MB
497 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
498 {
499 size_t len=0;
500
501 while (*psz && (!buf || len < n))
502 {
503 wxUint16 cc[2];
504 size_t pa=encode_utf16(*psz, cc);
505
506 if (pa == (size_t)-1)
507 return pa;
508
509 if (buf)
510 {
511 *(wxUint16*)buf = cc[0];
512 buf += sizeof(wxUint16);
513 if (pa > 1)
514 {
515 *(wxUint16*)buf = cc[1];
516 buf += sizeof(wxUint16);
517 }
518 }
519
520 len += pa*sizeof(wxUint16);
521 psz++;
522 }
523 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
524
525 return len;
526 }
527
528
529 // swap 16bit MB to 32bit String
530 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
531 {
532 size_t len=0;
533
534 while (*(wxUint16*)psz && (!buf || len < n))
535 {
536 wxUint32 cc;
537 char tmp[4];
538 tmp[0]=psz[1]; tmp[1]=psz[0];
539 tmp[2]=psz[3]; tmp[3]=psz[2];
540
541 size_t pa=decode_utf16((wxUint16*)tmp, cc);
542 if (pa == (size_t)-1)
543 return pa;
544
545 if (buf)
546 *buf++ = cc;
547
548 len++;
549 psz += pa * sizeof(wxUint16);
550 }
551 if (buf && len<n) *buf=0;
552
553 return len;
554 }
555
556
557 // swap 32bit String to 16bit MB
558 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
559 {
560 size_t len=0;
561
562 while (*psz && (!buf || len < n))
563 {
564 wxUint16 cc[2];
565 size_t pa=encode_utf16(*psz, cc);
566
567 if (pa == (size_t)-1)
568 return pa;
569
570 if (buf)
571 {
572 *buf++ = ((char*)cc)[1];
573 *buf++ = ((char*)cc)[0];
574 if (pa > 1)
575 {
576 *buf++ = ((char*)cc)[3];
577 *buf++ = ((char*)cc)[2];
578 }
579 }
580
581 len += pa*sizeof(wxUint16);
582 psz++;
583 }
584 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
585
586 return len;
587 }
588
589 #endif // WC_UTF16
590
591
592 // ----------------------------------------------------------------------------
593 // UTF-32
594 // ----------------------------------------------------------------------------
595
596 #ifdef WORDS_BIGENDIAN
597 #define wxMBConvUTF32straight wxMBConvUTF32BE
598 #define wxMBConvUTF32swap wxMBConvUTF32LE
599 #else
600 #define wxMBConvUTF32swap wxMBConvUTF32BE
601 #define wxMBConvUTF32straight wxMBConvUTF32LE
602 #endif
603
604
605 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
606 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
607
608
609 #ifdef WC_UTF16
610
611 // copy 32bit MB to 16bit String
612 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
613 {
614 size_t len=0;
615
616 while (*(wxUint32*)psz && (!buf || len < n))
617 {
618 wxUint16 cc[2];
619
620 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
621 if (pa == (size_t)-1)
622 return pa;
623
624 if (buf)
625 {
626 *buf++ = cc[0];
627 if (pa > 1)
628 *buf++ = cc[1];
629 }
630 len += pa;
631 psz += sizeof(wxUint32);
632 }
633 if (buf && len<n) *buf=0;
634
635 return len;
636 }
637
638
639 // copy 16bit String to 32bit MB
640 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
641 {
642 size_t len=0;
643
644 while (*psz && (!buf || len < n))
645 {
646 wxUint32 cc;
647
648 // cast is ok for WC_UTF16
649 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
650 if (pa == (size_t)-1)
651 return pa;
652
653 if (buf)
654 {
655 *(wxUint32*)buf = cc;
656 buf += sizeof(wxUint32);
657 }
658 len += sizeof(wxUint32);
659 psz += pa;
660 }
661
662 if (buf && len<=n-sizeof(wxUint32))
663 *(wxUint32*)buf=0;
664
665 return len;
666 }
667
668
669
670 // swap 32bit MB to 16bit String
671 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
672 {
673 size_t len=0;
674
675 while (*(wxUint32*)psz && (!buf || len < n))
676 {
677 char tmp[4];
678 tmp[0] = psz[3]; tmp[1] = psz[2];
679 tmp[2] = psz[1]; tmp[3] = psz[0];
680
681
682 wxUint16 cc[2];
683
684 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
685 if (pa == (size_t)-1)
686 return pa;
687
688 if (buf)
689 {
690 *buf++ = cc[0];
691 if (pa > 1)
692 *buf++ = cc[1];
693 }
694 len += pa;
695 psz += sizeof(wxUint32);
696 }
697
698 if (buf && len<n)
699 *buf=0;
700
701 return len;
702 }
703
704
705 // swap 16bit String to 32bit MB
706 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
707 {
708 size_t len=0;
709
710 while (*psz && (!buf || len < n))
711 {
712 char cc[4];
713
714 // cast is ok for WC_UTF16
715 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
716 if (pa == (size_t)-1)
717 return pa;
718
719 if (buf)
720 {
721 *buf++ = cc[3];
722 *buf++ = cc[2];
723 *buf++ = cc[1];
724 *buf++ = cc[0];
725 }
726 len += sizeof(wxUint32);
727 psz += pa;
728 }
729
730 if (buf && len<=n-sizeof(wxUint32))
731 *(wxUint32*)buf=0;
732
733 return len;
734 }
735
736 #else // WC_UTF16
737
738
739 // copy 32bit MB to 32bit String
740 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
741 {
742 size_t len=0;
743
744 while (*(wxUint32*)psz && (!buf || len < n))
745 {
746 if (buf)
747 *buf++ = *(wxUint32*)psz;
748 len++;
749 psz += sizeof(wxUint32);
750 }
751
752 if (buf && len<n)
753 *buf=0;
754
755 return len;
756 }
757
758
759 // copy 32bit String to 32bit MB
760 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
761 {
762 size_t len=0;
763
764 while (*psz && (!buf || len < n))
765 {
766 if (buf)
767 {
768 *(wxUint32*)buf = *psz;
769 buf += sizeof(wxUint32);
770 }
771
772 len += sizeof(wxUint32);
773 psz++;
774 }
775
776 if (buf && len<=n-sizeof(wxUint32))
777 *(wxUint32*)buf=0;
778
779 return len;
780 }
781
782
783 // swap 32bit MB to 32bit String
784 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
785 {
786 size_t len=0;
787
788 while (*(wxUint32*)psz && (!buf || len < n))
789 {
790 if (buf)
791 {
792 ((char *)buf)[0] = psz[3];
793 ((char *)buf)[1] = psz[2];
794 ((char *)buf)[2] = psz[1];
795 ((char *)buf)[3] = psz[0];
796 buf++;
797 }
798 len++;
799 psz += sizeof(wxUint32);
800 }
801
802 if (buf && len<n)
803 *buf=0;
804
805 return len;
806 }
807
808
809 // swap 32bit String to 32bit MB
810 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
811 {
812 size_t len=0;
813
814 while (*psz && (!buf || len < n))
815 {
816 if (buf)
817 {
818 *buf++ = ((char *)psz)[3];
819 *buf++ = ((char *)psz)[2];
820 *buf++ = ((char *)psz)[1];
821 *buf++ = ((char *)psz)[0];
822 }
823 len += sizeof(wxUint32);
824 psz++;
825 }
826
827 if (buf && len<=n-sizeof(wxUint32))
828 *(wxUint32*)buf=0;
829
830 return len;
831 }
832
833
834 #endif // WC_UTF16
835
836
837 // ============================================================================
838 // The classes doing conversion using the iconv_xxx() functions
839 // ============================================================================
840
841 #ifdef HAVE_ICONV
842
843 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
844 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
845 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
846 // (which means error) and says there are 0 bytes left in the input buffer --
847 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
848 // this alternative test for iconv() failure.
849 // [This bug does not appear in glibc 2.2.]
850 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
851 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
852 (errno != E2BIG || bufLeft != 0))
853 #else
854 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
855 #endif
856
857 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
858
859 // ----------------------------------------------------------------------------
860 // wxMBConv_iconv: encapsulates an iconv character set
861 // ----------------------------------------------------------------------------
862
863 class wxMBConv_iconv : public wxMBConv
864 {
865 public:
866 wxMBConv_iconv(const wxChar *name);
867 virtual ~wxMBConv_iconv();
868
869 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
870 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
871
872 bool IsOk() const
873 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
874
875 protected:
876 // the iconv handlers used to translate from multibyte to wide char and in
877 // the other direction
878 iconv_t m2w,
879 w2m;
880
881 private:
882 // the name (for iconv_open()) of a wide char charset -- if none is
883 // available on this machine, it will remain NULL
884 static const char *ms_wcCharsetName;
885
886 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
887 // different endian-ness than the native one
888 static bool ms_wcNeedsSwap;
889 };
890
891 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
892 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
893
894 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
895 {
896 // Do it the hard way
897 char cname[100];
898 for (size_t i = 0; i < wxStrlen(name)+1; i++)
899 cname[i] = (char) name[i];
900
901 // check for charset that represents wchar_t:
902 if (ms_wcCharsetName == NULL)
903 {
904 ms_wcNeedsSwap = false;
905
906 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
907 ms_wcCharsetName = WC_NAME_BEST;
908 m2w = iconv_open(ms_wcCharsetName, cname);
909
910 if (m2w == (iconv_t)-1)
911 {
912 // try charset w/o bytesex info (e.g. "UCS4")
913 // and check for bytesex ourselves:
914 ms_wcCharsetName = WC_NAME;
915 m2w = iconv_open(ms_wcCharsetName, cname);
916
917 // last bet, try if it knows WCHAR_T pseudo-charset
918 if (m2w == (iconv_t)-1)
919 {
920 ms_wcCharsetName = "WCHAR_T";
921 m2w = iconv_open(ms_wcCharsetName, cname);
922 }
923
924 if (m2w != (iconv_t)-1)
925 {
926 char buf[2], *bufPtr;
927 wchar_t wbuf[2], *wbufPtr;
928 size_t insz, outsz;
929 size_t res;
930
931 buf[0] = 'A';
932 buf[1] = 0;
933 wbuf[0] = 0;
934 insz = 2;
935 outsz = SIZEOF_WCHAR_T * 2;
936 wbufPtr = wbuf;
937 bufPtr = buf;
938
939 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
940 (char**)&wbufPtr, &outsz);
941
942 if (ICONV_FAILED(res, insz))
943 {
944 ms_wcCharsetName = NULL;
945 wxLogLastError(wxT("iconv"));
946 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
947 }
948 else
949 {
950 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
951 }
952 }
953 else
954 {
955 ms_wcCharsetName = NULL;
956
957 // VS: we must not output an error here, since wxWidgets will safely
958 // fall back to using wxEncodingConverter.
959 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
960 //wxLogError(
961 }
962 }
963 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
964 }
965 else // we already have ms_wcCharsetName
966 {
967 m2w = iconv_open(ms_wcCharsetName, cname);
968 }
969
970 // NB: don't ever pass NULL to iconv_open(), it may crash!
971 if ( ms_wcCharsetName )
972 {
973 w2m = iconv_open( cname, ms_wcCharsetName);
974 }
975 else
976 {
977 w2m = (iconv_t)-1;
978 }
979 }
980
981 wxMBConv_iconv::~wxMBConv_iconv()
982 {
983 if ( m2w != (iconv_t)-1 )
984 iconv_close(m2w);
985 if ( w2m != (iconv_t)-1 )
986 iconv_close(w2m);
987 }
988
989 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
990 {
991 size_t inbuf = strlen(psz);
992 size_t outbuf = n * SIZEOF_WCHAR_T;
993 size_t res, cres;
994 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
995 wchar_t *bufPtr = buf;
996 const char *pszPtr = psz;
997
998 if (buf)
999 {
1000 // have destination buffer, convert there
1001 cres = iconv(m2w,
1002 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1003 (char**)&bufPtr, &outbuf);
1004 res = n - (outbuf / SIZEOF_WCHAR_T);
1005
1006 if (ms_wcNeedsSwap)
1007 {
1008 // convert to native endianness
1009 WC_BSWAP(buf /* _not_ bufPtr */, res)
1010 }
1011
1012 // NB: iconv was given only strlen(psz) characters on input, and so
1013 // it couldn't convert the trailing zero. Let's do it ourselves
1014 // if there's some room left for it in the output buffer.
1015 if (res < n)
1016 buf[res] = 0;
1017 }
1018 else
1019 {
1020 // no destination buffer... convert using temp buffer
1021 // to calculate destination buffer requirement
1022 wchar_t tbuf[8];
1023 res = 0;
1024 do {
1025 bufPtr = tbuf;
1026 outbuf = 8*SIZEOF_WCHAR_T;
1027
1028 cres = iconv(m2w,
1029 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1030 (char**)&bufPtr, &outbuf );
1031
1032 res += 8-(outbuf/SIZEOF_WCHAR_T);
1033 } while ((cres==(size_t)-1) && (errno==E2BIG));
1034 }
1035
1036 if (ICONV_FAILED(cres, inbuf))
1037 {
1038 //VS: it is ok if iconv fails, hence trace only
1039 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1040 return (size_t)-1;
1041 }
1042
1043 return res;
1044 }
1045
1046 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1047 {
1048 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1049 size_t outbuf = n;
1050 size_t res, cres;
1051
1052 wchar_t *tmpbuf = 0;
1053
1054 if (ms_wcNeedsSwap)
1055 {
1056 // need to copy to temp buffer to switch endianness
1057 // this absolutely doesn't rock!
1058 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1059 // could be in read-only memory, or be accessed in some other thread)
1060 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1061 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1062 WC_BSWAP(tmpbuf, inbuf)
1063 psz=tmpbuf;
1064 }
1065
1066 if (buf)
1067 {
1068 // have destination buffer, convert there
1069 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1070
1071 res = n-outbuf;
1072
1073 // NB: iconv was given only wcslen(psz) characters on input, and so
1074 // it couldn't convert the trailing zero. Let's do it ourselves
1075 // if there's some room left for it in the output buffer.
1076 if (res < n)
1077 buf[0] = 0;
1078 }
1079 else
1080 {
1081 // no destination buffer... convert using temp buffer
1082 // to calculate destination buffer requirement
1083 char tbuf[16];
1084 res = 0;
1085 do {
1086 buf = tbuf; outbuf = 16;
1087
1088 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1089
1090 res += 16 - outbuf;
1091 } while ((cres==(size_t)-1) && (errno==E2BIG));
1092 }
1093
1094 if (ms_wcNeedsSwap)
1095 {
1096 free(tmpbuf);
1097 }
1098
1099 if (ICONV_FAILED(cres, inbuf))
1100 {
1101 //VS: it is ok if iconv fails, hence trace only
1102 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1103 return (size_t)-1;
1104 }
1105
1106 return res;
1107 }
1108
1109 #endif // HAVE_ICONV
1110
1111
1112 // ============================================================================
1113 // Win32 conversion classes
1114 // ============================================================================
1115
1116 #ifdef wxHAVE_WIN32_MB2WC
1117
1118 // from utils.cpp
1119 #if wxUSE_FONTMAP
1120 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1121 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1122 #endif
1123
1124 class wxMBConv_win32 : public wxMBConv
1125 {
1126 public:
1127 wxMBConv_win32()
1128 {
1129 m_CodePage = CP_ACP;
1130 }
1131
1132 #if wxUSE_FONTMAP
1133 wxMBConv_win32(const wxChar* name)
1134 {
1135 m_CodePage = wxCharsetToCodepage(name);
1136 }
1137
1138 wxMBConv_win32(wxFontEncoding encoding)
1139 {
1140 m_CodePage = wxEncodingToCodepage(encoding);
1141 }
1142 #endif
1143
1144 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1145 {
1146 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1147 // the behaviour is not compatible with the Unix version (using iconv)
1148 // and break the library itself, e.g. wxTextInputStream::NextChar()
1149 // wouldn't work if reading an incomplete MB char didn't result in an
1150 // error
1151 const size_t len = ::MultiByteToWideChar
1152 (
1153 m_CodePage, // code page
1154 MB_ERR_INVALID_CHARS, // flags: fall on error
1155 psz, // input string
1156 -1, // its length (NUL-terminated)
1157 buf, // output string
1158 buf ? n : 0 // size of output buffer
1159 );
1160
1161 // note that it returns count of written chars for buf != NULL and size
1162 // of the needed buffer for buf == NULL so in either case the length of
1163 // the string (which never includes the terminating NUL) is one less
1164 return len ? len - 1 : (size_t)-1;
1165 }
1166
1167 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1168 {
1169 /*
1170 we have a problem here: by default, WideCharToMultiByte() may
1171 replace characters unrepresentable in the target code page with bad
1172 quality approximations such as turning "1/2" symbol (U+00BD) into
1173 "1" for the code pages which don't have it and we, obviously, want
1174 to avoid this at any price
1175
1176 the trouble is that this function does it _silently_, i.e. it won't
1177 even tell us whether it did or not... Win98/2000 and higher provide
1178 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1179 we have to resort to a round trip, i.e. check that converting back
1180 results in the same string -- this is, of course, expensive but
1181 otherwise we simply can't be sure to not garble the data.
1182 */
1183
1184 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1185 // it doesn't work with CJK encodings (which we test for rather roughly
1186 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1187 // supporting it
1188 BOOL usedDef wxDUMMY_INITIALIZE(false);
1189 BOOL *pUsedDef;
1190 int flags;
1191 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1192 {
1193 // it's our lucky day
1194 flags = WC_NO_BEST_FIT_CHARS;
1195 pUsedDef = &usedDef;
1196 }
1197 else // old system or unsupported encoding
1198 {
1199 flags = 0;
1200 pUsedDef = NULL;
1201 }
1202
1203 const size_t len = ::WideCharToMultiByte
1204 (
1205 m_CodePage, // code page
1206 flags, // either none or no best fit
1207 pwz, // input string
1208 -1, // it is (wide) NUL-terminated
1209 buf, // output buffer
1210 buf ? n : 0, // and its size
1211 NULL, // default "replacement" char
1212 pUsedDef // [out] was it used?
1213 );
1214
1215 if ( !len )
1216 {
1217 // function totally failed
1218 return (size_t)-1;
1219 }
1220
1221 // if we were really converting, check if we succeeded
1222 if ( buf )
1223 {
1224 if ( flags )
1225 {
1226 // check if the conversion failed, i.e. if any replacements
1227 // were done
1228 if ( usedDef )
1229 return (size_t)-1;
1230 }
1231 else // we must resort to double tripping...
1232 {
1233 wxWCharBuffer wcBuf(n);
1234 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1235 wcscmp(wcBuf, pwz) != 0 )
1236 {
1237 // we didn't obtain the same thing we started from, hence
1238 // the conversion was lossy and we consider that it failed
1239 return (size_t)-1;
1240 }
1241 }
1242 }
1243
1244 // see the comment above for the reason of "len - 1"
1245 return len - 1;
1246 }
1247
1248 bool IsOk() const { return m_CodePage != -1; }
1249
1250 private:
1251 static bool CanUseNoBestFit()
1252 {
1253 static int s_isWin98Or2k = -1;
1254
1255 if ( s_isWin98Or2k == -1 )
1256 {
1257 int verMaj, verMin;
1258 switch ( wxGetOsVersion(&verMaj, &verMin) )
1259 {
1260 case wxWIN95:
1261 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1262 break;
1263
1264 case wxWINDOWS_NT:
1265 s_isWin98Or2k = verMaj >= 5;
1266 break;
1267
1268 default:
1269 // unknown, be conseravtive by default
1270 s_isWin98Or2k = 0;
1271 }
1272
1273 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1274 }
1275
1276 return s_isWin98Or2k == 1;
1277 }
1278
1279 long m_CodePage;
1280 };
1281
1282 #endif // wxHAVE_WIN32_MB2WC
1283
1284 // ============================================================================
1285 // Cocoa conversion classes
1286 // ============================================================================
1287
1288 #if defined(__WXCOCOA__)
1289
1290 // RN: There is no UTF-32 support in either Core Foundation or
1291 // Cocoa. Strangely enough, internally Core Foundation uses
1292 // UTF 32 internally quite a bit - its just not public (yet).
1293
1294 #include <CoreFoundation/CFString.h>
1295 #include <CoreFoundation/CFStringEncodingExt.h>
1296
1297 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1298 {
1299 CFStringEncoding enc = 0 ;
1300 if ( encoding == wxFONTENCODING_DEFAULT )
1301 {
1302 #if wxUSE_GUI
1303 encoding = wxFont::GetDefaultEncoding() ;
1304 #else
1305 encoding = wxLocale::GetSystemEncoding() ;
1306 #endif
1307 }
1308 else switch( encoding)
1309 {
1310 case wxFONTENCODING_ISO8859_1 :
1311 enc = kCFStringEncodingISOLatin1 ;
1312 break ;
1313 case wxFONTENCODING_ISO8859_2 :
1314 enc = kCFStringEncodingISOLatin2;
1315 break ;
1316 case wxFONTENCODING_ISO8859_3 :
1317 enc = kCFStringEncodingISOLatin3 ;
1318 break ;
1319 case wxFONTENCODING_ISO8859_4 :
1320 enc = kCFStringEncodingISOLatin4;
1321 break ;
1322 case wxFONTENCODING_ISO8859_5 :
1323 enc = kCFStringEncodingISOLatinCyrillic;
1324 break ;
1325 case wxFONTENCODING_ISO8859_6 :
1326 enc = kCFStringEncodingISOLatinArabic;
1327 break ;
1328 case wxFONTENCODING_ISO8859_7 :
1329 enc = kCFStringEncodingISOLatinGreek;
1330 break ;
1331 case wxFONTENCODING_ISO8859_8 :
1332 enc = kCFStringEncodingISOLatinHebrew;
1333 break ;
1334 case wxFONTENCODING_ISO8859_9 :
1335 enc = kCFStringEncodingISOLatin5;
1336 break ;
1337 case wxFONTENCODING_ISO8859_10 :
1338 enc = kCFStringEncodingISOLatin6;
1339 break ;
1340 case wxFONTENCODING_ISO8859_11 :
1341 enc = kCFStringEncodingISOLatinThai;
1342 break ;
1343 case wxFONTENCODING_ISO8859_13 :
1344 enc = kCFStringEncodingISOLatin7;
1345 break ;
1346 case wxFONTENCODING_ISO8859_14 :
1347 enc = kCFStringEncodingISOLatin8;
1348 break ;
1349 case wxFONTENCODING_ISO8859_15 :
1350 enc = kCFStringEncodingISOLatin9;
1351 break ;
1352
1353 case wxFONTENCODING_KOI8 :
1354 enc = kCFStringEncodingKOI8_R;
1355 break ;
1356 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1357 enc = kCFStringEncodingDOSRussian;
1358 break ;
1359
1360 // case wxFONTENCODING_BULGARIAN :
1361 // enc = ;
1362 // break ;
1363
1364 case wxFONTENCODING_CP437 :
1365 enc =kCFStringEncodingDOSLatinUS ;
1366 break ;
1367 case wxFONTENCODING_CP850 :
1368 enc = kCFStringEncodingDOSLatin1;
1369 break ;
1370 case wxFONTENCODING_CP852 :
1371 enc = kCFStringEncodingDOSLatin2;
1372 break ;
1373 case wxFONTENCODING_CP855 :
1374 enc = kCFStringEncodingDOSCyrillic;
1375 break ;
1376 case wxFONTENCODING_CP866 :
1377 enc =kCFStringEncodingDOSRussian ;
1378 break ;
1379 case wxFONTENCODING_CP874 :
1380 enc = kCFStringEncodingDOSThai;
1381 break ;
1382 case wxFONTENCODING_CP932 :
1383 enc = kCFStringEncodingDOSJapanese;
1384 break ;
1385 case wxFONTENCODING_CP936 :
1386 enc =kCFStringEncodingDOSChineseSimplif ;
1387 break ;
1388 case wxFONTENCODING_CP949 :
1389 enc = kCFStringEncodingDOSKorean;
1390 break ;
1391 case wxFONTENCODING_CP950 :
1392 enc = kCFStringEncodingDOSChineseTrad;
1393 break ;
1394
1395 case wxFONTENCODING_CP1250 :
1396 enc = kCFStringEncodingWindowsLatin2;
1397 break ;
1398 case wxFONTENCODING_CP1251 :
1399 enc =kCFStringEncodingWindowsCyrillic ;
1400 break ;
1401 case wxFONTENCODING_CP1252 :
1402 enc =kCFStringEncodingWindowsLatin1 ;
1403 break ;
1404 case wxFONTENCODING_CP1253 :
1405 enc = kCFStringEncodingWindowsGreek;
1406 break ;
1407 case wxFONTENCODING_CP1254 :
1408 enc = kCFStringEncodingWindowsLatin5;
1409 break ;
1410 case wxFONTENCODING_CP1255 :
1411 enc =kCFStringEncodingWindowsHebrew ;
1412 break ;
1413 case wxFONTENCODING_CP1256 :
1414 enc =kCFStringEncodingWindowsArabic ;
1415 break ;
1416 case wxFONTENCODING_CP1257 :
1417 enc = kCFStringEncodingWindowsBalticRim;
1418 break ;
1419 case wxFONTENCODING_UTF7 :
1420 enc = kCFStringEncodingNonLossyASCII ;
1421 break ;
1422 case wxFONTENCODING_UTF8 :
1423 enc = kCFStringEncodingUTF8 ;
1424 break ;
1425 case wxFONTENCODING_EUC_JP :
1426 enc = kCFStringEncodingEUC_JP;
1427 break ;
1428 case wxFONTENCODING_UTF16 :
1429 enc = kCFStringEncodingUnicode ;
1430 break ;
1431 case wxFONTENCODING_MACROMAN :
1432 enc = kCFStringEncodingMacRoman ;
1433 break ;
1434 case wxFONTENCODING_MACJAPANESE :
1435 enc = kCFStringEncodingMacJapanese ;
1436 break ;
1437 case wxFONTENCODING_MACCHINESETRAD :
1438 enc = kCFStringEncodingMacChineseTrad ;
1439 break ;
1440 case wxFONTENCODING_MACKOREAN :
1441 enc = kCFStringEncodingMacKorean ;
1442 break ;
1443 case wxFONTENCODING_MACARABIC :
1444 enc = kCFStringEncodingMacArabic ;
1445 break ;
1446 case wxFONTENCODING_MACHEBREW :
1447 enc = kCFStringEncodingMacHebrew ;
1448 break ;
1449 case wxFONTENCODING_MACGREEK :
1450 enc = kCFStringEncodingMacGreek ;
1451 break ;
1452 case wxFONTENCODING_MACCYRILLIC :
1453 enc = kCFStringEncodingMacCyrillic ;
1454 break ;
1455 case wxFONTENCODING_MACDEVANAGARI :
1456 enc = kCFStringEncodingMacDevanagari ;
1457 break ;
1458 case wxFONTENCODING_MACGURMUKHI :
1459 enc = kCFStringEncodingMacGurmukhi ;
1460 break ;
1461 case wxFONTENCODING_MACGUJARATI :
1462 enc = kCFStringEncodingMacGujarati ;
1463 break ;
1464 case wxFONTENCODING_MACORIYA :
1465 enc = kCFStringEncodingMacOriya ;
1466 break ;
1467 case wxFONTENCODING_MACBENGALI :
1468 enc = kCFStringEncodingMacBengali ;
1469 break ;
1470 case wxFONTENCODING_MACTAMIL :
1471 enc = kCFStringEncodingMacTamil ;
1472 break ;
1473 case wxFONTENCODING_MACTELUGU :
1474 enc = kCFStringEncodingMacTelugu ;
1475 break ;
1476 case wxFONTENCODING_MACKANNADA :
1477 enc = kCFStringEncodingMacKannada ;
1478 break ;
1479 case wxFONTENCODING_MACMALAJALAM :
1480 enc = kCFStringEncodingMacMalayalam ;
1481 break ;
1482 case wxFONTENCODING_MACSINHALESE :
1483 enc = kCFStringEncodingMacSinhalese ;
1484 break ;
1485 case wxFONTENCODING_MACBURMESE :
1486 enc = kCFStringEncodingMacBurmese ;
1487 break ;
1488 case wxFONTENCODING_MACKHMER :
1489 enc = kCFStringEncodingMacKhmer ;
1490 break ;
1491 case wxFONTENCODING_MACTHAI :
1492 enc = kCFStringEncodingMacThai ;
1493 break ;
1494 case wxFONTENCODING_MACLAOTIAN :
1495 enc = kCFStringEncodingMacLaotian ;
1496 break ;
1497 case wxFONTENCODING_MACGEORGIAN :
1498 enc = kCFStringEncodingMacGeorgian ;
1499 break ;
1500 case wxFONTENCODING_MACARMENIAN :
1501 enc = kCFStringEncodingMacArmenian ;
1502 break ;
1503 case wxFONTENCODING_MACCHINESESIMP :
1504 enc = kCFStringEncodingMacChineseSimp ;
1505 break ;
1506 case wxFONTENCODING_MACTIBETAN :
1507 enc = kCFStringEncodingMacTibetan ;
1508 break ;
1509 case wxFONTENCODING_MACMONGOLIAN :
1510 enc = kCFStringEncodingMacMongolian ;
1511 break ;
1512 case wxFONTENCODING_MACETHIOPIC :
1513 enc = kCFStringEncodingMacEthiopic ;
1514 break ;
1515 case wxFONTENCODING_MACCENTRALEUR :
1516 enc = kCFStringEncodingMacCentralEurRoman ;
1517 break ;
1518 case wxFONTENCODING_MACVIATNAMESE :
1519 enc = kCFStringEncodingMacVietnamese ;
1520 break ;
1521 case wxFONTENCODING_MACARABICEXT :
1522 enc = kCFStringEncodingMacExtArabic ;
1523 break ;
1524 case wxFONTENCODING_MACSYMBOL :
1525 enc = kCFStringEncodingMacSymbol ;
1526 break ;
1527 case wxFONTENCODING_MACDINGBATS :
1528 enc = kCFStringEncodingMacDingbats ;
1529 break ;
1530 case wxFONTENCODING_MACTURKISH :
1531 enc = kCFStringEncodingMacTurkish ;
1532 break ;
1533 case wxFONTENCODING_MACCROATIAN :
1534 enc = kCFStringEncodingMacCroatian ;
1535 break ;
1536 case wxFONTENCODING_MACICELANDIC :
1537 enc = kCFStringEncodingMacIcelandic ;
1538 break ;
1539 case wxFONTENCODING_MACROMANIAN :
1540 enc = kCFStringEncodingMacRomanian ;
1541 break ;
1542 case wxFONTENCODING_MACCELTIC :
1543 enc = kCFStringEncodingMacCeltic ;
1544 break ;
1545 case wxFONTENCODING_MACGAELIC :
1546 enc = kCFStringEncodingMacGaelic ;
1547 break ;
1548 // case wxFONTENCODING_MACKEYBOARD :
1549 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1550 // break ;
1551 default :
1552 // because gcc is picky
1553 break ;
1554 } ;
1555 return enc ;
1556 }
1557
1558 wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
1559 {
1560 wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1561
1562 switch( encoding)
1563 {
1564 case kCFStringEncodingISOLatin1 :
1565 enc = wxFONTENCODING_ISO8859_1 ;
1566 break ;
1567 case kCFStringEncodingISOLatin2 :
1568 enc = wxFONTENCODING_ISO8859_2;
1569 break ;
1570 case kCFStringEncodingISOLatin3 :
1571 enc = wxFONTENCODING_ISO8859_3 ;
1572 break ;
1573 case kCFStringEncodingISOLatin4 :
1574 enc = wxFONTENCODING_ISO8859_4;
1575 break ;
1576 case kCFStringEncodingISOLatinCyrillic :
1577 enc = wxFONTENCODING_ISO8859_5;
1578 break ;
1579 case kCFStringEncodingISOLatinArabic :
1580 enc = wxFONTENCODING_ISO8859_6;
1581 break ;
1582 case kCFStringEncodingISOLatinGreek :
1583 enc = wxFONTENCODING_ISO8859_7;
1584 break ;
1585 case kCFStringEncodingISOLatinHebrew :
1586 enc = wxFONTENCODING_ISO8859_8;
1587 break ;
1588 case kCFStringEncodingISOLatin5 :
1589 enc = wxFONTENCODING_ISO8859_9;
1590 break ;
1591 case kCFStringEncodingISOLatin6 :
1592 enc = wxFONTENCODING_ISO8859_10;
1593 break ;
1594 case kCFStringEncodingISOLatin7 :
1595 enc = wxFONTENCODING_ISO8859_13;
1596 break ;
1597 case kCFStringEncodingISOLatin8 :
1598 enc = wxFONTENCODING_ISO8859_14;
1599 break ;
1600 case kCFStringEncodingISOLatin9 :
1601 enc =wxFONTENCODING_ISO8859_15 ;
1602 break ;
1603
1604 case kCFStringEncodingKOI8_R :
1605 enc = wxFONTENCODING_KOI8;
1606 break ;
1607
1608 // case :
1609 // enc = wxFONTENCODING_BULGARIAN;
1610 // break ;
1611
1612 case kCFStringEncodingDOSLatinUS :
1613 enc = wxFONTENCODING_CP437;
1614 break ;
1615 case kCFStringEncodingDOSLatin1 :
1616 enc = wxFONTENCODING_CP850;
1617 break ;
1618 case kCFStringEncodingDOSLatin2 :
1619 enc =wxFONTENCODING_CP852 ;
1620 break ;
1621 case kCFStringEncodingDOSCyrillic :
1622 enc = wxFONTENCODING_CP855;
1623 break ;
1624 case kCFStringEncodingDOSRussian :
1625 enc = wxFONTENCODING_CP866;
1626 break ;
1627 case kCFStringEncodingDOSThai :
1628 enc =wxFONTENCODING_CP874 ;
1629 break ;
1630 case kCFStringEncodingDOSJapanese :
1631 enc = wxFONTENCODING_CP932;
1632 break ;
1633 case kCFStringEncodingDOSChineseSimplif :
1634 enc = wxFONTENCODING_CP936;
1635 break ;
1636 case kCFStringEncodingDOSKorean :
1637 enc = wxFONTENCODING_CP949;
1638 break ;
1639 case kCFStringEncodingDOSChineseTrad :
1640 enc = wxFONTENCODING_CP950;
1641 break ;
1642
1643 case kCFStringEncodingWindowsLatin2 :
1644 enc = wxFONTENCODING_CP1250;
1645 break ;
1646 case kCFStringEncodingWindowsCyrillic :
1647 enc = wxFONTENCODING_CP1251;
1648 break ;
1649 case kCFStringEncodingWindowsLatin1 :
1650 enc = wxFONTENCODING_CP1252;
1651 break ;
1652 case kCFStringEncodingWindowsGreek :
1653 enc = wxFONTENCODING_CP1253;
1654 break ;
1655 case kCFStringEncodingWindowsLatin5 :
1656 enc = wxFONTENCODING_CP1254;
1657 break ;
1658 case kCFStringEncodingWindowsHebrew :
1659 enc = wxFONTENCODING_CP1255;
1660 break ;
1661 case kCFStringEncodingWindowsArabic :
1662 enc = wxFONTENCODING_CP1256;
1663 break ;
1664 case kCFStringEncodingWindowsBalticRim :
1665 enc =wxFONTENCODING_CP1257 ;
1666 break ;
1667 case kCFStringEncodingEUC_JP :
1668 enc = wxFONTENCODING_EUC_JP;
1669 break ;
1670 case kCFStringEncodingUnicode :
1671 enc = wxFONTENCODING_UTF16;
1672 break;
1673 case kCFStringEncodingMacRoman :
1674 enc = wxFONTENCODING_MACROMAN ;
1675 break ;
1676 case kCFStringEncodingMacJapanese :
1677 enc = wxFONTENCODING_MACJAPANESE ;
1678 break ;
1679 case kCFStringEncodingMacChineseTrad :
1680 enc = wxFONTENCODING_MACCHINESETRAD ;
1681 break ;
1682 case kCFStringEncodingMacKorean :
1683 enc = wxFONTENCODING_MACKOREAN ;
1684 break ;
1685 case kCFStringEncodingMacArabic :
1686 enc =wxFONTENCODING_MACARABIC ;
1687 break ;
1688 case kCFStringEncodingMacHebrew :
1689 enc = wxFONTENCODING_MACHEBREW ;
1690 break ;
1691 case kCFStringEncodingMacGreek :
1692 enc = wxFONTENCODING_MACGREEK ;
1693 break ;
1694 case kCFStringEncodingMacCyrillic :
1695 enc = wxFONTENCODING_MACCYRILLIC ;
1696 break ;
1697 case kCFStringEncodingMacDevanagari :
1698 enc = wxFONTENCODING_MACDEVANAGARI ;
1699 break ;
1700 case kCFStringEncodingMacGurmukhi :
1701 enc = wxFONTENCODING_MACGURMUKHI ;
1702 break ;
1703 case kCFStringEncodingMacGujarati :
1704 enc = wxFONTENCODING_MACGUJARATI ;
1705 break ;
1706 case kCFStringEncodingMacOriya :
1707 enc =wxFONTENCODING_MACORIYA ;
1708 break ;
1709 case kCFStringEncodingMacBengali :
1710 enc =wxFONTENCODING_MACBENGALI ;
1711 break ;
1712 case kCFStringEncodingMacTamil :
1713 enc = wxFONTENCODING_MACTAMIL ;
1714 break ;
1715 case kCFStringEncodingMacTelugu :
1716 enc = wxFONTENCODING_MACTELUGU ;
1717 break ;
1718 case kCFStringEncodingMacKannada :
1719 enc = wxFONTENCODING_MACKANNADA ;
1720 break ;
1721 case kCFStringEncodingMacMalayalam :
1722 enc = wxFONTENCODING_MACMALAJALAM ;
1723 break ;
1724 case kCFStringEncodingMacSinhalese :
1725 enc = wxFONTENCODING_MACSINHALESE ;
1726 break ;
1727 case kCFStringEncodingMacBurmese :
1728 enc = wxFONTENCODING_MACBURMESE ;
1729 break ;
1730 case kCFStringEncodingMacKhmer :
1731 enc = wxFONTENCODING_MACKHMER ;
1732 break ;
1733 case kCFStringEncodingMacThai :
1734 enc = wxFONTENCODING_MACTHAI ;
1735 break ;
1736 case kCFStringEncodingMacLaotian :
1737 enc = wxFONTENCODING_MACLAOTIAN ;
1738 break ;
1739 case kCFStringEncodingMacGeorgian :
1740 enc = wxFONTENCODING_MACGEORGIAN ;
1741 break ;
1742 case kCFStringEncodingMacArmenian :
1743 enc = wxFONTENCODING_MACARMENIAN ;
1744 break ;
1745 case kCFStringEncodingMacChineseSimp :
1746 enc = wxFONTENCODING_MACCHINESESIMP ;
1747 break ;
1748 case kCFStringEncodingMacTibetan :
1749 enc = wxFONTENCODING_MACTIBETAN ;
1750 break ;
1751 case kCFStringEncodingMacMongolian :
1752 enc = wxFONTENCODING_MACMONGOLIAN ;
1753 break ;
1754 case kCFStringEncodingMacEthiopic :
1755 enc = wxFONTENCODING_MACETHIOPIC ;
1756 break ;
1757 case kCFStringEncodingMacCentralEurRoman:
1758 enc = wxFONTENCODING_MACCENTRALEUR ;
1759 break ;
1760 case kCFStringEncodingMacVietnamese:
1761 enc = wxFONTENCODING_MACVIATNAMESE ;
1762 break ;
1763 case kCFStringEncodingMacExtArabic :
1764 enc = wxFONTENCODING_MACARABICEXT ;
1765 break ;
1766 case kCFStringEncodingMacSymbol :
1767 enc = wxFONTENCODING_MACSYMBOL ;
1768 break ;
1769 case kCFStringEncodingMacDingbats :
1770 enc = wxFONTENCODING_MACDINGBATS ;
1771 break ;
1772 case kCFStringEncodingMacTurkish :
1773 enc = wxFONTENCODING_MACTURKISH ;
1774 break ;
1775 case kCFStringEncodingMacCroatian :
1776 enc = wxFONTENCODING_MACCROATIAN ;
1777 break ;
1778 case kCFStringEncodingMacIcelandic :
1779 enc = wxFONTENCODING_MACICELANDIC ;
1780 break ;
1781 case kCFStringEncodingMacRomanian :
1782 enc = wxFONTENCODING_MACROMANIAN ;
1783 break ;
1784 case kCFStringEncodingMacCeltic :
1785 enc = wxFONTENCODING_MACCELTIC ;
1786 break ;
1787 case kCFStringEncodingMacGaelic :
1788 enc = wxFONTENCODING_MACGAELIC ;
1789 break ;
1790 // case kCFStringEncodingMacKeyboardGlyphs :
1791 // enc = wxFONTENCODING_MACKEYBOARD ;
1792 // break ;
1793 } ;
1794 return enc ;
1795 }
1796
1797 class wxMBConv_cocoa : public wxMBConv
1798 {
1799 public:
1800 wxMBConv_cocoa()
1801 {
1802 Init(CFStringGetSystemEncoding()) ;
1803 }
1804
1805 wxMBConv_cocoa(const wxChar* name)
1806 {
1807 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1808 }
1809
1810 wxMBConv_cocoa(wxFontEncoding encoding)
1811 {
1812 Init( wxCFStringEncFromFontEnc(encoding) );
1813 }
1814
1815 ~wxMBConv_cocoa()
1816 {
1817 }
1818
1819 void Init( CFStringEncoding encoding)
1820 {
1821 m_char_encoding = encoding ;
1822 m_unicode_encoding = kCFStringEncodingUnicode;
1823 }
1824
1825 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1826 {
1827 wxASSERT(szUnConv);
1828
1829 size_t nBufSize = strlen(szUnConv) + 1;
1830 size_t nRealOutSize;
1831
1832 UniChar* szUniCharBuffer = (UniChar*) szOut;
1833 wchar_t* szConvBuffer = szOut;
1834
1835 if (szConvBuffer == NULL && nOutSize != 0)
1836 {
1837 szConvBuffer = new wchar_t[nOutSize] ;
1838 }
1839
1840 #if SIZEOF_WCHAR_T == 4
1841 szUniCharBuffer = new UniChar[nOutSize];
1842 #endif
1843
1844 CFDataRef theData = CFDataCreateWithBytesNoCopy (
1845 NULL, //allocator
1846 (const UInt8*)szUnConv,
1847 nBufSize - 1,
1848 NULL //deallocator
1849 );
1850
1851 wxASSERT(theData);
1852
1853 CFStringRef theString = CFStringCreateFromExternalRepresentation (
1854 NULL,
1855 theData,
1856 m_char_encoding
1857 );
1858
1859 wxASSERT(theString);
1860
1861 if (nOutSize == 0)
1862 {
1863 nRealOutSize = CFStringGetLength(theString) + 1;
1864 CFRelease(theString);
1865 return nRealOutSize - 1;
1866 }
1867
1868 CFRange theRange = { 0, CFStringGetLength(theString) };
1869
1870 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1871
1872
1873 nRealOutSize = (CFStringGetLength(theString) + 1);
1874
1875 CFRelease(theString);
1876
1877 szUniCharBuffer[nRealOutSize-1] = '\0' ;
1878
1879 #if SIZEOF_WCHAR_T == 4
1880 wxMBConvUTF16 converter ;
1881 converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
1882 delete[] szUniCharBuffer;
1883 #endif
1884 if ( szOut == NULL )
1885 delete [] szConvBuffer;
1886
1887 return nRealOutSize ;
1888 }
1889
1890 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1891 {
1892 size_t nBufSize = wxWcslen(szUnConv) + 1;
1893 size_t nRealOutSize;
1894 char* szBuffer = szOut;
1895 UniChar* szUniBuffer = (UniChar*) szUnConv;
1896
1897 if (szOut == NULL)
1898 {
1899 // worst case
1900 nRealOutSize = ((nBufSize - 1) << 1)+1 ;
1901 szBuffer = new char[ nRealOutSize ] ;
1902 }
1903 else
1904 nRealOutSize = nOutSize;
1905
1906 #if SIZEOF_WCHAR_T == 4
1907 wxMBConvUTF16BE converter ;
1908 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1909 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1910 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1911 nBufSize /= sizeof(UniChar);
1912 ++nBufSize;
1913 #endif
1914
1915 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1916 NULL, //allocator
1917 szUniBuffer,
1918 nBufSize,
1919 NULL //deallocator
1920 );
1921
1922 wxASSERT(theString);
1923
1924 //Note that CER puts a BOM when converting to unicode
1925 //so we may want to check and use getchars instead in that case
1926 CFDataRef theData = CFStringCreateExternalRepresentation(
1927 NULL, //allocator
1928 theString,
1929 m_char_encoding,
1930 0 //what to put in characters that can't be converted -
1931 //0 tells CFString to return NULL if it meets such a character
1932 );
1933
1934 if(!theData)
1935 return (size_t)-1;
1936
1937 CFRelease(theString);
1938
1939 nRealOutSize = CFDataGetLength(theData);
1940
1941 if ( szOut == NULL )
1942 delete[] szBuffer;
1943
1944 if(nOutSize == 0)
1945 {
1946 //TODO: This gets flagged as a non-malloced address by the debugger...
1947 //#if SIZEOF_WCHAR_T == 4
1948 // delete[] szUniBuffer;
1949 //#endif
1950 CFRelease(theData);
1951 return nRealOutSize - 1;
1952 }
1953
1954 CFRange theRange = {0, CFDataGetLength(theData) };
1955 CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
1956
1957 CFRelease(theData);
1958
1959 //TODO: This gets flagged as a non-malloced address by the debugger...
1960 //#if SIZEOF_WCHAR_T == 4
1961 // delete[] szUniBuffer;
1962 //#endif
1963 return nRealOutSize - 1;
1964 }
1965
1966 bool IsOk() const
1967 {
1968 //TODO: check for invalid en/de/coding
1969 return true;
1970 }
1971
1972 private:
1973 CFStringEncoding m_char_encoding ;
1974 CFStringEncoding m_unicode_encoding ;
1975 };
1976
1977 #endif // defined(__WXCOCOA__)
1978
1979 // ============================================================================
1980 // Mac conversion classes
1981 // ============================================================================
1982
1983 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1984
1985 class wxMBConv_mac : public wxMBConv
1986 {
1987 public:
1988 wxMBConv_mac()
1989 {
1990 Init(CFStringGetSystemEncoding()) ;
1991 }
1992
1993 wxMBConv_mac(const wxChar* name)
1994 {
1995 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1996 }
1997
1998 wxMBConv_mac(wxFontEncoding encoding)
1999 {
2000 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2001 }
2002
2003 ~wxMBConv_mac()
2004 {
2005 OSStatus status = noErr ;
2006 status = TECDisposeConverter(m_MB2WC_converter);
2007 status = TECDisposeConverter(m_WC2MB_converter);
2008 }
2009
2010
2011 void Init( TextEncodingBase encoding)
2012 {
2013 OSStatus status = noErr ;
2014 m_char_encoding = encoding ;
2015 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2016
2017 status = TECCreateConverter(&m_MB2WC_converter,
2018 m_char_encoding,
2019 m_unicode_encoding);
2020 status = TECCreateConverter(&m_WC2MB_converter,
2021 m_unicode_encoding,
2022 m_char_encoding);
2023 }
2024
2025 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2026 {
2027 OSStatus status = noErr ;
2028 ByteCount byteOutLen ;
2029 ByteCount byteInLen = strlen(psz) ;
2030 wchar_t *tbuf = NULL ;
2031 UniChar* ubuf = NULL ;
2032 size_t res = 0 ;
2033
2034 if (buf == NULL)
2035 {
2036 n = byteInLen ;
2037 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2038 }
2039 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2040 #if SIZEOF_WCHAR_T == 4
2041 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2042 #else
2043 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2044 #endif
2045 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2046 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2047 #if SIZEOF_WCHAR_T == 4
2048 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2049 // is not properly terminated we get random characters at the end
2050 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2051 wxMBConvUTF16BE converter ;
2052 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2053 free( ubuf ) ;
2054 #else
2055 res = byteOutLen / sizeof( UniChar ) ;
2056 #endif
2057 if ( buf == NULL )
2058 free(tbuf) ;
2059
2060 if ( buf && res < n)
2061 buf[res] = 0;
2062
2063 return res ;
2064 }
2065
2066 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2067 {
2068 OSStatus status = noErr ;
2069 ByteCount byteOutLen ;
2070 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2071
2072 char *tbuf = NULL ;
2073
2074 if (buf == NULL)
2075 {
2076 // worst case
2077 n = byteInLen * 2 ;
2078 tbuf = (char*) malloc( n ) ;
2079 }
2080
2081 ByteCount byteBufferLen = n ;
2082 UniChar* ubuf = NULL ;
2083 #if SIZEOF_WCHAR_T == 4
2084 wxMBConvUTF16BE converter ;
2085 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2086 byteInLen = unicharlen ;
2087 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2088 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2089 #else
2090 ubuf = (UniChar*) psz ;
2091 #endif
2092 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2093 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2094 #if SIZEOF_WCHAR_T == 4
2095 free( ubuf ) ;
2096 #endif
2097 if ( buf == NULL )
2098 free(tbuf) ;
2099
2100 size_t res = byteOutLen ;
2101 if ( buf && res < n)
2102 buf[res] = 0;
2103
2104 return res ;
2105 }
2106
2107 bool IsOk() const
2108 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2109
2110 private:
2111 TECObjectRef m_MB2WC_converter ;
2112 TECObjectRef m_WC2MB_converter ;
2113
2114 TextEncodingBase m_char_encoding ;
2115 TextEncodingBase m_unicode_encoding ;
2116 };
2117
2118 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2119
2120 // ============================================================================
2121 // wxEncodingConverter based conversion classes
2122 // ============================================================================
2123
2124 #if wxUSE_FONTMAP
2125
2126 class wxMBConv_wxwin : public wxMBConv
2127 {
2128 private:
2129 void Init()
2130 {
2131 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2132 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2133 }
2134
2135 public:
2136 // temporarily just use wxEncodingConverter stuff,
2137 // so that it works while a better implementation is built
2138 wxMBConv_wxwin(const wxChar* name)
2139 {
2140 if (name)
2141 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2142 else
2143 m_enc = wxFONTENCODING_SYSTEM;
2144
2145 Init();
2146 }
2147
2148 wxMBConv_wxwin(wxFontEncoding enc)
2149 {
2150 m_enc = enc;
2151
2152 Init();
2153 }
2154
2155 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2156 {
2157 size_t inbuf = strlen(psz);
2158 if (buf)
2159 m2w.Convert(psz,buf);
2160 return inbuf;
2161 }
2162
2163 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2164 {
2165 const size_t inbuf = wxWcslen(psz);
2166 if (buf)
2167 w2m.Convert(psz,buf);
2168
2169 return inbuf;
2170 }
2171
2172 bool IsOk() const { return m_ok; }
2173
2174 public:
2175 wxFontEncoding m_enc;
2176 wxEncodingConverter m2w, w2m;
2177
2178 // were we initialized successfully?
2179 bool m_ok;
2180
2181 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2182 };
2183
2184 #endif // wxUSE_FONTMAP
2185
2186 // ============================================================================
2187 // wxCSConv implementation
2188 // ============================================================================
2189
2190 void wxCSConv::Init()
2191 {
2192 m_name = NULL;
2193 m_convReal = NULL;
2194 m_deferred = true;
2195 }
2196
2197 wxCSConv::wxCSConv(const wxChar *charset)
2198 {
2199 Init();
2200
2201 if ( charset )
2202 {
2203 SetName(charset);
2204 }
2205
2206 m_encoding = wxFONTENCODING_SYSTEM;
2207 }
2208
2209 wxCSConv::wxCSConv(wxFontEncoding encoding)
2210 {
2211 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2212 {
2213 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2214
2215 encoding = wxFONTENCODING_SYSTEM;
2216 }
2217
2218 Init();
2219
2220 m_encoding = encoding;
2221 }
2222
2223 wxCSConv::~wxCSConv()
2224 {
2225 Clear();
2226 }
2227
2228 wxCSConv::wxCSConv(const wxCSConv& conv)
2229 : wxMBConv()
2230 {
2231 Init();
2232
2233 SetName(conv.m_name);
2234 m_encoding = conv.m_encoding;
2235 }
2236
2237 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2238 {
2239 Clear();
2240
2241 SetName(conv.m_name);
2242 m_encoding = conv.m_encoding;
2243
2244 return *this;
2245 }
2246
2247 void wxCSConv::Clear()
2248 {
2249 free(m_name);
2250 delete m_convReal;
2251
2252 m_name = NULL;
2253 m_convReal = NULL;
2254 }
2255
2256 void wxCSConv::SetName(const wxChar *charset)
2257 {
2258 if (charset)
2259 {
2260 m_name = wxStrdup(charset);
2261 m_deferred = true;
2262 }
2263 }
2264
2265 wxMBConv *wxCSConv::DoCreate() const
2266 {
2267 // check for the special case of ASCII or ISO8859-1 charset: as we have
2268 // special knowledge of it anyhow, we don't need to create a special
2269 // conversion object
2270 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2271 {
2272 // don't convert at all
2273 return NULL;
2274 }
2275
2276 // we trust OS to do conversion better than we can so try external
2277 // conversion methods first
2278 //
2279 // the full order is:
2280 // 1. OS conversion (iconv() under Unix or Win32 API)
2281 // 2. hard coded conversions for UTF
2282 // 3. wxEncodingConverter as fall back
2283
2284 // step (1)
2285 #ifdef HAVE_ICONV
2286 #if !wxUSE_FONTMAP
2287 if ( m_name )
2288 #endif // !wxUSE_FONTMAP
2289 {
2290 wxString name(m_name);
2291
2292 #if wxUSE_FONTMAP
2293 if ( name.empty() )
2294 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2295 #endif // wxUSE_FONTMAP
2296
2297 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2298 if ( conv->IsOk() )
2299 return conv;
2300
2301 delete conv;
2302 }
2303 #endif // HAVE_ICONV
2304
2305 #ifdef wxHAVE_WIN32_MB2WC
2306 {
2307 #if wxUSE_FONTMAP
2308 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2309 : new wxMBConv_win32(m_encoding);
2310 if ( conv->IsOk() )
2311 return conv;
2312
2313 delete conv;
2314 #else
2315 return NULL;
2316 #endif
2317 }
2318 #endif // wxHAVE_WIN32_MB2WC
2319 #if defined(__WXMAC__)
2320 {
2321 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2322 {
2323
2324 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2325 : new wxMBConv_mac(m_encoding);
2326 if ( conv->IsOk() )
2327 return conv;
2328
2329 delete conv;
2330 }
2331 }
2332 #endif
2333 #if defined(__WXCOCOA__)
2334 {
2335 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2336 {
2337
2338 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2339 : new wxMBConv_cocoa(m_encoding);
2340 if ( conv->IsOk() )
2341 return conv;
2342
2343 delete conv;
2344 }
2345 }
2346 #endif
2347 // step (2)
2348 wxFontEncoding enc = m_encoding;
2349 #if wxUSE_FONTMAP
2350 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2351 {
2352 // use "false" to suppress interactive dialogs -- we can be called from
2353 // anywhere and popping up a dialog from here is the last thing we want to
2354 // do
2355 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2356 }
2357 #endif // wxUSE_FONTMAP
2358
2359 switch ( enc )
2360 {
2361 case wxFONTENCODING_UTF7:
2362 return new wxMBConvUTF7;
2363
2364 case wxFONTENCODING_UTF8:
2365 return new wxMBConvUTF8;
2366
2367 case wxFONTENCODING_UTF16BE:
2368 return new wxMBConvUTF16BE;
2369
2370 case wxFONTENCODING_UTF16LE:
2371 return new wxMBConvUTF16LE;
2372
2373 case wxFONTENCODING_UTF32BE:
2374 return new wxMBConvUTF32BE;
2375
2376 case wxFONTENCODING_UTF32LE:
2377 return new wxMBConvUTF32LE;
2378
2379 default:
2380 // nothing to do but put here to suppress gcc warnings
2381 ;
2382 }
2383
2384 // step (3)
2385 #if wxUSE_FONTMAP
2386 {
2387 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2388 : new wxMBConv_wxwin(m_encoding);
2389 if ( conv->IsOk() )
2390 return conv;
2391
2392 delete conv;
2393 }
2394 #endif // wxUSE_FONTMAP
2395
2396 // NB: This is a hack to prevent deadlock. What could otherwise happen
2397 // in Unicode build: wxConvLocal creation ends up being here
2398 // because of some failure and logs the error. But wxLog will try to
2399 // attach timestamp, for which it will need wxConvLocal (to convert
2400 // time to char* and then wchar_t*), but that fails, tries to log
2401 // error, but wxLog has a (already locked) critical section that
2402 // guards static buffer.
2403 static bool alreadyLoggingError = false;
2404 if (!alreadyLoggingError)
2405 {
2406 alreadyLoggingError = true;
2407 wxLogError(_("Cannot convert from the charset '%s'!"),
2408 m_name ? m_name
2409 :
2410 #if wxUSE_FONTMAP
2411 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2412 #else // !wxUSE_FONTMAP
2413 wxString::Format(_("encoding %s"), m_encoding).c_str()
2414 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2415 );
2416 alreadyLoggingError = false;
2417 }
2418
2419 return NULL;
2420 }
2421
2422 void wxCSConv::CreateConvIfNeeded() const
2423 {
2424 if ( m_deferred )
2425 {
2426 wxCSConv *self = (wxCSConv *)this; // const_cast
2427
2428 #if wxUSE_INTL
2429 // if we don't have neither the name nor the encoding, use the default
2430 // encoding for this system
2431 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2432 {
2433 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2434 }
2435 #endif // wxUSE_INTL
2436
2437 self->m_convReal = DoCreate();
2438 self->m_deferred = false;
2439 }
2440 }
2441
2442 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2443 {
2444 CreateConvIfNeeded();
2445
2446 if (m_convReal)
2447 return m_convReal->MB2WC(buf, psz, n);
2448
2449 // latin-1 (direct)
2450 size_t len = strlen(psz);
2451
2452 if (buf)
2453 {
2454 for (size_t c = 0; c <= len; c++)
2455 buf[c] = (unsigned char)(psz[c]);
2456 }
2457
2458 return len;
2459 }
2460
2461 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2462 {
2463 CreateConvIfNeeded();
2464
2465 if (m_convReal)
2466 return m_convReal->WC2MB(buf, psz, n);
2467
2468 // latin-1 (direct)
2469 const size_t len = wxWcslen(psz);
2470 if (buf)
2471 {
2472 for (size_t c = 0; c <= len; c++)
2473 {
2474 if (psz[c] > 0xFF)
2475 return (size_t)-1;
2476 buf[c] = (char)psz[c];
2477 }
2478 }
2479 else
2480 {
2481 for (size_t c = 0; c <= len; c++)
2482 {
2483 if (psz[c] > 0xFF)
2484 return (size_t)-1;
2485 }
2486 }
2487
2488 return len;
2489 }
2490
2491 // ----------------------------------------------------------------------------
2492 // globals
2493 // ----------------------------------------------------------------------------
2494
2495 #ifdef __WINDOWS__
2496 static wxMBConv_win32 wxConvLibcObj;
2497 #elif defined(__WXMAC__) && !defined(__MACH__)
2498 static wxMBConv_mac wxConvLibcObj ;
2499 #else
2500 static wxMBConvLibc wxConvLibcObj;
2501 #endif
2502
2503 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2504 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2505 static wxMBConvUTF7 wxConvUTF7Obj;
2506 static wxMBConvUTF8 wxConvUTF8Obj;
2507
2508
2509 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2510 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2511 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2512 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2513 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2514 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2515
2516 #else // !wxUSE_WCHAR_T
2517
2518 // stand-ins in absence of wchar_t
2519 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2520 wxConvISO8859_1,
2521 wxConvLocal,
2522 wxConvUTF8;
2523
2524 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2525
2526