]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
unicode fix for CW
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ============================================================================
14 // declarations
15 // ============================================================================
16
17 // ----------------------------------------------------------------------------
18 // headers
19 // ----------------------------------------------------------------------------
20
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
23 #endif
24
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
27
28 #ifdef __BORLANDC__
29 #pragma hdrstop
30 #endif
31
32 #ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35 #endif // WX_PRECOMP
36
37 #include "wx/strconv.h"
38
39 #if wxUSE_WCHAR_T
40
41 #ifdef __WXMSW__
42 #include "wx/msw/private.h"
43 #endif
44
45 #ifdef __WINDOWS__
46 #include "wx/msw/missing.h"
47 #endif
48
49 #ifndef __WXWINCE__
50 #include <errno.h>
51 #endif
52
53 #include <ctype.h>
54 #include <string.h>
55 #include <stdlib.h>
56
57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59 #endif // __WIN32__ but !__WXMICROWIN__
60
61 // ----------------------------------------------------------------------------
62 // headers
63 // ----------------------------------------------------------------------------
64
65 #ifdef __SALFORDC__
66 #include <clib.h>
67 #endif
68
69 #ifdef HAVE_ICONV
70 #include <iconv.h>
71 #endif
72
73 #include "wx/encconv.h"
74 #include "wx/fontmap.h"
75 #include "wx/utils.h"
76
77 #ifdef __WXMAC__
78 #include <ATSUnicode.h>
79 #include <TextCommon.h>
80 #include <TextEncodingConverter.h>
81
82 #include "wx/mac/private.h" // includes mac headers
83 #endif
84 // ----------------------------------------------------------------------------
85 // macros
86 // ----------------------------------------------------------------------------
87
88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
90
91 #if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
99 #elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
102 #define WC_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
108 #else // sizeof(wchar_t) != 2 nor 4
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
111 #endif
112
113 // ============================================================================
114 // implementation
115 // ============================================================================
116
117 // ----------------------------------------------------------------------------
118 // UTF-16 en/decoding to/from UCS-4
119 // ----------------------------------------------------------------------------
120
121
122 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
123 {
124 if (input<=0xffff)
125 {
126 if (output)
127 *output = (wxUint16) input;
128 return 1;
129 }
130 else if (input>=0x110000)
131 {
132 return (size_t)-1;
133 }
134 else
135 {
136 if (output)
137 {
138 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
139 *output = (wxUint16) ((input&0x3ff)+0xdc00);
140 }
141 return 2;
142 }
143 }
144
145 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
146 {
147 if ((*input<0xd800) || (*input>0xdfff))
148 {
149 output = *input;
150 return 1;
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
153 {
154 output = *input;
155 return (size_t)-1;
156 }
157 else
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
162 }
163
164
165 // ----------------------------------------------------------------------------
166 // wxMBConv
167 // ----------------------------------------------------------------------------
168
169 wxMBConv::~wxMBConv()
170 {
171 // nothing to do here
172 }
173
174 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
175 {
176 if ( psz )
177 {
178 // calculate the length of the buffer needed first
179 size_t nLen = MB2WC(NULL, psz, 0);
180 if ( nLen != (size_t)-1 )
181 {
182 // now do the actual conversion
183 wxWCharBuffer buf(nLen);
184 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
185 if ( nLen != (size_t)-1 )
186 {
187 return buf;
188 }
189 }
190 }
191
192 wxWCharBuffer buf((wchar_t *)NULL);
193
194 return buf;
195 }
196
197 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
198 {
199 if ( pwz )
200 {
201 size_t nLen = WC2MB(NULL, pwz, 0);
202 if ( nLen != (size_t)-1 )
203 {
204 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
205 nLen = WC2MB(buf.data(), pwz, nLen + 4);
206 if ( nLen != (size_t)-1 )
207 {
208 return buf;
209 }
210 }
211 }
212
213 wxCharBuffer buf((char *)NULL);
214
215 return buf;
216 }
217
218 // ----------------------------------------------------------------------------
219 // wxMBConvLibc
220 // ----------------------------------------------------------------------------
221
222 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
223 {
224 return wxMB2WC(buf, psz, n);
225 }
226
227 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
228 {
229 return wxWC2MB(buf, psz, n);
230 }
231
232 // ----------------------------------------------------------------------------
233 // UTF-7
234 // ----------------------------------------------------------------------------
235
236 #if 0
237 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789'(),-./:?";
240 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
241 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
242 "abcdefghijklmnopqrstuvwxyz"
243 "0123456789+/";
244 #endif
245
246 // TODO: write actual implementations of UTF-7 here
247 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
248 const char * WXUNUSED(psz),
249 size_t WXUNUSED(n)) const
250 {
251 return 0;
252 }
253
254 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
255 const wchar_t * WXUNUSED(psz),
256 size_t WXUNUSED(n)) const
257 {
258 return 0;
259 }
260
261 // ----------------------------------------------------------------------------
262 // UTF-8
263 // ----------------------------------------------------------------------------
264
265 static wxUint32 utf8_max[]=
266 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
267
268 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
269 {
270 size_t len = 0;
271
272 while (*psz && ((!buf) || (len < n)))
273 {
274 unsigned char cc = *psz++, fc = cc;
275 unsigned cnt;
276 for (cnt = 0; fc & 0x80; cnt++)
277 fc <<= 1;
278 if (!cnt)
279 {
280 // plain ASCII char
281 if (buf)
282 *buf++ = cc;
283 len++;
284 }
285 else
286 {
287 cnt--;
288 if (!cnt)
289 {
290 // invalid UTF-8 sequence
291 return (size_t)-1;
292 }
293 else
294 {
295 unsigned ocnt = cnt - 1;
296 wxUint32 res = cc & (0x3f >> cnt);
297 while (cnt--)
298 {
299 cc = *psz++;
300 if ((cc & 0xC0) != 0x80)
301 {
302 // invalid UTF-8 sequence
303 return (size_t)-1;
304 }
305 res = (res << 6) | (cc & 0x3f);
306 }
307 if (res <= utf8_max[ocnt])
308 {
309 // illegal UTF-8 encoding
310 return (size_t)-1;
311 }
312 #ifdef WC_UTF16
313 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
314 size_t pa = encode_utf16(res, (wxUint16 *)buf);
315 if (pa == (size_t)-1)
316 return (size_t)-1;
317 if (buf)
318 buf += pa;
319 len += pa;
320 #else // !WC_UTF16
321 if (buf)
322 *buf++ = res;
323 len++;
324 #endif // WC_UTF16/!WC_UTF16
325 }
326 }
327 }
328 if (buf && (len < n))
329 *buf = 0;
330 return len;
331 }
332
333 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
334 {
335 size_t len = 0;
336
337 while (*psz && ((!buf) || (len < n)))
338 {
339 wxUint32 cc;
340 #ifdef WC_UTF16
341 // cast is ok for WC_UTF16
342 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
343 psz += (pa == (size_t)-1) ? 1 : pa;
344 #else
345 cc=(*psz++) & 0x7fffffff;
346 #endif
347 unsigned cnt;
348 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
349 if (!cnt)
350 {
351 // plain ASCII char
352 if (buf)
353 *buf++ = (char) cc;
354 len++;
355 }
356
357 else
358 {
359 len += cnt + 1;
360 if (buf)
361 {
362 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
363 while (cnt--)
364 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
365 }
366 }
367 }
368
369 if (buf && (len<n)) *buf = 0;
370
371 return len;
372 }
373
374
375
376
377 // ----------------------------------------------------------------------------
378 // UTF-16
379 // ----------------------------------------------------------------------------
380
381 #ifdef WORDS_BIGENDIAN
382 #define wxMBConvUTF16straight wxMBConvUTF16BE
383 #define wxMBConvUTF16swap wxMBConvUTF16LE
384 #else
385 #define wxMBConvUTF16swap wxMBConvUTF16BE
386 #define wxMBConvUTF16straight wxMBConvUTF16LE
387 #endif
388
389
390 #ifdef WC_UTF16
391
392 // copy 16bit MB to 16bit String
393 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
394 {
395 size_t len=0;
396
397 while (*(wxUint16*)psz && (!buf || len < n))
398 {
399 if (buf)
400 *buf++ = *(wxUint16*)psz;
401 len++;
402
403 psz += sizeof(wxUint16);
404 }
405 if (buf && len<n) *buf=0;
406
407 return len;
408 }
409
410
411 // copy 16bit String to 16bit MB
412 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
413 {
414 size_t len=0;
415
416 while (*psz && (!buf || len < n))
417 {
418 if (buf)
419 {
420 *(wxUint16*)buf = *psz;
421 buf += sizeof(wxUint16);
422 }
423 len += sizeof(wxUint16);
424 psz++;
425 }
426 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
427
428 return len;
429 }
430
431
432 // swap 16bit MB to 16bit String
433 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
434 {
435 size_t len=0;
436
437 while (*(wxUint16*)psz && (!buf || len < n))
438 {
439 if (buf)
440 {
441 ((char *)buf)[0] = psz[1];
442 ((char *)buf)[1] = psz[0];
443 buf++;
444 }
445 len++;
446 psz += sizeof(wxUint16);
447 }
448 if (buf && len<n) *buf=0;
449
450 return len;
451 }
452
453
454 // swap 16bit MB to 16bit String
455 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
456 {
457 size_t len=0;
458
459 while (*psz && (!buf || len < n))
460 {
461 if (buf)
462 {
463 *buf++ = ((char*)psz)[1];
464 *buf++ = ((char*)psz)[0];
465 }
466 len += sizeof(wxUint16);
467 psz++;
468 }
469 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
470
471 return len;
472 }
473
474
475 #else // WC_UTF16
476
477
478 // copy 16bit MB to 32bit String
479 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
480 {
481 size_t len=0;
482
483 while (*(wxUint16*)psz && (!buf || len < n))
484 {
485 wxUint32 cc;
486 size_t pa=decode_utf16((wxUint16*)psz, cc);
487 if (pa == (size_t)-1)
488 return pa;
489
490 if (buf)
491 *buf++ = cc;
492 len++;
493 psz += pa * sizeof(wxUint16);
494 }
495 if (buf && len<n) *buf=0;
496
497 return len;
498 }
499
500
501 // copy 32bit String to 16bit MB
502 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
503 {
504 size_t len=0;
505
506 while (*psz && (!buf || len < n))
507 {
508 wxUint16 cc[2];
509 size_t pa=encode_utf16(*psz, cc);
510
511 if (pa == (size_t)-1)
512 return pa;
513
514 if (buf)
515 {
516 *(wxUint16*)buf = cc[0];
517 buf += sizeof(wxUint16);
518 if (pa > 1)
519 {
520 *(wxUint16*)buf = cc[1];
521 buf += sizeof(wxUint16);
522 }
523 }
524
525 len += pa*sizeof(wxUint16);
526 psz++;
527 }
528 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
529
530 return len;
531 }
532
533
534 // swap 16bit MB to 32bit String
535 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
536 {
537 size_t len=0;
538
539 while (*(wxUint16*)psz && (!buf || len < n))
540 {
541 wxUint32 cc;
542 char tmp[4];
543 tmp[0]=psz[1]; tmp[1]=psz[0];
544 tmp[2]=psz[3]; tmp[3]=psz[2];
545
546 size_t pa=decode_utf16((wxUint16*)tmp, cc);
547 if (pa == (size_t)-1)
548 return pa;
549
550 if (buf)
551 *buf++ = cc;
552
553 len++;
554 psz += pa * sizeof(wxUint16);
555 }
556 if (buf && len<n) *buf=0;
557
558 return len;
559 }
560
561
562 // swap 32bit String to 16bit MB
563 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
564 {
565 size_t len=0;
566
567 while (*psz && (!buf || len < n))
568 {
569 wxUint16 cc[2];
570 size_t pa=encode_utf16(*psz, cc);
571
572 if (pa == (size_t)-1)
573 return pa;
574
575 if (buf)
576 {
577 *buf++ = ((char*)cc)[1];
578 *buf++ = ((char*)cc)[0];
579 if (pa > 1)
580 {
581 *buf++ = ((char*)cc)[3];
582 *buf++ = ((char*)cc)[2];
583 }
584 }
585
586 len += pa*sizeof(wxUint16);
587 psz++;
588 }
589 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
590
591 return len;
592 }
593
594 #endif // WC_UTF16
595
596
597 // ----------------------------------------------------------------------------
598 // UTF-32
599 // ----------------------------------------------------------------------------
600
601 #ifdef WORDS_BIGENDIAN
602 #define wxMBConvUTF32straight wxMBConvUTF32BE
603 #define wxMBConvUTF32swap wxMBConvUTF32LE
604 #else
605 #define wxMBConvUTF32swap wxMBConvUTF32BE
606 #define wxMBConvUTF32straight wxMBConvUTF32LE
607 #endif
608
609
610 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
611 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
612
613
614 #ifdef WC_UTF16
615
616 // copy 32bit MB to 16bit String
617 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
618 {
619 size_t len=0;
620
621 while (*(wxUint32*)psz && (!buf || len < n))
622 {
623 wxUint16 cc[2];
624
625 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
626 if (pa == (size_t)-1)
627 return pa;
628
629 if (buf)
630 {
631 *buf++ = cc[0];
632 if (pa > 1)
633 *buf++ = cc[1];
634 }
635 len += pa;
636 psz += sizeof(wxUint32);
637 }
638 if (buf && len<n) *buf=0;
639
640 return len;
641 }
642
643
644 // copy 16bit String to 32bit MB
645 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
646 {
647 size_t len=0;
648
649 while (*psz && (!buf || len < n))
650 {
651 wxUint32 cc;
652
653 // cast is ok for WC_UTF16
654 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
655 if (pa == (size_t)-1)
656 return pa;
657
658 if (buf)
659 {
660 *(wxUint32*)buf = cc;
661 buf += sizeof(wxUint32);
662 }
663 len += sizeof(wxUint32);
664 psz += pa;
665 }
666
667 if (buf && len<=n-sizeof(wxUint32))
668 *(wxUint32*)buf=0;
669
670 return len;
671 }
672
673
674
675 // swap 32bit MB to 16bit String
676 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
677 {
678 size_t len=0;
679
680 while (*(wxUint32*)psz && (!buf || len < n))
681 {
682 char tmp[4];
683 tmp[0] = psz[3]; tmp[1] = psz[2];
684 tmp[2] = psz[1]; tmp[3] = psz[0];
685
686
687 wxUint16 cc[2];
688
689 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
690 if (pa == (size_t)-1)
691 return pa;
692
693 if (buf)
694 {
695 *buf++ = cc[0];
696 if (pa > 1)
697 *buf++ = cc[1];
698 }
699 len += pa;
700 psz += sizeof(wxUint32);
701 }
702
703 if (buf && len<n)
704 *buf=0;
705
706 return len;
707 }
708
709
710 // swap 16bit String to 32bit MB
711 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
712 {
713 size_t len=0;
714
715 while (*psz && (!buf || len < n))
716 {
717 char cc[4];
718
719 // cast is ok for WC_UTF16
720 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
721 if (pa == (size_t)-1)
722 return pa;
723
724 if (buf)
725 {
726 *buf++ = cc[3];
727 *buf++ = cc[2];
728 *buf++ = cc[1];
729 *buf++ = cc[0];
730 }
731 len += sizeof(wxUint32);
732 psz += pa;
733 }
734
735 if (buf && len<=n-sizeof(wxUint32))
736 *(wxUint32*)buf=0;
737
738 return len;
739 }
740
741 #else // WC_UTF16
742
743
744 // copy 32bit MB to 32bit String
745 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
746 {
747 size_t len=0;
748
749 while (*(wxUint32*)psz && (!buf || len < n))
750 {
751 if (buf)
752 *buf++ = *(wxUint32*)psz;
753 len++;
754 psz += sizeof(wxUint32);
755 }
756
757 if (buf && len<n)
758 *buf=0;
759
760 return len;
761 }
762
763
764 // copy 32bit String to 32bit MB
765 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
766 {
767 size_t len=0;
768
769 while (*psz && (!buf || len < n))
770 {
771 if (buf)
772 {
773 *(wxUint32*)buf = *psz;
774 buf += sizeof(wxUint32);
775 }
776
777 len += sizeof(wxUint32);
778 psz++;
779 }
780
781 if (buf && len<=n-sizeof(wxUint32))
782 *(wxUint32*)buf=0;
783
784 return len;
785 }
786
787
788 // swap 32bit MB to 32bit String
789 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
790 {
791 size_t len=0;
792
793 while (*(wxUint32*)psz && (!buf || len < n))
794 {
795 if (buf)
796 {
797 ((char *)buf)[0] = psz[3];
798 ((char *)buf)[1] = psz[2];
799 ((char *)buf)[2] = psz[1];
800 ((char *)buf)[3] = psz[0];
801 buf++;
802 }
803 len++;
804 psz += sizeof(wxUint32);
805 }
806
807 if (buf && len<n)
808 *buf=0;
809
810 return len;
811 }
812
813
814 // swap 32bit String to 32bit MB
815 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
816 {
817 size_t len=0;
818
819 while (*psz && (!buf || len < n))
820 {
821 if (buf)
822 {
823 *buf++ = ((char *)psz)[3];
824 *buf++ = ((char *)psz)[2];
825 *buf++ = ((char *)psz)[1];
826 *buf++ = ((char *)psz)[0];
827 }
828 len += sizeof(wxUint32);
829 psz++;
830 }
831
832 if (buf && len<=n-sizeof(wxUint32))
833 *(wxUint32*)buf=0;
834
835 return len;
836 }
837
838
839 #endif // WC_UTF16
840
841
842 // ============================================================================
843 // The classes doing conversion using the iconv_xxx() functions
844 // ============================================================================
845
846 #ifdef HAVE_ICONV
847
848 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
849 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
850 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
851 // (which means error) and says there are 0 bytes left in the input buffer --
852 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
853 // this alternative test for iconv() failure.
854 // [This bug does not appear in glibc 2.2.]
855 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
856 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
857 (errno != E2BIG || bufLeft != 0))
858 #else
859 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
860 #endif
861
862 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
863
864 // ----------------------------------------------------------------------------
865 // wxMBConv_iconv: encapsulates an iconv character set
866 // ----------------------------------------------------------------------------
867
868 class wxMBConv_iconv : public wxMBConv
869 {
870 public:
871 wxMBConv_iconv(const wxChar *name);
872 virtual ~wxMBConv_iconv();
873
874 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
875 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
876
877 bool IsOk() const
878 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
879
880 protected:
881 // the iconv handlers used to translate from multibyte to wide char and in
882 // the other direction
883 iconv_t m2w,
884 w2m;
885
886 private:
887 // the name (for iconv_open()) of a wide char charset -- if none is
888 // available on this machine, it will remain NULL
889 static const char *ms_wcCharsetName;
890
891 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
892 // different endian-ness than the native one
893 static bool ms_wcNeedsSwap;
894 };
895
896 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
897 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
898
899 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
900 {
901 // Do it the hard way
902 char cname[100];
903 for (size_t i = 0; i < wxStrlen(name)+1; i++)
904 cname[i] = (char) name[i];
905
906 // check for charset that represents wchar_t:
907 if (ms_wcCharsetName == NULL)
908 {
909 ms_wcNeedsSwap = false;
910
911 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
912 ms_wcCharsetName = WC_NAME_BEST;
913 m2w = iconv_open(ms_wcCharsetName, cname);
914
915 if (m2w == (iconv_t)-1)
916 {
917 // try charset w/o bytesex info (e.g. "UCS4")
918 // and check for bytesex ourselves:
919 ms_wcCharsetName = WC_NAME;
920 m2w = iconv_open(ms_wcCharsetName, cname);
921
922 // last bet, try if it knows WCHAR_T pseudo-charset
923 if (m2w == (iconv_t)-1)
924 {
925 ms_wcCharsetName = "WCHAR_T";
926 m2w = iconv_open(ms_wcCharsetName, cname);
927 }
928
929 if (m2w != (iconv_t)-1)
930 {
931 char buf[2], *bufPtr;
932 wchar_t wbuf[2], *wbufPtr;
933 size_t insz, outsz;
934 size_t res;
935
936 buf[0] = 'A';
937 buf[1] = 0;
938 wbuf[0] = 0;
939 insz = 2;
940 outsz = SIZEOF_WCHAR_T * 2;
941 wbufPtr = wbuf;
942 bufPtr = buf;
943
944 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
945 (char**)&wbufPtr, &outsz);
946
947 if (ICONV_FAILED(res, insz))
948 {
949 ms_wcCharsetName = NULL;
950 wxLogLastError(wxT("iconv"));
951 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
952 }
953 else
954 {
955 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
956 }
957 }
958 else
959 {
960 ms_wcCharsetName = NULL;
961
962 // VS: we must not output an error here, since wxWidgets will safely
963 // fall back to using wxEncodingConverter.
964 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
965 //wxLogError(
966 }
967 }
968 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
969 }
970 else // we already have ms_wcCharsetName
971 {
972 m2w = iconv_open(ms_wcCharsetName, cname);
973 }
974
975 // NB: don't ever pass NULL to iconv_open(), it may crash!
976 if ( ms_wcCharsetName )
977 {
978 w2m = iconv_open( cname, ms_wcCharsetName);
979 }
980 else
981 {
982 w2m = (iconv_t)-1;
983 }
984 }
985
986 wxMBConv_iconv::~wxMBConv_iconv()
987 {
988 if ( m2w != (iconv_t)-1 )
989 iconv_close(m2w);
990 if ( w2m != (iconv_t)-1 )
991 iconv_close(w2m);
992 }
993
994 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
995 {
996 size_t inbuf = strlen(psz);
997 size_t outbuf = n * SIZEOF_WCHAR_T;
998 size_t res, cres;
999 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000 wchar_t *bufPtr = buf;
1001 const char *pszPtr = psz;
1002
1003 if (buf)
1004 {
1005 // have destination buffer, convert there
1006 cres = iconv(m2w,
1007 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1008 (char**)&bufPtr, &outbuf);
1009 res = n - (outbuf / SIZEOF_WCHAR_T);
1010
1011 if (ms_wcNeedsSwap)
1012 {
1013 // convert to native endianness
1014 WC_BSWAP(buf /* _not_ bufPtr */, res)
1015 }
1016
1017 // NB: iconv was given only strlen(psz) characters on input, and so
1018 // it couldn't convert the trailing zero. Let's do it ourselves
1019 // if there's some room left for it in the output buffer.
1020 if (res < n)
1021 buf[res] = 0;
1022 }
1023 else
1024 {
1025 // no destination buffer... convert using temp buffer
1026 // to calculate destination buffer requirement
1027 wchar_t tbuf[8];
1028 res = 0;
1029 do {
1030 bufPtr = tbuf;
1031 outbuf = 8*SIZEOF_WCHAR_T;
1032
1033 cres = iconv(m2w,
1034 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1035 (char**)&bufPtr, &outbuf );
1036
1037 res += 8-(outbuf/SIZEOF_WCHAR_T);
1038 } while ((cres==(size_t)-1) && (errno==E2BIG));
1039 }
1040
1041 if (ICONV_FAILED(cres, inbuf))
1042 {
1043 //VS: it is ok if iconv fails, hence trace only
1044 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1045 return (size_t)-1;
1046 }
1047
1048 return res;
1049 }
1050
1051 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1052 {
1053 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1054 size_t outbuf = n;
1055 size_t res, cres;
1056
1057 wchar_t *tmpbuf = 0;
1058
1059 if (ms_wcNeedsSwap)
1060 {
1061 // need to copy to temp buffer to switch endianness
1062 // this absolutely doesn't rock!
1063 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064 // could be in read-only memory, or be accessed in some other thread)
1065 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1066 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1067 WC_BSWAP(tmpbuf, inbuf)
1068 psz=tmpbuf;
1069 }
1070
1071 if (buf)
1072 {
1073 // have destination buffer, convert there
1074 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1075
1076 res = n-outbuf;
1077
1078 // NB: iconv was given only wcslen(psz) characters on input, and so
1079 // it couldn't convert the trailing zero. Let's do it ourselves
1080 // if there's some room left for it in the output buffer.
1081 if (res < n)
1082 buf[0] = 0;
1083 }
1084 else
1085 {
1086 // no destination buffer... convert using temp buffer
1087 // to calculate destination buffer requirement
1088 char tbuf[16];
1089 res = 0;
1090 do {
1091 buf = tbuf; outbuf = 16;
1092
1093 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1094
1095 res += 16 - outbuf;
1096 } while ((cres==(size_t)-1) && (errno==E2BIG));
1097 }
1098
1099 if (ms_wcNeedsSwap)
1100 {
1101 free(tmpbuf);
1102 }
1103
1104 if (ICONV_FAILED(cres, inbuf))
1105 {
1106 //VS: it is ok if iconv fails, hence trace only
1107 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1108 return (size_t)-1;
1109 }
1110
1111 return res;
1112 }
1113
1114 #endif // HAVE_ICONV
1115
1116
1117 // ============================================================================
1118 // Win32 conversion classes
1119 // ============================================================================
1120
1121 #ifdef wxHAVE_WIN32_MB2WC
1122
1123 // from utils.cpp
1124 #if wxUSE_FONTMAP
1125 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1126 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1127 #endif
1128
1129 class wxMBConv_win32 : public wxMBConv
1130 {
1131 public:
1132 wxMBConv_win32()
1133 {
1134 m_CodePage = CP_ACP;
1135 }
1136
1137 #if wxUSE_FONTMAP
1138 wxMBConv_win32(const wxChar* name)
1139 {
1140 m_CodePage = wxCharsetToCodepage(name);
1141 }
1142
1143 wxMBConv_win32(wxFontEncoding encoding)
1144 {
1145 m_CodePage = wxEncodingToCodepage(encoding);
1146 }
1147 #endif
1148
1149 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150 {
1151 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1152 // the behaviour is not compatible with the Unix version (using iconv)
1153 // and break the library itself, e.g. wxTextInputStream::NextChar()
1154 // wouldn't work if reading an incomplete MB char didn't result in an
1155 // error
1156 const size_t len = ::MultiByteToWideChar
1157 (
1158 m_CodePage, // code page
1159 MB_ERR_INVALID_CHARS, // flags: fall on error
1160 psz, // input string
1161 -1, // its length (NUL-terminated)
1162 buf, // output string
1163 buf ? n : 0 // size of output buffer
1164 );
1165
1166 // note that it returns count of written chars for buf != NULL and size
1167 // of the needed buffer for buf == NULL so in either case the length of
1168 // the string (which never includes the terminating NUL) is one less
1169 return len ? len - 1 : (size_t)-1;
1170 }
1171
1172 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1173 {
1174 /*
1175 we have a problem here: by default, WideCharToMultiByte() may
1176 replace characters unrepresentable in the target code page with bad
1177 quality approximations such as turning "1/2" symbol (U+00BD) into
1178 "1" for the code pages which don't have it and we, obviously, want
1179 to avoid this at any price
1180
1181 the trouble is that this function does it _silently_, i.e. it won't
1182 even tell us whether it did or not... Win98/2000 and higher provide
1183 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1184 we have to resort to a round trip, i.e. check that converting back
1185 results in the same string -- this is, of course, expensive but
1186 otherwise we simply can't be sure to not garble the data.
1187 */
1188
1189 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1190 // it doesn't work with CJK encodings (which we test for rather roughly
1191 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1192 // supporting it
1193 BOOL usedDef wxDUMMY_INITIALIZE(false),
1194 *pUsedDef;
1195 int flags;
1196 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1197 {
1198 // it's our lucky day
1199 flags = WC_NO_BEST_FIT_CHARS;
1200 pUsedDef = &usedDef;
1201 }
1202 else // old system or unsupported encoding
1203 {
1204 flags = 0;
1205 pUsedDef = NULL;
1206 }
1207
1208 const size_t len = ::WideCharToMultiByte
1209 (
1210 m_CodePage, // code page
1211 flags, // either none or no best fit
1212 pwz, // input string
1213 -1, // it is (wide) NUL-terminated
1214 buf, // output buffer
1215 buf ? n : 0, // and its size
1216 NULL, // default "replacement" char
1217 pUsedDef // [out] was it used?
1218 );
1219
1220 if ( !len )
1221 {
1222 // function totally failed
1223 return (size_t)-1;
1224 }
1225
1226 // if we were really converting, check if we succeeded
1227 if ( buf )
1228 {
1229 if ( flags )
1230 {
1231 // check if the conversion failed, i.e. if any replacements
1232 // were done
1233 if ( usedDef )
1234 return (size_t)-1;
1235 }
1236 else // we must resort to double tripping...
1237 {
1238 wxWCharBuffer wcBuf(n);
1239 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1240 wcscmp(wcBuf, pwz) != 0 )
1241 {
1242 // we didn't obtain the same thing we started from, hence
1243 // the conversion was lossy and we consider that it failed
1244 return (size_t)-1;
1245 }
1246 }
1247 }
1248
1249 // see the comment above for the reason of "len - 1"
1250 return len - 1;
1251 }
1252
1253 bool IsOk() const { return m_CodePage != -1; }
1254
1255 private:
1256 static bool CanUseNoBestFit()
1257 {
1258 static int s_isWin98Or2k = -1;
1259
1260 if ( s_isWin98Or2k == -1 )
1261 {
1262 int verMaj, verMin;
1263 switch ( wxGetOsVersion(&verMaj, &verMin) )
1264 {
1265 case wxWIN95:
1266 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1267 break;
1268
1269 case wxWINDOWS_NT:
1270 s_isWin98Or2k = verMaj >= 5;
1271 break;
1272
1273 default:
1274 // unknown, be conseravtive by default
1275 s_isWin98Or2k = 0;
1276 }
1277
1278 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1279 }
1280
1281 return s_isWin98Or2k == 1;
1282 }
1283
1284 long m_CodePage;
1285 };
1286
1287 #endif // wxHAVE_WIN32_MB2WC
1288
1289 // ============================================================================
1290 // Mac conversion classes
1291 // ============================================================================
1292
1293 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1294
1295 class wxMBConv_mac : public wxMBConv
1296 {
1297 public:
1298 wxMBConv_mac()
1299 {
1300 Init(CFStringGetSystemEncoding()) ;
1301 }
1302
1303 wxMBConv_mac(const wxChar* name)
1304 {
1305 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1306 }
1307
1308 wxMBConv_mac(wxFontEncoding encoding)
1309 {
1310 Init( wxMacGetSystemEncFromFontEnc(encoding) );
1311 }
1312
1313 ~wxMBConv_mac()
1314 {
1315 OSStatus status = noErr ;
1316 status = TECDisposeConverter(m_MB2WC_converter);
1317 status = TECDisposeConverter(m_WC2MB_converter);
1318 }
1319
1320
1321 void Init( TextEncodingBase encoding)
1322 {
1323 OSStatus status = noErr ;
1324 m_char_encoding = encoding ;
1325 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1326
1327 status = TECCreateConverter(&m_MB2WC_converter,
1328 m_char_encoding,
1329 m_unicode_encoding);
1330 status = TECCreateConverter(&m_WC2MB_converter,
1331 m_unicode_encoding,
1332 m_char_encoding);
1333 }
1334
1335 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1336 {
1337 OSStatus status = noErr ;
1338 ByteCount byteOutLen ;
1339 ByteCount byteInLen = strlen(psz) ;
1340 wchar_t *tbuf = NULL ;
1341 UniChar* ubuf = NULL ;
1342 size_t res = 0 ;
1343
1344 if (buf == NULL)
1345 {
1346 n = byteInLen ;
1347 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1348 }
1349 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1350 #if SIZEOF_WCHAR_T == 4
1351 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1352 #else
1353 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1354 #endif
1355 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1356 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1357 #if SIZEOF_WCHAR_T == 4
1358 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1359 // is not properly terminated we get random characters at the end
1360 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1361 wxMBConvUTF16BE converter ;
1362 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1363 free( ubuf ) ;
1364 #else
1365 res = byteOutLen / sizeof( UniChar ) ;
1366 #endif
1367 if ( buf == NULL )
1368 free(tbuf) ;
1369
1370 if ( buf && res < n)
1371 buf[res] = 0;
1372
1373 return res ;
1374 }
1375
1376 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1377 {
1378 OSStatus status = noErr ;
1379 ByteCount byteOutLen ;
1380 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1381
1382 char *tbuf = NULL ;
1383
1384 if (buf == NULL)
1385 {
1386 // worst case
1387 n = byteInLen * 2 ;
1388 tbuf = (char*) malloc( n ) ;
1389 }
1390
1391 ByteCount byteBufferLen = n ;
1392 UniChar* ubuf = NULL ;
1393 #if SIZEOF_WCHAR_T == 4
1394 wxMBConvUTF16BE converter ;
1395 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1396 byteInLen = unicharlen ;
1397 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1398 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1399 #else
1400 ubuf = (UniChar*) psz ;
1401 #endif
1402 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1403 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1404 #if SIZEOF_WCHAR_T == 4
1405 free( ubuf ) ;
1406 #endif
1407 if ( buf == NULL )
1408 free(tbuf) ;
1409
1410 size_t res = byteOutLen ;
1411 if ( buf && res < n)
1412 buf[res] = 0;
1413
1414 return res ;
1415 }
1416
1417 bool IsOk() const
1418 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1419
1420 private:
1421 TECObjectRef m_MB2WC_converter ;
1422 TECObjectRef m_WC2MB_converter ;
1423
1424 TextEncodingBase m_char_encoding ;
1425 TextEncodingBase m_unicode_encoding ;
1426 };
1427
1428 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1429
1430 // ============================================================================
1431 // wxEncodingConverter based conversion classes
1432 // ============================================================================
1433
1434 #if wxUSE_FONTMAP
1435
1436 class wxMBConv_wxwin : public wxMBConv
1437 {
1438 private:
1439 void Init()
1440 {
1441 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1442 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1443 }
1444
1445 public:
1446 // temporarily just use wxEncodingConverter stuff,
1447 // so that it works while a better implementation is built
1448 wxMBConv_wxwin(const wxChar* name)
1449 {
1450 if (name)
1451 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1452 else
1453 m_enc = wxFONTENCODING_SYSTEM;
1454
1455 Init();
1456 }
1457
1458 wxMBConv_wxwin(wxFontEncoding enc)
1459 {
1460 m_enc = enc;
1461
1462 Init();
1463 }
1464
1465 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1466 {
1467 size_t inbuf = strlen(psz);
1468 if (buf)
1469 m2w.Convert(psz,buf);
1470 return inbuf;
1471 }
1472
1473 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1474 {
1475 const size_t inbuf = wxWcslen(psz);
1476 if (buf)
1477 w2m.Convert(psz,buf);
1478
1479 return inbuf;
1480 }
1481
1482 bool IsOk() const { return m_ok; }
1483
1484 public:
1485 wxFontEncoding m_enc;
1486 wxEncodingConverter m2w, w2m;
1487
1488 // were we initialized successfully?
1489 bool m_ok;
1490
1491 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1492 };
1493
1494 #endif // wxUSE_FONTMAP
1495
1496 // ============================================================================
1497 // wxCSConv implementation
1498 // ============================================================================
1499
1500 void wxCSConv::Init()
1501 {
1502 m_name = NULL;
1503 m_convReal = NULL;
1504 m_deferred = true;
1505 }
1506
1507 wxCSConv::wxCSConv(const wxChar *charset)
1508 {
1509 Init();
1510
1511 if ( charset )
1512 {
1513 SetName(charset);
1514 }
1515
1516 m_encoding = wxFONTENCODING_SYSTEM;
1517 }
1518
1519 wxCSConv::wxCSConv(wxFontEncoding encoding)
1520 {
1521 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1522 {
1523 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1524
1525 encoding = wxFONTENCODING_SYSTEM;
1526 }
1527
1528 Init();
1529
1530 m_encoding = encoding;
1531 }
1532
1533 wxCSConv::~wxCSConv()
1534 {
1535 Clear();
1536 }
1537
1538 wxCSConv::wxCSConv(const wxCSConv& conv)
1539 : wxMBConv()
1540 {
1541 Init();
1542
1543 SetName(conv.m_name);
1544 m_encoding = conv.m_encoding;
1545 }
1546
1547 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1548 {
1549 Clear();
1550
1551 SetName(conv.m_name);
1552 m_encoding = conv.m_encoding;
1553
1554 return *this;
1555 }
1556
1557 void wxCSConv::Clear()
1558 {
1559 free(m_name);
1560 delete m_convReal;
1561
1562 m_name = NULL;
1563 m_convReal = NULL;
1564 }
1565
1566 void wxCSConv::SetName(const wxChar *charset)
1567 {
1568 if (charset)
1569 {
1570 m_name = wxStrdup(charset);
1571 m_deferred = true;
1572 }
1573 }
1574
1575 wxMBConv *wxCSConv::DoCreate() const
1576 {
1577 // check for the special case of ASCII or ISO8859-1 charset: as we have
1578 // special knowledge of it anyhow, we don't need to create a special
1579 // conversion object
1580 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1581 {
1582 // don't convert at all
1583 return NULL;
1584 }
1585
1586 // we trust OS to do conversion better than we can so try external
1587 // conversion methods first
1588 //
1589 // the full order is:
1590 // 1. OS conversion (iconv() under Unix or Win32 API)
1591 // 2. hard coded conversions for UTF
1592 // 3. wxEncodingConverter as fall back
1593
1594 // step (1)
1595 #ifdef HAVE_ICONV
1596 #if !wxUSE_FONTMAP
1597 if ( m_name )
1598 #endif // !wxUSE_FONTMAP
1599 {
1600 wxString name(m_name);
1601
1602 #if wxUSE_FONTMAP
1603 if ( name.empty() )
1604 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1605 #endif // wxUSE_FONTMAP
1606
1607 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1608 if ( conv->IsOk() )
1609 return conv;
1610
1611 delete conv;
1612 }
1613 #endif // HAVE_ICONV
1614
1615 #ifdef wxHAVE_WIN32_MB2WC
1616 {
1617 #if wxUSE_FONTMAP
1618 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1619 : new wxMBConv_win32(m_encoding);
1620 if ( conv->IsOk() )
1621 return conv;
1622
1623 delete conv;
1624 #else
1625 return NULL;
1626 #endif
1627 }
1628 #endif // wxHAVE_WIN32_MB2WC
1629 #if defined(__WXMAC__)
1630 {
1631 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1632 {
1633
1634 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1635 : new wxMBConv_mac(m_encoding);
1636 if ( conv->IsOk() )
1637 return conv;
1638
1639 delete conv;
1640 }
1641 }
1642 #endif
1643 // step (2)
1644 wxFontEncoding enc = m_encoding;
1645 #if wxUSE_FONTMAP
1646 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1647 {
1648 // use "false" to suppress interactive dialogs -- we can be called from
1649 // anywhere and popping up a dialog from here is the last thing we want to
1650 // do
1651 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1652 }
1653 #endif // wxUSE_FONTMAP
1654
1655 switch ( enc )
1656 {
1657 case wxFONTENCODING_UTF7:
1658 return new wxMBConvUTF7;
1659
1660 case wxFONTENCODING_UTF8:
1661 return new wxMBConvUTF8;
1662
1663 case wxFONTENCODING_UTF16BE:
1664 return new wxMBConvUTF16BE;
1665
1666 case wxFONTENCODING_UTF16LE:
1667 return new wxMBConvUTF16LE;
1668
1669 case wxFONTENCODING_UTF32BE:
1670 return new wxMBConvUTF32BE;
1671
1672 case wxFONTENCODING_UTF32LE:
1673 return new wxMBConvUTF32LE;
1674
1675 default:
1676 // nothing to do but put here to suppress gcc warnings
1677 ;
1678 }
1679
1680 // step (3)
1681 #if wxUSE_FONTMAP
1682 {
1683 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1684 : new wxMBConv_wxwin(m_encoding);
1685 if ( conv->IsOk() )
1686 return conv;
1687
1688 delete conv;
1689 }
1690 #endif // wxUSE_FONTMAP
1691
1692 // NB: This is a hack to prevent deadlock. What could otherwise happen
1693 // in Unicode build: wxConvLocal creation ends up being here
1694 // because of some failure and logs the error. But wxLog will try to
1695 // attach timestamp, for which it will need wxConvLocal (to convert
1696 // time to char* and then wchar_t*), but that fails, tries to log
1697 // error, but wxLog has a (already locked) critical section that
1698 // guards static buffer.
1699 static bool alreadyLoggingError = false;
1700 if (!alreadyLoggingError)
1701 {
1702 alreadyLoggingError = true;
1703 wxLogError(_("Cannot convert from the charset '%s'!"),
1704 m_name ? m_name
1705 :
1706 #if wxUSE_FONTMAP
1707 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1708 #else // !wxUSE_FONTMAP
1709 wxString::Format(_("encoding %s"), m_encoding).c_str()
1710 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1711 );
1712 alreadyLoggingError = false;
1713 }
1714
1715 return NULL;
1716 }
1717
1718 void wxCSConv::CreateConvIfNeeded() const
1719 {
1720 if ( m_deferred )
1721 {
1722 wxCSConv *self = (wxCSConv *)this; // const_cast
1723
1724 #if wxUSE_INTL
1725 // if we don't have neither the name nor the encoding, use the default
1726 // encoding for this system
1727 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1728 {
1729 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1730 }
1731 #endif // wxUSE_INTL
1732
1733 self->m_convReal = DoCreate();
1734 self->m_deferred = false;
1735 }
1736 }
1737
1738 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1739 {
1740 CreateConvIfNeeded();
1741
1742 if (m_convReal)
1743 return m_convReal->MB2WC(buf, psz, n);
1744
1745 // latin-1 (direct)
1746 size_t len = strlen(psz);
1747
1748 if (buf)
1749 {
1750 for (size_t c = 0; c <= len; c++)
1751 buf[c] = (unsigned char)(psz[c]);
1752 }
1753
1754 return len;
1755 }
1756
1757 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1758 {
1759 CreateConvIfNeeded();
1760
1761 if (m_convReal)
1762 return m_convReal->WC2MB(buf, psz, n);
1763
1764 // latin-1 (direct)
1765 const size_t len = wxWcslen(psz);
1766 if (buf)
1767 {
1768 for (size_t c = 0; c <= len; c++)
1769 {
1770 if (psz[c] > 0xFF)
1771 return (size_t)-1;
1772 buf[c] = psz[c];
1773 }
1774 }
1775 else
1776 {
1777 for (size_t c = 0; c <= len; c++)
1778 {
1779 if (psz[c] > 0xFF)
1780 return (size_t)-1;
1781 }
1782 }
1783
1784 return len;
1785 }
1786
1787 // ----------------------------------------------------------------------------
1788 // globals
1789 // ----------------------------------------------------------------------------
1790
1791 #ifdef __WINDOWS__
1792 static wxMBConv_win32 wxConvLibcObj;
1793 #elif defined(__WXMAC__) && !defined(__MACH__)
1794 static wxMBConv_mac wxConvLibcObj ;
1795 #else
1796 static wxMBConvLibc wxConvLibcObj;
1797 #endif
1798
1799 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1800 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1801 static wxMBConvUTF7 wxConvUTF7Obj;
1802 static wxMBConvUTF8 wxConvUTF8Obj;
1803
1804
1805 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1806 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1807 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1808 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1809 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1810 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1811
1812 #else // !wxUSE_WCHAR_T
1813
1814 // stand-ins in absence of wchar_t
1815 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1816 wxConvISO8859_1,
1817 wxConvLocal,
1818 wxConvUTF8;
1819
1820 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1821
1822