]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
WinCE project and wxDC corrections
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ============================================================================
14 // declarations
15 // ============================================================================
16
17 // ----------------------------------------------------------------------------
18 // headers
19 // ----------------------------------------------------------------------------
20
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
23 #endif
24
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
27
28 #ifdef __BORLANDC__
29 #pragma hdrstop
30 #endif
31
32 #ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35 #endif // WX_PRECOMP
36
37 #include "wx/strconv.h"
38
39 #if wxUSE_WCHAR_T
40
41 #ifdef __WXMSW__
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
44 #endif
45
46 #ifndef __WXWINCE__
47 #include <errno.h>
48 #endif
49
50 #include <ctype.h>
51 #include <string.h>
52 #include <stdlib.h>
53
54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
55 #define wxHAVE_WIN32_MB2WC
56 #endif // __WIN32__ but !__WXMICROWIN__
57
58 // ----------------------------------------------------------------------------
59 // headers
60 // ----------------------------------------------------------------------------
61
62 #ifdef __SALFORDC__
63 #include <clib.h>
64 #endif
65
66 #ifdef HAVE_ICONV
67 #include <iconv.h>
68 #endif
69
70 #include "wx/encconv.h"
71 #include "wx/fontmap.h"
72
73 #ifdef __WXMAC__
74 #include "ATSUnicode.h"
75 #include "TextCommon.h"
76 #include "TextEncodingConverter.h"
77
78 #include "wx/mac/private.h" // includes mac headers
79 #endif
80 // ----------------------------------------------------------------------------
81 // macros
82 // ----------------------------------------------------------------------------
83
84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
86
87 #if SIZEOF_WCHAR_T == 4
88 #define WC_NAME "UCS4"
89 #define WC_BSWAP BSWAP_UCS4
90 #ifdef WORDS_BIGENDIAN
91 #define WC_NAME_BEST "UCS-4BE"
92 #else
93 #define WC_NAME_BEST "UCS-4LE"
94 #endif
95 #elif SIZEOF_WCHAR_T == 2
96 #define WC_NAME "UTF16"
97 #define WC_BSWAP BSWAP_UTF16
98 #define WC_UTF16
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UTF-16BE"
101 #else
102 #define WC_NAME_BEST "UTF-16LE"
103 #endif
104 #else // sizeof(wchar_t) != 2 nor 4
105 // does this ever happen?
106 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
107 #endif
108
109 // ============================================================================
110 // implementation
111 // ============================================================================
112
113 // ----------------------------------------------------------------------------
114 // UTF-16 en/decoding to/from UCS-4
115 // ----------------------------------------------------------------------------
116
117
118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
119 {
120 if (input<=0xffff)
121 {
122 if (output)
123 *output = (wxUint16) input;
124 return 1;
125 }
126 else if (input>=0x110000)
127 {
128 return (size_t)-1;
129 }
130 else
131 {
132 if (output)
133 {
134 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
135 *output = (wxUint16) ((input&0x3ff)+0xdc00);
136 }
137 return 2;
138 }
139 }
140
141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
142 {
143 if ((*input<0xd800) || (*input>0xdfff))
144 {
145 output = *input;
146 return 1;
147 }
148 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
149 {
150 output = *input;
151 return (size_t)-1;
152 }
153 else
154 {
155 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
156 return 2;
157 }
158 }
159
160
161 // ----------------------------------------------------------------------------
162 // wxMBConv
163 // ----------------------------------------------------------------------------
164
165 wxMBConv::~wxMBConv()
166 {
167 // nothing to do here
168 }
169
170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
171 {
172 if ( psz )
173 {
174 // calculate the length of the buffer needed first
175 size_t nLen = MB2WC(NULL, psz, 0);
176 if ( nLen != (size_t)-1 )
177 {
178 // now do the actual conversion
179 wxWCharBuffer buf(nLen);
180 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
181
182 return buf;
183 }
184 }
185
186 wxWCharBuffer buf((wchar_t *)NULL);
187
188 return buf;
189 }
190
191 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
192 {
193 if ( pwz )
194 {
195 size_t nLen = WC2MB(NULL, pwz, 0);
196 if ( nLen != (size_t)-1 )
197 {
198 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
199 WC2MB(buf.data(), pwz, nLen + 4);
200
201 return buf;
202 }
203 }
204
205 wxCharBuffer buf((char *)NULL);
206
207 return buf;
208 }
209
210 // ----------------------------------------------------------------------------
211 // wxMBConvLibc
212 // ----------------------------------------------------------------------------
213
214 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
215 {
216 return wxMB2WC(buf, psz, n);
217 }
218
219 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
220 {
221 return wxWC2MB(buf, psz, n);
222 }
223
224 // ----------------------------------------------------------------------------
225 // UTF-7
226 // ----------------------------------------------------------------------------
227
228 #if 0
229 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
230 "abcdefghijklmnopqrstuvwxyz"
231 "0123456789'(),-./:?";
232 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
233 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234 "abcdefghijklmnopqrstuvwxyz"
235 "0123456789+/";
236 #endif
237
238 // TODO: write actual implementations of UTF-7 here
239 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
240 const char * WXUNUSED(psz),
241 size_t WXUNUSED(n)) const
242 {
243 return 0;
244 }
245
246 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
247 const wchar_t * WXUNUSED(psz),
248 size_t WXUNUSED(n)) const
249 {
250 return 0;
251 }
252
253 // ----------------------------------------------------------------------------
254 // UTF-8
255 // ----------------------------------------------------------------------------
256
257 static wxUint32 utf8_max[]=
258 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
259
260 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
261 {
262 size_t len = 0;
263
264 while (*psz && ((!buf) || (len < n)))
265 {
266 unsigned char cc = *psz++, fc = cc;
267 unsigned cnt;
268 for (cnt = 0; fc & 0x80; cnt++)
269 fc <<= 1;
270 if (!cnt)
271 {
272 // plain ASCII char
273 if (buf)
274 *buf++ = cc;
275 len++;
276 }
277 else
278 {
279 cnt--;
280 if (!cnt)
281 {
282 // invalid UTF-8 sequence
283 return (size_t)-1;
284 }
285 else
286 {
287 unsigned ocnt = cnt - 1;
288 wxUint32 res = cc & (0x3f >> cnt);
289 while (cnt--)
290 {
291 cc = *psz++;
292 if ((cc & 0xC0) != 0x80)
293 {
294 // invalid UTF-8 sequence
295 return (size_t)-1;
296 }
297 res = (res << 6) | (cc & 0x3f);
298 }
299 if (res <= utf8_max[ocnt])
300 {
301 // illegal UTF-8 encoding
302 return (size_t)-1;
303 }
304 #ifdef WC_UTF16
305 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
306 size_t pa = encode_utf16(res, (wxUint16 *)buf);
307 if (pa == (size_t)-1)
308 return (size_t)-1;
309 if (buf)
310 buf += pa;
311 len += pa;
312 #else // !WC_UTF16
313 if (buf)
314 *buf++ = res;
315 len++;
316 #endif // WC_UTF16/!WC_UTF16
317 }
318 }
319 }
320 if (buf && (len < n))
321 *buf = 0;
322 return len;
323 }
324
325 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
326 {
327 size_t len = 0;
328
329 while (*psz && ((!buf) || (len < n)))
330 {
331 wxUint32 cc;
332 #ifdef WC_UTF16
333 // cast is ok for WC_UTF16
334 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
335 psz += (pa == (size_t)-1) ? 1 : pa;
336 #else
337 cc=(*psz++) & 0x7fffffff;
338 #endif
339 unsigned cnt;
340 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
341 if (!cnt)
342 {
343 // plain ASCII char
344 if (buf)
345 *buf++ = (char) cc;
346 len++;
347 }
348
349 else
350 {
351 len += cnt + 1;
352 if (buf)
353 {
354 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
355 while (cnt--)
356 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
357 }
358 }
359 }
360
361 if (buf && (len<n)) *buf = 0;
362
363 return len;
364 }
365
366
367
368
369 // ----------------------------------------------------------------------------
370 // UTF-16
371 // ----------------------------------------------------------------------------
372
373 #ifdef WORDS_BIGENDIAN
374 #define wxMBConvUTF16straight wxMBConvUTF16BE
375 #define wxMBConvUTF16swap wxMBConvUTF16LE
376 #else
377 #define wxMBConvUTF16swap wxMBConvUTF16BE
378 #define wxMBConvUTF16straight wxMBConvUTF16LE
379 #endif
380
381
382 #ifdef WC_UTF16
383
384 // copy 16bit MB to 16bit String
385 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
386 {
387 size_t len=0;
388
389 while (*(wxUint16*)psz && (!buf || len < n))
390 {
391 if (buf)
392 *buf++ = *(wxUint16*)psz;
393 len++;
394
395 psz += sizeof(wxUint16);
396 }
397 if (buf && len<n) *buf=0;
398
399 return len;
400 }
401
402
403 // copy 16bit String to 16bit MB
404 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
405 {
406 size_t len=0;
407
408 while (*psz && (!buf || len < n))
409 {
410 if (buf)
411 {
412 *(wxUint16*)buf = *psz;
413 buf += sizeof(wxUint16);
414 }
415 len += sizeof(wxUint16);
416 psz++;
417 }
418 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
419
420 return len;
421 }
422
423
424 // swap 16bit MB to 16bit String
425 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
426 {
427 size_t len=0;
428
429 while (*(wxUint16*)psz && (!buf || len < n))
430 {
431 if (buf)
432 {
433 ((char *)buf)[0] = psz[1];
434 ((char *)buf)[1] = psz[0];
435 buf++;
436 }
437 len++;
438 psz += sizeof(wxUint16);
439 }
440 if (buf && len<n) *buf=0;
441
442 return len;
443 }
444
445
446 // swap 16bit MB to 16bit String
447 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
448 {
449 size_t len=0;
450
451 while (*psz && (!buf || len < n))
452 {
453 if (buf)
454 {
455 *buf++ = ((char*)psz)[1];
456 *buf++ = ((char*)psz)[0];
457 }
458 len += sizeof(wxUint16);
459 psz++;
460 }
461 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
462
463 return len;
464 }
465
466
467 #else // WC_UTF16
468
469
470 // copy 16bit MB to 32bit String
471 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
472 {
473 size_t len=0;
474
475 while (*(wxUint16*)psz && (!buf || len < n))
476 {
477 wxUint32 cc;
478 size_t pa=decode_utf16((wxUint16*)psz, cc);
479 if (pa == (size_t)-1)
480 return pa;
481
482 if (buf)
483 *buf++ = cc;
484 len++;
485 psz += pa * sizeof(wxUint16);
486 }
487 if (buf && len<n) *buf=0;
488
489 return len;
490 }
491
492
493 // copy 32bit String to 16bit MB
494 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
495 {
496 size_t len=0;
497
498 while (*psz && (!buf || len < n))
499 {
500 wxUint16 cc[2];
501 size_t pa=encode_utf16(*psz, cc);
502
503 if (pa == (size_t)-1)
504 return pa;
505
506 if (buf)
507 {
508 *(wxUint16*)buf = cc[0];
509 buf += sizeof(wxUint16);
510 if (pa > 1)
511 {
512 *(wxUint16*)buf = cc[1];
513 buf += sizeof(wxUint16);
514 }
515 }
516
517 len += pa*sizeof(wxUint16);
518 psz++;
519 }
520 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
521
522 return len;
523 }
524
525
526 // swap 16bit MB to 32bit String
527 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
528 {
529 size_t len=0;
530
531 while (*(wxUint16*)psz && (!buf || len < n))
532 {
533 wxUint32 cc;
534 char tmp[4];
535 tmp[0]=psz[1]; tmp[1]=psz[0];
536 tmp[2]=psz[3]; tmp[3]=psz[2];
537
538 size_t pa=decode_utf16((wxUint16*)tmp, cc);
539 if (pa == (size_t)-1)
540 return pa;
541
542 if (buf)
543 *buf++ = cc;
544
545 len++;
546 psz += pa * sizeof(wxUint16);
547 }
548 if (buf && len<n) *buf=0;
549
550 return len;
551 }
552
553
554 // swap 32bit String to 16bit MB
555 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
556 {
557 size_t len=0;
558
559 while (*psz && (!buf || len < n))
560 {
561 wxUint16 cc[2];
562 size_t pa=encode_utf16(*psz, cc);
563
564 if (pa == (size_t)-1)
565 return pa;
566
567 if (buf)
568 {
569 *buf++ = ((char*)cc)[1];
570 *buf++ = ((char*)cc)[0];
571 if (pa > 1)
572 {
573 *buf++ = ((char*)cc)[3];
574 *buf++ = ((char*)cc)[2];
575 }
576 }
577
578 len += pa*sizeof(wxUint16);
579 psz++;
580 }
581 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
582
583 return len;
584 }
585
586 #endif // WC_UTF16
587
588
589 // ----------------------------------------------------------------------------
590 // UTF-32
591 // ----------------------------------------------------------------------------
592
593 #ifdef WORDS_BIGENDIAN
594 #define wxMBConvUTF32straight wxMBConvUTF32BE
595 #define wxMBConvUTF32swap wxMBConvUTF32LE
596 #else
597 #define wxMBConvUTF32swap wxMBConvUTF32BE
598 #define wxMBConvUTF32straight wxMBConvUTF32LE
599 #endif
600
601
602 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
603 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
604
605
606 #ifdef WC_UTF16
607
608 // copy 32bit MB to 16bit String
609 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
610 {
611 size_t len=0;
612
613 while (*(wxUint32*)psz && (!buf || len < n))
614 {
615 wxUint16 cc[2];
616
617 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
618 if (pa == (size_t)-1)
619 return pa;
620
621 if (buf)
622 {
623 *buf++ = cc[0];
624 if (pa > 1)
625 *buf++ = cc[1];
626 }
627 len += pa;
628 psz += sizeof(wxUint32);
629 }
630 if (buf && len<n) *buf=0;
631
632 return len;
633 }
634
635
636 // copy 16bit String to 32bit MB
637 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
638 {
639 size_t len=0;
640
641 while (*psz && (!buf || len < n))
642 {
643 wxUint32 cc;
644
645 // cast is ok for WC_UTF16
646 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
647 if (pa == (size_t)-1)
648 return pa;
649
650 if (buf)
651 {
652 *(wxUint32*)buf = cc;
653 buf += sizeof(wxUint32);
654 }
655 len += sizeof(wxUint32);
656 psz += pa;
657 }
658
659 if (buf && len<=n-sizeof(wxUint32))
660 *(wxUint32*)buf=0;
661
662 return len;
663 }
664
665
666
667 // swap 32bit MB to 16bit String
668 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
669 {
670 size_t len=0;
671
672 while (*(wxUint32*)psz && (!buf || len < n))
673 {
674 char tmp[4];
675 tmp[0] = psz[3]; tmp[1] = psz[2];
676 tmp[2] = psz[1]; tmp[3] = psz[0];
677
678
679 wxUint16 cc[2];
680
681 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
682 if (pa == (size_t)-1)
683 return pa;
684
685 if (buf)
686 {
687 *buf++ = cc[0];
688 if (pa > 1)
689 *buf++ = cc[1];
690 }
691 len += pa;
692 psz += sizeof(wxUint32);
693 }
694
695 if (buf && len<n)
696 *buf=0;
697
698 return len;
699 }
700
701
702 // swap 16bit String to 32bit MB
703 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
704 {
705 size_t len=0;
706
707 while (*psz && (!buf || len < n))
708 {
709 char cc[4];
710
711 // cast is ok for WC_UTF16
712 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
713 if (pa == (size_t)-1)
714 return pa;
715
716 if (buf)
717 {
718 *buf++ = cc[3];
719 *buf++ = cc[2];
720 *buf++ = cc[1];
721 *buf++ = cc[0];
722 }
723 len += sizeof(wxUint32);
724 psz += pa;
725 }
726
727 if (buf && len<=n-sizeof(wxUint32))
728 *(wxUint32*)buf=0;
729
730 return len;
731 }
732
733 #else // WC_UTF16
734
735
736 // copy 32bit MB to 32bit String
737 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
738 {
739 size_t len=0;
740
741 while (*(wxUint32*)psz && (!buf || len < n))
742 {
743 if (buf)
744 *buf++ = *(wxUint32*)psz;
745 len++;
746 psz += sizeof(wxUint32);
747 }
748
749 if (buf && len<n)
750 *buf=0;
751
752 return len;
753 }
754
755
756 // copy 32bit String to 32bit MB
757 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
758 {
759 size_t len=0;
760
761 while (*psz && (!buf || len < n))
762 {
763 if (buf)
764 {
765 *(wxUint32*)buf = *psz;
766 buf += sizeof(wxUint32);
767 }
768
769 len += sizeof(wxUint32);
770 psz++;
771 }
772
773 if (buf && len<=n-sizeof(wxUint32))
774 *(wxUint32*)buf=0;
775
776 return len;
777 }
778
779
780 // swap 32bit MB to 32bit String
781 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
782 {
783 size_t len=0;
784
785 while (*(wxUint32*)psz && (!buf || len < n))
786 {
787 if (buf)
788 {
789 ((char *)buf)[0] = psz[3];
790 ((char *)buf)[1] = psz[2];
791 ((char *)buf)[2] = psz[1];
792 ((char *)buf)[3] = psz[0];
793 buf++;
794 }
795 len++;
796 psz += sizeof(wxUint32);
797 }
798
799 if (buf && len<n)
800 *buf=0;
801
802 return len;
803 }
804
805
806 // swap 32bit String to 32bit MB
807 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
808 {
809 size_t len=0;
810
811 while (*psz && (!buf || len < n))
812 {
813 if (buf)
814 {
815 *buf++ = ((char *)psz)[3];
816 *buf++ = ((char *)psz)[2];
817 *buf++ = ((char *)psz)[1];
818 *buf++ = ((char *)psz)[0];
819 }
820 len += sizeof(wxUint32);
821 psz++;
822 }
823
824 if (buf && len<=n-sizeof(wxUint32))
825 *(wxUint32*)buf=0;
826
827 return len;
828 }
829
830
831 #endif // WC_UTF16
832
833
834 // ============================================================================
835 // The classes doing conversion using the iconv_xxx() functions
836 // ============================================================================
837
838 #ifdef HAVE_ICONV
839
840 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
841 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
842 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
843 // (which means error) and says there are 0 bytes left in the input buffer --
844 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
845 // this alternative test for iconv() failure.
846 // [This bug does not appear in glibc 2.2.]
847 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
848 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
849 (errno != E2BIG || bufLeft != 0))
850 #else
851 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
852 #endif
853
854 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
855
856 // ----------------------------------------------------------------------------
857 // wxMBConv_iconv: encapsulates an iconv character set
858 // ----------------------------------------------------------------------------
859
860 class wxMBConv_iconv : public wxMBConv
861 {
862 public:
863 wxMBConv_iconv(const wxChar *name);
864 virtual ~wxMBConv_iconv();
865
866 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
867 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
868
869 bool IsOk() const
870 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
871
872 protected:
873 // the iconv handlers used to translate from multibyte to wide char and in
874 // the other direction
875 iconv_t m2w,
876 w2m;
877
878 private:
879 // the name (for iconv_open()) of a wide char charset -- if none is
880 // available on this machine, it will remain NULL
881 static const char *ms_wcCharsetName;
882
883 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
884 // different endian-ness than the native one
885 static bool ms_wcNeedsSwap;
886 };
887
888 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
889 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
890
891 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
892 {
893 // Do it the hard way
894 char cname[100];
895 for (size_t i = 0; i < wxStrlen(name)+1; i++)
896 cname[i] = (char) name[i];
897
898 // check for charset that represents wchar_t:
899 if (ms_wcCharsetName == NULL)
900 {
901 ms_wcNeedsSwap = false;
902
903 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
904 ms_wcCharsetName = WC_NAME_BEST;
905 m2w = iconv_open(ms_wcCharsetName, cname);
906
907 if (m2w == (iconv_t)-1)
908 {
909 // try charset w/o bytesex info (e.g. "UCS4")
910 // and check for bytesex ourselves:
911 ms_wcCharsetName = WC_NAME;
912 m2w = iconv_open(ms_wcCharsetName, cname);
913
914 // last bet, try if it knows WCHAR_T pseudo-charset
915 if (m2w == (iconv_t)-1)
916 {
917 ms_wcCharsetName = "WCHAR_T";
918 m2w = iconv_open(ms_wcCharsetName, cname);
919 }
920
921 if (m2w != (iconv_t)-1)
922 {
923 char buf[2], *bufPtr;
924 wchar_t wbuf[2], *wbufPtr;
925 size_t insz, outsz;
926 size_t res;
927
928 buf[0] = 'A';
929 buf[1] = 0;
930 wbuf[0] = 0;
931 insz = 2;
932 outsz = SIZEOF_WCHAR_T * 2;
933 wbufPtr = wbuf;
934 bufPtr = buf;
935
936 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
937 (char**)&wbufPtr, &outsz);
938
939 if (ICONV_FAILED(res, insz))
940 {
941 ms_wcCharsetName = NULL;
942 wxLogLastError(wxT("iconv"));
943 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
944 }
945 else
946 {
947 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
948 }
949 }
950 else
951 {
952 ms_wcCharsetName = NULL;
953
954 // VS: we must not output an error here, since wxWindows will safely
955 // fall back to using wxEncodingConverter.
956 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
957 //wxLogError(
958 }
959 }
960 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
961 }
962 else // we already have ms_wcCharsetName
963 {
964 m2w = iconv_open(ms_wcCharsetName, cname);
965 }
966
967 // NB: don't ever pass NULL to iconv_open(), it may crash!
968 if ( ms_wcCharsetName )
969 {
970 w2m = iconv_open( cname, ms_wcCharsetName);
971 }
972 else
973 {
974 w2m = (iconv_t)-1;
975 }
976 }
977
978 wxMBConv_iconv::~wxMBConv_iconv()
979 {
980 if ( m2w != (iconv_t)-1 )
981 iconv_close(m2w);
982 if ( w2m != (iconv_t)-1 )
983 iconv_close(w2m);
984 }
985
986 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
987 {
988 size_t inbuf = strlen(psz);
989 size_t outbuf = n * SIZEOF_WCHAR_T;
990 size_t res, cres;
991 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
992 wchar_t *bufPtr = buf;
993 const char *pszPtr = psz;
994
995 if (buf)
996 {
997 // have destination buffer, convert there
998 cres = iconv(m2w,
999 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1000 (char**)&bufPtr, &outbuf);
1001 res = n - (outbuf / SIZEOF_WCHAR_T);
1002
1003 if (ms_wcNeedsSwap)
1004 {
1005 // convert to native endianness
1006 WC_BSWAP(buf /* _not_ bufPtr */, res)
1007 }
1008
1009 // NB: iconv was given only strlen(psz) characters on input, and so
1010 // it couldn't convert the trailing zero. Let's do it ourselves
1011 // if there's some room left for it in the output buffer.
1012 if (res < n)
1013 buf[res] = 0;
1014 }
1015 else
1016 {
1017 // no destination buffer... convert using temp buffer
1018 // to calculate destination buffer requirement
1019 wchar_t tbuf[8];
1020 res = 0;
1021 do {
1022 bufPtr = tbuf;
1023 outbuf = 8*SIZEOF_WCHAR_T;
1024
1025 cres = iconv(m2w,
1026 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1027 (char**)&bufPtr, &outbuf );
1028
1029 res += 8-(outbuf/SIZEOF_WCHAR_T);
1030 } while ((cres==(size_t)-1) && (errno==E2BIG));
1031 }
1032
1033 if (ICONV_FAILED(cres, inbuf))
1034 {
1035 //VS: it is ok if iconv fails, hence trace only
1036 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1037 return (size_t)-1;
1038 }
1039
1040 return res;
1041 }
1042
1043 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1046 size_t outbuf = n;
1047 size_t res, cres;
1048
1049 wchar_t *tmpbuf = 0;
1050
1051 if (ms_wcNeedsSwap)
1052 {
1053 // need to copy to temp buffer to switch endianness
1054 // this absolutely doesn't rock!
1055 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1056 // could be in read-only memory, or be accessed in some other thread)
1057 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1058 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1059 WC_BSWAP(tmpbuf, inbuf)
1060 psz=tmpbuf;
1061 }
1062
1063 if (buf)
1064 {
1065 // have destination buffer, convert there
1066 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1067
1068 res = n-outbuf;
1069
1070 // NB: iconv was given only wcslen(psz) characters on input, and so
1071 // it couldn't convert the trailing zero. Let's do it ourselves
1072 // if there's some room left for it in the output buffer.
1073 if (res < n)
1074 buf[0] = 0;
1075 }
1076 else
1077 {
1078 // no destination buffer... convert using temp buffer
1079 // to calculate destination buffer requirement
1080 char tbuf[16];
1081 res = 0;
1082 do {
1083 buf = tbuf; outbuf = 16;
1084
1085 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1086
1087 res += 16 - outbuf;
1088 } while ((cres==(size_t)-1) && (errno==E2BIG));
1089 }
1090
1091 if (ms_wcNeedsSwap)
1092 {
1093 free(tmpbuf);
1094 }
1095
1096 if (ICONV_FAILED(cres, inbuf))
1097 {
1098 //VS: it is ok if iconv fails, hence trace only
1099 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1100 return (size_t)-1;
1101 }
1102
1103 return res;
1104 }
1105
1106 #endif // HAVE_ICONV
1107
1108
1109 // ============================================================================
1110 // Win32 conversion classes
1111 // ============================================================================
1112
1113 #ifdef wxHAVE_WIN32_MB2WC
1114
1115 // from utils.cpp
1116 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1117 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1118
1119 class wxMBConv_win32 : public wxMBConv
1120 {
1121 public:
1122 wxMBConv_win32()
1123 {
1124 m_CodePage = CP_ACP;
1125 }
1126
1127 wxMBConv_win32(const wxChar* name)
1128 {
1129 m_CodePage = wxCharsetToCodepage(name);
1130 }
1131
1132 wxMBConv_win32(wxFontEncoding encoding)
1133 {
1134 m_CodePage = wxEncodingToCodepage(encoding);
1135 }
1136
1137 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1138 {
1139 const size_t len = ::MultiByteToWideChar
1140 (
1141 m_CodePage, // code page
1142 0, // flags (none)
1143 psz, // input string
1144 -1, // its length (NUL-terminated)
1145 buf, // output string
1146 buf ? n : 0 // size of output buffer
1147 );
1148
1149 // note that it returns count of written chars for buf != NULL and size
1150 // of the needed buffer for buf == NULL so in either case the length of
1151 // the string (which never includes the terminating NUL) is one less
1152 return len ? len - 1 : (size_t)-1;
1153 }
1154
1155 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1156 {
1157 /*
1158 we have a problem here: by default, WideCharToMultiByte() may
1159 replace characters unrepresentable in the target code page with bad
1160 quality approximations such as turning "1/2" symbol (U+00BD) into
1161 "1" for the code pages which don't have it and we, obviously, want
1162 to avoid this at any price
1163
1164 the trouble is that this function does it _silently_, i.e. it won't
1165 even tell us whether it did or not... Win98/2000 and higher provide
1166 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1167 we have to resort to a round trip, i.e. check that converting back
1168 results in the same string -- this is, of course, expensive but
1169 otherwise we simply can't be sure to not garble the data.
1170 */
1171
1172 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1173 // it doesn't work with CJK encodings (which we test for rather roughly
1174 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1175 // supporting it
1176 BOOL usedDef wxDUMMY_INITIALIZE(false),
1177 *pUsedDef;
1178 int flags;
1179 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1180 {
1181 // it's our lucky day
1182 flags = WC_NO_BEST_FIT_CHARS;
1183 pUsedDef = &usedDef;
1184 }
1185 else // old system or unsupported encoding
1186 {
1187 flags = 0;
1188 pUsedDef = NULL;
1189 }
1190
1191 const size_t len = ::WideCharToMultiByte
1192 (
1193 m_CodePage, // code page
1194 flags, // either none or no best fit
1195 pwz, // input string
1196 -1, // it is (wide) NUL-terminated
1197 buf, // output buffer
1198 buf ? n : 0, // and its size
1199 NULL, // default "replacement" char
1200 pUsedDef // [out] was it used?
1201 );
1202
1203 if ( !len )
1204 {
1205 // function totally failed
1206 return (size_t)-1;
1207 }
1208
1209 // if we were really converting, check if we succeeded
1210 if ( buf )
1211 {
1212 if ( flags )
1213 {
1214 // check if the conversion failed, i.e. if any replacements
1215 // were done
1216 if ( usedDef )
1217 return (size_t)-1;
1218 }
1219 else // we must resort to double tripping...
1220 {
1221 wxWCharBuffer wcBuf(n);
1222 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1223 wcscmp(wcBuf, pwz) != 0 )
1224 {
1225 // we didn't obtain the same thing we started from, hence
1226 // the conversion was lossy and we consider that it failed
1227 return (size_t)-1;
1228 }
1229 }
1230 }
1231
1232 // see the comment above for the reason of "len - 1"
1233 return len - 1;
1234 }
1235
1236 bool IsOk() const { return m_CodePage != -1; }
1237
1238 private:
1239 static bool CanUseNoBestFit()
1240 {
1241 static int s_isWin98Or2k = -1;
1242
1243 if ( s_isWin98Or2k == -1 )
1244 {
1245 int verMaj, verMin;
1246 switch ( wxGetOsVersion(&verMaj, &verMin) )
1247 {
1248 case wxWIN95:
1249 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1250 break;
1251
1252 case wxWINDOWS_NT:
1253 s_isWin98Or2k = verMaj >= 5;
1254 break;
1255
1256 default:
1257 // unknown, be conseravtive by default
1258 s_isWin98Or2k = 0;
1259 }
1260
1261 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1262 }
1263
1264 return s_isWin98Or2k == 1;
1265 }
1266
1267 long m_CodePage;
1268 };
1269
1270 #endif // wxHAVE_WIN32_MB2WC
1271
1272 // ============================================================================
1273 // Mac conversion classes
1274 // ============================================================================
1275
1276 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1277
1278 class wxMBConv_mac : public wxMBConv
1279 {
1280 public:
1281 wxMBConv_mac()
1282 {
1283 Init(CFStringGetSystemEncoding()) ;
1284 }
1285
1286 wxMBConv_mac(const wxChar* name)
1287 {
1288 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1289 }
1290
1291 wxMBConv_mac(wxFontEncoding encoding)
1292 {
1293 Init( wxMacGetSystemEncFromFontEnc(encoding) );
1294 }
1295
1296 ~wxMBConv_mac()
1297 {
1298 OSStatus status = noErr ;
1299 status = TECDisposeConverter(m_MB2WC_converter);
1300 status = TECDisposeConverter(m_WC2MB_converter);
1301 }
1302
1303
1304 void Init( TextEncodingBase encoding)
1305 {
1306 OSStatus status = noErr ;
1307 m_char_encoding = encoding ;
1308 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1309
1310 status = TECCreateConverter(&m_MB2WC_converter,
1311 m_char_encoding,
1312 m_unicode_encoding);
1313 status = TECCreateConverter(&m_WC2MB_converter,
1314 m_unicode_encoding,
1315 m_char_encoding);
1316 }
1317
1318 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1319 {
1320 OSStatus status = noErr ;
1321 ByteCount byteOutLen ;
1322 ByteCount byteInLen = strlen(psz) ;
1323 wchar_t *tbuf = NULL ;
1324 UniChar* ubuf = NULL ;
1325 size_t res = 0 ;
1326
1327 if (buf == NULL)
1328 {
1329 n = byteInLen ;
1330 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1331 }
1332 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1333 #if SIZEOF_WCHAR_T == 4
1334 ubuf = (UniChar*) malloc( byteBufferLen ) ;
1335 #else
1336 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1337 #endif
1338 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1339 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1340 #if SIZEOF_WCHAR_T == 4
1341 wxMBConvUTF16BE converter ;
1342 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1343 free( ubuf ) ;
1344 #else
1345 res = byteOutLen / sizeof( UniChar ) ;
1346 #endif
1347 if ( buf == NULL )
1348 free(tbuf) ;
1349
1350 if ( buf && res < n)
1351 buf[res] = 0;
1352
1353 return res ;
1354 }
1355
1356 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1357 {
1358 OSStatus status = noErr ;
1359 ByteCount byteOutLen ;
1360 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1361
1362 char *tbuf = NULL ;
1363
1364 if (buf == NULL)
1365 {
1366 // worst case
1367 n = byteInLen * 2 ;
1368 tbuf = (char*) malloc( n ) ;
1369 }
1370
1371 ByteCount byteBufferLen = n ;
1372 UniChar* ubuf = NULL ;
1373 #if SIZEOF_WCHAR_T == 4
1374 wxMBConvUTF16BE converter ;
1375 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1376 byteBufferLen = unicharlen ;
1377 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1378 converter.WC2MB( (char*) ubuf , psz, unicharlen ) ;
1379 #else
1380 ubuf = (UniChar*) psz ;
1381 #endif
1382 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1383 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1384 #if SIZEOF_WCHAR_T == 4
1385 free( ubuf ) ;
1386 #endif
1387 if ( buf == NULL )
1388 free(tbuf) ;
1389
1390 size_t res = byteOutLen ;
1391 if ( buf && res < n)
1392 buf[res] = 0;
1393
1394 return res ;
1395 }
1396
1397 bool IsOk() const
1398 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1399
1400 private:
1401 TECObjectRef m_MB2WC_converter ;
1402 TECObjectRef m_WC2MB_converter ;
1403
1404 TextEncodingBase m_char_encoding ;
1405 TextEncodingBase m_unicode_encoding ;
1406 };
1407
1408 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1409
1410 // ============================================================================
1411 // wxEncodingConverter based conversion classes
1412 // ============================================================================
1413
1414 #if wxUSE_FONTMAP
1415
1416 class wxMBConv_wxwin : public wxMBConv
1417 {
1418 private:
1419 void Init()
1420 {
1421 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1422 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1423 }
1424
1425 public:
1426 // temporarily just use wxEncodingConverter stuff,
1427 // so that it works while a better implementation is built
1428 wxMBConv_wxwin(const wxChar* name)
1429 {
1430 if (name)
1431 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1432 else
1433 m_enc = wxFONTENCODING_SYSTEM;
1434
1435 Init();
1436 }
1437
1438 wxMBConv_wxwin(wxFontEncoding enc)
1439 {
1440 m_enc = enc;
1441
1442 Init();
1443 }
1444
1445 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1446 {
1447 size_t inbuf = strlen(psz);
1448 if (buf)
1449 m2w.Convert(psz,buf);
1450 return inbuf;
1451 }
1452
1453 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1454 {
1455 const size_t inbuf = wxWcslen(psz);
1456 if (buf)
1457 w2m.Convert(psz,buf);
1458
1459 return inbuf;
1460 }
1461
1462 bool IsOk() const { return m_ok; }
1463
1464 public:
1465 wxFontEncoding m_enc;
1466 wxEncodingConverter m2w, w2m;
1467
1468 // were we initialized successfully?
1469 bool m_ok;
1470
1471 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1472 };
1473
1474 #endif // wxUSE_FONTMAP
1475
1476 // ============================================================================
1477 // wxCSConv implementation
1478 // ============================================================================
1479
1480 void wxCSConv::Init()
1481 {
1482 m_name = NULL;
1483 m_convReal = NULL;
1484 m_deferred = true;
1485 }
1486
1487 wxCSConv::wxCSConv(const wxChar *charset)
1488 {
1489 Init();
1490
1491 if ( charset )
1492 {
1493 SetName(charset);
1494 }
1495
1496 m_encoding = wxFONTENCODING_SYSTEM;
1497 }
1498
1499 wxCSConv::wxCSConv(wxFontEncoding encoding)
1500 {
1501 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1502 {
1503 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1504
1505 encoding = wxFONTENCODING_SYSTEM;
1506 }
1507
1508 Init();
1509
1510 m_encoding = encoding;
1511 }
1512
1513 wxCSConv::~wxCSConv()
1514 {
1515 Clear();
1516 }
1517
1518 wxCSConv::wxCSConv(const wxCSConv& conv)
1519 : wxMBConv()
1520 {
1521 Init();
1522
1523 SetName(conv.m_name);
1524 m_encoding = conv.m_encoding;
1525 }
1526
1527 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1528 {
1529 Clear();
1530
1531 SetName(conv.m_name);
1532 m_encoding = conv.m_encoding;
1533
1534 return *this;
1535 }
1536
1537 void wxCSConv::Clear()
1538 {
1539 free(m_name);
1540 delete m_convReal;
1541
1542 m_name = NULL;
1543 m_convReal = NULL;
1544 }
1545
1546 void wxCSConv::SetName(const wxChar *charset)
1547 {
1548 if (charset)
1549 {
1550 m_name = wxStrdup(charset);
1551 m_deferred = true;
1552 }
1553 }
1554
1555 wxMBConv *wxCSConv::DoCreate() const
1556 {
1557 // check for the special case of ASCII or ISO8859-1 charset: as we have
1558 // special knowledge of it anyhow, we don't need to create a special
1559 // conversion object
1560 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1561 {
1562 // don't convert at all
1563 return NULL;
1564 }
1565
1566 // we trust OS to do conversion better than we can so try external
1567 // conversion methods first
1568 //
1569 // the full order is:
1570 // 1. OS conversion (iconv() under Unix or Win32 API)
1571 // 2. hard coded conversions for UTF
1572 // 3. wxEncodingConverter as fall back
1573
1574 // step (1)
1575 #ifdef HAVE_ICONV
1576 #if !wxUSE_FONTMAP
1577 if ( m_name )
1578 #endif // !wxUSE_FONTMAP
1579 {
1580 wxString name(m_name);
1581
1582 #if wxUSE_FONTMAP
1583 if ( name.empty() )
1584 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1585 #endif // wxUSE_FONTMAP
1586
1587 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1588 if ( conv->IsOk() )
1589 return conv;
1590
1591 delete conv;
1592 }
1593 #endif // HAVE_ICONV
1594
1595 #ifdef wxHAVE_WIN32_MB2WC
1596 {
1597 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1598 : new wxMBConv_win32(m_encoding);
1599 if ( conv->IsOk() )
1600 return conv;
1601
1602 delete conv;
1603 }
1604 #endif // wxHAVE_WIN32_MB2WC
1605 #if defined(__WXMAC__)
1606 {
1607 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1608 {
1609
1610 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1611 : new wxMBConv_mac(m_encoding);
1612 if ( conv->IsOk() )
1613 return conv;
1614
1615 delete conv;
1616 }
1617 }
1618 #endif
1619 // step (2)
1620 wxFontEncoding enc = m_encoding;
1621 #if wxUSE_FONTMAP
1622 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1623 {
1624 // use "false" to suppress interactive dialogs -- we can be called from
1625 // anywhere and popping up a dialog from here is the last thing we want to
1626 // do
1627 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1628 }
1629 #endif // wxUSE_FONTMAP
1630
1631 switch ( enc )
1632 {
1633 case wxFONTENCODING_UTF7:
1634 return new wxMBConvUTF7;
1635
1636 case wxFONTENCODING_UTF8:
1637 return new wxMBConvUTF8;
1638
1639 case wxFONTENCODING_UTF16BE:
1640 return new wxMBConvUTF16BE;
1641
1642 case wxFONTENCODING_UTF16LE:
1643 return new wxMBConvUTF16LE;
1644
1645 case wxFONTENCODING_UTF32BE:
1646 return new wxMBConvUTF32BE;
1647
1648 case wxFONTENCODING_UTF32LE:
1649 return new wxMBConvUTF32LE;
1650
1651 default:
1652 // nothing to do but put here to suppress gcc warnings
1653 ;
1654 }
1655
1656 // step (3)
1657 #if wxUSE_FONTMAP
1658 {
1659 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1660 : new wxMBConv_wxwin(m_encoding);
1661 if ( conv->IsOk() )
1662 return conv;
1663
1664 delete conv;
1665 }
1666 #endif // wxUSE_FONTMAP
1667
1668 // NB: This is a hack to prevent deadlock. What could otherwise happen
1669 // in Unicode build: wxConvLocal creation ends up being here
1670 // because of some failure and logs the error. But wxLog will try to
1671 // attach timestamp, for which it will need wxConvLocal (to convert
1672 // time to char* and then wchar_t*), but that fails, tries to log
1673 // error, but wxLog has a (already locked) critical section that
1674 // guards static buffer.
1675 static bool alreadyLoggingError = false;
1676 if (!alreadyLoggingError)
1677 {
1678 alreadyLoggingError = true;
1679 wxLogError(_("Cannot convert from the charset '%s'!"),
1680 m_name ? m_name
1681 :
1682 #if wxUSE_FONTMAP
1683 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1684 #else // !wxUSE_FONTMAP
1685 wxString::Format(_("encoding %s"), m_encoding).c_str()
1686 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1687 );
1688 alreadyLoggingError = false;
1689 }
1690
1691 return NULL;
1692 }
1693
1694 void wxCSConv::CreateConvIfNeeded() const
1695 {
1696 if ( m_deferred )
1697 {
1698 wxCSConv *self = (wxCSConv *)this; // const_cast
1699
1700 #if wxUSE_INTL
1701 // if we don't have neither the name nor the encoding, use the default
1702 // encoding for this system
1703 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1704 {
1705 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1706 }
1707 #endif // wxUSE_INTL
1708
1709 self->m_convReal = DoCreate();
1710 self->m_deferred = false;
1711 }
1712 }
1713
1714 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1715 {
1716 CreateConvIfNeeded();
1717
1718 if (m_convReal)
1719 return m_convReal->MB2WC(buf, psz, n);
1720
1721 // latin-1 (direct)
1722 size_t len = strlen(psz);
1723
1724 if (buf)
1725 {
1726 for (size_t c = 0; c <= len; c++)
1727 buf[c] = (unsigned char)(psz[c]);
1728 }
1729
1730 return len;
1731 }
1732
1733 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1734 {
1735 CreateConvIfNeeded();
1736
1737 if (m_convReal)
1738 return m_convReal->WC2MB(buf, psz, n);
1739
1740 // latin-1 (direct)
1741 const size_t len = wxWcslen(psz);
1742 if (buf)
1743 {
1744 for (size_t c = 0; c <= len; c++)
1745 {
1746 if (psz[c] > 0xFF)
1747 return (size_t)-1;
1748 buf[c] = psz[c];
1749 }
1750 }
1751 else
1752 {
1753 for (size_t c = 0; c <= len; c++)
1754 {
1755 if (psz[c] > 0xFF)
1756 return (size_t)-1;
1757 }
1758 }
1759
1760 return len;
1761 }
1762
1763 // ----------------------------------------------------------------------------
1764 // globals
1765 // ----------------------------------------------------------------------------
1766
1767 #ifdef __WINDOWS__
1768 static wxMBConv_win32 wxConvLibcObj;
1769 #else
1770 static wxMBConvLibc wxConvLibcObj;
1771 #endif
1772
1773 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1774 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1775 static wxMBConvUTF7 wxConvUTF7Obj;
1776 static wxMBConvUTF8 wxConvUTF8Obj;
1777
1778
1779 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1780 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1781 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1782 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1783 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1784 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1785
1786 #else // !wxUSE_WCHAR_T
1787
1788 // stand-ins in absence of wchar_t
1789 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1790 wxConvISO8859_1,
1791 wxConvLocal,
1792 wxConvUTF8;
1793
1794 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1795
1796