]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
added wxRegEx::GetMatchCount (patch 938995)
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9 // (c) 2000-2003 Vadim Zeitlin
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ============================================================================
14 // declarations
15 // ============================================================================
16
17 // ----------------------------------------------------------------------------
18 // headers
19 // ----------------------------------------------------------------------------
20
21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
22 #pragma implementation "strconv.h"
23 #endif
24
25 // For compilers that support precompilation, includes "wx.h".
26 #include "wx/wxprec.h"
27
28 #ifdef __BORLANDC__
29 #pragma hdrstop
30 #endif
31
32 #ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35 #endif // WX_PRECOMP
36
37 #include "wx/strconv.h"
38
39 #if wxUSE_WCHAR_T
40
41 #ifdef __WXMSW__
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
44 #endif
45
46 #ifndef __WXWINCE__
47 #include <errno.h>
48 #endif
49
50 #include <ctype.h>
51 #include <string.h>
52 #include <stdlib.h>
53
54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
55 #define wxHAVE_WIN32_MB2WC
56 #endif // __WIN32__ but !__WXMICROWIN__
57
58 // ----------------------------------------------------------------------------
59 // headers
60 // ----------------------------------------------------------------------------
61
62 #ifdef __SALFORDC__
63 #include <clib.h>
64 #endif
65
66 #ifdef HAVE_ICONV
67 #include <iconv.h>
68 #endif
69
70 #include "wx/encconv.h"
71 #include "wx/fontmap.h"
72
73 #ifdef __WXMAC__
74 #include <ATSUnicode.h>
75 #include <TextCommon.h>
76 #include <TextEncodingConverter.h>
77
78 #include "wx/mac/private.h" // includes mac headers
79 #endif
80 // ----------------------------------------------------------------------------
81 // macros
82 // ----------------------------------------------------------------------------
83
84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
86
87 #if SIZEOF_WCHAR_T == 4
88 #define WC_NAME "UCS4"
89 #define WC_BSWAP BSWAP_UCS4
90 #ifdef WORDS_BIGENDIAN
91 #define WC_NAME_BEST "UCS-4BE"
92 #else
93 #define WC_NAME_BEST "UCS-4LE"
94 #endif
95 #elif SIZEOF_WCHAR_T == 2
96 #define WC_NAME "UTF16"
97 #define WC_BSWAP BSWAP_UTF16
98 #define WC_UTF16
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UTF-16BE"
101 #else
102 #define WC_NAME_BEST "UTF-16LE"
103 #endif
104 #else // sizeof(wchar_t) != 2 nor 4
105 // does this ever happen?
106 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
107 #endif
108
109 // ============================================================================
110 // implementation
111 // ============================================================================
112
113 // ----------------------------------------------------------------------------
114 // UTF-16 en/decoding to/from UCS-4
115 // ----------------------------------------------------------------------------
116
117
118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
119 {
120 if (input<=0xffff)
121 {
122 if (output)
123 *output = (wxUint16) input;
124 return 1;
125 }
126 else if (input>=0x110000)
127 {
128 return (size_t)-1;
129 }
130 else
131 {
132 if (output)
133 {
134 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
135 *output = (wxUint16) ((input&0x3ff)+0xdc00);
136 }
137 return 2;
138 }
139 }
140
141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
142 {
143 if ((*input<0xd800) || (*input>0xdfff))
144 {
145 output = *input;
146 return 1;
147 }
148 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
149 {
150 output = *input;
151 return (size_t)-1;
152 }
153 else
154 {
155 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
156 return 2;
157 }
158 }
159
160
161 // ----------------------------------------------------------------------------
162 // wxMBConv
163 // ----------------------------------------------------------------------------
164
165 wxMBConv::~wxMBConv()
166 {
167 // nothing to do here
168 }
169
170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
171 {
172 if ( psz )
173 {
174 // calculate the length of the buffer needed first
175 size_t nLen = MB2WC(NULL, psz, 0);
176 if ( nLen != (size_t)-1 )
177 {
178 // now do the actual conversion
179 wxWCharBuffer buf(nLen);
180 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
181 if ( nLen != (size_t)-1 )
182 {
183 return buf;
184 }
185 }
186 }
187
188 wxWCharBuffer buf((wchar_t *)NULL);
189
190 return buf;
191 }
192
193 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
194 {
195 if ( pwz )
196 {
197 size_t nLen = WC2MB(NULL, pwz, 0);
198 if ( nLen != (size_t)-1 )
199 {
200 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
201 nLen = WC2MB(buf.data(), pwz, nLen + 4);
202 if ( nLen != (size_t)-1 )
203 {
204 return buf;
205 }
206 }
207 }
208
209 wxCharBuffer buf((char *)NULL);
210
211 return buf;
212 }
213
214 // ----------------------------------------------------------------------------
215 // wxMBConvLibc
216 // ----------------------------------------------------------------------------
217
218 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
219 {
220 return wxMB2WC(buf, psz, n);
221 }
222
223 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
224 {
225 return wxWC2MB(buf, psz, n);
226 }
227
228 // ----------------------------------------------------------------------------
229 // UTF-7
230 // ----------------------------------------------------------------------------
231
232 #if 0
233 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234 "abcdefghijklmnopqrstuvwxyz"
235 "0123456789'(),-./:?";
236 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
237 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789+/";
240 #endif
241
242 // TODO: write actual implementations of UTF-7 here
243 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
244 const char * WXUNUSED(psz),
245 size_t WXUNUSED(n)) const
246 {
247 return 0;
248 }
249
250 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
251 const wchar_t * WXUNUSED(psz),
252 size_t WXUNUSED(n)) const
253 {
254 return 0;
255 }
256
257 // ----------------------------------------------------------------------------
258 // UTF-8
259 // ----------------------------------------------------------------------------
260
261 static wxUint32 utf8_max[]=
262 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
263
264 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
265 {
266 size_t len = 0;
267
268 while (*psz && ((!buf) || (len < n)))
269 {
270 unsigned char cc = *psz++, fc = cc;
271 unsigned cnt;
272 for (cnt = 0; fc & 0x80; cnt++)
273 fc <<= 1;
274 if (!cnt)
275 {
276 // plain ASCII char
277 if (buf)
278 *buf++ = cc;
279 len++;
280 }
281 else
282 {
283 cnt--;
284 if (!cnt)
285 {
286 // invalid UTF-8 sequence
287 return (size_t)-1;
288 }
289 else
290 {
291 unsigned ocnt = cnt - 1;
292 wxUint32 res = cc & (0x3f >> cnt);
293 while (cnt--)
294 {
295 cc = *psz++;
296 if ((cc & 0xC0) != 0x80)
297 {
298 // invalid UTF-8 sequence
299 return (size_t)-1;
300 }
301 res = (res << 6) | (cc & 0x3f);
302 }
303 if (res <= utf8_max[ocnt])
304 {
305 // illegal UTF-8 encoding
306 return (size_t)-1;
307 }
308 #ifdef WC_UTF16
309 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
310 size_t pa = encode_utf16(res, (wxUint16 *)buf);
311 if (pa == (size_t)-1)
312 return (size_t)-1;
313 if (buf)
314 buf += pa;
315 len += pa;
316 #else // !WC_UTF16
317 if (buf)
318 *buf++ = res;
319 len++;
320 #endif // WC_UTF16/!WC_UTF16
321 }
322 }
323 }
324 if (buf && (len < n))
325 *buf = 0;
326 return len;
327 }
328
329 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
330 {
331 size_t len = 0;
332
333 while (*psz && ((!buf) || (len < n)))
334 {
335 wxUint32 cc;
336 #ifdef WC_UTF16
337 // cast is ok for WC_UTF16
338 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
339 psz += (pa == (size_t)-1) ? 1 : pa;
340 #else
341 cc=(*psz++) & 0x7fffffff;
342 #endif
343 unsigned cnt;
344 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
345 if (!cnt)
346 {
347 // plain ASCII char
348 if (buf)
349 *buf++ = (char) cc;
350 len++;
351 }
352
353 else
354 {
355 len += cnt + 1;
356 if (buf)
357 {
358 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
359 while (cnt--)
360 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
361 }
362 }
363 }
364
365 if (buf && (len<n)) *buf = 0;
366
367 return len;
368 }
369
370
371
372
373 // ----------------------------------------------------------------------------
374 // UTF-16
375 // ----------------------------------------------------------------------------
376
377 #ifdef WORDS_BIGENDIAN
378 #define wxMBConvUTF16straight wxMBConvUTF16BE
379 #define wxMBConvUTF16swap wxMBConvUTF16LE
380 #else
381 #define wxMBConvUTF16swap wxMBConvUTF16BE
382 #define wxMBConvUTF16straight wxMBConvUTF16LE
383 #endif
384
385
386 #ifdef WC_UTF16
387
388 // copy 16bit MB to 16bit String
389 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
390 {
391 size_t len=0;
392
393 while (*(wxUint16*)psz && (!buf || len < n))
394 {
395 if (buf)
396 *buf++ = *(wxUint16*)psz;
397 len++;
398
399 psz += sizeof(wxUint16);
400 }
401 if (buf && len<n) *buf=0;
402
403 return len;
404 }
405
406
407 // copy 16bit String to 16bit MB
408 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
409 {
410 size_t len=0;
411
412 while (*psz && (!buf || len < n))
413 {
414 if (buf)
415 {
416 *(wxUint16*)buf = *psz;
417 buf += sizeof(wxUint16);
418 }
419 len += sizeof(wxUint16);
420 psz++;
421 }
422 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
423
424 return len;
425 }
426
427
428 // swap 16bit MB to 16bit String
429 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
430 {
431 size_t len=0;
432
433 while (*(wxUint16*)psz && (!buf || len < n))
434 {
435 if (buf)
436 {
437 ((char *)buf)[0] = psz[1];
438 ((char *)buf)[1] = psz[0];
439 buf++;
440 }
441 len++;
442 psz += sizeof(wxUint16);
443 }
444 if (buf && len<n) *buf=0;
445
446 return len;
447 }
448
449
450 // swap 16bit MB to 16bit String
451 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
452 {
453 size_t len=0;
454
455 while (*psz && (!buf || len < n))
456 {
457 if (buf)
458 {
459 *buf++ = ((char*)psz)[1];
460 *buf++ = ((char*)psz)[0];
461 }
462 len += sizeof(wxUint16);
463 psz++;
464 }
465 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
466
467 return len;
468 }
469
470
471 #else // WC_UTF16
472
473
474 // copy 16bit MB to 32bit String
475 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
476 {
477 size_t len=0;
478
479 while (*(wxUint16*)psz && (!buf || len < n))
480 {
481 wxUint32 cc;
482 size_t pa=decode_utf16((wxUint16*)psz, cc);
483 if (pa == (size_t)-1)
484 return pa;
485
486 if (buf)
487 *buf++ = cc;
488 len++;
489 psz += pa * sizeof(wxUint16);
490 }
491 if (buf && len<n) *buf=0;
492
493 return len;
494 }
495
496
497 // copy 32bit String to 16bit MB
498 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
499 {
500 size_t len=0;
501
502 while (*psz && (!buf || len < n))
503 {
504 wxUint16 cc[2];
505 size_t pa=encode_utf16(*psz, cc);
506
507 if (pa == (size_t)-1)
508 return pa;
509
510 if (buf)
511 {
512 *(wxUint16*)buf = cc[0];
513 buf += sizeof(wxUint16);
514 if (pa > 1)
515 {
516 *(wxUint16*)buf = cc[1];
517 buf += sizeof(wxUint16);
518 }
519 }
520
521 len += pa*sizeof(wxUint16);
522 psz++;
523 }
524 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
525
526 return len;
527 }
528
529
530 // swap 16bit MB to 32bit String
531 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
532 {
533 size_t len=0;
534
535 while (*(wxUint16*)psz && (!buf || len < n))
536 {
537 wxUint32 cc;
538 char tmp[4];
539 tmp[0]=psz[1]; tmp[1]=psz[0];
540 tmp[2]=psz[3]; tmp[3]=psz[2];
541
542 size_t pa=decode_utf16((wxUint16*)tmp, cc);
543 if (pa == (size_t)-1)
544 return pa;
545
546 if (buf)
547 *buf++ = cc;
548
549 len++;
550 psz += pa * sizeof(wxUint16);
551 }
552 if (buf && len<n) *buf=0;
553
554 return len;
555 }
556
557
558 // swap 32bit String to 16bit MB
559 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
560 {
561 size_t len=0;
562
563 while (*psz && (!buf || len < n))
564 {
565 wxUint16 cc[2];
566 size_t pa=encode_utf16(*psz, cc);
567
568 if (pa == (size_t)-1)
569 return pa;
570
571 if (buf)
572 {
573 *buf++ = ((char*)cc)[1];
574 *buf++ = ((char*)cc)[0];
575 if (pa > 1)
576 {
577 *buf++ = ((char*)cc)[3];
578 *buf++ = ((char*)cc)[2];
579 }
580 }
581
582 len += pa*sizeof(wxUint16);
583 psz++;
584 }
585 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
586
587 return len;
588 }
589
590 #endif // WC_UTF16
591
592
593 // ----------------------------------------------------------------------------
594 // UTF-32
595 // ----------------------------------------------------------------------------
596
597 #ifdef WORDS_BIGENDIAN
598 #define wxMBConvUTF32straight wxMBConvUTF32BE
599 #define wxMBConvUTF32swap wxMBConvUTF32LE
600 #else
601 #define wxMBConvUTF32swap wxMBConvUTF32BE
602 #define wxMBConvUTF32straight wxMBConvUTF32LE
603 #endif
604
605
606 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
607 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
608
609
610 #ifdef WC_UTF16
611
612 // copy 32bit MB to 16bit String
613 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
614 {
615 size_t len=0;
616
617 while (*(wxUint32*)psz && (!buf || len < n))
618 {
619 wxUint16 cc[2];
620
621 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
622 if (pa == (size_t)-1)
623 return pa;
624
625 if (buf)
626 {
627 *buf++ = cc[0];
628 if (pa > 1)
629 *buf++ = cc[1];
630 }
631 len += pa;
632 psz += sizeof(wxUint32);
633 }
634 if (buf && len<n) *buf=0;
635
636 return len;
637 }
638
639
640 // copy 16bit String to 32bit MB
641 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
642 {
643 size_t len=0;
644
645 while (*psz && (!buf || len < n))
646 {
647 wxUint32 cc;
648
649 // cast is ok for WC_UTF16
650 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
651 if (pa == (size_t)-1)
652 return pa;
653
654 if (buf)
655 {
656 *(wxUint32*)buf = cc;
657 buf += sizeof(wxUint32);
658 }
659 len += sizeof(wxUint32);
660 psz += pa;
661 }
662
663 if (buf && len<=n-sizeof(wxUint32))
664 *(wxUint32*)buf=0;
665
666 return len;
667 }
668
669
670
671 // swap 32bit MB to 16bit String
672 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
673 {
674 size_t len=0;
675
676 while (*(wxUint32*)psz && (!buf || len < n))
677 {
678 char tmp[4];
679 tmp[0] = psz[3]; tmp[1] = psz[2];
680 tmp[2] = psz[1]; tmp[3] = psz[0];
681
682
683 wxUint16 cc[2];
684
685 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
686 if (pa == (size_t)-1)
687 return pa;
688
689 if (buf)
690 {
691 *buf++ = cc[0];
692 if (pa > 1)
693 *buf++ = cc[1];
694 }
695 len += pa;
696 psz += sizeof(wxUint32);
697 }
698
699 if (buf && len<n)
700 *buf=0;
701
702 return len;
703 }
704
705
706 // swap 16bit String to 32bit MB
707 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
708 {
709 size_t len=0;
710
711 while (*psz && (!buf || len < n))
712 {
713 char cc[4];
714
715 // cast is ok for WC_UTF16
716 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
717 if (pa == (size_t)-1)
718 return pa;
719
720 if (buf)
721 {
722 *buf++ = cc[3];
723 *buf++ = cc[2];
724 *buf++ = cc[1];
725 *buf++ = cc[0];
726 }
727 len += sizeof(wxUint32);
728 psz += pa;
729 }
730
731 if (buf && len<=n-sizeof(wxUint32))
732 *(wxUint32*)buf=0;
733
734 return len;
735 }
736
737 #else // WC_UTF16
738
739
740 // copy 32bit MB to 32bit String
741 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
742 {
743 size_t len=0;
744
745 while (*(wxUint32*)psz && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = *(wxUint32*)psz;
749 len++;
750 psz += sizeof(wxUint32);
751 }
752
753 if (buf && len<n)
754 *buf=0;
755
756 return len;
757 }
758
759
760 // copy 32bit String to 32bit MB
761 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
762 {
763 size_t len=0;
764
765 while (*psz && (!buf || len < n))
766 {
767 if (buf)
768 {
769 *(wxUint32*)buf = *psz;
770 buf += sizeof(wxUint32);
771 }
772
773 len += sizeof(wxUint32);
774 psz++;
775 }
776
777 if (buf && len<=n-sizeof(wxUint32))
778 *(wxUint32*)buf=0;
779
780 return len;
781 }
782
783
784 // swap 32bit MB to 32bit String
785 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
786 {
787 size_t len=0;
788
789 while (*(wxUint32*)psz && (!buf || len < n))
790 {
791 if (buf)
792 {
793 ((char *)buf)[0] = psz[3];
794 ((char *)buf)[1] = psz[2];
795 ((char *)buf)[2] = psz[1];
796 ((char *)buf)[3] = psz[0];
797 buf++;
798 }
799 len++;
800 psz += sizeof(wxUint32);
801 }
802
803 if (buf && len<n)
804 *buf=0;
805
806 return len;
807 }
808
809
810 // swap 32bit String to 32bit MB
811 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
812 {
813 size_t len=0;
814
815 while (*psz && (!buf || len < n))
816 {
817 if (buf)
818 {
819 *buf++ = ((char *)psz)[3];
820 *buf++ = ((char *)psz)[2];
821 *buf++ = ((char *)psz)[1];
822 *buf++ = ((char *)psz)[0];
823 }
824 len += sizeof(wxUint32);
825 psz++;
826 }
827
828 if (buf && len<=n-sizeof(wxUint32))
829 *(wxUint32*)buf=0;
830
831 return len;
832 }
833
834
835 #endif // WC_UTF16
836
837
838 // ============================================================================
839 // The classes doing conversion using the iconv_xxx() functions
840 // ============================================================================
841
842 #ifdef HAVE_ICONV
843
844 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
845 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
846 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
847 // (which means error) and says there are 0 bytes left in the input buffer --
848 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
849 // this alternative test for iconv() failure.
850 // [This bug does not appear in glibc 2.2.]
851 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
852 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
853 (errno != E2BIG || bufLeft != 0))
854 #else
855 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
856 #endif
857
858 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
859
860 // ----------------------------------------------------------------------------
861 // wxMBConv_iconv: encapsulates an iconv character set
862 // ----------------------------------------------------------------------------
863
864 class wxMBConv_iconv : public wxMBConv
865 {
866 public:
867 wxMBConv_iconv(const wxChar *name);
868 virtual ~wxMBConv_iconv();
869
870 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
871 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
872
873 bool IsOk() const
874 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
875
876 protected:
877 // the iconv handlers used to translate from multibyte to wide char and in
878 // the other direction
879 iconv_t m2w,
880 w2m;
881
882 private:
883 // the name (for iconv_open()) of a wide char charset -- if none is
884 // available on this machine, it will remain NULL
885 static const char *ms_wcCharsetName;
886
887 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
888 // different endian-ness than the native one
889 static bool ms_wcNeedsSwap;
890 };
891
892 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
893 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
894
895 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
896 {
897 // Do it the hard way
898 char cname[100];
899 for (size_t i = 0; i < wxStrlen(name)+1; i++)
900 cname[i] = (char) name[i];
901
902 // check for charset that represents wchar_t:
903 if (ms_wcCharsetName == NULL)
904 {
905 ms_wcNeedsSwap = false;
906
907 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
908 ms_wcCharsetName = WC_NAME_BEST;
909 m2w = iconv_open(ms_wcCharsetName, cname);
910
911 if (m2w == (iconv_t)-1)
912 {
913 // try charset w/o bytesex info (e.g. "UCS4")
914 // and check for bytesex ourselves:
915 ms_wcCharsetName = WC_NAME;
916 m2w = iconv_open(ms_wcCharsetName, cname);
917
918 // last bet, try if it knows WCHAR_T pseudo-charset
919 if (m2w == (iconv_t)-1)
920 {
921 ms_wcCharsetName = "WCHAR_T";
922 m2w = iconv_open(ms_wcCharsetName, cname);
923 }
924
925 if (m2w != (iconv_t)-1)
926 {
927 char buf[2], *bufPtr;
928 wchar_t wbuf[2], *wbufPtr;
929 size_t insz, outsz;
930 size_t res;
931
932 buf[0] = 'A';
933 buf[1] = 0;
934 wbuf[0] = 0;
935 insz = 2;
936 outsz = SIZEOF_WCHAR_T * 2;
937 wbufPtr = wbuf;
938 bufPtr = buf;
939
940 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
941 (char**)&wbufPtr, &outsz);
942
943 if (ICONV_FAILED(res, insz))
944 {
945 ms_wcCharsetName = NULL;
946 wxLogLastError(wxT("iconv"));
947 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
948 }
949 else
950 {
951 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
952 }
953 }
954 else
955 {
956 ms_wcCharsetName = NULL;
957
958 // VS: we must not output an error here, since wxWindows will safely
959 // fall back to using wxEncodingConverter.
960 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
961 //wxLogError(
962 }
963 }
964 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
965 }
966 else // we already have ms_wcCharsetName
967 {
968 m2w = iconv_open(ms_wcCharsetName, cname);
969 }
970
971 // NB: don't ever pass NULL to iconv_open(), it may crash!
972 if ( ms_wcCharsetName )
973 {
974 w2m = iconv_open( cname, ms_wcCharsetName);
975 }
976 else
977 {
978 w2m = (iconv_t)-1;
979 }
980 }
981
982 wxMBConv_iconv::~wxMBConv_iconv()
983 {
984 if ( m2w != (iconv_t)-1 )
985 iconv_close(m2w);
986 if ( w2m != (iconv_t)-1 )
987 iconv_close(w2m);
988 }
989
990 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
991 {
992 size_t inbuf = strlen(psz);
993 size_t outbuf = n * SIZEOF_WCHAR_T;
994 size_t res, cres;
995 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
996 wchar_t *bufPtr = buf;
997 const char *pszPtr = psz;
998
999 if (buf)
1000 {
1001 // have destination buffer, convert there
1002 cres = iconv(m2w,
1003 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1004 (char**)&bufPtr, &outbuf);
1005 res = n - (outbuf / SIZEOF_WCHAR_T);
1006
1007 if (ms_wcNeedsSwap)
1008 {
1009 // convert to native endianness
1010 WC_BSWAP(buf /* _not_ bufPtr */, res)
1011 }
1012
1013 // NB: iconv was given only strlen(psz) characters on input, and so
1014 // it couldn't convert the trailing zero. Let's do it ourselves
1015 // if there's some room left for it in the output buffer.
1016 if (res < n)
1017 buf[res] = 0;
1018 }
1019 else
1020 {
1021 // no destination buffer... convert using temp buffer
1022 // to calculate destination buffer requirement
1023 wchar_t tbuf[8];
1024 res = 0;
1025 do {
1026 bufPtr = tbuf;
1027 outbuf = 8*SIZEOF_WCHAR_T;
1028
1029 cres = iconv(m2w,
1030 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1031 (char**)&bufPtr, &outbuf );
1032
1033 res += 8-(outbuf/SIZEOF_WCHAR_T);
1034 } while ((cres==(size_t)-1) && (errno==E2BIG));
1035 }
1036
1037 if (ICONV_FAILED(cres, inbuf))
1038 {
1039 //VS: it is ok if iconv fails, hence trace only
1040 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1041 return (size_t)-1;
1042 }
1043
1044 return res;
1045 }
1046
1047 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1048 {
1049 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1050 size_t outbuf = n;
1051 size_t res, cres;
1052
1053 wchar_t *tmpbuf = 0;
1054
1055 if (ms_wcNeedsSwap)
1056 {
1057 // need to copy to temp buffer to switch endianness
1058 // this absolutely doesn't rock!
1059 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1060 // could be in read-only memory, or be accessed in some other thread)
1061 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1062 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1063 WC_BSWAP(tmpbuf, inbuf)
1064 psz=tmpbuf;
1065 }
1066
1067 if (buf)
1068 {
1069 // have destination buffer, convert there
1070 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1071
1072 res = n-outbuf;
1073
1074 // NB: iconv was given only wcslen(psz) characters on input, and so
1075 // it couldn't convert the trailing zero. Let's do it ourselves
1076 // if there's some room left for it in the output buffer.
1077 if (res < n)
1078 buf[0] = 0;
1079 }
1080 else
1081 {
1082 // no destination buffer... convert using temp buffer
1083 // to calculate destination buffer requirement
1084 char tbuf[16];
1085 res = 0;
1086 do {
1087 buf = tbuf; outbuf = 16;
1088
1089 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1090
1091 res += 16 - outbuf;
1092 } while ((cres==(size_t)-1) && (errno==E2BIG));
1093 }
1094
1095 if (ms_wcNeedsSwap)
1096 {
1097 free(tmpbuf);
1098 }
1099
1100 if (ICONV_FAILED(cres, inbuf))
1101 {
1102 //VS: it is ok if iconv fails, hence trace only
1103 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1104 return (size_t)-1;
1105 }
1106
1107 return res;
1108 }
1109
1110 #endif // HAVE_ICONV
1111
1112
1113 // ============================================================================
1114 // Win32 conversion classes
1115 // ============================================================================
1116
1117 #ifdef wxHAVE_WIN32_MB2WC
1118
1119 // from utils.cpp
1120 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1121 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1122
1123 class wxMBConv_win32 : public wxMBConv
1124 {
1125 public:
1126 wxMBConv_win32()
1127 {
1128 m_CodePage = CP_ACP;
1129 }
1130
1131 wxMBConv_win32(const wxChar* name)
1132 {
1133 m_CodePage = wxCharsetToCodepage(name);
1134 }
1135
1136 wxMBConv_win32(wxFontEncoding encoding)
1137 {
1138 m_CodePage = wxEncodingToCodepage(encoding);
1139 }
1140
1141 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1142 {
1143 const size_t len = ::MultiByteToWideChar
1144 (
1145 m_CodePage, // code page
1146 0, // flags (none)
1147 psz, // input string
1148 -1, // its length (NUL-terminated)
1149 buf, // output string
1150 buf ? n : 0 // size of output buffer
1151 );
1152
1153 // note that it returns count of written chars for buf != NULL and size
1154 // of the needed buffer for buf == NULL so in either case the length of
1155 // the string (which never includes the terminating NUL) is one less
1156 return len ? len - 1 : (size_t)-1;
1157 }
1158
1159 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1160 {
1161 /*
1162 we have a problem here: by default, WideCharToMultiByte() may
1163 replace characters unrepresentable in the target code page with bad
1164 quality approximations such as turning "1/2" symbol (U+00BD) into
1165 "1" for the code pages which don't have it and we, obviously, want
1166 to avoid this at any price
1167
1168 the trouble is that this function does it _silently_, i.e. it won't
1169 even tell us whether it did or not... Win98/2000 and higher provide
1170 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1171 we have to resort to a round trip, i.e. check that converting back
1172 results in the same string -- this is, of course, expensive but
1173 otherwise we simply can't be sure to not garble the data.
1174 */
1175
1176 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1177 // it doesn't work with CJK encodings (which we test for rather roughly
1178 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1179 // supporting it
1180 BOOL usedDef wxDUMMY_INITIALIZE(false),
1181 *pUsedDef;
1182 int flags;
1183 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1184 {
1185 // it's our lucky day
1186 flags = WC_NO_BEST_FIT_CHARS;
1187 pUsedDef = &usedDef;
1188 }
1189 else // old system or unsupported encoding
1190 {
1191 flags = 0;
1192 pUsedDef = NULL;
1193 }
1194
1195 const size_t len = ::WideCharToMultiByte
1196 (
1197 m_CodePage, // code page
1198 flags, // either none or no best fit
1199 pwz, // input string
1200 -1, // it is (wide) NUL-terminated
1201 buf, // output buffer
1202 buf ? n : 0, // and its size
1203 NULL, // default "replacement" char
1204 pUsedDef // [out] was it used?
1205 );
1206
1207 if ( !len )
1208 {
1209 // function totally failed
1210 return (size_t)-1;
1211 }
1212
1213 // if we were really converting, check if we succeeded
1214 if ( buf )
1215 {
1216 if ( flags )
1217 {
1218 // check if the conversion failed, i.e. if any replacements
1219 // were done
1220 if ( usedDef )
1221 return (size_t)-1;
1222 }
1223 else // we must resort to double tripping...
1224 {
1225 wxWCharBuffer wcBuf(n);
1226 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1227 wcscmp(wcBuf, pwz) != 0 )
1228 {
1229 // we didn't obtain the same thing we started from, hence
1230 // the conversion was lossy and we consider that it failed
1231 return (size_t)-1;
1232 }
1233 }
1234 }
1235
1236 // see the comment above for the reason of "len - 1"
1237 return len - 1;
1238 }
1239
1240 bool IsOk() const { return m_CodePage != -1; }
1241
1242 private:
1243 static bool CanUseNoBestFit()
1244 {
1245 static int s_isWin98Or2k = -1;
1246
1247 if ( s_isWin98Or2k == -1 )
1248 {
1249 int verMaj, verMin;
1250 switch ( wxGetOsVersion(&verMaj, &verMin) )
1251 {
1252 case wxWIN95:
1253 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1254 break;
1255
1256 case wxWINDOWS_NT:
1257 s_isWin98Or2k = verMaj >= 5;
1258 break;
1259
1260 default:
1261 // unknown, be conseravtive by default
1262 s_isWin98Or2k = 0;
1263 }
1264
1265 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1266 }
1267
1268 return s_isWin98Or2k == 1;
1269 }
1270
1271 long m_CodePage;
1272 };
1273
1274 #endif // wxHAVE_WIN32_MB2WC
1275
1276 // ============================================================================
1277 // Mac conversion classes
1278 // ============================================================================
1279
1280 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1281
1282 class wxMBConv_mac : public wxMBConv
1283 {
1284 public:
1285 wxMBConv_mac()
1286 {
1287 Init(CFStringGetSystemEncoding()) ;
1288 }
1289
1290 wxMBConv_mac(const wxChar* name)
1291 {
1292 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1293 }
1294
1295 wxMBConv_mac(wxFontEncoding encoding)
1296 {
1297 Init( wxMacGetSystemEncFromFontEnc(encoding) );
1298 }
1299
1300 ~wxMBConv_mac()
1301 {
1302 OSStatus status = noErr ;
1303 status = TECDisposeConverter(m_MB2WC_converter);
1304 status = TECDisposeConverter(m_WC2MB_converter);
1305 }
1306
1307
1308 void Init( TextEncodingBase encoding)
1309 {
1310 OSStatus status = noErr ;
1311 m_char_encoding = encoding ;
1312 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1313
1314 status = TECCreateConverter(&m_MB2WC_converter,
1315 m_char_encoding,
1316 m_unicode_encoding);
1317 status = TECCreateConverter(&m_WC2MB_converter,
1318 m_unicode_encoding,
1319 m_char_encoding);
1320 }
1321
1322 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1323 {
1324 OSStatus status = noErr ;
1325 ByteCount byteOutLen ;
1326 ByteCount byteInLen = strlen(psz) ;
1327 wchar_t *tbuf = NULL ;
1328 UniChar* ubuf = NULL ;
1329 size_t res = 0 ;
1330
1331 if (buf == NULL)
1332 {
1333 n = byteInLen ;
1334 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1335 }
1336 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1337 #if SIZEOF_WCHAR_T == 4
1338 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1339 #else
1340 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1341 #endif
1342 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1343 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1344 #if SIZEOF_WCHAR_T == 4
1345 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1346 // is not properly terminated we get random characters at the end
1347 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1348 wxMBConvUTF16BE converter ;
1349 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1350 free( ubuf ) ;
1351 #else
1352 res = byteOutLen / sizeof( UniChar ) ;
1353 #endif
1354 if ( buf == NULL )
1355 free(tbuf) ;
1356
1357 if ( buf && res < n)
1358 buf[res] = 0;
1359
1360 return res ;
1361 }
1362
1363 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1364 {
1365 OSStatus status = noErr ;
1366 ByteCount byteOutLen ;
1367 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1368
1369 char *tbuf = NULL ;
1370
1371 if (buf == NULL)
1372 {
1373 // worst case
1374 n = byteInLen * 2 ;
1375 tbuf = (char*) malloc( n ) ;
1376 }
1377
1378 ByteCount byteBufferLen = n ;
1379 UniChar* ubuf = NULL ;
1380 #if SIZEOF_WCHAR_T == 4
1381 wxMBConvUTF16BE converter ;
1382 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1383 byteInLen = unicharlen ;
1384 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1385 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1386 #else
1387 ubuf = (UniChar*) psz ;
1388 #endif
1389 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1390 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1391 #if SIZEOF_WCHAR_T == 4
1392 free( ubuf ) ;
1393 #endif
1394 if ( buf == NULL )
1395 free(tbuf) ;
1396
1397 size_t res = byteOutLen ;
1398 if ( buf && res < n)
1399 buf[res] = 0;
1400
1401 return res ;
1402 }
1403
1404 bool IsOk() const
1405 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1406
1407 private:
1408 TECObjectRef m_MB2WC_converter ;
1409 TECObjectRef m_WC2MB_converter ;
1410
1411 TextEncodingBase m_char_encoding ;
1412 TextEncodingBase m_unicode_encoding ;
1413 };
1414
1415 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1416
1417 // ============================================================================
1418 // wxEncodingConverter based conversion classes
1419 // ============================================================================
1420
1421 #if wxUSE_FONTMAP
1422
1423 class wxMBConv_wxwin : public wxMBConv
1424 {
1425 private:
1426 void Init()
1427 {
1428 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1429 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1430 }
1431
1432 public:
1433 // temporarily just use wxEncodingConverter stuff,
1434 // so that it works while a better implementation is built
1435 wxMBConv_wxwin(const wxChar* name)
1436 {
1437 if (name)
1438 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1439 else
1440 m_enc = wxFONTENCODING_SYSTEM;
1441
1442 Init();
1443 }
1444
1445 wxMBConv_wxwin(wxFontEncoding enc)
1446 {
1447 m_enc = enc;
1448
1449 Init();
1450 }
1451
1452 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1453 {
1454 size_t inbuf = strlen(psz);
1455 if (buf)
1456 m2w.Convert(psz,buf);
1457 return inbuf;
1458 }
1459
1460 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1461 {
1462 const size_t inbuf = wxWcslen(psz);
1463 if (buf)
1464 w2m.Convert(psz,buf);
1465
1466 return inbuf;
1467 }
1468
1469 bool IsOk() const { return m_ok; }
1470
1471 public:
1472 wxFontEncoding m_enc;
1473 wxEncodingConverter m2w, w2m;
1474
1475 // were we initialized successfully?
1476 bool m_ok;
1477
1478 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1479 };
1480
1481 #endif // wxUSE_FONTMAP
1482
1483 // ============================================================================
1484 // wxCSConv implementation
1485 // ============================================================================
1486
1487 void wxCSConv::Init()
1488 {
1489 m_name = NULL;
1490 m_convReal = NULL;
1491 m_deferred = true;
1492 }
1493
1494 wxCSConv::wxCSConv(const wxChar *charset)
1495 {
1496 Init();
1497
1498 if ( charset )
1499 {
1500 SetName(charset);
1501 }
1502
1503 m_encoding = wxFONTENCODING_SYSTEM;
1504 }
1505
1506 wxCSConv::wxCSConv(wxFontEncoding encoding)
1507 {
1508 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1509 {
1510 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1511
1512 encoding = wxFONTENCODING_SYSTEM;
1513 }
1514
1515 Init();
1516
1517 m_encoding = encoding;
1518 }
1519
1520 wxCSConv::~wxCSConv()
1521 {
1522 Clear();
1523 }
1524
1525 wxCSConv::wxCSConv(const wxCSConv& conv)
1526 : wxMBConv()
1527 {
1528 Init();
1529
1530 SetName(conv.m_name);
1531 m_encoding = conv.m_encoding;
1532 }
1533
1534 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1535 {
1536 Clear();
1537
1538 SetName(conv.m_name);
1539 m_encoding = conv.m_encoding;
1540
1541 return *this;
1542 }
1543
1544 void wxCSConv::Clear()
1545 {
1546 free(m_name);
1547 delete m_convReal;
1548
1549 m_name = NULL;
1550 m_convReal = NULL;
1551 }
1552
1553 void wxCSConv::SetName(const wxChar *charset)
1554 {
1555 if (charset)
1556 {
1557 m_name = wxStrdup(charset);
1558 m_deferred = true;
1559 }
1560 }
1561
1562 wxMBConv *wxCSConv::DoCreate() const
1563 {
1564 // check for the special case of ASCII or ISO8859-1 charset: as we have
1565 // special knowledge of it anyhow, we don't need to create a special
1566 // conversion object
1567 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1568 {
1569 // don't convert at all
1570 return NULL;
1571 }
1572
1573 // we trust OS to do conversion better than we can so try external
1574 // conversion methods first
1575 //
1576 // the full order is:
1577 // 1. OS conversion (iconv() under Unix or Win32 API)
1578 // 2. hard coded conversions for UTF
1579 // 3. wxEncodingConverter as fall back
1580
1581 // step (1)
1582 #ifdef HAVE_ICONV
1583 #if !wxUSE_FONTMAP
1584 if ( m_name )
1585 #endif // !wxUSE_FONTMAP
1586 {
1587 wxString name(m_name);
1588
1589 #if wxUSE_FONTMAP
1590 if ( name.empty() )
1591 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1592 #endif // wxUSE_FONTMAP
1593
1594 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1595 if ( conv->IsOk() )
1596 return conv;
1597
1598 delete conv;
1599 }
1600 #endif // HAVE_ICONV
1601
1602 #ifdef wxHAVE_WIN32_MB2WC
1603 {
1604 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1605 : new wxMBConv_win32(m_encoding);
1606 if ( conv->IsOk() )
1607 return conv;
1608
1609 delete conv;
1610 }
1611 #endif // wxHAVE_WIN32_MB2WC
1612 #if defined(__WXMAC__)
1613 {
1614 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1615 {
1616
1617 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1618 : new wxMBConv_mac(m_encoding);
1619 if ( conv->IsOk() )
1620 return conv;
1621
1622 delete conv;
1623 }
1624 }
1625 #endif
1626 // step (2)
1627 wxFontEncoding enc = m_encoding;
1628 #if wxUSE_FONTMAP
1629 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1630 {
1631 // use "false" to suppress interactive dialogs -- we can be called from
1632 // anywhere and popping up a dialog from here is the last thing we want to
1633 // do
1634 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1635 }
1636 #endif // wxUSE_FONTMAP
1637
1638 switch ( enc )
1639 {
1640 case wxFONTENCODING_UTF7:
1641 return new wxMBConvUTF7;
1642
1643 case wxFONTENCODING_UTF8:
1644 return new wxMBConvUTF8;
1645
1646 case wxFONTENCODING_UTF16BE:
1647 return new wxMBConvUTF16BE;
1648
1649 case wxFONTENCODING_UTF16LE:
1650 return new wxMBConvUTF16LE;
1651
1652 case wxFONTENCODING_UTF32BE:
1653 return new wxMBConvUTF32BE;
1654
1655 case wxFONTENCODING_UTF32LE:
1656 return new wxMBConvUTF32LE;
1657
1658 default:
1659 // nothing to do but put here to suppress gcc warnings
1660 ;
1661 }
1662
1663 // step (3)
1664 #if wxUSE_FONTMAP
1665 {
1666 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1667 : new wxMBConv_wxwin(m_encoding);
1668 if ( conv->IsOk() )
1669 return conv;
1670
1671 delete conv;
1672 }
1673 #endif // wxUSE_FONTMAP
1674
1675 // NB: This is a hack to prevent deadlock. What could otherwise happen
1676 // in Unicode build: wxConvLocal creation ends up being here
1677 // because of some failure and logs the error. But wxLog will try to
1678 // attach timestamp, for which it will need wxConvLocal (to convert
1679 // time to char* and then wchar_t*), but that fails, tries to log
1680 // error, but wxLog has a (already locked) critical section that
1681 // guards static buffer.
1682 static bool alreadyLoggingError = false;
1683 if (!alreadyLoggingError)
1684 {
1685 alreadyLoggingError = true;
1686 wxLogError(_("Cannot convert from the charset '%s'!"),
1687 m_name ? m_name
1688 :
1689 #if wxUSE_FONTMAP
1690 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1691 #else // !wxUSE_FONTMAP
1692 wxString::Format(_("encoding %s"), m_encoding).c_str()
1693 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1694 );
1695 alreadyLoggingError = false;
1696 }
1697
1698 return NULL;
1699 }
1700
1701 void wxCSConv::CreateConvIfNeeded() const
1702 {
1703 if ( m_deferred )
1704 {
1705 wxCSConv *self = (wxCSConv *)this; // const_cast
1706
1707 #if wxUSE_INTL
1708 // if we don't have neither the name nor the encoding, use the default
1709 // encoding for this system
1710 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1711 {
1712 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1713 }
1714 #endif // wxUSE_INTL
1715
1716 self->m_convReal = DoCreate();
1717 self->m_deferred = false;
1718 }
1719 }
1720
1721 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1722 {
1723 CreateConvIfNeeded();
1724
1725 if (m_convReal)
1726 return m_convReal->MB2WC(buf, psz, n);
1727
1728 // latin-1 (direct)
1729 size_t len = strlen(psz);
1730
1731 if (buf)
1732 {
1733 for (size_t c = 0; c <= len; c++)
1734 buf[c] = (unsigned char)(psz[c]);
1735 }
1736
1737 return len;
1738 }
1739
1740 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1741 {
1742 CreateConvIfNeeded();
1743
1744 if (m_convReal)
1745 return m_convReal->WC2MB(buf, psz, n);
1746
1747 // latin-1 (direct)
1748 const size_t len = wxWcslen(psz);
1749 if (buf)
1750 {
1751 for (size_t c = 0; c <= len; c++)
1752 {
1753 if (psz[c] > 0xFF)
1754 return (size_t)-1;
1755 buf[c] = psz[c];
1756 }
1757 }
1758 else
1759 {
1760 for (size_t c = 0; c <= len; c++)
1761 {
1762 if (psz[c] > 0xFF)
1763 return (size_t)-1;
1764 }
1765 }
1766
1767 return len;
1768 }
1769
1770 // ----------------------------------------------------------------------------
1771 // globals
1772 // ----------------------------------------------------------------------------
1773
1774 #ifdef __WINDOWS__
1775 static wxMBConv_win32 wxConvLibcObj;
1776 #else
1777 static wxMBConvLibc wxConvLibcObj;
1778 #endif
1779
1780 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1781 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1782 static wxMBConvUTF7 wxConvUTF7Obj;
1783 static wxMBConvUTF8 wxConvUTF8Obj;
1784
1785
1786 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1787 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1788 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1789 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1790 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1791 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1792
1793 #else // !wxUSE_WCHAR_T
1794
1795 // stand-ins in absence of wchar_t
1796 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1797 wxConvISO8859_1,
1798 wxConvLocal,
1799 wxConvUTF8;
1800
1801 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1802
1803