]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
turn off unknown pragma for CW 9
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
55d99c7a 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
0a1c1e62
GRG
43#endif
44
1c193821 45#ifndef __WXWINCE__
1cd52418 46#include <errno.h>
1c193821
JS
47#endif
48
6001e347
RR
49#include <ctype.h>
50#include <string.h>
51#include <stdlib.h>
52
e95354ec
VZ
53#if defined(__WIN32__) && !defined(__WXMICROWIN__)
54 #define wxHAVE_WIN32_MB2WC
55#endif // __WIN32__ but !__WXMICROWIN__
56
373658eb
VZ
57// ----------------------------------------------------------------------------
58// headers
59// ----------------------------------------------------------------------------
7af284fd 60
6001e347 61#ifdef __SALFORDC__
373658eb 62 #include <clib.h>
6001e347
RR
63#endif
64
b040e242 65#ifdef HAVE_ICONV
373658eb 66 #include <iconv.h>
1cd52418 67#endif
1cd52418 68
373658eb
VZ
69#include "wx/encconv.h"
70#include "wx/fontmap.h"
71
72// ----------------------------------------------------------------------------
73// macros
74// ----------------------------------------------------------------------------
3e61dfb0 75
1cd52418 76#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 77#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
78
79#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
80 #define WC_NAME "UCS4"
81 #define WC_BSWAP BSWAP_UCS4
82 #ifdef WORDS_BIGENDIAN
83 #define WC_NAME_BEST "UCS-4BE"
84 #else
85 #define WC_NAME_BEST "UCS-4LE"
86 #endif
1cd52418 87#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
88 #define WC_NAME "UTF16"
89 #define WC_BSWAP BSWAP_UTF16
a3f2769e 90 #define WC_UTF16
3a0d76bc
VS
91 #ifdef WORDS_BIGENDIAN
92 #define WC_NAME_BEST "UTF-16BE"
93 #else
94 #define WC_NAME_BEST "UTF-16LE"
95 #endif
bab1e722 96#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
97 // does this ever happen?
98 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
99#endif
100
373658eb
VZ
101// ============================================================================
102// implementation
103// ============================================================================
104
105// ----------------------------------------------------------------------------
c91830cb 106// UTF-16 en/decoding to/from UCS-4
373658eb 107// ----------------------------------------------------------------------------
6001e347 108
b0a6bb75 109
c91830cb 110static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 111{
dccce9ea 112 if (input<=0xffff)
4def3b35 113 {
999836aa
VZ
114 if (output)
115 *output = (wxUint16) input;
4def3b35 116 return 1;
dccce9ea
VZ
117 }
118 else if (input>=0x110000)
4def3b35
VS
119 {
120 return (size_t)-1;
dccce9ea
VZ
121 }
122 else
4def3b35 123 {
dccce9ea 124 if (output)
4def3b35 125 {
c91830cb 126 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 127 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
128 }
129 return 2;
1cd52418 130 }
1cd52418
OK
131}
132
c91830cb 133static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 134{
dccce9ea 135 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
136 {
137 output = *input;
138 return 1;
dccce9ea
VZ
139 }
140 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
141 {
142 output = *input;
143 return (size_t)-1;
dccce9ea
VZ
144 }
145 else
4def3b35
VS
146 {
147 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
148 return 2;
149 }
1cd52418
OK
150}
151
b0a6bb75 152
f6bcfd97 153// ----------------------------------------------------------------------------
6001e347 154// wxMBConv
f6bcfd97 155// ----------------------------------------------------------------------------
6001e347 156
2b5f62a0
VZ
157wxMBConv::~wxMBConv()
158{
159 // nothing to do here
160}
161
6001e347
RR
162const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
163{
2b5f62a0 164 if ( psz )
6001e347 165 {
2b5f62a0
VZ
166 // calculate the length of the buffer needed first
167 size_t nLen = MB2WC(NULL, psz, 0);
168 if ( nLen != (size_t)-1 )
169 {
170 // now do the actual conversion
171 wxWCharBuffer buf(nLen);
172 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
173
174 return buf;
175 }
f6bcfd97 176 }
2b5f62a0
VZ
177
178 wxWCharBuffer buf((wchar_t *)NULL);
179
180 return buf;
6001e347
RR
181}
182
e5cceba0 183const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 184{
2b5f62a0
VZ
185 if ( pwz )
186 {
187 size_t nLen = WC2MB(NULL, pwz, 0);
188 if ( nLen != (size_t)-1 )
189 {
c91830cb
VZ
190 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
191 WC2MB(buf.data(), pwz, nLen + 4);
2b5f62a0
VZ
192
193 return buf;
194 }
195 }
196
197 wxCharBuffer buf((char *)NULL);
e5cceba0 198
e5cceba0 199 return buf;
6001e347
RR
200}
201
6001e347 202// ----------------------------------------------------------------------------
bde4baac 203// wxMBConvLibc
6001e347
RR
204// ----------------------------------------------------------------------------
205
bde4baac
VZ
206size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
207{
208 return wxMB2WC(buf, psz, n);
209}
210
211size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
212{
213 return wxWC2MB(buf, psz, n);
214}
215
216// ----------------------------------------------------------------------------
217// UTF-7
218// ----------------------------------------------------------------------------
6001e347
RR
219
220#if 0
221static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
222 "abcdefghijklmnopqrstuvwxyz"
223 "0123456789'(),-./:?";
224static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
225static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
226 "abcdefghijklmnopqrstuvwxyz"
227 "0123456789+/";
228#endif
229
230// TODO: write actual implementations of UTF-7 here
231size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
232 const char * WXUNUSED(psz),
233 size_t WXUNUSED(n)) const
234{
235 return 0;
236}
237
238size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
239 const wchar_t * WXUNUSED(psz),
240 size_t WXUNUSED(n)) const
241{
242 return 0;
243}
244
f6bcfd97 245// ----------------------------------------------------------------------------
6001e347 246// UTF-8
f6bcfd97 247// ----------------------------------------------------------------------------
6001e347 248
dccce9ea 249static wxUint32 utf8_max[]=
4def3b35 250 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
251
252size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
253{
4def3b35
VS
254 size_t len = 0;
255
dccce9ea 256 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
257 {
258 unsigned char cc = *psz++, fc = cc;
259 unsigned cnt;
dccce9ea 260 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 261 fc <<= 1;
dccce9ea 262 if (!cnt)
4def3b35
VS
263 {
264 // plain ASCII char
dccce9ea 265 if (buf)
4def3b35
VS
266 *buf++ = cc;
267 len++;
dccce9ea
VZ
268 }
269 else
4def3b35
VS
270 {
271 cnt--;
dccce9ea 272 if (!cnt)
4def3b35
VS
273 {
274 // invalid UTF-8 sequence
275 return (size_t)-1;
dccce9ea
VZ
276 }
277 else
4def3b35
VS
278 {
279 unsigned ocnt = cnt - 1;
280 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 281 while (cnt--)
4def3b35
VS
282 {
283 cc = *psz++;
dccce9ea 284 if ((cc & 0xC0) != 0x80)
4def3b35
VS
285 {
286 // invalid UTF-8 sequence
287 return (size_t)-1;
288 }
289 res = (res << 6) | (cc & 0x3f);
290 }
dccce9ea 291 if (res <= utf8_max[ocnt])
4def3b35
VS
292 {
293 // illegal UTF-8 encoding
294 return (size_t)-1;
295 }
1cd52418 296#ifdef WC_UTF16
b5153fd8
VZ
297 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
298 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
299 if (pa == (size_t)-1)
300 return (size_t)-1;
dccce9ea 301 if (buf)
4def3b35
VS
302 buf += pa;
303 len += pa;
373658eb 304#else // !WC_UTF16
dccce9ea 305 if (buf)
4def3b35
VS
306 *buf++ = res;
307 len++;
373658eb 308#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
309 }
310 }
6001e347 311 }
dccce9ea 312 if (buf && (len < n))
4def3b35
VS
313 *buf = 0;
314 return len;
6001e347
RR
315}
316
317size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
318{
4def3b35 319 size_t len = 0;
6001e347 320
dccce9ea 321 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
322 {
323 wxUint32 cc;
1cd52418 324#ifdef WC_UTF16
b5153fd8
VZ
325 // cast is ok for WC_UTF16
326 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 327 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 328#else
4def3b35
VS
329 cc=(*psz++) & 0x7fffffff;
330#endif
331 unsigned cnt;
332 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 333 if (!cnt)
4def3b35
VS
334 {
335 // plain ASCII char
dccce9ea 336 if (buf)
574c939e 337 *buf++ = (char) cc;
4def3b35 338 len++;
dccce9ea
VZ
339 }
340
341 else
4def3b35
VS
342 {
343 len += cnt + 1;
dccce9ea 344 if (buf)
4def3b35 345 {
574c939e 346 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 347 while (cnt--)
574c939e 348 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
349 }
350 }
6001e347 351 }
4def3b35
VS
352
353 if (buf && (len<n)) *buf = 0;
adb45366 354
4def3b35 355 return len;
6001e347
RR
356}
357
c91830cb
VZ
358
359
360
361// ----------------------------------------------------------------------------
362// UTF-16
363// ----------------------------------------------------------------------------
364
365#ifdef WORDS_BIGENDIAN
bde4baac
VZ
366 #define wxMBConvUTF16straight wxMBConvUTF16BE
367 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 368#else
bde4baac
VZ
369 #define wxMBConvUTF16swap wxMBConvUTF16BE
370 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
371#endif
372
373
c91830cb
VZ
374#ifdef WC_UTF16
375
c91830cb
VZ
376// copy 16bit MB to 16bit String
377size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
378{
379 size_t len=0;
380
381 while (*(wxUint16*)psz && (!buf || len < n))
382 {
383 if (buf)
384 *buf++ = *(wxUint16*)psz;
385 len++;
386
387 psz += sizeof(wxUint16);
388 }
389 if (buf && len<n) *buf=0;
390
391 return len;
392}
393
394
395// copy 16bit String to 16bit MB
396size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
397{
398 size_t len=0;
399
400 while (*psz && (!buf || len < n))
401 {
402 if (buf)
403 {
404 *(wxUint16*)buf = *psz;
405 buf += sizeof(wxUint16);
406 }
407 len += sizeof(wxUint16);
408 psz++;
409 }
410 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
411
412 return len;
413}
414
415
416// swap 16bit MB to 16bit String
417size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
418{
419 size_t len=0;
420
421 while (*(wxUint16*)psz && (!buf || len < n))
422 {
423 if (buf)
424 {
425 ((char *)buf)[0] = psz[1];
426 ((char *)buf)[1] = psz[0];
427 buf++;
428 }
429 len++;
430 psz += sizeof(wxUint16);
431 }
432 if (buf && len<n) *buf=0;
433
434 return len;
435}
436
437
438// swap 16bit MB to 16bit String
439size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
440{
441 size_t len=0;
442
443 while (*psz && (!buf || len < n))
444 {
445 if (buf)
446 {
447 *buf++ = ((char*)psz)[1];
448 *buf++ = ((char*)psz)[0];
449 }
450 len += sizeof(wxUint16);
451 psz++;
452 }
453 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
454
455 return len;
456}
457
458
459#else // WC_UTF16
460
461
462// copy 16bit MB to 32bit String
463size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
464{
465 size_t len=0;
466
467 while (*(wxUint16*)psz && (!buf || len < n))
468 {
469 wxUint32 cc;
470 size_t pa=decode_utf16((wxUint16*)psz, cc);
471 if (pa == (size_t)-1)
472 return pa;
473
474 if (buf)
475 *buf++ = cc;
476 len++;
477 psz += pa * sizeof(wxUint16);
478 }
479 if (buf && len<n) *buf=0;
480
481 return len;
482}
483
484
485// copy 32bit String to 16bit MB
486size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
487{
488 size_t len=0;
489
490 while (*psz && (!buf || len < n))
491 {
492 wxUint16 cc[2];
493 size_t pa=encode_utf16(*psz, cc);
494
495 if (pa == (size_t)-1)
496 return pa;
497
498 if (buf)
499 {
69b80d28 500 *(wxUint16*)buf = cc[0];
b5153fd8 501 buf += sizeof(wxUint16);
c91830cb 502 if (pa > 1)
69b80d28
VZ
503 {
504 *(wxUint16*)buf = cc[1];
505 buf += sizeof(wxUint16);
506 }
c91830cb
VZ
507 }
508
509 len += pa*sizeof(wxUint16);
510 psz++;
511 }
512 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
513
514 return len;
515}
516
517
518// swap 16bit MB to 32bit String
519size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
520{
521 size_t len=0;
522
523 while (*(wxUint16*)psz && (!buf || len < n))
524 {
525 wxUint32 cc;
526 char tmp[4];
527 tmp[0]=psz[1]; tmp[1]=psz[0];
528 tmp[2]=psz[3]; tmp[3]=psz[2];
529
530 size_t pa=decode_utf16((wxUint16*)tmp, cc);
531 if (pa == (size_t)-1)
532 return pa;
533
534 if (buf)
535 *buf++ = cc;
536
537 len++;
538 psz += pa * sizeof(wxUint16);
539 }
540 if (buf && len<n) *buf=0;
541
542 return len;
543}
544
545
546// swap 32bit String to 16bit MB
547size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
548{
549 size_t len=0;
550
551 while (*psz && (!buf || len < n))
552 {
553 wxUint16 cc[2];
554 size_t pa=encode_utf16(*psz, cc);
555
556 if (pa == (size_t)-1)
557 return pa;
558
559 if (buf)
560 {
561 *buf++ = ((char*)cc)[1];
562 *buf++ = ((char*)cc)[0];
563 if (pa > 1)
564 {
565 *buf++ = ((char*)cc)[3];
566 *buf++ = ((char*)cc)[2];
567 }
568 }
569
570 len += pa*sizeof(wxUint16);
571 psz++;
572 }
573 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
574
575 return len;
576}
577
578#endif // WC_UTF16
579
580
581// ----------------------------------------------------------------------------
582// UTF-32
583// ----------------------------------------------------------------------------
584
585#ifdef WORDS_BIGENDIAN
586#define wxMBConvUTF32straight wxMBConvUTF32BE
587#define wxMBConvUTF32swap wxMBConvUTF32LE
588#else
589#define wxMBConvUTF32swap wxMBConvUTF32BE
590#define wxMBConvUTF32straight wxMBConvUTF32LE
591#endif
592
593
594WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
595WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
596
597
598#ifdef WC_UTF16
599
600// copy 32bit MB to 16bit String
601size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
602{
603 size_t len=0;
604
605 while (*(wxUint32*)psz && (!buf || len < n))
606 {
607 wxUint16 cc[2];
608
609 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
610 if (pa == (size_t)-1)
611 return pa;
612
613 if (buf)
614 {
615 *buf++ = cc[0];
616 if (pa > 1)
617 *buf++ = cc[1];
618 }
619 len += pa;
620 psz += sizeof(wxUint32);
621 }
622 if (buf && len<n) *buf=0;
623
624 return len;
625}
626
627
628// copy 16bit String to 32bit MB
629size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
630{
631 size_t len=0;
632
633 while (*psz && (!buf || len < n))
634 {
635 wxUint32 cc;
636
b5153fd8
VZ
637 // cast is ok for WC_UTF16
638 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
639 if (pa == (size_t)-1)
640 return pa;
641
642 if (buf)
643 {
644 *(wxUint32*)buf = cc;
645 buf += sizeof(wxUint32);
646 }
647 len += sizeof(wxUint32);
648 psz += pa;
649 }
b5153fd8
VZ
650
651 if (buf && len<=n-sizeof(wxUint32))
652 *(wxUint32*)buf=0;
c91830cb
VZ
653
654 return len;
655}
656
657
658
659// swap 32bit MB to 16bit String
660size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
661{
662 size_t len=0;
663
664 while (*(wxUint32*)psz && (!buf || len < n))
665 {
666 char tmp[4];
667 tmp[0] = psz[3]; tmp[1] = psz[2];
668 tmp[2] = psz[1]; tmp[3] = psz[0];
669
670
671 wxUint16 cc[2];
672
673 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
674 if (pa == (size_t)-1)
675 return pa;
676
677 if (buf)
678 {
679 *buf++ = cc[0];
680 if (pa > 1)
681 *buf++ = cc[1];
682 }
683 len += pa;
684 psz += sizeof(wxUint32);
685 }
b5153fd8
VZ
686
687 if (buf && len<n)
688 *buf=0;
c91830cb
VZ
689
690 return len;
691}
692
693
694// swap 16bit String to 32bit MB
695size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
696{
697 size_t len=0;
698
699 while (*psz && (!buf || len < n))
700 {
701 char cc[4];
702
b5153fd8
VZ
703 // cast is ok for WC_UTF16
704 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
705 if (pa == (size_t)-1)
706 return pa;
707
708 if (buf)
709 {
710 *buf++ = cc[3];
711 *buf++ = cc[2];
712 *buf++ = cc[1];
713 *buf++ = cc[0];
714 }
715 len += sizeof(wxUint32);
716 psz += pa;
717 }
b5153fd8
VZ
718
719 if (buf && len<=n-sizeof(wxUint32))
720 *(wxUint32*)buf=0;
c91830cb
VZ
721
722 return len;
723}
724
725#else // WC_UTF16
726
727
728// copy 32bit MB to 32bit String
729size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
730{
731 size_t len=0;
732
733 while (*(wxUint32*)psz && (!buf || len < n))
734 {
735 if (buf)
736 *buf++ = *(wxUint32*)psz;
737 len++;
738 psz += sizeof(wxUint32);
739 }
b5153fd8
VZ
740
741 if (buf && len<n)
742 *buf=0;
c91830cb
VZ
743
744 return len;
745}
746
747
748// copy 32bit String to 32bit MB
749size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
750{
751 size_t len=0;
752
753 while (*psz && (!buf || len < n))
754 {
755 if (buf)
756 {
757 *(wxUint32*)buf = *psz;
758 buf += sizeof(wxUint32);
759 }
760
761 len += sizeof(wxUint32);
762 psz++;
763 }
764
b5153fd8
VZ
765 if (buf && len<=n-sizeof(wxUint32))
766 *(wxUint32*)buf=0;
c91830cb
VZ
767
768 return len;
769}
770
771
772// swap 32bit MB to 32bit String
773size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
774{
775 size_t len=0;
776
777 while (*(wxUint32*)psz && (!buf || len < n))
778 {
779 if (buf)
780 {
781 ((char *)buf)[0] = psz[3];
782 ((char *)buf)[1] = psz[2];
783 ((char *)buf)[2] = psz[1];
784 ((char *)buf)[3] = psz[0];
785 buf++;
786 }
787 len++;
788 psz += sizeof(wxUint32);
789 }
b5153fd8
VZ
790
791 if (buf && len<n)
792 *buf=0;
c91830cb
VZ
793
794 return len;
795}
796
797
798// swap 32bit String to 32bit MB
799size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
800{
801 size_t len=0;
802
803 while (*psz && (!buf || len < n))
804 {
805 if (buf)
806 {
807 *buf++ = ((char *)psz)[3];
808 *buf++ = ((char *)psz)[2];
809 *buf++ = ((char *)psz)[1];
810 *buf++ = ((char *)psz)[0];
811 }
812 len += sizeof(wxUint32);
813 psz++;
814 }
b5153fd8
VZ
815
816 if (buf && len<=n-sizeof(wxUint32))
817 *(wxUint32*)buf=0;
c91830cb
VZ
818
819 return len;
820}
821
822
823#endif // WC_UTF16
824
825
36acb880
VZ
826// ============================================================================
827// The classes doing conversion using the iconv_xxx() functions
828// ============================================================================
3caec1bb 829
b040e242 830#ifdef HAVE_ICONV
3a0d76bc 831
3caec1bb
VS
832// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
833// if output buffer is _exactly_ as big as needed. Such case is (unless there's
834// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
835// (which means error) and says there are 0 bytes left in the input buffer --
836// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
837// this alternative test for iconv() failure.
838// [This bug does not appear in glibc 2.2.]
839#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
840#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
841 (errno != E2BIG || bufLeft != 0))
842#else
843#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
844#endif
845
ab217dba 846#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
847
848// ----------------------------------------------------------------------------
e95354ec 849// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
850// ----------------------------------------------------------------------------
851
e95354ec 852class wxMBConv_iconv : public wxMBConv
1cd52418
OK
853{
854public:
e95354ec
VZ
855 wxMBConv_iconv(const wxChar *name);
856 virtual ~wxMBConv_iconv();
36acb880 857
bde4baac
VZ
858 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
859 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 860
e95354ec 861 bool IsOk() const
36acb880
VZ
862 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
863
864protected:
865 // the iconv handlers used to translate from multibyte to wide char and in
866 // the other direction
867 iconv_t m2w,
868 w2m;
869
870private:
e95354ec 871 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
872 // available on this machine, it will remain NULL
873 static const char *ms_wcCharsetName;
874
875 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
876 // different endian-ness than the native one
405d8f46 877 static bool ms_wcNeedsSwap;
36acb880
VZ
878};
879
e95354ec
VZ
880const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
881bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 882
e95354ec 883wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 884{
04c79127
RR
885 // Do it the hard way
886 char cname[100];
887 for (size_t i = 0; i < wxStrlen(name)+1; i++)
888 cname[i] = (char) name[i];
889
36acb880
VZ
890 // check for charset that represents wchar_t:
891 if (ms_wcCharsetName == NULL)
f1339c56 892 {
e95354ec 893 ms_wcNeedsSwap = false;
dccce9ea 894
36acb880
VZ
895 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
896 ms_wcCharsetName = WC_NAME_BEST;
04c79127 897 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 898
36acb880
VZ
899 if (m2w == (iconv_t)-1)
900 {
901 // try charset w/o bytesex info (e.g. "UCS4")
902 // and check for bytesex ourselves:
903 ms_wcCharsetName = WC_NAME;
04c79127 904 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
905
906 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
907 if (m2w == (iconv_t)-1)
908 {
36acb880 909 ms_wcCharsetName = "WCHAR_T";
04c79127 910 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 911 }
3a0d76bc 912
36acb880
VZ
913 if (m2w != (iconv_t)-1)
914 {
915 char buf[2], *bufPtr;
916 wchar_t wbuf[2], *wbufPtr;
917 size_t insz, outsz;
918 size_t res;
919
920 buf[0] = 'A';
921 buf[1] = 0;
922 wbuf[0] = 0;
923 insz = 2;
924 outsz = SIZEOF_WCHAR_T * 2;
925 wbufPtr = wbuf;
926 bufPtr = buf;
927
928 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
929 (char**)&wbufPtr, &outsz);
930
931 if (ICONV_FAILED(res, insz))
3a0d76bc 932 {
36acb880
VZ
933 ms_wcCharsetName = NULL;
934 wxLogLastError(wxT("iconv"));
2b5f62a0 935 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
936 }
937 else
938 {
36acb880 939 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
940 }
941 }
36acb880
VZ
942 else
943 {
944 ms_wcCharsetName = NULL;
373658eb 945
957686c8
VS
946 // VS: we must not output an error here, since wxWindows will safely
947 // fall back to using wxEncodingConverter.
948 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
949 //wxLogError(
36acb880 950 }
3a0d76bc 951 }
36acb880 952 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 953 }
36acb880 954 else // we already have ms_wcCharsetName
3caec1bb 955 {
04c79127 956 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 957 }
dccce9ea 958
36acb880
VZ
959 // NB: don't ever pass NULL to iconv_open(), it may crash!
960 if ( ms_wcCharsetName )
f1339c56 961 {
04c79127 962 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 963 }
405d8f46
VZ
964 else
965 {
966 w2m = (iconv_t)-1;
967 }
36acb880 968}
3caec1bb 969
e95354ec 970wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
971{
972 if ( m2w != (iconv_t)-1 )
973 iconv_close(m2w);
974 if ( w2m != (iconv_t)-1 )
975 iconv_close(w2m);
976}
3a0d76bc 977
bde4baac 978size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
979{
980 size_t inbuf = strlen(psz);
981 size_t outbuf = n * SIZEOF_WCHAR_T;
982 size_t res, cres;
983 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
984 wchar_t *bufPtr = buf;
985 const char *pszPtr = psz;
986
987 if (buf)
988 {
989 // have destination buffer, convert there
990 cres = iconv(m2w,
991 ICONV_CHAR_CAST(&pszPtr), &inbuf,
992 (char**)&bufPtr, &outbuf);
993 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 994
36acb880 995 if (ms_wcNeedsSwap)
3a0d76bc 996 {
36acb880
VZ
997 // convert to native endianness
998 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 999 }
adb45366 1000
49dd9820
VS
1001 // NB: iconv was given only strlen(psz) characters on input, and so
1002 // it couldn't convert the trailing zero. Let's do it ourselves
1003 // if there's some room left for it in the output buffer.
1004 if (res < n)
1005 buf[res] = 0;
36acb880
VZ
1006 }
1007 else
1008 {
1009 // no destination buffer... convert using temp buffer
1010 // to calculate destination buffer requirement
1011 wchar_t tbuf[8];
1012 res = 0;
1013 do {
1014 bufPtr = tbuf;
1015 outbuf = 8*SIZEOF_WCHAR_T;
1016
1017 cres = iconv(m2w,
1018 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1019 (char**)&bufPtr, &outbuf );
1020
1021 res += 8-(outbuf/SIZEOF_WCHAR_T);
1022 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1023 }
dccce9ea 1024
36acb880 1025 if (ICONV_FAILED(cres, inbuf))
f1339c56 1026 {
36acb880
VZ
1027 //VS: it is ok if iconv fails, hence trace only
1028 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1029 return (size_t)-1;
1030 }
1031
1032 return res;
1033}
1034
bde4baac 1035size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1036{
f8d791e0 1037 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1038 size_t outbuf = n;
1039 size_t res, cres;
3a0d76bc 1040
36acb880 1041 wchar_t *tmpbuf = 0;
3caec1bb 1042
36acb880
VZ
1043 if (ms_wcNeedsSwap)
1044 {
1045 // need to copy to temp buffer to switch endianness
1046 // this absolutely doesn't rock!
1047 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1048 // could be in read-only memory, or be accessed in some other thread)
1049 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1050 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1051 WC_BSWAP(tmpbuf, inbuf)
1052 psz=tmpbuf;
1053 }
3a0d76bc 1054
36acb880
VZ
1055 if (buf)
1056 {
1057 // have destination buffer, convert there
1058 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1059
36acb880 1060 res = n-outbuf;
adb45366 1061
49dd9820
VS
1062 // NB: iconv was given only wcslen(psz) characters on input, and so
1063 // it couldn't convert the trailing zero. Let's do it ourselves
1064 // if there's some room left for it in the output buffer.
1065 if (res < n)
1066 buf[0] = 0;
36acb880
VZ
1067 }
1068 else
1069 {
1070 // no destination buffer... convert using temp buffer
1071 // to calculate destination buffer requirement
1072 char tbuf[16];
1073 res = 0;
1074 do {
1075 buf = tbuf; outbuf = 16;
1076
1077 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1078
36acb880
VZ
1079 res += 16 - outbuf;
1080 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1081 }
dccce9ea 1082
36acb880
VZ
1083 if (ms_wcNeedsSwap)
1084 {
1085 free(tmpbuf);
1086 }
dccce9ea 1087
36acb880
VZ
1088 if (ICONV_FAILED(cres, inbuf))
1089 {
1090 //VS: it is ok if iconv fails, hence trace only
1091 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1092 return (size_t)-1;
1093 }
1094
1095 return res;
1096}
1097
b040e242 1098#endif // HAVE_ICONV
36acb880 1099
e95354ec 1100
36acb880
VZ
1101// ============================================================================
1102// Win32 conversion classes
1103// ============================================================================
1cd52418 1104
e95354ec 1105#ifdef wxHAVE_WIN32_MB2WC
373658eb 1106
8b04d4c4
VZ
1107// from utils.cpp
1108extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1109extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
373658eb 1110
e95354ec 1111class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1112{
1113public:
bde4baac
VZ
1114 wxMBConv_win32()
1115 {
1116 m_CodePage = CP_ACP;
1117 }
1118
e95354ec 1119 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1120 {
1121 m_CodePage = wxCharsetToCodepage(name);
1122 }
dccce9ea 1123
e95354ec 1124 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1125 {
1126 m_CodePage = wxEncodingToCodepage(encoding);
1127 }
8b04d4c4 1128
bde4baac 1129 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1130 {
2b5f62a0
VZ
1131 const size_t len = ::MultiByteToWideChar
1132 (
1133 m_CodePage, // code page
1134 0, // flags (none)
1135 psz, // input string
1136 -1, // its length (NUL-terminated)
b4da152e 1137 buf, // output string
2b5f62a0
VZ
1138 buf ? n : 0 // size of output buffer
1139 );
1140
03a991bc
VZ
1141 // note that it returns count of written chars for buf != NULL and size
1142 // of the needed buffer for buf == NULL so in either case the length of
1143 // the string (which never includes the terminating NUL) is one less
1144 return len ? len - 1 : (size_t)-1;
f1339c56 1145 }
dccce9ea 1146
bde4baac 1147 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
f1339c56 1148 {
2b5f62a0
VZ
1149 const size_t len = ::WideCharToMultiByte
1150 (
1151 m_CodePage, // code page
1152 0, // flags (none)
b4da152e 1153 psz, // input string
2b5f62a0
VZ
1154 -1, // it is (wide) NUL-terminated
1155 buf, // output buffer
1156 buf ? n : 0, // and its size
1157 NULL, // default "replacement" char
1158 NULL // [out] was it used?
1159 );
1160
03a991bc
VZ
1161 // see the comment above for the reason of "len - 1"
1162 return len ? len - 1 : (size_t)-1;
f1339c56 1163 }
dccce9ea 1164
e95354ec 1165 bool IsOk() const
b1d66b54 1166 { return m_CodePage != -1; }
f1339c56
RR
1167
1168public:
b1d66b54 1169 long m_CodePage;
1cd52418 1170};
e95354ec
VZ
1171
1172#endif // wxHAVE_WIN32_MB2WC
1173
1e6feb95 1174
36acb880
VZ
1175// ============================================================================
1176// wxEncodingConverter based conversion classes
1177// ============================================================================
1178
1e6feb95 1179#if wxUSE_FONTMAP
1cd52418 1180
e95354ec 1181class wxMBConv_wxwin : public wxMBConv
1cd52418 1182{
8b04d4c4
VZ
1183private:
1184 void Init()
1185 {
1186 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1187 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1188 }
1189
6001e347 1190public:
f1339c56
RR
1191 // temporarily just use wxEncodingConverter stuff,
1192 // so that it works while a better implementation is built
e95354ec 1193 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1194 {
1195 if (name)
e95354ec 1196 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1197 else
1198 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1199
8b04d4c4
VZ
1200 Init();
1201 }
1202
e95354ec 1203 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1204 {
1205 m_enc = enc;
1206
1207 Init();
f1339c56 1208 }
dccce9ea 1209
bde4baac 1210 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
1211 {
1212 size_t inbuf = strlen(psz);
dccce9ea 1213 if (buf)
4def3b35 1214 m2w.Convert(psz,buf);
f1339c56
RR
1215 return inbuf;
1216 }
dccce9ea 1217
bde4baac 1218 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 1219 {
f8d791e0 1220 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1221 if (buf)
1222 w2m.Convert(psz,buf);
dccce9ea 1223
f1339c56
RR
1224 return inbuf;
1225 }
dccce9ea 1226
e95354ec 1227 bool IsOk() const { return m_ok; }
f1339c56
RR
1228
1229public:
8b04d4c4 1230 wxFontEncoding m_enc;
f1339c56 1231 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1232
1233 // were we initialized successfully?
1234 bool m_ok;
fc7a2a60 1235
e95354ec 1236 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1237};
6001e347 1238
1e6feb95
VZ
1239#endif // wxUSE_FONTMAP
1240
36acb880
VZ
1241// ============================================================================
1242// wxCSConv implementation
1243// ============================================================================
1244
8b04d4c4 1245void wxCSConv::Init()
6001e347 1246{
e95354ec
VZ
1247 m_name = NULL;
1248 m_convReal = NULL;
1249 m_deferred = true;
1250}
1251
8b04d4c4
VZ
1252wxCSConv::wxCSConv(const wxChar *charset)
1253{
1254 Init();
82713003 1255
e95354ec
VZ
1256 if ( charset )
1257 {
e95354ec
VZ
1258 SetName(charset);
1259 }
bda3d86a
VZ
1260
1261 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
1262}
1263
8b04d4c4
VZ
1264wxCSConv::wxCSConv(wxFontEncoding encoding)
1265{
bda3d86a 1266 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
1267 {
1268 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1269
1270 encoding = wxFONTENCODING_SYSTEM;
1271 }
1272
8b04d4c4
VZ
1273 Init();
1274
bda3d86a 1275 m_encoding = encoding;
8b04d4c4
VZ
1276}
1277
6001e347
RR
1278wxCSConv::~wxCSConv()
1279{
65e50848
JS
1280 Clear();
1281}
1282
54380f29 1283wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1284 : wxMBConv()
54380f29 1285{
8b04d4c4
VZ
1286 Init();
1287
54380f29 1288 SetName(conv.m_name);
8b04d4c4 1289 m_encoding = conv.m_encoding;
54380f29
GD
1290}
1291
1292wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1293{
1294 Clear();
8b04d4c4 1295
54380f29 1296 SetName(conv.m_name);
8b04d4c4
VZ
1297 m_encoding = conv.m_encoding;
1298
54380f29
GD
1299 return *this;
1300}
1301
65e50848
JS
1302void wxCSConv::Clear()
1303{
8b04d4c4 1304 free(m_name);
e95354ec 1305 delete m_convReal;
8b04d4c4 1306
65e50848 1307 m_name = NULL;
e95354ec 1308 m_convReal = NULL;
6001e347
RR
1309}
1310
1311void wxCSConv::SetName(const wxChar *charset)
1312{
f1339c56
RR
1313 if (charset)
1314 {
1315 m_name = wxStrdup(charset);
e95354ec 1316 m_deferred = true;
f1339c56 1317 }
6001e347
RR
1318}
1319
e95354ec
VZ
1320wxMBConv *wxCSConv::DoCreate() const
1321{
c547282d
VZ
1322 // check for the special case of ASCII or ISO8859-1 charset: as we have
1323 // special knowledge of it anyhow, we don't need to create a special
1324 // conversion object
1325 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 1326 {
e95354ec
VZ
1327 // don't convert at all
1328 return NULL;
1329 }
dccce9ea 1330
e95354ec
VZ
1331 // we trust OS to do conversion better than we can so try external
1332 // conversion methods first
1333 //
1334 // the full order is:
1335 // 1. OS conversion (iconv() under Unix or Win32 API)
1336 // 2. hard coded conversions for UTF
1337 // 3. wxEncodingConverter as fall back
1338
1339 // step (1)
1340#ifdef HAVE_ICONV
c547282d 1341#if !wxUSE_FONTMAP
e95354ec 1342 if ( m_name )
c547282d 1343#endif // !wxUSE_FONTMAP
e95354ec 1344 {
c547282d
VZ
1345 wxString name(m_name);
1346
1347#if wxUSE_FONTMAP
1348 if ( name.empty() )
1349 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1350#endif // wxUSE_FONTMAP
1351
1352 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
1353 if ( conv->IsOk() )
1354 return conv;
1355
1356 delete conv;
1357 }
1358#endif // HAVE_ICONV
1359
1360#ifdef wxHAVE_WIN32_MB2WC
1361 {
1362 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1363 : new wxMBConv_win32(m_encoding);
1364 if ( conv->IsOk() )
1365 return conv;
1366
1367 delete conv;
1368 }
1369#endif // wxHAVE_WIN32_MB2WC
1370
1371 // step (2)
1372 wxFontEncoding enc = m_encoding;
1373#if wxUSE_FONTMAP
c547282d
VZ
1374 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1375 {
1376 // use "false" to suppress interactive dialogs -- we can be called from
1377 // anywhere and popping up a dialog from here is the last thing we want to
1378 // do
1379 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1380 }
e95354ec
VZ
1381#endif // wxUSE_FONTMAP
1382
1383 switch ( enc )
1384 {
1385 case wxFONTENCODING_UTF7:
1386 return new wxMBConvUTF7;
1387
1388 case wxFONTENCODING_UTF8:
1389 return new wxMBConvUTF8;
1390
e95354ec
VZ
1391 case wxFONTENCODING_UTF16BE:
1392 return new wxMBConvUTF16BE;
1393
1394 case wxFONTENCODING_UTF16LE:
1395 return new wxMBConvUTF16LE;
1396
e95354ec
VZ
1397 case wxFONTENCODING_UTF32BE:
1398 return new wxMBConvUTF32BE;
1399
1400 case wxFONTENCODING_UTF32LE:
1401 return new wxMBConvUTF32LE;
1402
1403 default:
1404 // nothing to do but put here to suppress gcc warnings
1405 ;
1406 }
1407
1408 // step (3)
1409#if wxUSE_FONTMAP
1410 {
1411 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1412 : new wxMBConv_wxwin(m_encoding);
1413 if ( conv->IsOk() )
1414 return conv;
1415
1416 delete conv;
1417 }
1418#endif // wxUSE_FONTMAP
1419
a58d4f4d
VS
1420 // NB: This is a hack to prevent deadlock. What could otherwise happen
1421 // in Unicode build: wxConvLocal creation ends up being here
1422 // because of some failure and logs the error. But wxLog will try to
1423 // attach timestamp, for which it will need wxConvLocal (to convert
1424 // time to char* and then wchar_t*), but that fails, tries to log
1425 // error, but wxLog has a (already locked) critical section that
1426 // guards static buffer.
1427 static bool alreadyLoggingError = false;
1428 if (!alreadyLoggingError)
1429 {
1430 alreadyLoggingError = true;
1431 wxLogError(_("Cannot convert from the charset '%s'!"),
1432 m_name ? m_name
e95354ec
VZ
1433 :
1434#if wxUSE_FONTMAP
1435 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1436#else // !wxUSE_FONTMAP
1437 wxString::Format(_("encoding %s"), m_encoding).c_str()
1438#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1439 );
a58d4f4d
VS
1440 alreadyLoggingError = false;
1441 }
e95354ec
VZ
1442
1443 return NULL;
1444}
1445
1446void wxCSConv::CreateConvIfNeeded() const
1447{
1448 if ( m_deferred )
1449 {
1450 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
1451
1452#if wxUSE_INTL
1453 // if we don't have neither the name nor the encoding, use the default
1454 // encoding for this system
1455 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1456 {
4d312c22 1457 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
1458 }
1459#endif // wxUSE_INTL
1460
e95354ec
VZ
1461 self->m_convReal = DoCreate();
1462 self->m_deferred = false;
6001e347 1463 }
6001e347
RR
1464}
1465
1466size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1467{
e95354ec 1468 CreateConvIfNeeded();
dccce9ea 1469
e95354ec
VZ
1470 if (m_convReal)
1471 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1472
1473 // latin-1 (direct)
4def3b35 1474 size_t len = strlen(psz);
dccce9ea 1475
f1339c56
RR
1476 if (buf)
1477 {
4def3b35 1478 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1479 buf[c] = (unsigned char)(psz[c]);
1480 }
dccce9ea 1481
f1339c56 1482 return len;
6001e347
RR
1483}
1484
1485size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1486{
e95354ec 1487 CreateConvIfNeeded();
dccce9ea 1488
e95354ec
VZ
1489 if (m_convReal)
1490 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1491
f1339c56 1492 // latin-1 (direct)
f8d791e0 1493 const size_t len = wxWcslen(psz);
f1339c56
RR
1494 if (buf)
1495 {
4def3b35 1496 for (size_t c = 0; c <= len; c++)
24642831
VS
1497 {
1498 if (psz[c] > 0xFF)
1499 return (size_t)-1;
1500 buf[c] = psz[c];
1501 }
1502 }
1503 else
1504 {
1505 for (size_t c = 0; c <= len; c++)
1506 {
1507 if (psz[c] > 0xFF)
1508 return (size_t)-1;
1509 }
f1339c56 1510 }
dccce9ea 1511
f1339c56 1512 return len;
6001e347
RR
1513}
1514
bde4baac
VZ
1515// ----------------------------------------------------------------------------
1516// globals
1517// ----------------------------------------------------------------------------
1518
1519#ifdef __WINDOWS__
1520 static wxMBConv_win32 wxConvLibcObj;
1521#else
dcc8fac0 1522 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
1523#endif
1524
1525static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1526static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1527static wxMBConvUTF7 wxConvUTF7Obj;
1528static wxMBConvUTF8 wxConvUTF8Obj;
1529
1530
1531WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1532WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1533WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1534WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1535WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1536WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1537
1538#else // !wxUSE_WCHAR_T
1539
1540// stand-ins in absence of wchar_t
1541WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1542 wxConvISO8859_1,
1543 wxConvLocal,
1544 wxConvUTF8;
1545
1546#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
1547
1548