]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Some wxPerl notes.
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
55d99c7a 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
13dd924a 43 #include "wx/msw/missing.h"
0a1c1e62
GRG
44#endif
45
1c193821 46#ifndef __WXWINCE__
1cd52418 47#include <errno.h>
1c193821
JS
48#endif
49
6001e347
RR
50#include <ctype.h>
51#include <string.h>
52#include <stdlib.h>
53
e95354ec
VZ
54#if defined(__WIN32__) && !defined(__WXMICROWIN__)
55 #define wxHAVE_WIN32_MB2WC
56#endif // __WIN32__ but !__WXMICROWIN__
57
373658eb
VZ
58// ----------------------------------------------------------------------------
59// headers
60// ----------------------------------------------------------------------------
7af284fd 61
6001e347 62#ifdef __SALFORDC__
373658eb 63 #include <clib.h>
6001e347
RR
64#endif
65
b040e242 66#ifdef HAVE_ICONV
373658eb 67 #include <iconv.h>
1cd52418 68#endif
1cd52418 69
373658eb
VZ
70#include "wx/encconv.h"
71#include "wx/fontmap.h"
72
335d31e0 73#ifdef __WXMAC__
4227afa4
SC
74#include <ATSUnicode.h>
75#include <TextCommon.h>
76#include <TextEncodingConverter.h>
335d31e0
SC
77
78#include "wx/mac/private.h" // includes mac headers
79#endif
373658eb
VZ
80// ----------------------------------------------------------------------------
81// macros
82// ----------------------------------------------------------------------------
3e61dfb0 83
1cd52418 84#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 85#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
86
87#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
88 #define WC_NAME "UCS4"
89 #define WC_BSWAP BSWAP_UCS4
90 #ifdef WORDS_BIGENDIAN
91 #define WC_NAME_BEST "UCS-4BE"
92 #else
93 #define WC_NAME_BEST "UCS-4LE"
94 #endif
1cd52418 95#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
96 #define WC_NAME "UTF16"
97 #define WC_BSWAP BSWAP_UTF16
a3f2769e 98 #define WC_UTF16
3a0d76bc
VS
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UTF-16BE"
101 #else
102 #define WC_NAME_BEST "UTF-16LE"
103 #endif
bab1e722 104#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
105 // does this ever happen?
106 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
107#endif
108
373658eb
VZ
109// ============================================================================
110// implementation
111// ============================================================================
112
113// ----------------------------------------------------------------------------
c91830cb 114// UTF-16 en/decoding to/from UCS-4
373658eb 115// ----------------------------------------------------------------------------
6001e347 116
b0a6bb75 117
c91830cb 118static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 119{
dccce9ea 120 if (input<=0xffff)
4def3b35 121 {
999836aa
VZ
122 if (output)
123 *output = (wxUint16) input;
4def3b35 124 return 1;
dccce9ea
VZ
125 }
126 else if (input>=0x110000)
4def3b35
VS
127 {
128 return (size_t)-1;
dccce9ea
VZ
129 }
130 else
4def3b35 131 {
dccce9ea 132 if (output)
4def3b35 133 {
c91830cb 134 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 135 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
136 }
137 return 2;
1cd52418 138 }
1cd52418
OK
139}
140
c91830cb 141static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 142{
dccce9ea 143 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
144 {
145 output = *input;
146 return 1;
dccce9ea
VZ
147 }
148 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
149 {
150 output = *input;
151 return (size_t)-1;
dccce9ea
VZ
152 }
153 else
4def3b35
VS
154 {
155 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
156 return 2;
157 }
1cd52418
OK
158}
159
b0a6bb75 160
f6bcfd97 161// ----------------------------------------------------------------------------
6001e347 162// wxMBConv
f6bcfd97 163// ----------------------------------------------------------------------------
6001e347 164
2b5f62a0
VZ
165wxMBConv::~wxMBConv()
166{
167 // nothing to do here
168}
169
6001e347
RR
170const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
171{
2b5f62a0 172 if ( psz )
6001e347 173 {
2b5f62a0
VZ
174 // calculate the length of the buffer needed first
175 size_t nLen = MB2WC(NULL, psz, 0);
176 if ( nLen != (size_t)-1 )
177 {
178 // now do the actual conversion
179 wxWCharBuffer buf(nLen);
635f33ce
VS
180 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
181 if ( nLen != (size_t)-1 )
182 {
183 return buf;
184 }
2b5f62a0 185 }
f6bcfd97 186 }
2b5f62a0
VZ
187
188 wxWCharBuffer buf((wchar_t *)NULL);
189
190 return buf;
6001e347
RR
191}
192
e5cceba0 193const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 194{
2b5f62a0
VZ
195 if ( pwz )
196 {
197 size_t nLen = WC2MB(NULL, pwz, 0);
198 if ( nLen != (size_t)-1 )
199 {
c91830cb 200 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
201 nLen = WC2MB(buf.data(), pwz, nLen + 4);
202 if ( nLen != (size_t)-1 )
203 {
204 return buf;
205 }
2b5f62a0
VZ
206 }
207 }
208
209 wxCharBuffer buf((char *)NULL);
e5cceba0 210
e5cceba0 211 return buf;
6001e347
RR
212}
213
6001e347 214// ----------------------------------------------------------------------------
bde4baac 215// wxMBConvLibc
6001e347
RR
216// ----------------------------------------------------------------------------
217
bde4baac
VZ
218size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
219{
220 return wxMB2WC(buf, psz, n);
221}
222
223size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
224{
225 return wxWC2MB(buf, psz, n);
226}
227
228// ----------------------------------------------------------------------------
229// UTF-7
230// ----------------------------------------------------------------------------
6001e347
RR
231
232#if 0
233static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234 "abcdefghijklmnopqrstuvwxyz"
235 "0123456789'(),-./:?";
236static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
237static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789+/";
240#endif
241
242// TODO: write actual implementations of UTF-7 here
243size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
244 const char * WXUNUSED(psz),
245 size_t WXUNUSED(n)) const
246{
247 return 0;
248}
249
250size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
251 const wchar_t * WXUNUSED(psz),
252 size_t WXUNUSED(n)) const
253{
254 return 0;
255}
256
f6bcfd97 257// ----------------------------------------------------------------------------
6001e347 258// UTF-8
f6bcfd97 259// ----------------------------------------------------------------------------
6001e347 260
dccce9ea 261static wxUint32 utf8_max[]=
4def3b35 262 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
263
264size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
265{
4def3b35
VS
266 size_t len = 0;
267
dccce9ea 268 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
269 {
270 unsigned char cc = *psz++, fc = cc;
271 unsigned cnt;
dccce9ea 272 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 273 fc <<= 1;
dccce9ea 274 if (!cnt)
4def3b35
VS
275 {
276 // plain ASCII char
dccce9ea 277 if (buf)
4def3b35
VS
278 *buf++ = cc;
279 len++;
dccce9ea
VZ
280 }
281 else
4def3b35
VS
282 {
283 cnt--;
dccce9ea 284 if (!cnt)
4def3b35
VS
285 {
286 // invalid UTF-8 sequence
287 return (size_t)-1;
dccce9ea
VZ
288 }
289 else
4def3b35
VS
290 {
291 unsigned ocnt = cnt - 1;
292 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 293 while (cnt--)
4def3b35
VS
294 {
295 cc = *psz++;
dccce9ea 296 if ((cc & 0xC0) != 0x80)
4def3b35
VS
297 {
298 // invalid UTF-8 sequence
299 return (size_t)-1;
300 }
301 res = (res << 6) | (cc & 0x3f);
302 }
dccce9ea 303 if (res <= utf8_max[ocnt])
4def3b35
VS
304 {
305 // illegal UTF-8 encoding
306 return (size_t)-1;
307 }
1cd52418 308#ifdef WC_UTF16
b5153fd8
VZ
309 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
310 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
311 if (pa == (size_t)-1)
312 return (size_t)-1;
dccce9ea 313 if (buf)
4def3b35
VS
314 buf += pa;
315 len += pa;
373658eb 316#else // !WC_UTF16
dccce9ea 317 if (buf)
4def3b35
VS
318 *buf++ = res;
319 len++;
373658eb 320#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
321 }
322 }
6001e347 323 }
dccce9ea 324 if (buf && (len < n))
4def3b35
VS
325 *buf = 0;
326 return len;
6001e347
RR
327}
328
329size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
330{
4def3b35 331 size_t len = 0;
6001e347 332
dccce9ea 333 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
334 {
335 wxUint32 cc;
1cd52418 336#ifdef WC_UTF16
b5153fd8
VZ
337 // cast is ok for WC_UTF16
338 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 339 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 340#else
4def3b35
VS
341 cc=(*psz++) & 0x7fffffff;
342#endif
343 unsigned cnt;
344 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 345 if (!cnt)
4def3b35
VS
346 {
347 // plain ASCII char
dccce9ea 348 if (buf)
574c939e 349 *buf++ = (char) cc;
4def3b35 350 len++;
dccce9ea
VZ
351 }
352
353 else
4def3b35
VS
354 {
355 len += cnt + 1;
dccce9ea 356 if (buf)
4def3b35 357 {
574c939e 358 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 359 while (cnt--)
574c939e 360 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
361 }
362 }
6001e347 363 }
4def3b35
VS
364
365 if (buf && (len<n)) *buf = 0;
adb45366 366
4def3b35 367 return len;
6001e347
RR
368}
369
c91830cb
VZ
370
371
372
373// ----------------------------------------------------------------------------
374// UTF-16
375// ----------------------------------------------------------------------------
376
377#ifdef WORDS_BIGENDIAN
bde4baac
VZ
378 #define wxMBConvUTF16straight wxMBConvUTF16BE
379 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 380#else
bde4baac
VZ
381 #define wxMBConvUTF16swap wxMBConvUTF16BE
382 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
383#endif
384
385
c91830cb
VZ
386#ifdef WC_UTF16
387
c91830cb
VZ
388// copy 16bit MB to 16bit String
389size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
390{
391 size_t len=0;
392
393 while (*(wxUint16*)psz && (!buf || len < n))
394 {
395 if (buf)
396 *buf++ = *(wxUint16*)psz;
397 len++;
398
399 psz += sizeof(wxUint16);
400 }
401 if (buf && len<n) *buf=0;
402
403 return len;
404}
405
406
407// copy 16bit String to 16bit MB
408size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
409{
410 size_t len=0;
411
412 while (*psz && (!buf || len < n))
413 {
414 if (buf)
415 {
416 *(wxUint16*)buf = *psz;
417 buf += sizeof(wxUint16);
418 }
419 len += sizeof(wxUint16);
420 psz++;
421 }
422 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
423
424 return len;
425}
426
427
428// swap 16bit MB to 16bit String
429size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
430{
431 size_t len=0;
432
433 while (*(wxUint16*)psz && (!buf || len < n))
434 {
435 if (buf)
436 {
437 ((char *)buf)[0] = psz[1];
438 ((char *)buf)[1] = psz[0];
439 buf++;
440 }
441 len++;
442 psz += sizeof(wxUint16);
443 }
444 if (buf && len<n) *buf=0;
445
446 return len;
447}
448
449
450// swap 16bit MB to 16bit String
451size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
452{
453 size_t len=0;
454
455 while (*psz && (!buf || len < n))
456 {
457 if (buf)
458 {
459 *buf++ = ((char*)psz)[1];
460 *buf++ = ((char*)psz)[0];
461 }
462 len += sizeof(wxUint16);
463 psz++;
464 }
465 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
466
467 return len;
468}
469
470
471#else // WC_UTF16
472
473
474// copy 16bit MB to 32bit String
475size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
476{
477 size_t len=0;
478
479 while (*(wxUint16*)psz && (!buf || len < n))
480 {
481 wxUint32 cc;
482 size_t pa=decode_utf16((wxUint16*)psz, cc);
483 if (pa == (size_t)-1)
484 return pa;
485
486 if (buf)
487 *buf++ = cc;
488 len++;
489 psz += pa * sizeof(wxUint16);
490 }
491 if (buf && len<n) *buf=0;
492
493 return len;
494}
495
496
497// copy 32bit String to 16bit MB
498size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
499{
500 size_t len=0;
501
502 while (*psz && (!buf || len < n))
503 {
504 wxUint16 cc[2];
505 size_t pa=encode_utf16(*psz, cc);
506
507 if (pa == (size_t)-1)
508 return pa;
509
510 if (buf)
511 {
69b80d28 512 *(wxUint16*)buf = cc[0];
b5153fd8 513 buf += sizeof(wxUint16);
c91830cb 514 if (pa > 1)
69b80d28
VZ
515 {
516 *(wxUint16*)buf = cc[1];
517 buf += sizeof(wxUint16);
518 }
c91830cb
VZ
519 }
520
521 len += pa*sizeof(wxUint16);
522 psz++;
523 }
524 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
525
526 return len;
527}
528
529
530// swap 16bit MB to 32bit String
531size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
532{
533 size_t len=0;
534
535 while (*(wxUint16*)psz && (!buf || len < n))
536 {
537 wxUint32 cc;
538 char tmp[4];
539 tmp[0]=psz[1]; tmp[1]=psz[0];
540 tmp[2]=psz[3]; tmp[3]=psz[2];
541
542 size_t pa=decode_utf16((wxUint16*)tmp, cc);
543 if (pa == (size_t)-1)
544 return pa;
545
546 if (buf)
547 *buf++ = cc;
548
549 len++;
550 psz += pa * sizeof(wxUint16);
551 }
552 if (buf && len<n) *buf=0;
553
554 return len;
555}
556
557
558// swap 32bit String to 16bit MB
559size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
560{
561 size_t len=0;
562
563 while (*psz && (!buf || len < n))
564 {
565 wxUint16 cc[2];
566 size_t pa=encode_utf16(*psz, cc);
567
568 if (pa == (size_t)-1)
569 return pa;
570
571 if (buf)
572 {
573 *buf++ = ((char*)cc)[1];
574 *buf++ = ((char*)cc)[0];
575 if (pa > 1)
576 {
577 *buf++ = ((char*)cc)[3];
578 *buf++ = ((char*)cc)[2];
579 }
580 }
581
582 len += pa*sizeof(wxUint16);
583 psz++;
584 }
585 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
586
587 return len;
588}
589
590#endif // WC_UTF16
591
592
593// ----------------------------------------------------------------------------
594// UTF-32
595// ----------------------------------------------------------------------------
596
597#ifdef WORDS_BIGENDIAN
598#define wxMBConvUTF32straight wxMBConvUTF32BE
599#define wxMBConvUTF32swap wxMBConvUTF32LE
600#else
601#define wxMBConvUTF32swap wxMBConvUTF32BE
602#define wxMBConvUTF32straight wxMBConvUTF32LE
603#endif
604
605
606WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
607WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
608
609
610#ifdef WC_UTF16
611
612// copy 32bit MB to 16bit String
613size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
614{
615 size_t len=0;
616
617 while (*(wxUint32*)psz && (!buf || len < n))
618 {
619 wxUint16 cc[2];
620
621 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
622 if (pa == (size_t)-1)
623 return pa;
624
625 if (buf)
626 {
627 *buf++ = cc[0];
628 if (pa > 1)
629 *buf++ = cc[1];
630 }
631 len += pa;
632 psz += sizeof(wxUint32);
633 }
634 if (buf && len<n) *buf=0;
635
636 return len;
637}
638
639
640// copy 16bit String to 32bit MB
641size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
642{
643 size_t len=0;
644
645 while (*psz && (!buf || len < n))
646 {
647 wxUint32 cc;
648
b5153fd8
VZ
649 // cast is ok for WC_UTF16
650 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
651 if (pa == (size_t)-1)
652 return pa;
653
654 if (buf)
655 {
656 *(wxUint32*)buf = cc;
657 buf += sizeof(wxUint32);
658 }
659 len += sizeof(wxUint32);
660 psz += pa;
661 }
b5153fd8
VZ
662
663 if (buf && len<=n-sizeof(wxUint32))
664 *(wxUint32*)buf=0;
c91830cb
VZ
665
666 return len;
667}
668
669
670
671// swap 32bit MB to 16bit String
672size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
673{
674 size_t len=0;
675
676 while (*(wxUint32*)psz && (!buf || len < n))
677 {
678 char tmp[4];
679 tmp[0] = psz[3]; tmp[1] = psz[2];
680 tmp[2] = psz[1]; tmp[3] = psz[0];
681
682
683 wxUint16 cc[2];
684
685 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
686 if (pa == (size_t)-1)
687 return pa;
688
689 if (buf)
690 {
691 *buf++ = cc[0];
692 if (pa > 1)
693 *buf++ = cc[1];
694 }
695 len += pa;
696 psz += sizeof(wxUint32);
697 }
b5153fd8
VZ
698
699 if (buf && len<n)
700 *buf=0;
c91830cb
VZ
701
702 return len;
703}
704
705
706// swap 16bit String to 32bit MB
707size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
708{
709 size_t len=0;
710
711 while (*psz && (!buf || len < n))
712 {
713 char cc[4];
714
b5153fd8
VZ
715 // cast is ok for WC_UTF16
716 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
717 if (pa == (size_t)-1)
718 return pa;
719
720 if (buf)
721 {
722 *buf++ = cc[3];
723 *buf++ = cc[2];
724 *buf++ = cc[1];
725 *buf++ = cc[0];
726 }
727 len += sizeof(wxUint32);
728 psz += pa;
729 }
b5153fd8
VZ
730
731 if (buf && len<=n-sizeof(wxUint32))
732 *(wxUint32*)buf=0;
c91830cb
VZ
733
734 return len;
735}
736
737#else // WC_UTF16
738
739
740// copy 32bit MB to 32bit String
741size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
742{
743 size_t len=0;
744
745 while (*(wxUint32*)psz && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = *(wxUint32*)psz;
749 len++;
750 psz += sizeof(wxUint32);
751 }
b5153fd8
VZ
752
753 if (buf && len<n)
754 *buf=0;
c91830cb
VZ
755
756 return len;
757}
758
759
760// copy 32bit String to 32bit MB
761size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
762{
763 size_t len=0;
764
765 while (*psz && (!buf || len < n))
766 {
767 if (buf)
768 {
769 *(wxUint32*)buf = *psz;
770 buf += sizeof(wxUint32);
771 }
772
773 len += sizeof(wxUint32);
774 psz++;
775 }
776
b5153fd8
VZ
777 if (buf && len<=n-sizeof(wxUint32))
778 *(wxUint32*)buf=0;
c91830cb
VZ
779
780 return len;
781}
782
783
784// swap 32bit MB to 32bit String
785size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
786{
787 size_t len=0;
788
789 while (*(wxUint32*)psz && (!buf || len < n))
790 {
791 if (buf)
792 {
793 ((char *)buf)[0] = psz[3];
794 ((char *)buf)[1] = psz[2];
795 ((char *)buf)[2] = psz[1];
796 ((char *)buf)[3] = psz[0];
797 buf++;
798 }
799 len++;
800 psz += sizeof(wxUint32);
801 }
b5153fd8
VZ
802
803 if (buf && len<n)
804 *buf=0;
c91830cb
VZ
805
806 return len;
807}
808
809
810// swap 32bit String to 32bit MB
811size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
812{
813 size_t len=0;
814
815 while (*psz && (!buf || len < n))
816 {
817 if (buf)
818 {
819 *buf++ = ((char *)psz)[3];
820 *buf++ = ((char *)psz)[2];
821 *buf++ = ((char *)psz)[1];
822 *buf++ = ((char *)psz)[0];
823 }
824 len += sizeof(wxUint32);
825 psz++;
826 }
b5153fd8
VZ
827
828 if (buf && len<=n-sizeof(wxUint32))
829 *(wxUint32*)buf=0;
c91830cb
VZ
830
831 return len;
832}
833
834
835#endif // WC_UTF16
836
837
36acb880
VZ
838// ============================================================================
839// The classes doing conversion using the iconv_xxx() functions
840// ============================================================================
3caec1bb 841
b040e242 842#ifdef HAVE_ICONV
3a0d76bc 843
3caec1bb
VS
844// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
845// if output buffer is _exactly_ as big as needed. Such case is (unless there's
846// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
847// (which means error) and says there are 0 bytes left in the input buffer --
848// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
849// this alternative test for iconv() failure.
850// [This bug does not appear in glibc 2.2.]
851#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
852#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
853 (errno != E2BIG || bufLeft != 0))
854#else
855#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
856#endif
857
ab217dba 858#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
859
860// ----------------------------------------------------------------------------
e95354ec 861// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
862// ----------------------------------------------------------------------------
863
e95354ec 864class wxMBConv_iconv : public wxMBConv
1cd52418
OK
865{
866public:
e95354ec
VZ
867 wxMBConv_iconv(const wxChar *name);
868 virtual ~wxMBConv_iconv();
36acb880 869
bde4baac
VZ
870 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
871 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 872
e95354ec 873 bool IsOk() const
36acb880
VZ
874 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
875
876protected:
877 // the iconv handlers used to translate from multibyte to wide char and in
878 // the other direction
879 iconv_t m2w,
880 w2m;
881
882private:
e95354ec 883 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
884 // available on this machine, it will remain NULL
885 static const char *ms_wcCharsetName;
886
887 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
888 // different endian-ness than the native one
405d8f46 889 static bool ms_wcNeedsSwap;
36acb880
VZ
890};
891
e95354ec
VZ
892const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
893bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 894
e95354ec 895wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 896{
04c79127
RR
897 // Do it the hard way
898 char cname[100];
899 for (size_t i = 0; i < wxStrlen(name)+1; i++)
900 cname[i] = (char) name[i];
901
36acb880
VZ
902 // check for charset that represents wchar_t:
903 if (ms_wcCharsetName == NULL)
f1339c56 904 {
e95354ec 905 ms_wcNeedsSwap = false;
dccce9ea 906
36acb880
VZ
907 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
908 ms_wcCharsetName = WC_NAME_BEST;
04c79127 909 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 910
36acb880
VZ
911 if (m2w == (iconv_t)-1)
912 {
913 // try charset w/o bytesex info (e.g. "UCS4")
914 // and check for bytesex ourselves:
915 ms_wcCharsetName = WC_NAME;
04c79127 916 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
917
918 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
919 if (m2w == (iconv_t)-1)
920 {
36acb880 921 ms_wcCharsetName = "WCHAR_T";
04c79127 922 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 923 }
3a0d76bc 924
36acb880
VZ
925 if (m2w != (iconv_t)-1)
926 {
927 char buf[2], *bufPtr;
928 wchar_t wbuf[2], *wbufPtr;
929 size_t insz, outsz;
930 size_t res;
931
932 buf[0] = 'A';
933 buf[1] = 0;
934 wbuf[0] = 0;
935 insz = 2;
936 outsz = SIZEOF_WCHAR_T * 2;
937 wbufPtr = wbuf;
938 bufPtr = buf;
939
940 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
941 (char**)&wbufPtr, &outsz);
942
943 if (ICONV_FAILED(res, insz))
3a0d76bc 944 {
36acb880
VZ
945 ms_wcCharsetName = NULL;
946 wxLogLastError(wxT("iconv"));
2b5f62a0 947 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
948 }
949 else
950 {
36acb880 951 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
952 }
953 }
36acb880
VZ
954 else
955 {
956 ms_wcCharsetName = NULL;
373658eb 957
957686c8
VS
958 // VS: we must not output an error here, since wxWindows will safely
959 // fall back to using wxEncodingConverter.
960 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
961 //wxLogError(
36acb880 962 }
3a0d76bc 963 }
36acb880 964 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 965 }
36acb880 966 else // we already have ms_wcCharsetName
3caec1bb 967 {
04c79127 968 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 969 }
dccce9ea 970
36acb880
VZ
971 // NB: don't ever pass NULL to iconv_open(), it may crash!
972 if ( ms_wcCharsetName )
f1339c56 973 {
04c79127 974 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 975 }
405d8f46
VZ
976 else
977 {
978 w2m = (iconv_t)-1;
979 }
36acb880 980}
3caec1bb 981
e95354ec 982wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
983{
984 if ( m2w != (iconv_t)-1 )
985 iconv_close(m2w);
986 if ( w2m != (iconv_t)-1 )
987 iconv_close(w2m);
988}
3a0d76bc 989
bde4baac 990size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
991{
992 size_t inbuf = strlen(psz);
993 size_t outbuf = n * SIZEOF_WCHAR_T;
994 size_t res, cres;
995 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
996 wchar_t *bufPtr = buf;
997 const char *pszPtr = psz;
998
999 if (buf)
1000 {
1001 // have destination buffer, convert there
1002 cres = iconv(m2w,
1003 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1004 (char**)&bufPtr, &outbuf);
1005 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1006
36acb880 1007 if (ms_wcNeedsSwap)
3a0d76bc 1008 {
36acb880
VZ
1009 // convert to native endianness
1010 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1011 }
adb45366 1012
49dd9820
VS
1013 // NB: iconv was given only strlen(psz) characters on input, and so
1014 // it couldn't convert the trailing zero. Let's do it ourselves
1015 // if there's some room left for it in the output buffer.
1016 if (res < n)
1017 buf[res] = 0;
36acb880
VZ
1018 }
1019 else
1020 {
1021 // no destination buffer... convert using temp buffer
1022 // to calculate destination buffer requirement
1023 wchar_t tbuf[8];
1024 res = 0;
1025 do {
1026 bufPtr = tbuf;
1027 outbuf = 8*SIZEOF_WCHAR_T;
1028
1029 cres = iconv(m2w,
1030 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1031 (char**)&bufPtr, &outbuf );
1032
1033 res += 8-(outbuf/SIZEOF_WCHAR_T);
1034 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1035 }
dccce9ea 1036
36acb880 1037 if (ICONV_FAILED(cres, inbuf))
f1339c56 1038 {
36acb880
VZ
1039 //VS: it is ok if iconv fails, hence trace only
1040 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1041 return (size_t)-1;
1042 }
1043
1044 return res;
1045}
1046
bde4baac 1047size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1048{
f8d791e0 1049 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1050 size_t outbuf = n;
1051 size_t res, cres;
3a0d76bc 1052
36acb880 1053 wchar_t *tmpbuf = 0;
3caec1bb 1054
36acb880
VZ
1055 if (ms_wcNeedsSwap)
1056 {
1057 // need to copy to temp buffer to switch endianness
1058 // this absolutely doesn't rock!
1059 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1060 // could be in read-only memory, or be accessed in some other thread)
1061 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1062 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1063 WC_BSWAP(tmpbuf, inbuf)
1064 psz=tmpbuf;
1065 }
3a0d76bc 1066
36acb880
VZ
1067 if (buf)
1068 {
1069 // have destination buffer, convert there
1070 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1071
36acb880 1072 res = n-outbuf;
adb45366 1073
49dd9820
VS
1074 // NB: iconv was given only wcslen(psz) characters on input, and so
1075 // it couldn't convert the trailing zero. Let's do it ourselves
1076 // if there's some room left for it in the output buffer.
1077 if (res < n)
1078 buf[0] = 0;
36acb880
VZ
1079 }
1080 else
1081 {
1082 // no destination buffer... convert using temp buffer
1083 // to calculate destination buffer requirement
1084 char tbuf[16];
1085 res = 0;
1086 do {
1087 buf = tbuf; outbuf = 16;
1088
1089 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1090
36acb880
VZ
1091 res += 16 - outbuf;
1092 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1093 }
dccce9ea 1094
36acb880
VZ
1095 if (ms_wcNeedsSwap)
1096 {
1097 free(tmpbuf);
1098 }
dccce9ea 1099
36acb880
VZ
1100 if (ICONV_FAILED(cres, inbuf))
1101 {
1102 //VS: it is ok if iconv fails, hence trace only
1103 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1104 return (size_t)-1;
1105 }
1106
1107 return res;
1108}
1109
b040e242 1110#endif // HAVE_ICONV
36acb880 1111
e95354ec 1112
36acb880
VZ
1113// ============================================================================
1114// Win32 conversion classes
1115// ============================================================================
1cd52418 1116
e95354ec 1117#ifdef wxHAVE_WIN32_MB2WC
373658eb 1118
8b04d4c4
VZ
1119// from utils.cpp
1120extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1121extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
373658eb 1122
e95354ec 1123class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1124{
1125public:
bde4baac
VZ
1126 wxMBConv_win32()
1127 {
1128 m_CodePage = CP_ACP;
1129 }
1130
e95354ec 1131 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1132 {
1133 m_CodePage = wxCharsetToCodepage(name);
1134 }
dccce9ea 1135
e95354ec 1136 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1137 {
1138 m_CodePage = wxEncodingToCodepage(encoding);
1139 }
8b04d4c4 1140
bde4baac 1141 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1142 {
2b5f62a0
VZ
1143 const size_t len = ::MultiByteToWideChar
1144 (
1145 m_CodePage, // code page
1146 0, // flags (none)
1147 psz, // input string
1148 -1, // its length (NUL-terminated)
b4da152e 1149 buf, // output string
2b5f62a0
VZ
1150 buf ? n : 0 // size of output buffer
1151 );
1152
03a991bc
VZ
1153 // note that it returns count of written chars for buf != NULL and size
1154 // of the needed buffer for buf == NULL so in either case the length of
1155 // the string (which never includes the terminating NUL) is one less
1156 return len ? len - 1 : (size_t)-1;
f1339c56 1157 }
dccce9ea 1158
13dd924a 1159 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1160 {
13dd924a
VZ
1161 /*
1162 we have a problem here: by default, WideCharToMultiByte() may
1163 replace characters unrepresentable in the target code page with bad
1164 quality approximations such as turning "1/2" symbol (U+00BD) into
1165 "1" for the code pages which don't have it and we, obviously, want
1166 to avoid this at any price
1167
1168 the trouble is that this function does it _silently_, i.e. it won't
1169 even tell us whether it did or not... Win98/2000 and higher provide
1170 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1171 we have to resort to a round trip, i.e. check that converting back
1172 results in the same string -- this is, of course, expensive but
1173 otherwise we simply can't be sure to not garble the data.
1174 */
1175
1176 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1177 // it doesn't work with CJK encodings (which we test for rather roughly
1178 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1179 // supporting it
1180 BOOL usedDef wxDUMMY_INITIALIZE(false),
1181 *pUsedDef;
1182 int flags;
1183 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1184 {
1185 // it's our lucky day
1186 flags = WC_NO_BEST_FIT_CHARS;
1187 pUsedDef = &usedDef;
1188 }
1189 else // old system or unsupported encoding
1190 {
1191 flags = 0;
1192 pUsedDef = NULL;
1193 }
1194
2b5f62a0
VZ
1195 const size_t len = ::WideCharToMultiByte
1196 (
1197 m_CodePage, // code page
13dd924a
VZ
1198 flags, // either none or no best fit
1199 pwz, // input string
2b5f62a0
VZ
1200 -1, // it is (wide) NUL-terminated
1201 buf, // output buffer
1202 buf ? n : 0, // and its size
1203 NULL, // default "replacement" char
13dd924a 1204 pUsedDef // [out] was it used?
2b5f62a0
VZ
1205 );
1206
13dd924a
VZ
1207 if ( !len )
1208 {
1209 // function totally failed
1210 return (size_t)-1;
1211 }
1212
1213 // if we were really converting, check if we succeeded
1214 if ( buf )
1215 {
1216 if ( flags )
1217 {
1218 // check if the conversion failed, i.e. if any replacements
1219 // were done
1220 if ( usedDef )
1221 return (size_t)-1;
1222 }
1223 else // we must resort to double tripping...
1224 {
1225 wxWCharBuffer wcBuf(n);
1226 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1227 wcscmp(wcBuf, pwz) != 0 )
1228 {
1229 // we didn't obtain the same thing we started from, hence
1230 // the conversion was lossy and we consider that it failed
1231 return (size_t)-1;
1232 }
1233 }
1234 }
1235
03a991bc 1236 // see the comment above for the reason of "len - 1"
13dd924a 1237 return len - 1;
f1339c56 1238 }
dccce9ea 1239
13dd924a
VZ
1240 bool IsOk() const { return m_CodePage != -1; }
1241
1242private:
1243 static bool CanUseNoBestFit()
1244 {
1245 static int s_isWin98Or2k = -1;
1246
1247 if ( s_isWin98Or2k == -1 )
1248 {
1249 int verMaj, verMin;
1250 switch ( wxGetOsVersion(&verMaj, &verMin) )
1251 {
1252 case wxWIN95:
1253 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1254 break;
1255
1256 case wxWINDOWS_NT:
1257 s_isWin98Or2k = verMaj >= 5;
1258 break;
1259
1260 default:
1261 // unknown, be conseravtive by default
1262 s_isWin98Or2k = 0;
1263 }
1264
1265 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1266 }
1267
1268 return s_isWin98Or2k == 1;
1269 }
f1339c56 1270
b1d66b54 1271 long m_CodePage;
1cd52418 1272};
e95354ec
VZ
1273
1274#endif // wxHAVE_WIN32_MB2WC
1275
335d31e0
SC
1276// ============================================================================
1277// Mac conversion classes
1278// ============================================================================
1279
1280#if defined(__WXMAC__) && defined(TARGET_CARBON)
1281
1282class wxMBConv_mac : public wxMBConv
1283{
1284public:
1285 wxMBConv_mac()
1286 {
1287 Init(CFStringGetSystemEncoding()) ;
1288 }
1289
1290 wxMBConv_mac(const wxChar* name)
1291 {
8057c6d6 1292 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
335d31e0
SC
1293 }
1294
1295 wxMBConv_mac(wxFontEncoding encoding)
1296 {
8057c6d6 1297 Init( wxMacGetSystemEncFromFontEnc(encoding) );
335d31e0
SC
1298 }
1299
1300 ~wxMBConv_mac()
1301 {
1302 OSStatus status = noErr ;
1303 status = TECDisposeConverter(m_MB2WC_converter);
1304 status = TECDisposeConverter(m_WC2MB_converter);
1305 }
1306
335d31e0
SC
1307
1308 void Init( TextEncodingBase encoding)
1309 {
1310 OSStatus status = noErr ;
1311 m_char_encoding = encoding ;
335d31e0 1312 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
f3a355ce 1313
335d31e0
SC
1314 status = TECCreateConverter(&m_MB2WC_converter,
1315 m_char_encoding,
1316 m_unicode_encoding);
1317 status = TECCreateConverter(&m_WC2MB_converter,
1318 m_unicode_encoding,
1319 m_char_encoding);
1320 }
1321
1322 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1323 {
1324 OSStatus status = noErr ;
1325 ByteCount byteOutLen ;
1326 ByteCount byteInLen = strlen(psz) ;
335d31e0 1327 wchar_t *tbuf = NULL ;
f3a355ce
SC
1328 UniChar* ubuf = NULL ;
1329 size_t res = 0 ;
335d31e0
SC
1330
1331 if (buf == NULL)
1332 {
5c250a10
SC
1333 n = byteInLen ;
1334 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
335d31e0 1335 }
f3a355ce
SC
1336 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1337#if SIZEOF_WCHAR_T == 4
8471ea90 1338 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce
SC
1339#else
1340 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1341#endif
335d31e0 1342 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
f3a355ce
SC
1343 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1344#if SIZEOF_WCHAR_T == 4
8471ea90
SC
1345 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1346 // is not properly terminated we get random characters at the end
1347 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
f3a355ce
SC
1348 wxMBConvUTF16BE converter ;
1349 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1350 free( ubuf ) ;
1351#else
1352 res = byteOutLen / sizeof( UniChar ) ;
1353#endif
335d31e0
SC
1354 if ( buf == NULL )
1355 free(tbuf) ;
1356
335d31e0
SC
1357 if ( buf && res < n)
1358 buf[res] = 0;
1359
1360 return res ;
1361 }
1362
1363 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1364 {
1365 OSStatus status = noErr ;
1366 ByteCount byteOutLen ;
1367 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
335d31e0
SC
1368
1369 char *tbuf = NULL ;
1370
1371 if (buf == NULL)
1372 {
5c250a10
SC
1373 // worst case
1374 n = byteInLen * 2 ;
335d31e0
SC
1375 tbuf = (char*) malloc( n ) ;
1376 }
1377
5c250a10 1378 ByteCount byteBufferLen = n ;
f3a355ce
SC
1379 UniChar* ubuf = NULL ;
1380#if SIZEOF_WCHAR_T == 4
1381 wxMBConvUTF16BE converter ;
1382 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
4227afa4
SC
1383 byteInLen = unicharlen ;
1384 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1385 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce
SC
1386#else
1387 ubuf = (UniChar*) psz ;
1388#endif
1389 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1390 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1391#if SIZEOF_WCHAR_T == 4
1392 free( ubuf ) ;
1393#endif
335d31e0
SC
1394 if ( buf == NULL )
1395 free(tbuf) ;
1396
1397 size_t res = byteOutLen ;
1398 if ( buf && res < n)
1399 buf[res] = 0;
1400
1401 return res ;
1402 }
1403
1404 bool IsOk() const
1405 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1406
1407private:
1408 TECObjectRef m_MB2WC_converter ;
1409 TECObjectRef m_WC2MB_converter ;
1410
1411 TextEncodingBase m_char_encoding ;
1412 TextEncodingBase m_unicode_encoding ;
1413};
1414
1415#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 1416
36acb880
VZ
1417// ============================================================================
1418// wxEncodingConverter based conversion classes
1419// ============================================================================
1420
1e6feb95 1421#if wxUSE_FONTMAP
1cd52418 1422
e95354ec 1423class wxMBConv_wxwin : public wxMBConv
1cd52418 1424{
8b04d4c4
VZ
1425private:
1426 void Init()
1427 {
1428 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1429 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1430 }
1431
6001e347 1432public:
f1339c56
RR
1433 // temporarily just use wxEncodingConverter stuff,
1434 // so that it works while a better implementation is built
e95354ec 1435 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1436 {
1437 if (name)
e95354ec 1438 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1439 else
1440 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1441
8b04d4c4
VZ
1442 Init();
1443 }
1444
e95354ec 1445 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1446 {
1447 m_enc = enc;
1448
1449 Init();
f1339c56 1450 }
dccce9ea 1451
bde4baac 1452 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
1453 {
1454 size_t inbuf = strlen(psz);
dccce9ea 1455 if (buf)
4def3b35 1456 m2w.Convert(psz,buf);
f1339c56
RR
1457 return inbuf;
1458 }
dccce9ea 1459
bde4baac 1460 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 1461 {
f8d791e0 1462 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1463 if (buf)
1464 w2m.Convert(psz,buf);
dccce9ea 1465
f1339c56
RR
1466 return inbuf;
1467 }
dccce9ea 1468
e95354ec 1469 bool IsOk() const { return m_ok; }
f1339c56
RR
1470
1471public:
8b04d4c4 1472 wxFontEncoding m_enc;
f1339c56 1473 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1474
1475 // were we initialized successfully?
1476 bool m_ok;
fc7a2a60 1477
e95354ec 1478 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1479};
6001e347 1480
1e6feb95
VZ
1481#endif // wxUSE_FONTMAP
1482
36acb880
VZ
1483// ============================================================================
1484// wxCSConv implementation
1485// ============================================================================
1486
8b04d4c4 1487void wxCSConv::Init()
6001e347 1488{
e95354ec
VZ
1489 m_name = NULL;
1490 m_convReal = NULL;
1491 m_deferred = true;
1492}
1493
8b04d4c4
VZ
1494wxCSConv::wxCSConv(const wxChar *charset)
1495{
1496 Init();
82713003 1497
e95354ec
VZ
1498 if ( charset )
1499 {
e95354ec
VZ
1500 SetName(charset);
1501 }
bda3d86a
VZ
1502
1503 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
1504}
1505
8b04d4c4
VZ
1506wxCSConv::wxCSConv(wxFontEncoding encoding)
1507{
bda3d86a 1508 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
1509 {
1510 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1511
1512 encoding = wxFONTENCODING_SYSTEM;
1513 }
1514
8b04d4c4
VZ
1515 Init();
1516
bda3d86a 1517 m_encoding = encoding;
8b04d4c4
VZ
1518}
1519
6001e347
RR
1520wxCSConv::~wxCSConv()
1521{
65e50848
JS
1522 Clear();
1523}
1524
54380f29 1525wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1526 : wxMBConv()
54380f29 1527{
8b04d4c4
VZ
1528 Init();
1529
54380f29 1530 SetName(conv.m_name);
8b04d4c4 1531 m_encoding = conv.m_encoding;
54380f29
GD
1532}
1533
1534wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1535{
1536 Clear();
8b04d4c4 1537
54380f29 1538 SetName(conv.m_name);
8b04d4c4
VZ
1539 m_encoding = conv.m_encoding;
1540
54380f29
GD
1541 return *this;
1542}
1543
65e50848
JS
1544void wxCSConv::Clear()
1545{
8b04d4c4 1546 free(m_name);
e95354ec 1547 delete m_convReal;
8b04d4c4 1548
65e50848 1549 m_name = NULL;
e95354ec 1550 m_convReal = NULL;
6001e347
RR
1551}
1552
1553void wxCSConv::SetName(const wxChar *charset)
1554{
f1339c56
RR
1555 if (charset)
1556 {
1557 m_name = wxStrdup(charset);
e95354ec 1558 m_deferred = true;
f1339c56 1559 }
6001e347
RR
1560}
1561
e95354ec
VZ
1562wxMBConv *wxCSConv::DoCreate() const
1563{
c547282d
VZ
1564 // check for the special case of ASCII or ISO8859-1 charset: as we have
1565 // special knowledge of it anyhow, we don't need to create a special
1566 // conversion object
1567 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 1568 {
e95354ec
VZ
1569 // don't convert at all
1570 return NULL;
1571 }
dccce9ea 1572
e95354ec
VZ
1573 // we trust OS to do conversion better than we can so try external
1574 // conversion methods first
1575 //
1576 // the full order is:
1577 // 1. OS conversion (iconv() under Unix or Win32 API)
1578 // 2. hard coded conversions for UTF
1579 // 3. wxEncodingConverter as fall back
1580
1581 // step (1)
1582#ifdef HAVE_ICONV
c547282d 1583#if !wxUSE_FONTMAP
e95354ec 1584 if ( m_name )
c547282d 1585#endif // !wxUSE_FONTMAP
e95354ec 1586 {
c547282d
VZ
1587 wxString name(m_name);
1588
1589#if wxUSE_FONTMAP
1590 if ( name.empty() )
1591 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1592#endif // wxUSE_FONTMAP
1593
1594 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
1595 if ( conv->IsOk() )
1596 return conv;
1597
1598 delete conv;
1599 }
1600#endif // HAVE_ICONV
1601
1602#ifdef wxHAVE_WIN32_MB2WC
1603 {
1604 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1605 : new wxMBConv_win32(m_encoding);
1606 if ( conv->IsOk() )
1607 return conv;
1608
1609 delete conv;
1610 }
1611#endif // wxHAVE_WIN32_MB2WC
335d31e0
SC
1612#if defined(__WXMAC__)
1613 {
1614 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1615 {
1616
1617 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1618 : new wxMBConv_mac(m_encoding);
1619 if ( conv->IsOk() )
1620 return conv;
1621
1622 delete conv;
1623 }
1624 }
1625#endif
e95354ec
VZ
1626 // step (2)
1627 wxFontEncoding enc = m_encoding;
1628#if wxUSE_FONTMAP
c547282d
VZ
1629 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1630 {
1631 // use "false" to suppress interactive dialogs -- we can be called from
1632 // anywhere and popping up a dialog from here is the last thing we want to
1633 // do
1634 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1635 }
e95354ec
VZ
1636#endif // wxUSE_FONTMAP
1637
1638 switch ( enc )
1639 {
1640 case wxFONTENCODING_UTF7:
1641 return new wxMBConvUTF7;
1642
1643 case wxFONTENCODING_UTF8:
1644 return new wxMBConvUTF8;
1645
e95354ec
VZ
1646 case wxFONTENCODING_UTF16BE:
1647 return new wxMBConvUTF16BE;
1648
1649 case wxFONTENCODING_UTF16LE:
1650 return new wxMBConvUTF16LE;
1651
e95354ec
VZ
1652 case wxFONTENCODING_UTF32BE:
1653 return new wxMBConvUTF32BE;
1654
1655 case wxFONTENCODING_UTF32LE:
1656 return new wxMBConvUTF32LE;
1657
1658 default:
1659 // nothing to do but put here to suppress gcc warnings
1660 ;
1661 }
1662
1663 // step (3)
1664#if wxUSE_FONTMAP
1665 {
1666 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1667 : new wxMBConv_wxwin(m_encoding);
1668 if ( conv->IsOk() )
1669 return conv;
1670
1671 delete conv;
1672 }
1673#endif // wxUSE_FONTMAP
1674
a58d4f4d
VS
1675 // NB: This is a hack to prevent deadlock. What could otherwise happen
1676 // in Unicode build: wxConvLocal creation ends up being here
1677 // because of some failure and logs the error. But wxLog will try to
1678 // attach timestamp, for which it will need wxConvLocal (to convert
1679 // time to char* and then wchar_t*), but that fails, tries to log
1680 // error, but wxLog has a (already locked) critical section that
1681 // guards static buffer.
1682 static bool alreadyLoggingError = false;
1683 if (!alreadyLoggingError)
1684 {
1685 alreadyLoggingError = true;
1686 wxLogError(_("Cannot convert from the charset '%s'!"),
1687 m_name ? m_name
e95354ec
VZ
1688 :
1689#if wxUSE_FONTMAP
1690 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1691#else // !wxUSE_FONTMAP
1692 wxString::Format(_("encoding %s"), m_encoding).c_str()
1693#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1694 );
a58d4f4d
VS
1695 alreadyLoggingError = false;
1696 }
e95354ec
VZ
1697
1698 return NULL;
1699}
1700
1701void wxCSConv::CreateConvIfNeeded() const
1702{
1703 if ( m_deferred )
1704 {
1705 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
1706
1707#if wxUSE_INTL
1708 // if we don't have neither the name nor the encoding, use the default
1709 // encoding for this system
1710 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1711 {
4d312c22 1712 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
1713 }
1714#endif // wxUSE_INTL
1715
e95354ec
VZ
1716 self->m_convReal = DoCreate();
1717 self->m_deferred = false;
6001e347 1718 }
6001e347
RR
1719}
1720
1721size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1722{
e95354ec 1723 CreateConvIfNeeded();
dccce9ea 1724
e95354ec
VZ
1725 if (m_convReal)
1726 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1727
1728 // latin-1 (direct)
4def3b35 1729 size_t len = strlen(psz);
dccce9ea 1730
f1339c56
RR
1731 if (buf)
1732 {
4def3b35 1733 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1734 buf[c] = (unsigned char)(psz[c]);
1735 }
dccce9ea 1736
f1339c56 1737 return len;
6001e347
RR
1738}
1739
1740size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1741{
e95354ec 1742 CreateConvIfNeeded();
dccce9ea 1743
e95354ec
VZ
1744 if (m_convReal)
1745 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1746
f1339c56 1747 // latin-1 (direct)
f8d791e0 1748 const size_t len = wxWcslen(psz);
f1339c56
RR
1749 if (buf)
1750 {
4def3b35 1751 for (size_t c = 0; c <= len; c++)
24642831
VS
1752 {
1753 if (psz[c] > 0xFF)
1754 return (size_t)-1;
1755 buf[c] = psz[c];
1756 }
1757 }
1758 else
1759 {
1760 for (size_t c = 0; c <= len; c++)
1761 {
1762 if (psz[c] > 0xFF)
1763 return (size_t)-1;
1764 }
f1339c56 1765 }
dccce9ea 1766
f1339c56 1767 return len;
6001e347
RR
1768}
1769
bde4baac
VZ
1770// ----------------------------------------------------------------------------
1771// globals
1772// ----------------------------------------------------------------------------
1773
1774#ifdef __WINDOWS__
1775 static wxMBConv_win32 wxConvLibcObj;
1776#else
dcc8fac0 1777 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
1778#endif
1779
1780static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1781static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1782static wxMBConvUTF7 wxConvUTF7Obj;
1783static wxMBConvUTF8 wxConvUTF8Obj;
1784
1785
1786WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1787WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1788WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1789WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1790WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1791WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1792
1793#else // !wxUSE_WCHAR_T
1794
1795// stand-ins in absence of wchar_t
1796WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1797 wxConvISO8859_1,
1798 wxConvLocal,
1799 wxConvUTF8;
1800
1801#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
1802
1803