]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Source cleaning: TRUE/true, FALSE/false, whitespaces, tabs.
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
65571936 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
7608a683
WS
43#endif
44
45#ifdef __WINDOWS__
13dd924a 46 #include "wx/msw/missing.h"
0a1c1e62
GRG
47#endif
48
1c193821 49#ifndef __WXWINCE__
1cd52418 50#include <errno.h>
1c193821
JS
51#endif
52
6001e347
RR
53#include <ctype.h>
54#include <string.h>
55#include <stdlib.h>
56
e95354ec
VZ
57#if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59#endif // __WIN32__ but !__WXMICROWIN__
60
373658eb
VZ
61// ----------------------------------------------------------------------------
62// headers
63// ----------------------------------------------------------------------------
7af284fd 64
6001e347 65#ifdef __SALFORDC__
373658eb 66 #include <clib.h>
6001e347
RR
67#endif
68
b040e242 69#ifdef HAVE_ICONV
373658eb 70 #include <iconv.h>
1cd52418 71#endif
1cd52418 72
373658eb
VZ
73#include "wx/encconv.h"
74#include "wx/fontmap.h"
7608a683 75#include "wx/utils.h"
373658eb 76
335d31e0 77#ifdef __WXMAC__
4227afa4
SC
78#include <ATSUnicode.h>
79#include <TextCommon.h>
80#include <TextEncodingConverter.h>
335d31e0
SC
81
82#include "wx/mac/private.h" // includes mac headers
83#endif
373658eb
VZ
84// ----------------------------------------------------------------------------
85// macros
86// ----------------------------------------------------------------------------
3e61dfb0 87
1cd52418 88#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 89#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
90
91#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
1cd52418 99#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
a3f2769e 102 #define WC_UTF16
3a0d76bc
VS
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
bab1e722 108#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
111#endif
112
373658eb
VZ
113// ============================================================================
114// implementation
115// ============================================================================
116
117// ----------------------------------------------------------------------------
c91830cb 118// UTF-16 en/decoding to/from UCS-4
373658eb 119// ----------------------------------------------------------------------------
6001e347 120
b0a6bb75 121
c91830cb 122static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 123{
dccce9ea 124 if (input<=0xffff)
4def3b35 125 {
999836aa
VZ
126 if (output)
127 *output = (wxUint16) input;
4def3b35 128 return 1;
dccce9ea
VZ
129 }
130 else if (input>=0x110000)
4def3b35
VS
131 {
132 return (size_t)-1;
dccce9ea
VZ
133 }
134 else
4def3b35 135 {
dccce9ea 136 if (output)
4def3b35 137 {
c91830cb 138 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 139 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
140 }
141 return 2;
1cd52418 142 }
1cd52418
OK
143}
144
c91830cb 145static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 146{
dccce9ea 147 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
148 {
149 output = *input;
150 return 1;
dccce9ea
VZ
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
153 {
154 output = *input;
155 return (size_t)-1;
dccce9ea
VZ
156 }
157 else
4def3b35
VS
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
1cd52418
OK
162}
163
b0a6bb75 164
f6bcfd97 165// ----------------------------------------------------------------------------
6001e347 166// wxMBConv
f6bcfd97 167// ----------------------------------------------------------------------------
6001e347 168
2b5f62a0
VZ
169wxMBConv::~wxMBConv()
170{
171 // nothing to do here
172}
173
6001e347
RR
174const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
175{
2b5f62a0 176 if ( psz )
6001e347 177 {
2b5f62a0
VZ
178 // calculate the length of the buffer needed first
179 size_t nLen = MB2WC(NULL, psz, 0);
180 if ( nLen != (size_t)-1 )
181 {
182 // now do the actual conversion
183 wxWCharBuffer buf(nLen);
635f33ce
VS
184 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
185 if ( nLen != (size_t)-1 )
186 {
187 return buf;
188 }
2b5f62a0 189 }
f6bcfd97 190 }
2b5f62a0
VZ
191
192 wxWCharBuffer buf((wchar_t *)NULL);
193
194 return buf;
6001e347
RR
195}
196
e5cceba0 197const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 198{
2b5f62a0
VZ
199 if ( pwz )
200 {
201 size_t nLen = WC2MB(NULL, pwz, 0);
202 if ( nLen != (size_t)-1 )
203 {
c91830cb 204 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
205 nLen = WC2MB(buf.data(), pwz, nLen + 4);
206 if ( nLen != (size_t)-1 )
207 {
208 return buf;
209 }
2b5f62a0
VZ
210 }
211 }
212
213 wxCharBuffer buf((char *)NULL);
e5cceba0 214
e5cceba0 215 return buf;
6001e347
RR
216}
217
6001e347 218// ----------------------------------------------------------------------------
bde4baac 219// wxMBConvLibc
6001e347
RR
220// ----------------------------------------------------------------------------
221
bde4baac
VZ
222size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
223{
224 return wxMB2WC(buf, psz, n);
225}
226
227size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
228{
229 return wxWC2MB(buf, psz, n);
230}
231
232// ----------------------------------------------------------------------------
233// UTF-7
234// ----------------------------------------------------------------------------
6001e347
RR
235
236#if 0
237static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789'(),-./:?";
240static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
241static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
242 "abcdefghijklmnopqrstuvwxyz"
243 "0123456789+/";
244#endif
245
246// TODO: write actual implementations of UTF-7 here
247size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
248 const char * WXUNUSED(psz),
249 size_t WXUNUSED(n)) const
250{
251 return 0;
252}
253
254size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
255 const wchar_t * WXUNUSED(psz),
256 size_t WXUNUSED(n)) const
257{
258 return 0;
259}
260
f6bcfd97 261// ----------------------------------------------------------------------------
6001e347 262// UTF-8
f6bcfd97 263// ----------------------------------------------------------------------------
6001e347 264
dccce9ea 265static wxUint32 utf8_max[]=
4def3b35 266 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
267
268size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
269{
4def3b35
VS
270 size_t len = 0;
271
dccce9ea 272 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
273 {
274 unsigned char cc = *psz++, fc = cc;
275 unsigned cnt;
dccce9ea 276 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 277 fc <<= 1;
dccce9ea 278 if (!cnt)
4def3b35
VS
279 {
280 // plain ASCII char
dccce9ea 281 if (buf)
4def3b35
VS
282 *buf++ = cc;
283 len++;
dccce9ea
VZ
284 }
285 else
4def3b35
VS
286 {
287 cnt--;
dccce9ea 288 if (!cnt)
4def3b35
VS
289 {
290 // invalid UTF-8 sequence
291 return (size_t)-1;
dccce9ea
VZ
292 }
293 else
4def3b35
VS
294 {
295 unsigned ocnt = cnt - 1;
296 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 297 while (cnt--)
4def3b35
VS
298 {
299 cc = *psz++;
dccce9ea 300 if ((cc & 0xC0) != 0x80)
4def3b35
VS
301 {
302 // invalid UTF-8 sequence
303 return (size_t)-1;
304 }
305 res = (res << 6) | (cc & 0x3f);
306 }
dccce9ea 307 if (res <= utf8_max[ocnt])
4def3b35
VS
308 {
309 // illegal UTF-8 encoding
310 return (size_t)-1;
311 }
1cd52418 312#ifdef WC_UTF16
b5153fd8
VZ
313 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
314 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
315 if (pa == (size_t)-1)
316 return (size_t)-1;
dccce9ea 317 if (buf)
4def3b35
VS
318 buf += pa;
319 len += pa;
373658eb 320#else // !WC_UTF16
dccce9ea 321 if (buf)
4def3b35
VS
322 *buf++ = res;
323 len++;
373658eb 324#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
325 }
326 }
6001e347 327 }
dccce9ea 328 if (buf && (len < n))
4def3b35
VS
329 *buf = 0;
330 return len;
6001e347
RR
331}
332
333size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
334{
4def3b35 335 size_t len = 0;
6001e347 336
dccce9ea 337 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
338 {
339 wxUint32 cc;
1cd52418 340#ifdef WC_UTF16
b5153fd8
VZ
341 // cast is ok for WC_UTF16
342 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 343 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 344#else
4def3b35
VS
345 cc=(*psz++) & 0x7fffffff;
346#endif
347 unsigned cnt;
348 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 349 if (!cnt)
4def3b35
VS
350 {
351 // plain ASCII char
dccce9ea 352 if (buf)
574c939e 353 *buf++ = (char) cc;
4def3b35 354 len++;
dccce9ea
VZ
355 }
356
357 else
4def3b35
VS
358 {
359 len += cnt + 1;
dccce9ea 360 if (buf)
4def3b35 361 {
574c939e 362 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 363 while (cnt--)
574c939e 364 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
365 }
366 }
6001e347 367 }
4def3b35
VS
368
369 if (buf && (len<n)) *buf = 0;
adb45366 370
4def3b35 371 return len;
6001e347
RR
372}
373
c91830cb
VZ
374
375
376
377// ----------------------------------------------------------------------------
378// UTF-16
379// ----------------------------------------------------------------------------
380
381#ifdef WORDS_BIGENDIAN
bde4baac
VZ
382 #define wxMBConvUTF16straight wxMBConvUTF16BE
383 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 384#else
bde4baac
VZ
385 #define wxMBConvUTF16swap wxMBConvUTF16BE
386 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
387#endif
388
389
c91830cb
VZ
390#ifdef WC_UTF16
391
c91830cb
VZ
392// copy 16bit MB to 16bit String
393size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
394{
395 size_t len=0;
396
397 while (*(wxUint16*)psz && (!buf || len < n))
398 {
399 if (buf)
400 *buf++ = *(wxUint16*)psz;
401 len++;
402
403 psz += sizeof(wxUint16);
404 }
405 if (buf && len<n) *buf=0;
406
407 return len;
408}
409
410
411// copy 16bit String to 16bit MB
412size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
413{
414 size_t len=0;
415
416 while (*psz && (!buf || len < n))
417 {
418 if (buf)
419 {
420 *(wxUint16*)buf = *psz;
421 buf += sizeof(wxUint16);
422 }
423 len += sizeof(wxUint16);
424 psz++;
425 }
426 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
427
428 return len;
429}
430
431
432// swap 16bit MB to 16bit String
433size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
434{
435 size_t len=0;
436
437 while (*(wxUint16*)psz && (!buf || len < n))
438 {
439 if (buf)
440 {
441 ((char *)buf)[0] = psz[1];
442 ((char *)buf)[1] = psz[0];
443 buf++;
444 }
445 len++;
446 psz += sizeof(wxUint16);
447 }
448 if (buf && len<n) *buf=0;
449
450 return len;
451}
452
453
454// swap 16bit MB to 16bit String
455size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
456{
457 size_t len=0;
458
459 while (*psz && (!buf || len < n))
460 {
461 if (buf)
462 {
463 *buf++ = ((char*)psz)[1];
464 *buf++ = ((char*)psz)[0];
465 }
466 len += sizeof(wxUint16);
467 psz++;
468 }
469 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
470
471 return len;
472}
473
474
475#else // WC_UTF16
476
477
478// copy 16bit MB to 32bit String
479size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
480{
481 size_t len=0;
482
483 while (*(wxUint16*)psz && (!buf || len < n))
484 {
485 wxUint32 cc;
486 size_t pa=decode_utf16((wxUint16*)psz, cc);
487 if (pa == (size_t)-1)
488 return pa;
489
490 if (buf)
491 *buf++ = cc;
492 len++;
493 psz += pa * sizeof(wxUint16);
494 }
495 if (buf && len<n) *buf=0;
496
497 return len;
498}
499
500
501// copy 32bit String to 16bit MB
502size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
503{
504 size_t len=0;
505
506 while (*psz && (!buf || len < n))
507 {
508 wxUint16 cc[2];
509 size_t pa=encode_utf16(*psz, cc);
510
511 if (pa == (size_t)-1)
512 return pa;
513
514 if (buf)
515 {
69b80d28 516 *(wxUint16*)buf = cc[0];
b5153fd8 517 buf += sizeof(wxUint16);
c91830cb 518 if (pa > 1)
69b80d28
VZ
519 {
520 *(wxUint16*)buf = cc[1];
521 buf += sizeof(wxUint16);
522 }
c91830cb
VZ
523 }
524
525 len += pa*sizeof(wxUint16);
526 psz++;
527 }
528 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
529
530 return len;
531}
532
533
534// swap 16bit MB to 32bit String
535size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
536{
537 size_t len=0;
538
539 while (*(wxUint16*)psz && (!buf || len < n))
540 {
541 wxUint32 cc;
542 char tmp[4];
543 tmp[0]=psz[1]; tmp[1]=psz[0];
544 tmp[2]=psz[3]; tmp[3]=psz[2];
545
546 size_t pa=decode_utf16((wxUint16*)tmp, cc);
547 if (pa == (size_t)-1)
548 return pa;
549
550 if (buf)
551 *buf++ = cc;
552
553 len++;
554 psz += pa * sizeof(wxUint16);
555 }
556 if (buf && len<n) *buf=0;
557
558 return len;
559}
560
561
562// swap 32bit String to 16bit MB
563size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
564{
565 size_t len=0;
566
567 while (*psz && (!buf || len < n))
568 {
569 wxUint16 cc[2];
570 size_t pa=encode_utf16(*psz, cc);
571
572 if (pa == (size_t)-1)
573 return pa;
574
575 if (buf)
576 {
577 *buf++ = ((char*)cc)[1];
578 *buf++ = ((char*)cc)[0];
579 if (pa > 1)
580 {
581 *buf++ = ((char*)cc)[3];
582 *buf++ = ((char*)cc)[2];
583 }
584 }
585
586 len += pa*sizeof(wxUint16);
587 psz++;
588 }
589 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
590
591 return len;
592}
593
594#endif // WC_UTF16
595
596
597// ----------------------------------------------------------------------------
598// UTF-32
599// ----------------------------------------------------------------------------
600
601#ifdef WORDS_BIGENDIAN
602#define wxMBConvUTF32straight wxMBConvUTF32BE
603#define wxMBConvUTF32swap wxMBConvUTF32LE
604#else
605#define wxMBConvUTF32swap wxMBConvUTF32BE
606#define wxMBConvUTF32straight wxMBConvUTF32LE
607#endif
608
609
610WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
611WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
612
613
614#ifdef WC_UTF16
615
616// copy 32bit MB to 16bit String
617size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
618{
619 size_t len=0;
620
621 while (*(wxUint32*)psz && (!buf || len < n))
622 {
623 wxUint16 cc[2];
624
625 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
626 if (pa == (size_t)-1)
627 return pa;
628
629 if (buf)
630 {
631 *buf++ = cc[0];
632 if (pa > 1)
633 *buf++ = cc[1];
634 }
635 len += pa;
636 psz += sizeof(wxUint32);
637 }
638 if (buf && len<n) *buf=0;
639
640 return len;
641}
642
643
644// copy 16bit String to 32bit MB
645size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
646{
647 size_t len=0;
648
649 while (*psz && (!buf || len < n))
650 {
651 wxUint32 cc;
652
b5153fd8
VZ
653 // cast is ok for WC_UTF16
654 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
655 if (pa == (size_t)-1)
656 return pa;
657
658 if (buf)
659 {
660 *(wxUint32*)buf = cc;
661 buf += sizeof(wxUint32);
662 }
663 len += sizeof(wxUint32);
664 psz += pa;
665 }
b5153fd8
VZ
666
667 if (buf && len<=n-sizeof(wxUint32))
668 *(wxUint32*)buf=0;
c91830cb
VZ
669
670 return len;
671}
672
673
674
675// swap 32bit MB to 16bit String
676size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
677{
678 size_t len=0;
679
680 while (*(wxUint32*)psz && (!buf || len < n))
681 {
682 char tmp[4];
683 tmp[0] = psz[3]; tmp[1] = psz[2];
684 tmp[2] = psz[1]; tmp[3] = psz[0];
685
686
687 wxUint16 cc[2];
688
689 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
690 if (pa == (size_t)-1)
691 return pa;
692
693 if (buf)
694 {
695 *buf++ = cc[0];
696 if (pa > 1)
697 *buf++ = cc[1];
698 }
699 len += pa;
700 psz += sizeof(wxUint32);
701 }
b5153fd8
VZ
702
703 if (buf && len<n)
704 *buf=0;
c91830cb
VZ
705
706 return len;
707}
708
709
710// swap 16bit String to 32bit MB
711size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
712{
713 size_t len=0;
714
715 while (*psz && (!buf || len < n))
716 {
717 char cc[4];
718
b5153fd8
VZ
719 // cast is ok for WC_UTF16
720 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
721 if (pa == (size_t)-1)
722 return pa;
723
724 if (buf)
725 {
726 *buf++ = cc[3];
727 *buf++ = cc[2];
728 *buf++ = cc[1];
729 *buf++ = cc[0];
730 }
731 len += sizeof(wxUint32);
732 psz += pa;
733 }
b5153fd8
VZ
734
735 if (buf && len<=n-sizeof(wxUint32))
736 *(wxUint32*)buf=0;
c91830cb
VZ
737
738 return len;
739}
740
741#else // WC_UTF16
742
743
744// copy 32bit MB to 32bit String
745size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
746{
747 size_t len=0;
748
749 while (*(wxUint32*)psz && (!buf || len < n))
750 {
751 if (buf)
752 *buf++ = *(wxUint32*)psz;
753 len++;
754 psz += sizeof(wxUint32);
755 }
b5153fd8
VZ
756
757 if (buf && len<n)
758 *buf=0;
c91830cb
VZ
759
760 return len;
761}
762
763
764// copy 32bit String to 32bit MB
765size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
766{
767 size_t len=0;
768
769 while (*psz && (!buf || len < n))
770 {
771 if (buf)
772 {
773 *(wxUint32*)buf = *psz;
774 buf += sizeof(wxUint32);
775 }
776
777 len += sizeof(wxUint32);
778 psz++;
779 }
780
b5153fd8
VZ
781 if (buf && len<=n-sizeof(wxUint32))
782 *(wxUint32*)buf=0;
c91830cb
VZ
783
784 return len;
785}
786
787
788// swap 32bit MB to 32bit String
789size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
790{
791 size_t len=0;
792
793 while (*(wxUint32*)psz && (!buf || len < n))
794 {
795 if (buf)
796 {
797 ((char *)buf)[0] = psz[3];
798 ((char *)buf)[1] = psz[2];
799 ((char *)buf)[2] = psz[1];
800 ((char *)buf)[3] = psz[0];
801 buf++;
802 }
803 len++;
804 psz += sizeof(wxUint32);
805 }
b5153fd8
VZ
806
807 if (buf && len<n)
808 *buf=0;
c91830cb
VZ
809
810 return len;
811}
812
813
814// swap 32bit String to 32bit MB
815size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
816{
817 size_t len=0;
818
819 while (*psz && (!buf || len < n))
820 {
821 if (buf)
822 {
823 *buf++ = ((char *)psz)[3];
824 *buf++ = ((char *)psz)[2];
825 *buf++ = ((char *)psz)[1];
826 *buf++ = ((char *)psz)[0];
827 }
828 len += sizeof(wxUint32);
829 psz++;
830 }
b5153fd8
VZ
831
832 if (buf && len<=n-sizeof(wxUint32))
833 *(wxUint32*)buf=0;
c91830cb
VZ
834
835 return len;
836}
837
838
839#endif // WC_UTF16
840
841
36acb880
VZ
842// ============================================================================
843// The classes doing conversion using the iconv_xxx() functions
844// ============================================================================
3caec1bb 845
b040e242 846#ifdef HAVE_ICONV
3a0d76bc 847
3caec1bb
VS
848// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
849// if output buffer is _exactly_ as big as needed. Such case is (unless there's
850// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
851// (which means error) and says there are 0 bytes left in the input buffer --
852// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
853// this alternative test for iconv() failure.
854// [This bug does not appear in glibc 2.2.]
855#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
856#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
857 (errno != E2BIG || bufLeft != 0))
858#else
859#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
860#endif
861
ab217dba 862#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
863
864// ----------------------------------------------------------------------------
e95354ec 865// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
866// ----------------------------------------------------------------------------
867
e95354ec 868class wxMBConv_iconv : public wxMBConv
1cd52418
OK
869{
870public:
e95354ec
VZ
871 wxMBConv_iconv(const wxChar *name);
872 virtual ~wxMBConv_iconv();
36acb880 873
bde4baac
VZ
874 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
875 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 876
e95354ec 877 bool IsOk() const
36acb880
VZ
878 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
879
880protected:
881 // the iconv handlers used to translate from multibyte to wide char and in
882 // the other direction
883 iconv_t m2w,
884 w2m;
885
886private:
e95354ec 887 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
888 // available on this machine, it will remain NULL
889 static const char *ms_wcCharsetName;
890
891 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
892 // different endian-ness than the native one
405d8f46 893 static bool ms_wcNeedsSwap;
36acb880
VZ
894};
895
e95354ec
VZ
896const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
897bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 898
e95354ec 899wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 900{
04c79127
RR
901 // Do it the hard way
902 char cname[100];
903 for (size_t i = 0; i < wxStrlen(name)+1; i++)
904 cname[i] = (char) name[i];
905
36acb880
VZ
906 // check for charset that represents wchar_t:
907 if (ms_wcCharsetName == NULL)
f1339c56 908 {
e95354ec 909 ms_wcNeedsSwap = false;
dccce9ea 910
36acb880
VZ
911 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
912 ms_wcCharsetName = WC_NAME_BEST;
04c79127 913 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 914
36acb880
VZ
915 if (m2w == (iconv_t)-1)
916 {
917 // try charset w/o bytesex info (e.g. "UCS4")
918 // and check for bytesex ourselves:
919 ms_wcCharsetName = WC_NAME;
04c79127 920 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
921
922 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
923 if (m2w == (iconv_t)-1)
924 {
36acb880 925 ms_wcCharsetName = "WCHAR_T";
04c79127 926 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 927 }
3a0d76bc 928
36acb880
VZ
929 if (m2w != (iconv_t)-1)
930 {
931 char buf[2], *bufPtr;
932 wchar_t wbuf[2], *wbufPtr;
933 size_t insz, outsz;
934 size_t res;
935
936 buf[0] = 'A';
937 buf[1] = 0;
938 wbuf[0] = 0;
939 insz = 2;
940 outsz = SIZEOF_WCHAR_T * 2;
941 wbufPtr = wbuf;
942 bufPtr = buf;
943
944 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
945 (char**)&wbufPtr, &outsz);
946
947 if (ICONV_FAILED(res, insz))
3a0d76bc 948 {
36acb880
VZ
949 ms_wcCharsetName = NULL;
950 wxLogLastError(wxT("iconv"));
2b5f62a0 951 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
952 }
953 else
954 {
36acb880 955 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
956 }
957 }
36acb880
VZ
958 else
959 {
960 ms_wcCharsetName = NULL;
373658eb 961
77ffb593 962 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
963 // fall back to using wxEncodingConverter.
964 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
965 //wxLogError(
36acb880 966 }
3a0d76bc 967 }
36acb880 968 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 969 }
36acb880 970 else // we already have ms_wcCharsetName
3caec1bb 971 {
04c79127 972 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 973 }
dccce9ea 974
36acb880
VZ
975 // NB: don't ever pass NULL to iconv_open(), it may crash!
976 if ( ms_wcCharsetName )
f1339c56 977 {
04c79127 978 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 979 }
405d8f46
VZ
980 else
981 {
982 w2m = (iconv_t)-1;
983 }
36acb880 984}
3caec1bb 985
e95354ec 986wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
987{
988 if ( m2w != (iconv_t)-1 )
989 iconv_close(m2w);
990 if ( w2m != (iconv_t)-1 )
991 iconv_close(w2m);
992}
3a0d76bc 993
bde4baac 994size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
995{
996 size_t inbuf = strlen(psz);
997 size_t outbuf = n * SIZEOF_WCHAR_T;
998 size_t res, cres;
999 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000 wchar_t *bufPtr = buf;
1001 const char *pszPtr = psz;
1002
1003 if (buf)
1004 {
1005 // have destination buffer, convert there
1006 cres = iconv(m2w,
1007 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1008 (char**)&bufPtr, &outbuf);
1009 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1010
36acb880 1011 if (ms_wcNeedsSwap)
3a0d76bc 1012 {
36acb880
VZ
1013 // convert to native endianness
1014 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1015 }
adb45366 1016
49dd9820
VS
1017 // NB: iconv was given only strlen(psz) characters on input, and so
1018 // it couldn't convert the trailing zero. Let's do it ourselves
1019 // if there's some room left for it in the output buffer.
1020 if (res < n)
1021 buf[res] = 0;
36acb880
VZ
1022 }
1023 else
1024 {
1025 // no destination buffer... convert using temp buffer
1026 // to calculate destination buffer requirement
1027 wchar_t tbuf[8];
1028 res = 0;
1029 do {
1030 bufPtr = tbuf;
1031 outbuf = 8*SIZEOF_WCHAR_T;
1032
1033 cres = iconv(m2w,
1034 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1035 (char**)&bufPtr, &outbuf );
1036
1037 res += 8-(outbuf/SIZEOF_WCHAR_T);
1038 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1039 }
dccce9ea 1040
36acb880 1041 if (ICONV_FAILED(cres, inbuf))
f1339c56 1042 {
36acb880
VZ
1043 //VS: it is ok if iconv fails, hence trace only
1044 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1045 return (size_t)-1;
1046 }
1047
1048 return res;
1049}
1050
bde4baac 1051size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1052{
f8d791e0 1053 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1054 size_t outbuf = n;
1055 size_t res, cres;
3a0d76bc 1056
36acb880 1057 wchar_t *tmpbuf = 0;
3caec1bb 1058
36acb880
VZ
1059 if (ms_wcNeedsSwap)
1060 {
1061 // need to copy to temp buffer to switch endianness
1062 // this absolutely doesn't rock!
1063 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064 // could be in read-only memory, or be accessed in some other thread)
1065 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1066 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1067 WC_BSWAP(tmpbuf, inbuf)
1068 psz=tmpbuf;
1069 }
3a0d76bc 1070
36acb880
VZ
1071 if (buf)
1072 {
1073 // have destination buffer, convert there
1074 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1075
36acb880 1076 res = n-outbuf;
adb45366 1077
49dd9820
VS
1078 // NB: iconv was given only wcslen(psz) characters on input, and so
1079 // it couldn't convert the trailing zero. Let's do it ourselves
1080 // if there's some room left for it in the output buffer.
1081 if (res < n)
1082 buf[0] = 0;
36acb880
VZ
1083 }
1084 else
1085 {
1086 // no destination buffer... convert using temp buffer
1087 // to calculate destination buffer requirement
1088 char tbuf[16];
1089 res = 0;
1090 do {
1091 buf = tbuf; outbuf = 16;
1092
1093 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1094
36acb880
VZ
1095 res += 16 - outbuf;
1096 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1097 }
dccce9ea 1098
36acb880
VZ
1099 if (ms_wcNeedsSwap)
1100 {
1101 free(tmpbuf);
1102 }
dccce9ea 1103
36acb880
VZ
1104 if (ICONV_FAILED(cres, inbuf))
1105 {
1106 //VS: it is ok if iconv fails, hence trace only
1107 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1108 return (size_t)-1;
1109 }
1110
1111 return res;
1112}
1113
b040e242 1114#endif // HAVE_ICONV
36acb880 1115
e95354ec 1116
36acb880
VZ
1117// ============================================================================
1118// Win32 conversion classes
1119// ============================================================================
1cd52418 1120
e95354ec 1121#ifdef wxHAVE_WIN32_MB2WC
373658eb 1122
8b04d4c4 1123// from utils.cpp
7608a683 1124#if wxUSE_FONTMAP
8b04d4c4
VZ
1125extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1126extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1127#endif
373658eb 1128
e95354ec 1129class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1130{
1131public:
bde4baac
VZ
1132 wxMBConv_win32()
1133 {
1134 m_CodePage = CP_ACP;
1135 }
1136
7608a683 1137#if wxUSE_FONTMAP
e95354ec 1138 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1139 {
1140 m_CodePage = wxCharsetToCodepage(name);
1141 }
dccce9ea 1142
e95354ec 1143 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1144 {
1145 m_CodePage = wxEncodingToCodepage(encoding);
1146 }
7608a683 1147#endif
8b04d4c4 1148
bde4baac 1149 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1150 {
2b5f62a0
VZ
1151 const size_t len = ::MultiByteToWideChar
1152 (
1153 m_CodePage, // code page
1154 0, // flags (none)
1155 psz, // input string
1156 -1, // its length (NUL-terminated)
b4da152e 1157 buf, // output string
2b5f62a0
VZ
1158 buf ? n : 0 // size of output buffer
1159 );
1160
03a991bc
VZ
1161 // note that it returns count of written chars for buf != NULL and size
1162 // of the needed buffer for buf == NULL so in either case the length of
1163 // the string (which never includes the terminating NUL) is one less
1164 return len ? len - 1 : (size_t)-1;
f1339c56 1165 }
dccce9ea 1166
13dd924a 1167 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1168 {
13dd924a
VZ
1169 /*
1170 we have a problem here: by default, WideCharToMultiByte() may
1171 replace characters unrepresentable in the target code page with bad
1172 quality approximations such as turning "1/2" symbol (U+00BD) into
1173 "1" for the code pages which don't have it and we, obviously, want
1174 to avoid this at any price
1175
1176 the trouble is that this function does it _silently_, i.e. it won't
1177 even tell us whether it did or not... Win98/2000 and higher provide
1178 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1179 we have to resort to a round trip, i.e. check that converting back
1180 results in the same string -- this is, of course, expensive but
1181 otherwise we simply can't be sure to not garble the data.
1182 */
1183
1184 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1185 // it doesn't work with CJK encodings (which we test for rather roughly
1186 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1187 // supporting it
1188 BOOL usedDef wxDUMMY_INITIALIZE(false),
1189 *pUsedDef;
1190 int flags;
1191 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1192 {
1193 // it's our lucky day
1194 flags = WC_NO_BEST_FIT_CHARS;
1195 pUsedDef = &usedDef;
1196 }
1197 else // old system or unsupported encoding
1198 {
1199 flags = 0;
1200 pUsedDef = NULL;
1201 }
1202
2b5f62a0
VZ
1203 const size_t len = ::WideCharToMultiByte
1204 (
1205 m_CodePage, // code page
13dd924a
VZ
1206 flags, // either none or no best fit
1207 pwz, // input string
2b5f62a0
VZ
1208 -1, // it is (wide) NUL-terminated
1209 buf, // output buffer
1210 buf ? n : 0, // and its size
1211 NULL, // default "replacement" char
13dd924a 1212 pUsedDef // [out] was it used?
2b5f62a0
VZ
1213 );
1214
13dd924a
VZ
1215 if ( !len )
1216 {
1217 // function totally failed
1218 return (size_t)-1;
1219 }
1220
1221 // if we were really converting, check if we succeeded
1222 if ( buf )
1223 {
1224 if ( flags )
1225 {
1226 // check if the conversion failed, i.e. if any replacements
1227 // were done
1228 if ( usedDef )
1229 return (size_t)-1;
1230 }
1231 else // we must resort to double tripping...
1232 {
1233 wxWCharBuffer wcBuf(n);
1234 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1235 wcscmp(wcBuf, pwz) != 0 )
1236 {
1237 // we didn't obtain the same thing we started from, hence
1238 // the conversion was lossy and we consider that it failed
1239 return (size_t)-1;
1240 }
1241 }
1242 }
1243
03a991bc 1244 // see the comment above for the reason of "len - 1"
13dd924a 1245 return len - 1;
f1339c56 1246 }
dccce9ea 1247
13dd924a
VZ
1248 bool IsOk() const { return m_CodePage != -1; }
1249
1250private:
1251 static bool CanUseNoBestFit()
1252 {
1253 static int s_isWin98Or2k = -1;
1254
1255 if ( s_isWin98Or2k == -1 )
1256 {
1257 int verMaj, verMin;
1258 switch ( wxGetOsVersion(&verMaj, &verMin) )
1259 {
1260 case wxWIN95:
1261 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1262 break;
1263
1264 case wxWINDOWS_NT:
1265 s_isWin98Or2k = verMaj >= 5;
1266 break;
1267
1268 default:
1269 // unknown, be conseravtive by default
1270 s_isWin98Or2k = 0;
1271 }
1272
1273 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1274 }
1275
1276 return s_isWin98Or2k == 1;
1277 }
f1339c56 1278
b1d66b54 1279 long m_CodePage;
1cd52418 1280};
e95354ec
VZ
1281
1282#endif // wxHAVE_WIN32_MB2WC
1283
335d31e0
SC
1284// ============================================================================
1285// Mac conversion classes
1286// ============================================================================
1287
1288#if defined(__WXMAC__) && defined(TARGET_CARBON)
1289
1290class wxMBConv_mac : public wxMBConv
1291{
1292public:
1293 wxMBConv_mac()
1294 {
1295 Init(CFStringGetSystemEncoding()) ;
1296 }
1297
1298 wxMBConv_mac(const wxChar* name)
1299 {
8057c6d6 1300 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
335d31e0
SC
1301 }
1302
1303 wxMBConv_mac(wxFontEncoding encoding)
1304 {
8057c6d6 1305 Init( wxMacGetSystemEncFromFontEnc(encoding) );
335d31e0
SC
1306 }
1307
1308 ~wxMBConv_mac()
1309 {
1310 OSStatus status = noErr ;
1311 status = TECDisposeConverter(m_MB2WC_converter);
1312 status = TECDisposeConverter(m_WC2MB_converter);
1313 }
1314
335d31e0
SC
1315
1316 void Init( TextEncodingBase encoding)
1317 {
1318 OSStatus status = noErr ;
1319 m_char_encoding = encoding ;
335d31e0 1320 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
f3a355ce 1321
335d31e0
SC
1322 status = TECCreateConverter(&m_MB2WC_converter,
1323 m_char_encoding,
1324 m_unicode_encoding);
1325 status = TECCreateConverter(&m_WC2MB_converter,
1326 m_unicode_encoding,
1327 m_char_encoding);
1328 }
1329
1330 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1331 {
1332 OSStatus status = noErr ;
1333 ByteCount byteOutLen ;
1334 ByteCount byteInLen = strlen(psz) ;
335d31e0 1335 wchar_t *tbuf = NULL ;
f3a355ce
SC
1336 UniChar* ubuf = NULL ;
1337 size_t res = 0 ;
335d31e0
SC
1338
1339 if (buf == NULL)
1340 {
5c250a10
SC
1341 n = byteInLen ;
1342 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
335d31e0 1343 }
f3a355ce
SC
1344 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1345#if SIZEOF_WCHAR_T == 4
8471ea90 1346 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce
SC
1347#else
1348 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1349#endif
335d31e0 1350 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
f3a355ce
SC
1351 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1352#if SIZEOF_WCHAR_T == 4
8471ea90
SC
1353 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1354 // is not properly terminated we get random characters at the end
1355 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
f3a355ce
SC
1356 wxMBConvUTF16BE converter ;
1357 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1358 free( ubuf ) ;
1359#else
1360 res = byteOutLen / sizeof( UniChar ) ;
1361#endif
335d31e0
SC
1362 if ( buf == NULL )
1363 free(tbuf) ;
1364
335d31e0
SC
1365 if ( buf && res < n)
1366 buf[res] = 0;
1367
1368 return res ;
1369 }
1370
1371 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1372 {
1373 OSStatus status = noErr ;
1374 ByteCount byteOutLen ;
1375 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
335d31e0
SC
1376
1377 char *tbuf = NULL ;
1378
1379 if (buf == NULL)
1380 {
5c250a10
SC
1381 // worst case
1382 n = byteInLen * 2 ;
335d31e0
SC
1383 tbuf = (char*) malloc( n ) ;
1384 }
1385
5c250a10 1386 ByteCount byteBufferLen = n ;
f3a355ce
SC
1387 UniChar* ubuf = NULL ;
1388#if SIZEOF_WCHAR_T == 4
1389 wxMBConvUTF16BE converter ;
1390 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
4227afa4
SC
1391 byteInLen = unicharlen ;
1392 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1393 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce
SC
1394#else
1395 ubuf = (UniChar*) psz ;
1396#endif
1397 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1398 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1399#if SIZEOF_WCHAR_T == 4
1400 free( ubuf ) ;
1401#endif
335d31e0
SC
1402 if ( buf == NULL )
1403 free(tbuf) ;
1404
1405 size_t res = byteOutLen ;
1406 if ( buf && res < n)
1407 buf[res] = 0;
1408
1409 return res ;
1410 }
1411
1412 bool IsOk() const
1413 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1414
1415private:
1416 TECObjectRef m_MB2WC_converter ;
1417 TECObjectRef m_WC2MB_converter ;
1418
1419 TextEncodingBase m_char_encoding ;
1420 TextEncodingBase m_unicode_encoding ;
1421};
1422
1423#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 1424
36acb880
VZ
1425// ============================================================================
1426// wxEncodingConverter based conversion classes
1427// ============================================================================
1428
1e6feb95 1429#if wxUSE_FONTMAP
1cd52418 1430
e95354ec 1431class wxMBConv_wxwin : public wxMBConv
1cd52418 1432{
8b04d4c4
VZ
1433private:
1434 void Init()
1435 {
1436 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1437 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1438 }
1439
6001e347 1440public:
f1339c56
RR
1441 // temporarily just use wxEncodingConverter stuff,
1442 // so that it works while a better implementation is built
e95354ec 1443 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1444 {
1445 if (name)
e95354ec 1446 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1447 else
1448 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1449
8b04d4c4
VZ
1450 Init();
1451 }
1452
e95354ec 1453 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1454 {
1455 m_enc = enc;
1456
1457 Init();
f1339c56 1458 }
dccce9ea 1459
bde4baac 1460 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
1461 {
1462 size_t inbuf = strlen(psz);
dccce9ea 1463 if (buf)
4def3b35 1464 m2w.Convert(psz,buf);
f1339c56
RR
1465 return inbuf;
1466 }
dccce9ea 1467
bde4baac 1468 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 1469 {
f8d791e0 1470 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1471 if (buf)
1472 w2m.Convert(psz,buf);
dccce9ea 1473
f1339c56
RR
1474 return inbuf;
1475 }
dccce9ea 1476
e95354ec 1477 bool IsOk() const { return m_ok; }
f1339c56
RR
1478
1479public:
8b04d4c4 1480 wxFontEncoding m_enc;
f1339c56 1481 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1482
1483 // were we initialized successfully?
1484 bool m_ok;
fc7a2a60 1485
e95354ec 1486 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1487};
6001e347 1488
1e6feb95
VZ
1489#endif // wxUSE_FONTMAP
1490
36acb880
VZ
1491// ============================================================================
1492// wxCSConv implementation
1493// ============================================================================
1494
8b04d4c4 1495void wxCSConv::Init()
6001e347 1496{
e95354ec
VZ
1497 m_name = NULL;
1498 m_convReal = NULL;
1499 m_deferred = true;
1500}
1501
8b04d4c4
VZ
1502wxCSConv::wxCSConv(const wxChar *charset)
1503{
1504 Init();
82713003 1505
e95354ec
VZ
1506 if ( charset )
1507 {
e95354ec
VZ
1508 SetName(charset);
1509 }
bda3d86a
VZ
1510
1511 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
1512}
1513
8b04d4c4
VZ
1514wxCSConv::wxCSConv(wxFontEncoding encoding)
1515{
bda3d86a 1516 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
1517 {
1518 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1519
1520 encoding = wxFONTENCODING_SYSTEM;
1521 }
1522
8b04d4c4
VZ
1523 Init();
1524
bda3d86a 1525 m_encoding = encoding;
8b04d4c4
VZ
1526}
1527
6001e347
RR
1528wxCSConv::~wxCSConv()
1529{
65e50848
JS
1530 Clear();
1531}
1532
54380f29 1533wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1534 : wxMBConv()
54380f29 1535{
8b04d4c4
VZ
1536 Init();
1537
54380f29 1538 SetName(conv.m_name);
8b04d4c4 1539 m_encoding = conv.m_encoding;
54380f29
GD
1540}
1541
1542wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1543{
1544 Clear();
8b04d4c4 1545
54380f29 1546 SetName(conv.m_name);
8b04d4c4
VZ
1547 m_encoding = conv.m_encoding;
1548
54380f29
GD
1549 return *this;
1550}
1551
65e50848
JS
1552void wxCSConv::Clear()
1553{
8b04d4c4 1554 free(m_name);
e95354ec 1555 delete m_convReal;
8b04d4c4 1556
65e50848 1557 m_name = NULL;
e95354ec 1558 m_convReal = NULL;
6001e347
RR
1559}
1560
1561void wxCSConv::SetName(const wxChar *charset)
1562{
f1339c56
RR
1563 if (charset)
1564 {
1565 m_name = wxStrdup(charset);
e95354ec 1566 m_deferred = true;
f1339c56 1567 }
6001e347
RR
1568}
1569
e95354ec
VZ
1570wxMBConv *wxCSConv::DoCreate() const
1571{
c547282d
VZ
1572 // check for the special case of ASCII or ISO8859-1 charset: as we have
1573 // special knowledge of it anyhow, we don't need to create a special
1574 // conversion object
1575 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 1576 {
e95354ec
VZ
1577 // don't convert at all
1578 return NULL;
1579 }
dccce9ea 1580
e95354ec
VZ
1581 // we trust OS to do conversion better than we can so try external
1582 // conversion methods first
1583 //
1584 // the full order is:
1585 // 1. OS conversion (iconv() under Unix or Win32 API)
1586 // 2. hard coded conversions for UTF
1587 // 3. wxEncodingConverter as fall back
1588
1589 // step (1)
1590#ifdef HAVE_ICONV
c547282d 1591#if !wxUSE_FONTMAP
e95354ec 1592 if ( m_name )
c547282d 1593#endif // !wxUSE_FONTMAP
e95354ec 1594 {
c547282d
VZ
1595 wxString name(m_name);
1596
1597#if wxUSE_FONTMAP
1598 if ( name.empty() )
1599 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1600#endif // wxUSE_FONTMAP
1601
1602 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
1603 if ( conv->IsOk() )
1604 return conv;
1605
1606 delete conv;
1607 }
1608#endif // HAVE_ICONV
1609
1610#ifdef wxHAVE_WIN32_MB2WC
1611 {
7608a683 1612#if wxUSE_FONTMAP
e95354ec
VZ
1613 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1614 : new wxMBConv_win32(m_encoding);
1615 if ( conv->IsOk() )
1616 return conv;
1617
1618 delete conv;
7608a683
WS
1619#else
1620 return NULL;
1621#endif
e95354ec
VZ
1622 }
1623#endif // wxHAVE_WIN32_MB2WC
335d31e0
SC
1624#if defined(__WXMAC__)
1625 {
1626 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1627 {
1628
1629 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1630 : new wxMBConv_mac(m_encoding);
1631 if ( conv->IsOk() )
1632 return conv;
1633
1634 delete conv;
1635 }
1636 }
1637#endif
e95354ec
VZ
1638 // step (2)
1639 wxFontEncoding enc = m_encoding;
1640#if wxUSE_FONTMAP
c547282d
VZ
1641 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1642 {
1643 // use "false" to suppress interactive dialogs -- we can be called from
1644 // anywhere and popping up a dialog from here is the last thing we want to
1645 // do
1646 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1647 }
e95354ec
VZ
1648#endif // wxUSE_FONTMAP
1649
1650 switch ( enc )
1651 {
1652 case wxFONTENCODING_UTF7:
1653 return new wxMBConvUTF7;
1654
1655 case wxFONTENCODING_UTF8:
1656 return new wxMBConvUTF8;
1657
e95354ec
VZ
1658 case wxFONTENCODING_UTF16BE:
1659 return new wxMBConvUTF16BE;
1660
1661 case wxFONTENCODING_UTF16LE:
1662 return new wxMBConvUTF16LE;
1663
e95354ec
VZ
1664 case wxFONTENCODING_UTF32BE:
1665 return new wxMBConvUTF32BE;
1666
1667 case wxFONTENCODING_UTF32LE:
1668 return new wxMBConvUTF32LE;
1669
1670 default:
1671 // nothing to do but put here to suppress gcc warnings
1672 ;
1673 }
1674
1675 // step (3)
1676#if wxUSE_FONTMAP
1677 {
1678 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1679 : new wxMBConv_wxwin(m_encoding);
1680 if ( conv->IsOk() )
1681 return conv;
1682
1683 delete conv;
1684 }
1685#endif // wxUSE_FONTMAP
1686
a58d4f4d
VS
1687 // NB: This is a hack to prevent deadlock. What could otherwise happen
1688 // in Unicode build: wxConvLocal creation ends up being here
1689 // because of some failure and logs the error. But wxLog will try to
1690 // attach timestamp, for which it will need wxConvLocal (to convert
1691 // time to char* and then wchar_t*), but that fails, tries to log
1692 // error, but wxLog has a (already locked) critical section that
1693 // guards static buffer.
1694 static bool alreadyLoggingError = false;
1695 if (!alreadyLoggingError)
1696 {
1697 alreadyLoggingError = true;
1698 wxLogError(_("Cannot convert from the charset '%s'!"),
1699 m_name ? m_name
e95354ec
VZ
1700 :
1701#if wxUSE_FONTMAP
1702 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1703#else // !wxUSE_FONTMAP
1704 wxString::Format(_("encoding %s"), m_encoding).c_str()
1705#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1706 );
a58d4f4d
VS
1707 alreadyLoggingError = false;
1708 }
e95354ec
VZ
1709
1710 return NULL;
1711}
1712
1713void wxCSConv::CreateConvIfNeeded() const
1714{
1715 if ( m_deferred )
1716 {
1717 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
1718
1719#if wxUSE_INTL
1720 // if we don't have neither the name nor the encoding, use the default
1721 // encoding for this system
1722 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1723 {
4d312c22 1724 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
1725 }
1726#endif // wxUSE_INTL
1727
e95354ec
VZ
1728 self->m_convReal = DoCreate();
1729 self->m_deferred = false;
6001e347 1730 }
6001e347
RR
1731}
1732
1733size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1734{
e95354ec 1735 CreateConvIfNeeded();
dccce9ea 1736
e95354ec
VZ
1737 if (m_convReal)
1738 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1739
1740 // latin-1 (direct)
4def3b35 1741 size_t len = strlen(psz);
dccce9ea 1742
f1339c56
RR
1743 if (buf)
1744 {
4def3b35 1745 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1746 buf[c] = (unsigned char)(psz[c]);
1747 }
dccce9ea 1748
f1339c56 1749 return len;
6001e347
RR
1750}
1751
1752size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1753{
e95354ec 1754 CreateConvIfNeeded();
dccce9ea 1755
e95354ec
VZ
1756 if (m_convReal)
1757 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1758
f1339c56 1759 // latin-1 (direct)
f8d791e0 1760 const size_t len = wxWcslen(psz);
f1339c56
RR
1761 if (buf)
1762 {
4def3b35 1763 for (size_t c = 0; c <= len; c++)
24642831
VS
1764 {
1765 if (psz[c] > 0xFF)
1766 return (size_t)-1;
1767 buf[c] = psz[c];
1768 }
1769 }
1770 else
1771 {
1772 for (size_t c = 0; c <= len; c++)
1773 {
1774 if (psz[c] > 0xFF)
1775 return (size_t)-1;
1776 }
f1339c56 1777 }
dccce9ea 1778
f1339c56 1779 return len;
6001e347
RR
1780}
1781
bde4baac
VZ
1782// ----------------------------------------------------------------------------
1783// globals
1784// ----------------------------------------------------------------------------
1785
1786#ifdef __WINDOWS__
1787 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
1788#elif defined(__WXMAC__) && !defined(__MACH__)
1789 static wxMBConv_mac wxConvLibcObj ;
bde4baac 1790#else
dcc8fac0 1791 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
1792#endif
1793
1794static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1795static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1796static wxMBConvUTF7 wxConvUTF7Obj;
1797static wxMBConvUTF8 wxConvUTF8Obj;
1798
1799
1800WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1801WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1802WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1803WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1804WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1805WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1806
1807#else // !wxUSE_WCHAR_T
1808
1809// stand-ins in absence of wchar_t
1810WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1811 wxConvISO8859_1,
1812 wxConvLocal,
1813 wxConvUTF8;
1814
1815#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
1816
1817