]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
removed more unneeded files, see patch 890642
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
55d99c7a 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
13dd924a 43 #include "wx/msw/missing.h"
0a1c1e62
GRG
44#endif
45
1c193821 46#ifndef __WXWINCE__
1cd52418 47#include <errno.h>
1c193821
JS
48#endif
49
6001e347
RR
50#include <ctype.h>
51#include <string.h>
52#include <stdlib.h>
53
e95354ec
VZ
54#if defined(__WIN32__) && !defined(__WXMICROWIN__)
55 #define wxHAVE_WIN32_MB2WC
56#endif // __WIN32__ but !__WXMICROWIN__
57
373658eb
VZ
58// ----------------------------------------------------------------------------
59// headers
60// ----------------------------------------------------------------------------
7af284fd 61
6001e347 62#ifdef __SALFORDC__
373658eb 63 #include <clib.h>
6001e347
RR
64#endif
65
b040e242 66#ifdef HAVE_ICONV
373658eb 67 #include <iconv.h>
1cd52418 68#endif
1cd52418 69
373658eb
VZ
70#include "wx/encconv.h"
71#include "wx/fontmap.h"
72
335d31e0 73#ifdef __WXMAC__
4227afa4
SC
74#include <ATSUnicode.h>
75#include <TextCommon.h>
76#include <TextEncodingConverter.h>
335d31e0
SC
77
78#include "wx/mac/private.h" // includes mac headers
79#endif
373658eb
VZ
80// ----------------------------------------------------------------------------
81// macros
82// ----------------------------------------------------------------------------
3e61dfb0 83
1cd52418 84#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 85#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
86
87#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
88 #define WC_NAME "UCS4"
89 #define WC_BSWAP BSWAP_UCS4
90 #ifdef WORDS_BIGENDIAN
91 #define WC_NAME_BEST "UCS-4BE"
92 #else
93 #define WC_NAME_BEST "UCS-4LE"
94 #endif
1cd52418 95#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
96 #define WC_NAME "UTF16"
97 #define WC_BSWAP BSWAP_UTF16
a3f2769e 98 #define WC_UTF16
3a0d76bc
VS
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UTF-16BE"
101 #else
102 #define WC_NAME_BEST "UTF-16LE"
103 #endif
bab1e722 104#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
105 // does this ever happen?
106 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
107#endif
108
373658eb
VZ
109// ============================================================================
110// implementation
111// ============================================================================
112
113// ----------------------------------------------------------------------------
c91830cb 114// UTF-16 en/decoding to/from UCS-4
373658eb 115// ----------------------------------------------------------------------------
6001e347 116
b0a6bb75 117
c91830cb 118static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 119{
dccce9ea 120 if (input<=0xffff)
4def3b35 121 {
999836aa
VZ
122 if (output)
123 *output = (wxUint16) input;
4def3b35 124 return 1;
dccce9ea
VZ
125 }
126 else if (input>=0x110000)
4def3b35
VS
127 {
128 return (size_t)-1;
dccce9ea
VZ
129 }
130 else
4def3b35 131 {
dccce9ea 132 if (output)
4def3b35 133 {
c91830cb 134 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 135 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
136 }
137 return 2;
1cd52418 138 }
1cd52418
OK
139}
140
c91830cb 141static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 142{
dccce9ea 143 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
144 {
145 output = *input;
146 return 1;
dccce9ea
VZ
147 }
148 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
149 {
150 output = *input;
151 return (size_t)-1;
dccce9ea
VZ
152 }
153 else
4def3b35
VS
154 {
155 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
156 return 2;
157 }
1cd52418
OK
158}
159
b0a6bb75 160
f6bcfd97 161// ----------------------------------------------------------------------------
6001e347 162// wxMBConv
f6bcfd97 163// ----------------------------------------------------------------------------
6001e347 164
2b5f62a0
VZ
165wxMBConv::~wxMBConv()
166{
167 // nothing to do here
168}
169
6001e347
RR
170const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
171{
2b5f62a0 172 if ( psz )
6001e347 173 {
2b5f62a0
VZ
174 // calculate the length of the buffer needed first
175 size_t nLen = MB2WC(NULL, psz, 0);
176 if ( nLen != (size_t)-1 )
177 {
178 // now do the actual conversion
179 wxWCharBuffer buf(nLen);
180 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
181
182 return buf;
183 }
f6bcfd97 184 }
2b5f62a0
VZ
185
186 wxWCharBuffer buf((wchar_t *)NULL);
187
188 return buf;
6001e347
RR
189}
190
e5cceba0 191const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 192{
2b5f62a0
VZ
193 if ( pwz )
194 {
195 size_t nLen = WC2MB(NULL, pwz, 0);
196 if ( nLen != (size_t)-1 )
197 {
c91830cb
VZ
198 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
199 WC2MB(buf.data(), pwz, nLen + 4);
2b5f62a0
VZ
200
201 return buf;
202 }
203 }
204
205 wxCharBuffer buf((char *)NULL);
e5cceba0 206
e5cceba0 207 return buf;
6001e347
RR
208}
209
6001e347 210// ----------------------------------------------------------------------------
bde4baac 211// wxMBConvLibc
6001e347
RR
212// ----------------------------------------------------------------------------
213
bde4baac
VZ
214size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
215{
216 return wxMB2WC(buf, psz, n);
217}
218
219size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
220{
221 return wxWC2MB(buf, psz, n);
222}
223
224// ----------------------------------------------------------------------------
225// UTF-7
226// ----------------------------------------------------------------------------
6001e347
RR
227
228#if 0
229static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
230 "abcdefghijklmnopqrstuvwxyz"
231 "0123456789'(),-./:?";
232static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
233static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234 "abcdefghijklmnopqrstuvwxyz"
235 "0123456789+/";
236#endif
237
238// TODO: write actual implementations of UTF-7 here
239size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
240 const char * WXUNUSED(psz),
241 size_t WXUNUSED(n)) const
242{
243 return 0;
244}
245
246size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
247 const wchar_t * WXUNUSED(psz),
248 size_t WXUNUSED(n)) const
249{
250 return 0;
251}
252
f6bcfd97 253// ----------------------------------------------------------------------------
6001e347 254// UTF-8
f6bcfd97 255// ----------------------------------------------------------------------------
6001e347 256
dccce9ea 257static wxUint32 utf8_max[]=
4def3b35 258 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
259
260size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
261{
4def3b35
VS
262 size_t len = 0;
263
dccce9ea 264 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
265 {
266 unsigned char cc = *psz++, fc = cc;
267 unsigned cnt;
dccce9ea 268 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 269 fc <<= 1;
dccce9ea 270 if (!cnt)
4def3b35
VS
271 {
272 // plain ASCII char
dccce9ea 273 if (buf)
4def3b35
VS
274 *buf++ = cc;
275 len++;
dccce9ea
VZ
276 }
277 else
4def3b35
VS
278 {
279 cnt--;
dccce9ea 280 if (!cnt)
4def3b35
VS
281 {
282 // invalid UTF-8 sequence
283 return (size_t)-1;
dccce9ea
VZ
284 }
285 else
4def3b35
VS
286 {
287 unsigned ocnt = cnt - 1;
288 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 289 while (cnt--)
4def3b35
VS
290 {
291 cc = *psz++;
dccce9ea 292 if ((cc & 0xC0) != 0x80)
4def3b35
VS
293 {
294 // invalid UTF-8 sequence
295 return (size_t)-1;
296 }
297 res = (res << 6) | (cc & 0x3f);
298 }
dccce9ea 299 if (res <= utf8_max[ocnt])
4def3b35
VS
300 {
301 // illegal UTF-8 encoding
302 return (size_t)-1;
303 }
1cd52418 304#ifdef WC_UTF16
b5153fd8
VZ
305 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
306 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
307 if (pa == (size_t)-1)
308 return (size_t)-1;
dccce9ea 309 if (buf)
4def3b35
VS
310 buf += pa;
311 len += pa;
373658eb 312#else // !WC_UTF16
dccce9ea 313 if (buf)
4def3b35
VS
314 *buf++ = res;
315 len++;
373658eb 316#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
317 }
318 }
6001e347 319 }
dccce9ea 320 if (buf && (len < n))
4def3b35
VS
321 *buf = 0;
322 return len;
6001e347
RR
323}
324
325size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
326{
4def3b35 327 size_t len = 0;
6001e347 328
dccce9ea 329 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
330 {
331 wxUint32 cc;
1cd52418 332#ifdef WC_UTF16
b5153fd8
VZ
333 // cast is ok for WC_UTF16
334 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 335 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 336#else
4def3b35
VS
337 cc=(*psz++) & 0x7fffffff;
338#endif
339 unsigned cnt;
340 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 341 if (!cnt)
4def3b35
VS
342 {
343 // plain ASCII char
dccce9ea 344 if (buf)
574c939e 345 *buf++ = (char) cc;
4def3b35 346 len++;
dccce9ea
VZ
347 }
348
349 else
4def3b35
VS
350 {
351 len += cnt + 1;
dccce9ea 352 if (buf)
4def3b35 353 {
574c939e 354 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 355 while (cnt--)
574c939e 356 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
357 }
358 }
6001e347 359 }
4def3b35
VS
360
361 if (buf && (len<n)) *buf = 0;
adb45366 362
4def3b35 363 return len;
6001e347
RR
364}
365
c91830cb
VZ
366
367
368
369// ----------------------------------------------------------------------------
370// UTF-16
371// ----------------------------------------------------------------------------
372
373#ifdef WORDS_BIGENDIAN
bde4baac
VZ
374 #define wxMBConvUTF16straight wxMBConvUTF16BE
375 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 376#else
bde4baac
VZ
377 #define wxMBConvUTF16swap wxMBConvUTF16BE
378 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
379#endif
380
381
c91830cb
VZ
382#ifdef WC_UTF16
383
c91830cb
VZ
384// copy 16bit MB to 16bit String
385size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
386{
387 size_t len=0;
388
389 while (*(wxUint16*)psz && (!buf || len < n))
390 {
391 if (buf)
392 *buf++ = *(wxUint16*)psz;
393 len++;
394
395 psz += sizeof(wxUint16);
396 }
397 if (buf && len<n) *buf=0;
398
399 return len;
400}
401
402
403// copy 16bit String to 16bit MB
404size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
405{
406 size_t len=0;
407
408 while (*psz && (!buf || len < n))
409 {
410 if (buf)
411 {
412 *(wxUint16*)buf = *psz;
413 buf += sizeof(wxUint16);
414 }
415 len += sizeof(wxUint16);
416 psz++;
417 }
418 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
419
420 return len;
421}
422
423
424// swap 16bit MB to 16bit String
425size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
426{
427 size_t len=0;
428
429 while (*(wxUint16*)psz && (!buf || len < n))
430 {
431 if (buf)
432 {
433 ((char *)buf)[0] = psz[1];
434 ((char *)buf)[1] = psz[0];
435 buf++;
436 }
437 len++;
438 psz += sizeof(wxUint16);
439 }
440 if (buf && len<n) *buf=0;
441
442 return len;
443}
444
445
446// swap 16bit MB to 16bit String
447size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
448{
449 size_t len=0;
450
451 while (*psz && (!buf || len < n))
452 {
453 if (buf)
454 {
455 *buf++ = ((char*)psz)[1];
456 *buf++ = ((char*)psz)[0];
457 }
458 len += sizeof(wxUint16);
459 psz++;
460 }
461 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
462
463 return len;
464}
465
466
467#else // WC_UTF16
468
469
470// copy 16bit MB to 32bit String
471size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
472{
473 size_t len=0;
474
475 while (*(wxUint16*)psz && (!buf || len < n))
476 {
477 wxUint32 cc;
478 size_t pa=decode_utf16((wxUint16*)psz, cc);
479 if (pa == (size_t)-1)
480 return pa;
481
482 if (buf)
483 *buf++ = cc;
484 len++;
485 psz += pa * sizeof(wxUint16);
486 }
487 if (buf && len<n) *buf=0;
488
489 return len;
490}
491
492
493// copy 32bit String to 16bit MB
494size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
495{
496 size_t len=0;
497
498 while (*psz && (!buf || len < n))
499 {
500 wxUint16 cc[2];
501 size_t pa=encode_utf16(*psz, cc);
502
503 if (pa == (size_t)-1)
504 return pa;
505
506 if (buf)
507 {
69b80d28 508 *(wxUint16*)buf = cc[0];
b5153fd8 509 buf += sizeof(wxUint16);
c91830cb 510 if (pa > 1)
69b80d28
VZ
511 {
512 *(wxUint16*)buf = cc[1];
513 buf += sizeof(wxUint16);
514 }
c91830cb
VZ
515 }
516
517 len += pa*sizeof(wxUint16);
518 psz++;
519 }
520 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
521
522 return len;
523}
524
525
526// swap 16bit MB to 32bit String
527size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
528{
529 size_t len=0;
530
531 while (*(wxUint16*)psz && (!buf || len < n))
532 {
533 wxUint32 cc;
534 char tmp[4];
535 tmp[0]=psz[1]; tmp[1]=psz[0];
536 tmp[2]=psz[3]; tmp[3]=psz[2];
537
538 size_t pa=decode_utf16((wxUint16*)tmp, cc);
539 if (pa == (size_t)-1)
540 return pa;
541
542 if (buf)
543 *buf++ = cc;
544
545 len++;
546 psz += pa * sizeof(wxUint16);
547 }
548 if (buf && len<n) *buf=0;
549
550 return len;
551}
552
553
554// swap 32bit String to 16bit MB
555size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
556{
557 size_t len=0;
558
559 while (*psz && (!buf || len < n))
560 {
561 wxUint16 cc[2];
562 size_t pa=encode_utf16(*psz, cc);
563
564 if (pa == (size_t)-1)
565 return pa;
566
567 if (buf)
568 {
569 *buf++ = ((char*)cc)[1];
570 *buf++ = ((char*)cc)[0];
571 if (pa > 1)
572 {
573 *buf++ = ((char*)cc)[3];
574 *buf++ = ((char*)cc)[2];
575 }
576 }
577
578 len += pa*sizeof(wxUint16);
579 psz++;
580 }
581 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
582
583 return len;
584}
585
586#endif // WC_UTF16
587
588
589// ----------------------------------------------------------------------------
590// UTF-32
591// ----------------------------------------------------------------------------
592
593#ifdef WORDS_BIGENDIAN
594#define wxMBConvUTF32straight wxMBConvUTF32BE
595#define wxMBConvUTF32swap wxMBConvUTF32LE
596#else
597#define wxMBConvUTF32swap wxMBConvUTF32BE
598#define wxMBConvUTF32straight wxMBConvUTF32LE
599#endif
600
601
602WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
603WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
604
605
606#ifdef WC_UTF16
607
608// copy 32bit MB to 16bit String
609size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
610{
611 size_t len=0;
612
613 while (*(wxUint32*)psz && (!buf || len < n))
614 {
615 wxUint16 cc[2];
616
617 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
618 if (pa == (size_t)-1)
619 return pa;
620
621 if (buf)
622 {
623 *buf++ = cc[0];
624 if (pa > 1)
625 *buf++ = cc[1];
626 }
627 len += pa;
628 psz += sizeof(wxUint32);
629 }
630 if (buf && len<n) *buf=0;
631
632 return len;
633}
634
635
636// copy 16bit String to 32bit MB
637size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
638{
639 size_t len=0;
640
641 while (*psz && (!buf || len < n))
642 {
643 wxUint32 cc;
644
b5153fd8
VZ
645 // cast is ok for WC_UTF16
646 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
647 if (pa == (size_t)-1)
648 return pa;
649
650 if (buf)
651 {
652 *(wxUint32*)buf = cc;
653 buf += sizeof(wxUint32);
654 }
655 len += sizeof(wxUint32);
656 psz += pa;
657 }
b5153fd8
VZ
658
659 if (buf && len<=n-sizeof(wxUint32))
660 *(wxUint32*)buf=0;
c91830cb
VZ
661
662 return len;
663}
664
665
666
667// swap 32bit MB to 16bit String
668size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
669{
670 size_t len=0;
671
672 while (*(wxUint32*)psz && (!buf || len < n))
673 {
674 char tmp[4];
675 tmp[0] = psz[3]; tmp[1] = psz[2];
676 tmp[2] = psz[1]; tmp[3] = psz[0];
677
678
679 wxUint16 cc[2];
680
681 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
682 if (pa == (size_t)-1)
683 return pa;
684
685 if (buf)
686 {
687 *buf++ = cc[0];
688 if (pa > 1)
689 *buf++ = cc[1];
690 }
691 len += pa;
692 psz += sizeof(wxUint32);
693 }
b5153fd8
VZ
694
695 if (buf && len<n)
696 *buf=0;
c91830cb
VZ
697
698 return len;
699}
700
701
702// swap 16bit String to 32bit MB
703size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
704{
705 size_t len=0;
706
707 while (*psz && (!buf || len < n))
708 {
709 char cc[4];
710
b5153fd8
VZ
711 // cast is ok for WC_UTF16
712 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
713 if (pa == (size_t)-1)
714 return pa;
715
716 if (buf)
717 {
718 *buf++ = cc[3];
719 *buf++ = cc[2];
720 *buf++ = cc[1];
721 *buf++ = cc[0];
722 }
723 len += sizeof(wxUint32);
724 psz += pa;
725 }
b5153fd8
VZ
726
727 if (buf && len<=n-sizeof(wxUint32))
728 *(wxUint32*)buf=0;
c91830cb
VZ
729
730 return len;
731}
732
733#else // WC_UTF16
734
735
736// copy 32bit MB to 32bit String
737size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
738{
739 size_t len=0;
740
741 while (*(wxUint32*)psz && (!buf || len < n))
742 {
743 if (buf)
744 *buf++ = *(wxUint32*)psz;
745 len++;
746 psz += sizeof(wxUint32);
747 }
b5153fd8
VZ
748
749 if (buf && len<n)
750 *buf=0;
c91830cb
VZ
751
752 return len;
753}
754
755
756// copy 32bit String to 32bit MB
757size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
758{
759 size_t len=0;
760
761 while (*psz && (!buf || len < n))
762 {
763 if (buf)
764 {
765 *(wxUint32*)buf = *psz;
766 buf += sizeof(wxUint32);
767 }
768
769 len += sizeof(wxUint32);
770 psz++;
771 }
772
b5153fd8
VZ
773 if (buf && len<=n-sizeof(wxUint32))
774 *(wxUint32*)buf=0;
c91830cb
VZ
775
776 return len;
777}
778
779
780// swap 32bit MB to 32bit String
781size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
782{
783 size_t len=0;
784
785 while (*(wxUint32*)psz && (!buf || len < n))
786 {
787 if (buf)
788 {
789 ((char *)buf)[0] = psz[3];
790 ((char *)buf)[1] = psz[2];
791 ((char *)buf)[2] = psz[1];
792 ((char *)buf)[3] = psz[0];
793 buf++;
794 }
795 len++;
796 psz += sizeof(wxUint32);
797 }
b5153fd8
VZ
798
799 if (buf && len<n)
800 *buf=0;
c91830cb
VZ
801
802 return len;
803}
804
805
806// swap 32bit String to 32bit MB
807size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
808{
809 size_t len=0;
810
811 while (*psz && (!buf || len < n))
812 {
813 if (buf)
814 {
815 *buf++ = ((char *)psz)[3];
816 *buf++ = ((char *)psz)[2];
817 *buf++ = ((char *)psz)[1];
818 *buf++ = ((char *)psz)[0];
819 }
820 len += sizeof(wxUint32);
821 psz++;
822 }
b5153fd8
VZ
823
824 if (buf && len<=n-sizeof(wxUint32))
825 *(wxUint32*)buf=0;
c91830cb
VZ
826
827 return len;
828}
829
830
831#endif // WC_UTF16
832
833
36acb880
VZ
834// ============================================================================
835// The classes doing conversion using the iconv_xxx() functions
836// ============================================================================
3caec1bb 837
b040e242 838#ifdef HAVE_ICONV
3a0d76bc 839
3caec1bb
VS
840// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
841// if output buffer is _exactly_ as big as needed. Such case is (unless there's
842// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
843// (which means error) and says there are 0 bytes left in the input buffer --
844// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
845// this alternative test for iconv() failure.
846// [This bug does not appear in glibc 2.2.]
847#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
848#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
849 (errno != E2BIG || bufLeft != 0))
850#else
851#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
852#endif
853
ab217dba 854#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
855
856// ----------------------------------------------------------------------------
e95354ec 857// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
858// ----------------------------------------------------------------------------
859
e95354ec 860class wxMBConv_iconv : public wxMBConv
1cd52418
OK
861{
862public:
e95354ec
VZ
863 wxMBConv_iconv(const wxChar *name);
864 virtual ~wxMBConv_iconv();
36acb880 865
bde4baac
VZ
866 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
867 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 868
e95354ec 869 bool IsOk() const
36acb880
VZ
870 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
871
872protected:
873 // the iconv handlers used to translate from multibyte to wide char and in
874 // the other direction
875 iconv_t m2w,
876 w2m;
877
878private:
e95354ec 879 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
880 // available on this machine, it will remain NULL
881 static const char *ms_wcCharsetName;
882
883 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
884 // different endian-ness than the native one
405d8f46 885 static bool ms_wcNeedsSwap;
36acb880
VZ
886};
887
e95354ec
VZ
888const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
889bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 890
e95354ec 891wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 892{
04c79127
RR
893 // Do it the hard way
894 char cname[100];
895 for (size_t i = 0; i < wxStrlen(name)+1; i++)
896 cname[i] = (char) name[i];
897
36acb880
VZ
898 // check for charset that represents wchar_t:
899 if (ms_wcCharsetName == NULL)
f1339c56 900 {
e95354ec 901 ms_wcNeedsSwap = false;
dccce9ea 902
36acb880
VZ
903 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
904 ms_wcCharsetName = WC_NAME_BEST;
04c79127 905 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 906
36acb880
VZ
907 if (m2w == (iconv_t)-1)
908 {
909 // try charset w/o bytesex info (e.g. "UCS4")
910 // and check for bytesex ourselves:
911 ms_wcCharsetName = WC_NAME;
04c79127 912 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
913
914 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
915 if (m2w == (iconv_t)-1)
916 {
36acb880 917 ms_wcCharsetName = "WCHAR_T";
04c79127 918 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 919 }
3a0d76bc 920
36acb880
VZ
921 if (m2w != (iconv_t)-1)
922 {
923 char buf[2], *bufPtr;
924 wchar_t wbuf[2], *wbufPtr;
925 size_t insz, outsz;
926 size_t res;
927
928 buf[0] = 'A';
929 buf[1] = 0;
930 wbuf[0] = 0;
931 insz = 2;
932 outsz = SIZEOF_WCHAR_T * 2;
933 wbufPtr = wbuf;
934 bufPtr = buf;
935
936 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
937 (char**)&wbufPtr, &outsz);
938
939 if (ICONV_FAILED(res, insz))
3a0d76bc 940 {
36acb880
VZ
941 ms_wcCharsetName = NULL;
942 wxLogLastError(wxT("iconv"));
2b5f62a0 943 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
944 }
945 else
946 {
36acb880 947 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
948 }
949 }
36acb880
VZ
950 else
951 {
952 ms_wcCharsetName = NULL;
373658eb 953
957686c8
VS
954 // VS: we must not output an error here, since wxWindows will safely
955 // fall back to using wxEncodingConverter.
956 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
957 //wxLogError(
36acb880 958 }
3a0d76bc 959 }
36acb880 960 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 961 }
36acb880 962 else // we already have ms_wcCharsetName
3caec1bb 963 {
04c79127 964 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 965 }
dccce9ea 966
36acb880
VZ
967 // NB: don't ever pass NULL to iconv_open(), it may crash!
968 if ( ms_wcCharsetName )
f1339c56 969 {
04c79127 970 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 971 }
405d8f46
VZ
972 else
973 {
974 w2m = (iconv_t)-1;
975 }
36acb880 976}
3caec1bb 977
e95354ec 978wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
979{
980 if ( m2w != (iconv_t)-1 )
981 iconv_close(m2w);
982 if ( w2m != (iconv_t)-1 )
983 iconv_close(w2m);
984}
3a0d76bc 985
bde4baac 986size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
987{
988 size_t inbuf = strlen(psz);
989 size_t outbuf = n * SIZEOF_WCHAR_T;
990 size_t res, cres;
991 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
992 wchar_t *bufPtr = buf;
993 const char *pszPtr = psz;
994
995 if (buf)
996 {
997 // have destination buffer, convert there
998 cres = iconv(m2w,
999 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1000 (char**)&bufPtr, &outbuf);
1001 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1002
36acb880 1003 if (ms_wcNeedsSwap)
3a0d76bc 1004 {
36acb880
VZ
1005 // convert to native endianness
1006 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1007 }
adb45366 1008
49dd9820
VS
1009 // NB: iconv was given only strlen(psz) characters on input, and so
1010 // it couldn't convert the trailing zero. Let's do it ourselves
1011 // if there's some room left for it in the output buffer.
1012 if (res < n)
1013 buf[res] = 0;
36acb880
VZ
1014 }
1015 else
1016 {
1017 // no destination buffer... convert using temp buffer
1018 // to calculate destination buffer requirement
1019 wchar_t tbuf[8];
1020 res = 0;
1021 do {
1022 bufPtr = tbuf;
1023 outbuf = 8*SIZEOF_WCHAR_T;
1024
1025 cres = iconv(m2w,
1026 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1027 (char**)&bufPtr, &outbuf );
1028
1029 res += 8-(outbuf/SIZEOF_WCHAR_T);
1030 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1031 }
dccce9ea 1032
36acb880 1033 if (ICONV_FAILED(cres, inbuf))
f1339c56 1034 {
36acb880
VZ
1035 //VS: it is ok if iconv fails, hence trace only
1036 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1037 return (size_t)-1;
1038 }
1039
1040 return res;
1041}
1042
bde4baac 1043size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1044{
f8d791e0 1045 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1046 size_t outbuf = n;
1047 size_t res, cres;
3a0d76bc 1048
36acb880 1049 wchar_t *tmpbuf = 0;
3caec1bb 1050
36acb880
VZ
1051 if (ms_wcNeedsSwap)
1052 {
1053 // need to copy to temp buffer to switch endianness
1054 // this absolutely doesn't rock!
1055 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1056 // could be in read-only memory, or be accessed in some other thread)
1057 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1058 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1059 WC_BSWAP(tmpbuf, inbuf)
1060 psz=tmpbuf;
1061 }
3a0d76bc 1062
36acb880
VZ
1063 if (buf)
1064 {
1065 // have destination buffer, convert there
1066 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1067
36acb880 1068 res = n-outbuf;
adb45366 1069
49dd9820
VS
1070 // NB: iconv was given only wcslen(psz) characters on input, and so
1071 // it couldn't convert the trailing zero. Let's do it ourselves
1072 // if there's some room left for it in the output buffer.
1073 if (res < n)
1074 buf[0] = 0;
36acb880
VZ
1075 }
1076 else
1077 {
1078 // no destination buffer... convert using temp buffer
1079 // to calculate destination buffer requirement
1080 char tbuf[16];
1081 res = 0;
1082 do {
1083 buf = tbuf; outbuf = 16;
1084
1085 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1086
36acb880
VZ
1087 res += 16 - outbuf;
1088 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1089 }
dccce9ea 1090
36acb880
VZ
1091 if (ms_wcNeedsSwap)
1092 {
1093 free(tmpbuf);
1094 }
dccce9ea 1095
36acb880
VZ
1096 if (ICONV_FAILED(cres, inbuf))
1097 {
1098 //VS: it is ok if iconv fails, hence trace only
1099 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1100 return (size_t)-1;
1101 }
1102
1103 return res;
1104}
1105
b040e242 1106#endif // HAVE_ICONV
36acb880 1107
e95354ec 1108
36acb880
VZ
1109// ============================================================================
1110// Win32 conversion classes
1111// ============================================================================
1cd52418 1112
e95354ec 1113#ifdef wxHAVE_WIN32_MB2WC
373658eb 1114
8b04d4c4
VZ
1115// from utils.cpp
1116extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1117extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
373658eb 1118
e95354ec 1119class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1120{
1121public:
bde4baac
VZ
1122 wxMBConv_win32()
1123 {
1124 m_CodePage = CP_ACP;
1125 }
1126
e95354ec 1127 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1128 {
1129 m_CodePage = wxCharsetToCodepage(name);
1130 }
dccce9ea 1131
e95354ec 1132 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1133 {
1134 m_CodePage = wxEncodingToCodepage(encoding);
1135 }
8b04d4c4 1136
bde4baac 1137 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1138 {
2b5f62a0
VZ
1139 const size_t len = ::MultiByteToWideChar
1140 (
1141 m_CodePage, // code page
1142 0, // flags (none)
1143 psz, // input string
1144 -1, // its length (NUL-terminated)
b4da152e 1145 buf, // output string
2b5f62a0
VZ
1146 buf ? n : 0 // size of output buffer
1147 );
1148
03a991bc
VZ
1149 // note that it returns count of written chars for buf != NULL and size
1150 // of the needed buffer for buf == NULL so in either case the length of
1151 // the string (which never includes the terminating NUL) is one less
1152 return len ? len - 1 : (size_t)-1;
f1339c56 1153 }
dccce9ea 1154
13dd924a 1155 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1156 {
13dd924a
VZ
1157 /*
1158 we have a problem here: by default, WideCharToMultiByte() may
1159 replace characters unrepresentable in the target code page with bad
1160 quality approximations such as turning "1/2" symbol (U+00BD) into
1161 "1" for the code pages which don't have it and we, obviously, want
1162 to avoid this at any price
1163
1164 the trouble is that this function does it _silently_, i.e. it won't
1165 even tell us whether it did or not... Win98/2000 and higher provide
1166 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1167 we have to resort to a round trip, i.e. check that converting back
1168 results in the same string -- this is, of course, expensive but
1169 otherwise we simply can't be sure to not garble the data.
1170 */
1171
1172 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1173 // it doesn't work with CJK encodings (which we test for rather roughly
1174 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1175 // supporting it
1176 BOOL usedDef wxDUMMY_INITIALIZE(false),
1177 *pUsedDef;
1178 int flags;
1179 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1180 {
1181 // it's our lucky day
1182 flags = WC_NO_BEST_FIT_CHARS;
1183 pUsedDef = &usedDef;
1184 }
1185 else // old system or unsupported encoding
1186 {
1187 flags = 0;
1188 pUsedDef = NULL;
1189 }
1190
2b5f62a0
VZ
1191 const size_t len = ::WideCharToMultiByte
1192 (
1193 m_CodePage, // code page
13dd924a
VZ
1194 flags, // either none or no best fit
1195 pwz, // input string
2b5f62a0
VZ
1196 -1, // it is (wide) NUL-terminated
1197 buf, // output buffer
1198 buf ? n : 0, // and its size
1199 NULL, // default "replacement" char
13dd924a 1200 pUsedDef // [out] was it used?
2b5f62a0
VZ
1201 );
1202
13dd924a
VZ
1203 if ( !len )
1204 {
1205 // function totally failed
1206 return (size_t)-1;
1207 }
1208
1209 // if we were really converting, check if we succeeded
1210 if ( buf )
1211 {
1212 if ( flags )
1213 {
1214 // check if the conversion failed, i.e. if any replacements
1215 // were done
1216 if ( usedDef )
1217 return (size_t)-1;
1218 }
1219 else // we must resort to double tripping...
1220 {
1221 wxWCharBuffer wcBuf(n);
1222 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1223 wcscmp(wcBuf, pwz) != 0 )
1224 {
1225 // we didn't obtain the same thing we started from, hence
1226 // the conversion was lossy and we consider that it failed
1227 return (size_t)-1;
1228 }
1229 }
1230 }
1231
03a991bc 1232 // see the comment above for the reason of "len - 1"
13dd924a 1233 return len - 1;
f1339c56 1234 }
dccce9ea 1235
13dd924a
VZ
1236 bool IsOk() const { return m_CodePage != -1; }
1237
1238private:
1239 static bool CanUseNoBestFit()
1240 {
1241 static int s_isWin98Or2k = -1;
1242
1243 if ( s_isWin98Or2k == -1 )
1244 {
1245 int verMaj, verMin;
1246 switch ( wxGetOsVersion(&verMaj, &verMin) )
1247 {
1248 case wxWIN95:
1249 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1250 break;
1251
1252 case wxWINDOWS_NT:
1253 s_isWin98Or2k = verMaj >= 5;
1254 break;
1255
1256 default:
1257 // unknown, be conseravtive by default
1258 s_isWin98Or2k = 0;
1259 }
1260
1261 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1262 }
1263
1264 return s_isWin98Or2k == 1;
1265 }
f1339c56 1266
b1d66b54 1267 long m_CodePage;
1cd52418 1268};
e95354ec
VZ
1269
1270#endif // wxHAVE_WIN32_MB2WC
1271
335d31e0
SC
1272// ============================================================================
1273// Mac conversion classes
1274// ============================================================================
1275
1276#if defined(__WXMAC__) && defined(TARGET_CARBON)
1277
1278class wxMBConv_mac : public wxMBConv
1279{
1280public:
1281 wxMBConv_mac()
1282 {
1283 Init(CFStringGetSystemEncoding()) ;
1284 }
1285
1286 wxMBConv_mac(const wxChar* name)
1287 {
8057c6d6 1288 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
335d31e0
SC
1289 }
1290
1291 wxMBConv_mac(wxFontEncoding encoding)
1292 {
8057c6d6 1293 Init( wxMacGetSystemEncFromFontEnc(encoding) );
335d31e0
SC
1294 }
1295
1296 ~wxMBConv_mac()
1297 {
1298 OSStatus status = noErr ;
1299 status = TECDisposeConverter(m_MB2WC_converter);
1300 status = TECDisposeConverter(m_WC2MB_converter);
1301 }
1302
335d31e0
SC
1303
1304 void Init( TextEncodingBase encoding)
1305 {
1306 OSStatus status = noErr ;
1307 m_char_encoding = encoding ;
335d31e0 1308 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
f3a355ce 1309
335d31e0
SC
1310 status = TECCreateConverter(&m_MB2WC_converter,
1311 m_char_encoding,
1312 m_unicode_encoding);
1313 status = TECCreateConverter(&m_WC2MB_converter,
1314 m_unicode_encoding,
1315 m_char_encoding);
1316 }
1317
1318 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1319 {
1320 OSStatus status = noErr ;
1321 ByteCount byteOutLen ;
1322 ByteCount byteInLen = strlen(psz) ;
335d31e0 1323 wchar_t *tbuf = NULL ;
f3a355ce
SC
1324 UniChar* ubuf = NULL ;
1325 size_t res = 0 ;
335d31e0
SC
1326
1327 if (buf == NULL)
1328 {
5c250a10
SC
1329 n = byteInLen ;
1330 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
335d31e0 1331 }
f3a355ce
SC
1332 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1333#if SIZEOF_WCHAR_T == 4
8471ea90 1334 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce
SC
1335#else
1336 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1337#endif
335d31e0 1338 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
f3a355ce
SC
1339 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1340#if SIZEOF_WCHAR_T == 4
8471ea90
SC
1341 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1342 // is not properly terminated we get random characters at the end
1343 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
f3a355ce
SC
1344 wxMBConvUTF16BE converter ;
1345 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1346 free( ubuf ) ;
1347#else
1348 res = byteOutLen / sizeof( UniChar ) ;
1349#endif
335d31e0
SC
1350 if ( buf == NULL )
1351 free(tbuf) ;
1352
335d31e0
SC
1353 if ( buf && res < n)
1354 buf[res] = 0;
1355
1356 return res ;
1357 }
1358
1359 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1360 {
1361 OSStatus status = noErr ;
1362 ByteCount byteOutLen ;
1363 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
335d31e0
SC
1364
1365 char *tbuf = NULL ;
1366
1367 if (buf == NULL)
1368 {
5c250a10
SC
1369 // worst case
1370 n = byteInLen * 2 ;
335d31e0
SC
1371 tbuf = (char*) malloc( n ) ;
1372 }
1373
5c250a10 1374 ByteCount byteBufferLen = n ;
f3a355ce
SC
1375 UniChar* ubuf = NULL ;
1376#if SIZEOF_WCHAR_T == 4
1377 wxMBConvUTF16BE converter ;
1378 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
4227afa4
SC
1379 byteInLen = unicharlen ;
1380 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1381 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce
SC
1382#else
1383 ubuf = (UniChar*) psz ;
1384#endif
1385 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1386 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1387#if SIZEOF_WCHAR_T == 4
1388 free( ubuf ) ;
1389#endif
335d31e0
SC
1390 if ( buf == NULL )
1391 free(tbuf) ;
1392
1393 size_t res = byteOutLen ;
1394 if ( buf && res < n)
1395 buf[res] = 0;
1396
1397 return res ;
1398 }
1399
1400 bool IsOk() const
1401 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1402
1403private:
1404 TECObjectRef m_MB2WC_converter ;
1405 TECObjectRef m_WC2MB_converter ;
1406
1407 TextEncodingBase m_char_encoding ;
1408 TextEncodingBase m_unicode_encoding ;
1409};
1410
1411#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 1412
36acb880
VZ
1413// ============================================================================
1414// wxEncodingConverter based conversion classes
1415// ============================================================================
1416
1e6feb95 1417#if wxUSE_FONTMAP
1cd52418 1418
e95354ec 1419class wxMBConv_wxwin : public wxMBConv
1cd52418 1420{
8b04d4c4
VZ
1421private:
1422 void Init()
1423 {
1424 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1425 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1426 }
1427
6001e347 1428public:
f1339c56
RR
1429 // temporarily just use wxEncodingConverter stuff,
1430 // so that it works while a better implementation is built
e95354ec 1431 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1432 {
1433 if (name)
e95354ec 1434 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1435 else
1436 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1437
8b04d4c4
VZ
1438 Init();
1439 }
1440
e95354ec 1441 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1442 {
1443 m_enc = enc;
1444
1445 Init();
f1339c56 1446 }
dccce9ea 1447
bde4baac 1448 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
1449 {
1450 size_t inbuf = strlen(psz);
dccce9ea 1451 if (buf)
4def3b35 1452 m2w.Convert(psz,buf);
f1339c56
RR
1453 return inbuf;
1454 }
dccce9ea 1455
bde4baac 1456 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 1457 {
f8d791e0 1458 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1459 if (buf)
1460 w2m.Convert(psz,buf);
dccce9ea 1461
f1339c56
RR
1462 return inbuf;
1463 }
dccce9ea 1464
e95354ec 1465 bool IsOk() const { return m_ok; }
f1339c56
RR
1466
1467public:
8b04d4c4 1468 wxFontEncoding m_enc;
f1339c56 1469 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1470
1471 // were we initialized successfully?
1472 bool m_ok;
fc7a2a60 1473
e95354ec 1474 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1475};
6001e347 1476
1e6feb95
VZ
1477#endif // wxUSE_FONTMAP
1478
36acb880
VZ
1479// ============================================================================
1480// wxCSConv implementation
1481// ============================================================================
1482
8b04d4c4 1483void wxCSConv::Init()
6001e347 1484{
e95354ec
VZ
1485 m_name = NULL;
1486 m_convReal = NULL;
1487 m_deferred = true;
1488}
1489
8b04d4c4
VZ
1490wxCSConv::wxCSConv(const wxChar *charset)
1491{
1492 Init();
82713003 1493
e95354ec
VZ
1494 if ( charset )
1495 {
e95354ec
VZ
1496 SetName(charset);
1497 }
bda3d86a
VZ
1498
1499 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
1500}
1501
8b04d4c4
VZ
1502wxCSConv::wxCSConv(wxFontEncoding encoding)
1503{
bda3d86a 1504 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
1505 {
1506 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1507
1508 encoding = wxFONTENCODING_SYSTEM;
1509 }
1510
8b04d4c4
VZ
1511 Init();
1512
bda3d86a 1513 m_encoding = encoding;
8b04d4c4
VZ
1514}
1515
6001e347
RR
1516wxCSConv::~wxCSConv()
1517{
65e50848
JS
1518 Clear();
1519}
1520
54380f29 1521wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1522 : wxMBConv()
54380f29 1523{
8b04d4c4
VZ
1524 Init();
1525
54380f29 1526 SetName(conv.m_name);
8b04d4c4 1527 m_encoding = conv.m_encoding;
54380f29
GD
1528}
1529
1530wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1531{
1532 Clear();
8b04d4c4 1533
54380f29 1534 SetName(conv.m_name);
8b04d4c4
VZ
1535 m_encoding = conv.m_encoding;
1536
54380f29
GD
1537 return *this;
1538}
1539
65e50848
JS
1540void wxCSConv::Clear()
1541{
8b04d4c4 1542 free(m_name);
e95354ec 1543 delete m_convReal;
8b04d4c4 1544
65e50848 1545 m_name = NULL;
e95354ec 1546 m_convReal = NULL;
6001e347
RR
1547}
1548
1549void wxCSConv::SetName(const wxChar *charset)
1550{
f1339c56
RR
1551 if (charset)
1552 {
1553 m_name = wxStrdup(charset);
e95354ec 1554 m_deferred = true;
f1339c56 1555 }
6001e347
RR
1556}
1557
e95354ec
VZ
1558wxMBConv *wxCSConv::DoCreate() const
1559{
c547282d
VZ
1560 // check for the special case of ASCII or ISO8859-1 charset: as we have
1561 // special knowledge of it anyhow, we don't need to create a special
1562 // conversion object
1563 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 1564 {
e95354ec
VZ
1565 // don't convert at all
1566 return NULL;
1567 }
dccce9ea 1568
e95354ec
VZ
1569 // we trust OS to do conversion better than we can so try external
1570 // conversion methods first
1571 //
1572 // the full order is:
1573 // 1. OS conversion (iconv() under Unix or Win32 API)
1574 // 2. hard coded conversions for UTF
1575 // 3. wxEncodingConverter as fall back
1576
1577 // step (1)
1578#ifdef HAVE_ICONV
c547282d 1579#if !wxUSE_FONTMAP
e95354ec 1580 if ( m_name )
c547282d 1581#endif // !wxUSE_FONTMAP
e95354ec 1582 {
c547282d
VZ
1583 wxString name(m_name);
1584
1585#if wxUSE_FONTMAP
1586 if ( name.empty() )
1587 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1588#endif // wxUSE_FONTMAP
1589
1590 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
1591 if ( conv->IsOk() )
1592 return conv;
1593
1594 delete conv;
1595 }
1596#endif // HAVE_ICONV
1597
1598#ifdef wxHAVE_WIN32_MB2WC
1599 {
1600 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1601 : new wxMBConv_win32(m_encoding);
1602 if ( conv->IsOk() )
1603 return conv;
1604
1605 delete conv;
1606 }
1607#endif // wxHAVE_WIN32_MB2WC
335d31e0
SC
1608#if defined(__WXMAC__)
1609 {
1610 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1611 {
1612
1613 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1614 : new wxMBConv_mac(m_encoding);
1615 if ( conv->IsOk() )
1616 return conv;
1617
1618 delete conv;
1619 }
1620 }
1621#endif
e95354ec
VZ
1622 // step (2)
1623 wxFontEncoding enc = m_encoding;
1624#if wxUSE_FONTMAP
c547282d
VZ
1625 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1626 {
1627 // use "false" to suppress interactive dialogs -- we can be called from
1628 // anywhere and popping up a dialog from here is the last thing we want to
1629 // do
1630 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1631 }
e95354ec
VZ
1632#endif // wxUSE_FONTMAP
1633
1634 switch ( enc )
1635 {
1636 case wxFONTENCODING_UTF7:
1637 return new wxMBConvUTF7;
1638
1639 case wxFONTENCODING_UTF8:
1640 return new wxMBConvUTF8;
1641
e95354ec
VZ
1642 case wxFONTENCODING_UTF16BE:
1643 return new wxMBConvUTF16BE;
1644
1645 case wxFONTENCODING_UTF16LE:
1646 return new wxMBConvUTF16LE;
1647
e95354ec
VZ
1648 case wxFONTENCODING_UTF32BE:
1649 return new wxMBConvUTF32BE;
1650
1651 case wxFONTENCODING_UTF32LE:
1652 return new wxMBConvUTF32LE;
1653
1654 default:
1655 // nothing to do but put here to suppress gcc warnings
1656 ;
1657 }
1658
1659 // step (3)
1660#if wxUSE_FONTMAP
1661 {
1662 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1663 : new wxMBConv_wxwin(m_encoding);
1664 if ( conv->IsOk() )
1665 return conv;
1666
1667 delete conv;
1668 }
1669#endif // wxUSE_FONTMAP
1670
a58d4f4d
VS
1671 // NB: This is a hack to prevent deadlock. What could otherwise happen
1672 // in Unicode build: wxConvLocal creation ends up being here
1673 // because of some failure and logs the error. But wxLog will try to
1674 // attach timestamp, for which it will need wxConvLocal (to convert
1675 // time to char* and then wchar_t*), but that fails, tries to log
1676 // error, but wxLog has a (already locked) critical section that
1677 // guards static buffer.
1678 static bool alreadyLoggingError = false;
1679 if (!alreadyLoggingError)
1680 {
1681 alreadyLoggingError = true;
1682 wxLogError(_("Cannot convert from the charset '%s'!"),
1683 m_name ? m_name
e95354ec
VZ
1684 :
1685#if wxUSE_FONTMAP
1686 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1687#else // !wxUSE_FONTMAP
1688 wxString::Format(_("encoding %s"), m_encoding).c_str()
1689#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1690 );
a58d4f4d
VS
1691 alreadyLoggingError = false;
1692 }
e95354ec
VZ
1693
1694 return NULL;
1695}
1696
1697void wxCSConv::CreateConvIfNeeded() const
1698{
1699 if ( m_deferred )
1700 {
1701 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
1702
1703#if wxUSE_INTL
1704 // if we don't have neither the name nor the encoding, use the default
1705 // encoding for this system
1706 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1707 {
4d312c22 1708 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
1709 }
1710#endif // wxUSE_INTL
1711
e95354ec
VZ
1712 self->m_convReal = DoCreate();
1713 self->m_deferred = false;
6001e347 1714 }
6001e347
RR
1715}
1716
1717size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1718{
e95354ec 1719 CreateConvIfNeeded();
dccce9ea 1720
e95354ec
VZ
1721 if (m_convReal)
1722 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1723
1724 // latin-1 (direct)
4def3b35 1725 size_t len = strlen(psz);
dccce9ea 1726
f1339c56
RR
1727 if (buf)
1728 {
4def3b35 1729 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1730 buf[c] = (unsigned char)(psz[c]);
1731 }
dccce9ea 1732
f1339c56 1733 return len;
6001e347
RR
1734}
1735
1736size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1737{
e95354ec 1738 CreateConvIfNeeded();
dccce9ea 1739
e95354ec
VZ
1740 if (m_convReal)
1741 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1742
f1339c56 1743 // latin-1 (direct)
f8d791e0 1744 const size_t len = wxWcslen(psz);
f1339c56
RR
1745 if (buf)
1746 {
4def3b35 1747 for (size_t c = 0; c <= len; c++)
24642831
VS
1748 {
1749 if (psz[c] > 0xFF)
1750 return (size_t)-1;
1751 buf[c] = psz[c];
1752 }
1753 }
1754 else
1755 {
1756 for (size_t c = 0; c <= len; c++)
1757 {
1758 if (psz[c] > 0xFF)
1759 return (size_t)-1;
1760 }
f1339c56 1761 }
dccce9ea 1762
f1339c56 1763 return len;
6001e347
RR
1764}
1765
bde4baac
VZ
1766// ----------------------------------------------------------------------------
1767// globals
1768// ----------------------------------------------------------------------------
1769
1770#ifdef __WINDOWS__
1771 static wxMBConv_win32 wxConvLibcObj;
1772#else
dcc8fac0 1773 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
1774#endif
1775
1776static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1777static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1778static wxMBConvUTF7 wxConvUTF7Obj;
1779static wxMBConvUTF8 wxConvUTF8Obj;
1780
1781
1782WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1783WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1784WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1785WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1786WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1787WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1788
1789#else // !wxUSE_WCHAR_T
1790
1791// stand-ins in absence of wchar_t
1792WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1793 wxConvISO8859_1,
1794 wxConvLocal,
1795 wxConvUTF8;
1796
1797#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
1798
1799