]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
wchar_t (4 bytes) / unichar (2 bytes) problems first attempt to fix
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
55d99c7a 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
0a1c1e62
GRG
43#endif
44
1c193821 45#ifndef __WXWINCE__
1cd52418 46#include <errno.h>
1c193821
JS
47#endif
48
6001e347
RR
49#include <ctype.h>
50#include <string.h>
51#include <stdlib.h>
52
e95354ec
VZ
53#if defined(__WIN32__) && !defined(__WXMICROWIN__)
54 #define wxHAVE_WIN32_MB2WC
55#endif // __WIN32__ but !__WXMICROWIN__
56
373658eb
VZ
57// ----------------------------------------------------------------------------
58// headers
59// ----------------------------------------------------------------------------
7af284fd 60
6001e347 61#ifdef __SALFORDC__
373658eb 62 #include <clib.h>
6001e347
RR
63#endif
64
b040e242 65#ifdef HAVE_ICONV
373658eb 66 #include <iconv.h>
1cd52418 67#endif
1cd52418 68
373658eb
VZ
69#include "wx/encconv.h"
70#include "wx/fontmap.h"
71
335d31e0
SC
72#ifdef __WXMAC__
73#include "ATSUnicode.h"
74#include "TextCommon.h"
75#include "TextEncodingConverter.h"
76
77#include "wx/mac/private.h" // includes mac headers
78#endif
373658eb
VZ
79// ----------------------------------------------------------------------------
80// macros
81// ----------------------------------------------------------------------------
3e61dfb0 82
1cd52418 83#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 84#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
85
86#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
87 #define WC_NAME "UCS4"
88 #define WC_BSWAP BSWAP_UCS4
89 #ifdef WORDS_BIGENDIAN
90 #define WC_NAME_BEST "UCS-4BE"
91 #else
92 #define WC_NAME_BEST "UCS-4LE"
93 #endif
1cd52418 94#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
95 #define WC_NAME "UTF16"
96 #define WC_BSWAP BSWAP_UTF16
a3f2769e 97 #define WC_UTF16
3a0d76bc
VS
98 #ifdef WORDS_BIGENDIAN
99 #define WC_NAME_BEST "UTF-16BE"
100 #else
101 #define WC_NAME_BEST "UTF-16LE"
102 #endif
bab1e722 103#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
104 // does this ever happen?
105 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
106#endif
107
373658eb
VZ
108// ============================================================================
109// implementation
110// ============================================================================
111
112// ----------------------------------------------------------------------------
c91830cb 113// UTF-16 en/decoding to/from UCS-4
373658eb 114// ----------------------------------------------------------------------------
6001e347 115
b0a6bb75 116
c91830cb 117static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 118{
dccce9ea 119 if (input<=0xffff)
4def3b35 120 {
999836aa
VZ
121 if (output)
122 *output = (wxUint16) input;
4def3b35 123 return 1;
dccce9ea
VZ
124 }
125 else if (input>=0x110000)
4def3b35
VS
126 {
127 return (size_t)-1;
dccce9ea
VZ
128 }
129 else
4def3b35 130 {
dccce9ea 131 if (output)
4def3b35 132 {
c91830cb 133 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 134 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
135 }
136 return 2;
1cd52418 137 }
1cd52418
OK
138}
139
c91830cb 140static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 141{
dccce9ea 142 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
143 {
144 output = *input;
145 return 1;
dccce9ea
VZ
146 }
147 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
148 {
149 output = *input;
150 return (size_t)-1;
dccce9ea
VZ
151 }
152 else
4def3b35
VS
153 {
154 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
155 return 2;
156 }
1cd52418
OK
157}
158
b0a6bb75 159
f6bcfd97 160// ----------------------------------------------------------------------------
6001e347 161// wxMBConv
f6bcfd97 162// ----------------------------------------------------------------------------
6001e347 163
2b5f62a0
VZ
164wxMBConv::~wxMBConv()
165{
166 // nothing to do here
167}
168
6001e347
RR
169const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
170{
2b5f62a0 171 if ( psz )
6001e347 172 {
2b5f62a0
VZ
173 // calculate the length of the buffer needed first
174 size_t nLen = MB2WC(NULL, psz, 0);
175 if ( nLen != (size_t)-1 )
176 {
177 // now do the actual conversion
178 wxWCharBuffer buf(nLen);
179 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
180
181 return buf;
182 }
f6bcfd97 183 }
2b5f62a0
VZ
184
185 wxWCharBuffer buf((wchar_t *)NULL);
186
187 return buf;
6001e347
RR
188}
189
e5cceba0 190const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 191{
2b5f62a0
VZ
192 if ( pwz )
193 {
194 size_t nLen = WC2MB(NULL, pwz, 0);
195 if ( nLen != (size_t)-1 )
196 {
c91830cb
VZ
197 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
198 WC2MB(buf.data(), pwz, nLen + 4);
2b5f62a0
VZ
199
200 return buf;
201 }
202 }
203
204 wxCharBuffer buf((char *)NULL);
e5cceba0 205
e5cceba0 206 return buf;
6001e347
RR
207}
208
6001e347 209// ----------------------------------------------------------------------------
bde4baac 210// wxMBConvLibc
6001e347
RR
211// ----------------------------------------------------------------------------
212
bde4baac
VZ
213size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
214{
215 return wxMB2WC(buf, psz, n);
216}
217
218size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
219{
220 return wxWC2MB(buf, psz, n);
221}
222
223// ----------------------------------------------------------------------------
224// UTF-7
225// ----------------------------------------------------------------------------
6001e347
RR
226
227#if 0
228static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
229 "abcdefghijklmnopqrstuvwxyz"
230 "0123456789'(),-./:?";
231static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
232static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
233 "abcdefghijklmnopqrstuvwxyz"
234 "0123456789+/";
235#endif
236
237// TODO: write actual implementations of UTF-7 here
238size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
239 const char * WXUNUSED(psz),
240 size_t WXUNUSED(n)) const
241{
242 return 0;
243}
244
245size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
246 const wchar_t * WXUNUSED(psz),
247 size_t WXUNUSED(n)) const
248{
249 return 0;
250}
251
f6bcfd97 252// ----------------------------------------------------------------------------
6001e347 253// UTF-8
f6bcfd97 254// ----------------------------------------------------------------------------
6001e347 255
dccce9ea 256static wxUint32 utf8_max[]=
4def3b35 257 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
258
259size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
260{
4def3b35
VS
261 size_t len = 0;
262
dccce9ea 263 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
264 {
265 unsigned char cc = *psz++, fc = cc;
266 unsigned cnt;
dccce9ea 267 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 268 fc <<= 1;
dccce9ea 269 if (!cnt)
4def3b35
VS
270 {
271 // plain ASCII char
dccce9ea 272 if (buf)
4def3b35
VS
273 *buf++ = cc;
274 len++;
dccce9ea
VZ
275 }
276 else
4def3b35
VS
277 {
278 cnt--;
dccce9ea 279 if (!cnt)
4def3b35
VS
280 {
281 // invalid UTF-8 sequence
282 return (size_t)-1;
dccce9ea
VZ
283 }
284 else
4def3b35
VS
285 {
286 unsigned ocnt = cnt - 1;
287 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 288 while (cnt--)
4def3b35
VS
289 {
290 cc = *psz++;
dccce9ea 291 if ((cc & 0xC0) != 0x80)
4def3b35
VS
292 {
293 // invalid UTF-8 sequence
294 return (size_t)-1;
295 }
296 res = (res << 6) | (cc & 0x3f);
297 }
dccce9ea 298 if (res <= utf8_max[ocnt])
4def3b35
VS
299 {
300 // illegal UTF-8 encoding
301 return (size_t)-1;
302 }
1cd52418 303#ifdef WC_UTF16
b5153fd8
VZ
304 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
305 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
306 if (pa == (size_t)-1)
307 return (size_t)-1;
dccce9ea 308 if (buf)
4def3b35
VS
309 buf += pa;
310 len += pa;
373658eb 311#else // !WC_UTF16
dccce9ea 312 if (buf)
4def3b35
VS
313 *buf++ = res;
314 len++;
373658eb 315#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
316 }
317 }
6001e347 318 }
dccce9ea 319 if (buf && (len < n))
4def3b35
VS
320 *buf = 0;
321 return len;
6001e347
RR
322}
323
324size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
325{
4def3b35 326 size_t len = 0;
6001e347 327
dccce9ea 328 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
329 {
330 wxUint32 cc;
1cd52418 331#ifdef WC_UTF16
b5153fd8
VZ
332 // cast is ok for WC_UTF16
333 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 334 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 335#else
4def3b35
VS
336 cc=(*psz++) & 0x7fffffff;
337#endif
338 unsigned cnt;
339 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 340 if (!cnt)
4def3b35
VS
341 {
342 // plain ASCII char
dccce9ea 343 if (buf)
574c939e 344 *buf++ = (char) cc;
4def3b35 345 len++;
dccce9ea
VZ
346 }
347
348 else
4def3b35
VS
349 {
350 len += cnt + 1;
dccce9ea 351 if (buf)
4def3b35 352 {
574c939e 353 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 354 while (cnt--)
574c939e 355 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
356 }
357 }
6001e347 358 }
4def3b35
VS
359
360 if (buf && (len<n)) *buf = 0;
adb45366 361
4def3b35 362 return len;
6001e347
RR
363}
364
c91830cb
VZ
365
366
367
368// ----------------------------------------------------------------------------
369// UTF-16
370// ----------------------------------------------------------------------------
371
372#ifdef WORDS_BIGENDIAN
bde4baac
VZ
373 #define wxMBConvUTF16straight wxMBConvUTF16BE
374 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 375#else
bde4baac
VZ
376 #define wxMBConvUTF16swap wxMBConvUTF16BE
377 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
378#endif
379
380
c91830cb
VZ
381#ifdef WC_UTF16
382
c91830cb
VZ
383// copy 16bit MB to 16bit String
384size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
385{
386 size_t len=0;
387
388 while (*(wxUint16*)psz && (!buf || len < n))
389 {
390 if (buf)
391 *buf++ = *(wxUint16*)psz;
392 len++;
393
394 psz += sizeof(wxUint16);
395 }
396 if (buf && len<n) *buf=0;
397
398 return len;
399}
400
401
402// copy 16bit String to 16bit MB
403size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
404{
405 size_t len=0;
406
407 while (*psz && (!buf || len < n))
408 {
409 if (buf)
410 {
411 *(wxUint16*)buf = *psz;
412 buf += sizeof(wxUint16);
413 }
414 len += sizeof(wxUint16);
415 psz++;
416 }
417 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
418
419 return len;
420}
421
422
423// swap 16bit MB to 16bit String
424size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
425{
426 size_t len=0;
427
428 while (*(wxUint16*)psz && (!buf || len < n))
429 {
430 if (buf)
431 {
432 ((char *)buf)[0] = psz[1];
433 ((char *)buf)[1] = psz[0];
434 buf++;
435 }
436 len++;
437 psz += sizeof(wxUint16);
438 }
439 if (buf && len<n) *buf=0;
440
441 return len;
442}
443
444
445// swap 16bit MB to 16bit String
446size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
447{
448 size_t len=0;
449
450 while (*psz && (!buf || len < n))
451 {
452 if (buf)
453 {
454 *buf++ = ((char*)psz)[1];
455 *buf++ = ((char*)psz)[0];
456 }
457 len += sizeof(wxUint16);
458 psz++;
459 }
460 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
461
462 return len;
463}
464
465
466#else // WC_UTF16
467
468
469// copy 16bit MB to 32bit String
470size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
471{
472 size_t len=0;
473
474 while (*(wxUint16*)psz && (!buf || len < n))
475 {
476 wxUint32 cc;
477 size_t pa=decode_utf16((wxUint16*)psz, cc);
478 if (pa == (size_t)-1)
479 return pa;
480
481 if (buf)
482 *buf++ = cc;
483 len++;
484 psz += pa * sizeof(wxUint16);
485 }
486 if (buf && len<n) *buf=0;
487
488 return len;
489}
490
491
492// copy 32bit String to 16bit MB
493size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
494{
495 size_t len=0;
496
497 while (*psz && (!buf || len < n))
498 {
499 wxUint16 cc[2];
500 size_t pa=encode_utf16(*psz, cc);
501
502 if (pa == (size_t)-1)
503 return pa;
504
505 if (buf)
506 {
69b80d28 507 *(wxUint16*)buf = cc[0];
b5153fd8 508 buf += sizeof(wxUint16);
c91830cb 509 if (pa > 1)
69b80d28
VZ
510 {
511 *(wxUint16*)buf = cc[1];
512 buf += sizeof(wxUint16);
513 }
c91830cb
VZ
514 }
515
516 len += pa*sizeof(wxUint16);
517 psz++;
518 }
519 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
520
521 return len;
522}
523
524
525// swap 16bit MB to 32bit String
526size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
527{
528 size_t len=0;
529
530 while (*(wxUint16*)psz && (!buf || len < n))
531 {
532 wxUint32 cc;
533 char tmp[4];
534 tmp[0]=psz[1]; tmp[1]=psz[0];
535 tmp[2]=psz[3]; tmp[3]=psz[2];
536
537 size_t pa=decode_utf16((wxUint16*)tmp, cc);
538 if (pa == (size_t)-1)
539 return pa;
540
541 if (buf)
542 *buf++ = cc;
543
544 len++;
545 psz += pa * sizeof(wxUint16);
546 }
547 if (buf && len<n) *buf=0;
548
549 return len;
550}
551
552
553// swap 32bit String to 16bit MB
554size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
555{
556 size_t len=0;
557
558 while (*psz && (!buf || len < n))
559 {
560 wxUint16 cc[2];
561 size_t pa=encode_utf16(*psz, cc);
562
563 if (pa == (size_t)-1)
564 return pa;
565
566 if (buf)
567 {
568 *buf++ = ((char*)cc)[1];
569 *buf++ = ((char*)cc)[0];
570 if (pa > 1)
571 {
572 *buf++ = ((char*)cc)[3];
573 *buf++ = ((char*)cc)[2];
574 }
575 }
576
577 len += pa*sizeof(wxUint16);
578 psz++;
579 }
580 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
581
582 return len;
583}
584
585#endif // WC_UTF16
586
587
588// ----------------------------------------------------------------------------
589// UTF-32
590// ----------------------------------------------------------------------------
591
592#ifdef WORDS_BIGENDIAN
593#define wxMBConvUTF32straight wxMBConvUTF32BE
594#define wxMBConvUTF32swap wxMBConvUTF32LE
595#else
596#define wxMBConvUTF32swap wxMBConvUTF32BE
597#define wxMBConvUTF32straight wxMBConvUTF32LE
598#endif
599
600
601WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
602WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
603
604
605#ifdef WC_UTF16
606
607// copy 32bit MB to 16bit String
608size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
609{
610 size_t len=0;
611
612 while (*(wxUint32*)psz && (!buf || len < n))
613 {
614 wxUint16 cc[2];
615
616 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
617 if (pa == (size_t)-1)
618 return pa;
619
620 if (buf)
621 {
622 *buf++ = cc[0];
623 if (pa > 1)
624 *buf++ = cc[1];
625 }
626 len += pa;
627 psz += sizeof(wxUint32);
628 }
629 if (buf && len<n) *buf=0;
630
631 return len;
632}
633
634
635// copy 16bit String to 32bit MB
636size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
637{
638 size_t len=0;
639
640 while (*psz && (!buf || len < n))
641 {
642 wxUint32 cc;
643
b5153fd8
VZ
644 // cast is ok for WC_UTF16
645 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
646 if (pa == (size_t)-1)
647 return pa;
648
649 if (buf)
650 {
651 *(wxUint32*)buf = cc;
652 buf += sizeof(wxUint32);
653 }
654 len += sizeof(wxUint32);
655 psz += pa;
656 }
b5153fd8
VZ
657
658 if (buf && len<=n-sizeof(wxUint32))
659 *(wxUint32*)buf=0;
c91830cb
VZ
660
661 return len;
662}
663
664
665
666// swap 32bit MB to 16bit String
667size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
668{
669 size_t len=0;
670
671 while (*(wxUint32*)psz && (!buf || len < n))
672 {
673 char tmp[4];
674 tmp[0] = psz[3]; tmp[1] = psz[2];
675 tmp[2] = psz[1]; tmp[3] = psz[0];
676
677
678 wxUint16 cc[2];
679
680 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
681 if (pa == (size_t)-1)
682 return pa;
683
684 if (buf)
685 {
686 *buf++ = cc[0];
687 if (pa > 1)
688 *buf++ = cc[1];
689 }
690 len += pa;
691 psz += sizeof(wxUint32);
692 }
b5153fd8
VZ
693
694 if (buf && len<n)
695 *buf=0;
c91830cb
VZ
696
697 return len;
698}
699
700
701// swap 16bit String to 32bit MB
702size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
703{
704 size_t len=0;
705
706 while (*psz && (!buf || len < n))
707 {
708 char cc[4];
709
b5153fd8
VZ
710 // cast is ok for WC_UTF16
711 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
712 if (pa == (size_t)-1)
713 return pa;
714
715 if (buf)
716 {
717 *buf++ = cc[3];
718 *buf++ = cc[2];
719 *buf++ = cc[1];
720 *buf++ = cc[0];
721 }
722 len += sizeof(wxUint32);
723 psz += pa;
724 }
b5153fd8
VZ
725
726 if (buf && len<=n-sizeof(wxUint32))
727 *(wxUint32*)buf=0;
c91830cb
VZ
728
729 return len;
730}
731
732#else // WC_UTF16
733
734
735// copy 32bit MB to 32bit String
736size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
737{
738 size_t len=0;
739
740 while (*(wxUint32*)psz && (!buf || len < n))
741 {
742 if (buf)
743 *buf++ = *(wxUint32*)psz;
744 len++;
745 psz += sizeof(wxUint32);
746 }
b5153fd8
VZ
747
748 if (buf && len<n)
749 *buf=0;
c91830cb
VZ
750
751 return len;
752}
753
754
755// copy 32bit String to 32bit MB
756size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
757{
758 size_t len=0;
759
760 while (*psz && (!buf || len < n))
761 {
762 if (buf)
763 {
764 *(wxUint32*)buf = *psz;
765 buf += sizeof(wxUint32);
766 }
767
768 len += sizeof(wxUint32);
769 psz++;
770 }
771
b5153fd8
VZ
772 if (buf && len<=n-sizeof(wxUint32))
773 *(wxUint32*)buf=0;
c91830cb
VZ
774
775 return len;
776}
777
778
779// swap 32bit MB to 32bit String
780size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
781{
782 size_t len=0;
783
784 while (*(wxUint32*)psz && (!buf || len < n))
785 {
786 if (buf)
787 {
788 ((char *)buf)[0] = psz[3];
789 ((char *)buf)[1] = psz[2];
790 ((char *)buf)[2] = psz[1];
791 ((char *)buf)[3] = psz[0];
792 buf++;
793 }
794 len++;
795 psz += sizeof(wxUint32);
796 }
b5153fd8
VZ
797
798 if (buf && len<n)
799 *buf=0;
c91830cb
VZ
800
801 return len;
802}
803
804
805// swap 32bit String to 32bit MB
806size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
807{
808 size_t len=0;
809
810 while (*psz && (!buf || len < n))
811 {
812 if (buf)
813 {
814 *buf++ = ((char *)psz)[3];
815 *buf++ = ((char *)psz)[2];
816 *buf++ = ((char *)psz)[1];
817 *buf++ = ((char *)psz)[0];
818 }
819 len += sizeof(wxUint32);
820 psz++;
821 }
b5153fd8
VZ
822
823 if (buf && len<=n-sizeof(wxUint32))
824 *(wxUint32*)buf=0;
c91830cb
VZ
825
826 return len;
827}
828
829
830#endif // WC_UTF16
831
832
36acb880
VZ
833// ============================================================================
834// The classes doing conversion using the iconv_xxx() functions
835// ============================================================================
3caec1bb 836
b040e242 837#ifdef HAVE_ICONV
3a0d76bc 838
3caec1bb
VS
839// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
840// if output buffer is _exactly_ as big as needed. Such case is (unless there's
841// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
842// (which means error) and says there are 0 bytes left in the input buffer --
843// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
844// this alternative test for iconv() failure.
845// [This bug does not appear in glibc 2.2.]
846#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
847#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
848 (errno != E2BIG || bufLeft != 0))
849#else
850#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
851#endif
852
ab217dba 853#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
854
855// ----------------------------------------------------------------------------
e95354ec 856// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
857// ----------------------------------------------------------------------------
858
e95354ec 859class wxMBConv_iconv : public wxMBConv
1cd52418
OK
860{
861public:
e95354ec
VZ
862 wxMBConv_iconv(const wxChar *name);
863 virtual ~wxMBConv_iconv();
36acb880 864
bde4baac
VZ
865 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
866 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 867
e95354ec 868 bool IsOk() const
36acb880
VZ
869 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
870
871protected:
872 // the iconv handlers used to translate from multibyte to wide char and in
873 // the other direction
874 iconv_t m2w,
875 w2m;
876
877private:
e95354ec 878 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
879 // available on this machine, it will remain NULL
880 static const char *ms_wcCharsetName;
881
882 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
883 // different endian-ness than the native one
405d8f46 884 static bool ms_wcNeedsSwap;
36acb880
VZ
885};
886
e95354ec
VZ
887const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
888bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 889
e95354ec 890wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 891{
04c79127
RR
892 // Do it the hard way
893 char cname[100];
894 for (size_t i = 0; i < wxStrlen(name)+1; i++)
895 cname[i] = (char) name[i];
896
36acb880
VZ
897 // check for charset that represents wchar_t:
898 if (ms_wcCharsetName == NULL)
f1339c56 899 {
e95354ec 900 ms_wcNeedsSwap = false;
dccce9ea 901
36acb880
VZ
902 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
903 ms_wcCharsetName = WC_NAME_BEST;
04c79127 904 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 905
36acb880
VZ
906 if (m2w == (iconv_t)-1)
907 {
908 // try charset w/o bytesex info (e.g. "UCS4")
909 // and check for bytesex ourselves:
910 ms_wcCharsetName = WC_NAME;
04c79127 911 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
912
913 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
914 if (m2w == (iconv_t)-1)
915 {
36acb880 916 ms_wcCharsetName = "WCHAR_T";
04c79127 917 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 918 }
3a0d76bc 919
36acb880
VZ
920 if (m2w != (iconv_t)-1)
921 {
922 char buf[2], *bufPtr;
923 wchar_t wbuf[2], *wbufPtr;
924 size_t insz, outsz;
925 size_t res;
926
927 buf[0] = 'A';
928 buf[1] = 0;
929 wbuf[0] = 0;
930 insz = 2;
931 outsz = SIZEOF_WCHAR_T * 2;
932 wbufPtr = wbuf;
933 bufPtr = buf;
934
935 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
936 (char**)&wbufPtr, &outsz);
937
938 if (ICONV_FAILED(res, insz))
3a0d76bc 939 {
36acb880
VZ
940 ms_wcCharsetName = NULL;
941 wxLogLastError(wxT("iconv"));
2b5f62a0 942 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
943 }
944 else
945 {
36acb880 946 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
947 }
948 }
36acb880
VZ
949 else
950 {
951 ms_wcCharsetName = NULL;
373658eb 952
957686c8
VS
953 // VS: we must not output an error here, since wxWindows will safely
954 // fall back to using wxEncodingConverter.
955 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
956 //wxLogError(
36acb880 957 }
3a0d76bc 958 }
36acb880 959 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 960 }
36acb880 961 else // we already have ms_wcCharsetName
3caec1bb 962 {
04c79127 963 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 964 }
dccce9ea 965
36acb880
VZ
966 // NB: don't ever pass NULL to iconv_open(), it may crash!
967 if ( ms_wcCharsetName )
f1339c56 968 {
04c79127 969 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 970 }
405d8f46
VZ
971 else
972 {
973 w2m = (iconv_t)-1;
974 }
36acb880 975}
3caec1bb 976
e95354ec 977wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
978{
979 if ( m2w != (iconv_t)-1 )
980 iconv_close(m2w);
981 if ( w2m != (iconv_t)-1 )
982 iconv_close(w2m);
983}
3a0d76bc 984
bde4baac 985size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
986{
987 size_t inbuf = strlen(psz);
988 size_t outbuf = n * SIZEOF_WCHAR_T;
989 size_t res, cres;
990 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
991 wchar_t *bufPtr = buf;
992 const char *pszPtr = psz;
993
994 if (buf)
995 {
996 // have destination buffer, convert there
997 cres = iconv(m2w,
998 ICONV_CHAR_CAST(&pszPtr), &inbuf,
999 (char**)&bufPtr, &outbuf);
1000 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1001
36acb880 1002 if (ms_wcNeedsSwap)
3a0d76bc 1003 {
36acb880
VZ
1004 // convert to native endianness
1005 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1006 }
adb45366 1007
49dd9820
VS
1008 // NB: iconv was given only strlen(psz) characters on input, and so
1009 // it couldn't convert the trailing zero. Let's do it ourselves
1010 // if there's some room left for it in the output buffer.
1011 if (res < n)
1012 buf[res] = 0;
36acb880
VZ
1013 }
1014 else
1015 {
1016 // no destination buffer... convert using temp buffer
1017 // to calculate destination buffer requirement
1018 wchar_t tbuf[8];
1019 res = 0;
1020 do {
1021 bufPtr = tbuf;
1022 outbuf = 8*SIZEOF_WCHAR_T;
1023
1024 cres = iconv(m2w,
1025 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1026 (char**)&bufPtr, &outbuf );
1027
1028 res += 8-(outbuf/SIZEOF_WCHAR_T);
1029 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1030 }
dccce9ea 1031
36acb880 1032 if (ICONV_FAILED(cres, inbuf))
f1339c56 1033 {
36acb880
VZ
1034 //VS: it is ok if iconv fails, hence trace only
1035 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1036 return (size_t)-1;
1037 }
1038
1039 return res;
1040}
1041
bde4baac 1042size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1043{
f8d791e0 1044 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1045 size_t outbuf = n;
1046 size_t res, cres;
3a0d76bc 1047
36acb880 1048 wchar_t *tmpbuf = 0;
3caec1bb 1049
36acb880
VZ
1050 if (ms_wcNeedsSwap)
1051 {
1052 // need to copy to temp buffer to switch endianness
1053 // this absolutely doesn't rock!
1054 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1055 // could be in read-only memory, or be accessed in some other thread)
1056 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1057 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1058 WC_BSWAP(tmpbuf, inbuf)
1059 psz=tmpbuf;
1060 }
3a0d76bc 1061
36acb880
VZ
1062 if (buf)
1063 {
1064 // have destination buffer, convert there
1065 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1066
36acb880 1067 res = n-outbuf;
adb45366 1068
49dd9820
VS
1069 // NB: iconv was given only wcslen(psz) characters on input, and so
1070 // it couldn't convert the trailing zero. Let's do it ourselves
1071 // if there's some room left for it in the output buffer.
1072 if (res < n)
1073 buf[0] = 0;
36acb880
VZ
1074 }
1075 else
1076 {
1077 // no destination buffer... convert using temp buffer
1078 // to calculate destination buffer requirement
1079 char tbuf[16];
1080 res = 0;
1081 do {
1082 buf = tbuf; outbuf = 16;
1083
1084 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1085
36acb880
VZ
1086 res += 16 - outbuf;
1087 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1088 }
dccce9ea 1089
36acb880
VZ
1090 if (ms_wcNeedsSwap)
1091 {
1092 free(tmpbuf);
1093 }
dccce9ea 1094
36acb880
VZ
1095 if (ICONV_FAILED(cres, inbuf))
1096 {
1097 //VS: it is ok if iconv fails, hence trace only
1098 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1099 return (size_t)-1;
1100 }
1101
1102 return res;
1103}
1104
b040e242 1105#endif // HAVE_ICONV
36acb880 1106
e95354ec 1107
36acb880
VZ
1108// ============================================================================
1109// Win32 conversion classes
1110// ============================================================================
1cd52418 1111
e95354ec 1112#ifdef wxHAVE_WIN32_MB2WC
373658eb 1113
8b04d4c4
VZ
1114// from utils.cpp
1115extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1116extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
373658eb 1117
e95354ec 1118class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1119{
1120public:
bde4baac
VZ
1121 wxMBConv_win32()
1122 {
1123 m_CodePage = CP_ACP;
1124 }
1125
e95354ec 1126 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1127 {
1128 m_CodePage = wxCharsetToCodepage(name);
1129 }
dccce9ea 1130
e95354ec 1131 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1132 {
1133 m_CodePage = wxEncodingToCodepage(encoding);
1134 }
8b04d4c4 1135
bde4baac 1136 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1137 {
2b5f62a0
VZ
1138 const size_t len = ::MultiByteToWideChar
1139 (
1140 m_CodePage, // code page
1141 0, // flags (none)
1142 psz, // input string
1143 -1, // its length (NUL-terminated)
b4da152e 1144 buf, // output string
2b5f62a0
VZ
1145 buf ? n : 0 // size of output buffer
1146 );
1147
03a991bc
VZ
1148 // note that it returns count of written chars for buf != NULL and size
1149 // of the needed buffer for buf == NULL so in either case the length of
1150 // the string (which never includes the terminating NUL) is one less
1151 return len ? len - 1 : (size_t)-1;
f1339c56 1152 }
dccce9ea 1153
bde4baac 1154 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
f1339c56 1155 {
2b5f62a0
VZ
1156 const size_t len = ::WideCharToMultiByte
1157 (
1158 m_CodePage, // code page
1159 0, // flags (none)
b4da152e 1160 psz, // input string
2b5f62a0
VZ
1161 -1, // it is (wide) NUL-terminated
1162 buf, // output buffer
1163 buf ? n : 0, // and its size
1164 NULL, // default "replacement" char
1165 NULL // [out] was it used?
1166 );
1167
03a991bc
VZ
1168 // see the comment above for the reason of "len - 1"
1169 return len ? len - 1 : (size_t)-1;
f1339c56 1170 }
dccce9ea 1171
e95354ec 1172 bool IsOk() const
b1d66b54 1173 { return m_CodePage != -1; }
f1339c56
RR
1174
1175public:
b1d66b54 1176 long m_CodePage;
1cd52418 1177};
e95354ec
VZ
1178
1179#endif // wxHAVE_WIN32_MB2WC
1180
335d31e0
SC
1181// ============================================================================
1182// Mac conversion classes
1183// ============================================================================
1184
1185#if defined(__WXMAC__) && defined(TARGET_CARBON)
1186
1187class wxMBConv_mac : public wxMBConv
1188{
1189public:
1190 wxMBConv_mac()
1191 {
1192 Init(CFStringGetSystemEncoding()) ;
1193 }
1194
1195 wxMBConv_mac(const wxChar* name)
1196 {
1197 Init( EncodingToSystem(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1198 }
1199
1200 wxMBConv_mac(wxFontEncoding encoding)
1201 {
1202 Init( EncodingToSystem(encoding) );
1203 }
1204
1205 ~wxMBConv_mac()
1206 {
1207 OSStatus status = noErr ;
1208 status = TECDisposeConverter(m_MB2WC_converter);
1209 status = TECDisposeConverter(m_WC2MB_converter);
1210 }
1211
1212 static TextEncodingBase EncodingToSystem(wxFontEncoding encoding)
1213 {
1214 TextEncodingBase enc = CFStringGetSystemEncoding() ;
1215
1216 switch( encoding)
1217 {
1218 case wxFONTENCODING_ISO8859_1 :
1219 enc = kTextEncodingISOLatin1 ;
1220 break ;
1221 case wxFONTENCODING_ISO8859_2 :
1222 enc = kTextEncodingISOLatin2;
1223 break ;
1224 case wxFONTENCODING_ISO8859_3 :
1225 enc = kTextEncodingISOLatin3 ;
1226 break ;
1227 case wxFONTENCODING_ISO8859_4 :
1228 enc = kTextEncodingISOLatin4;
1229 break ;
1230 case wxFONTENCODING_ISO8859_5 :
1231 enc = kTextEncodingISOLatinCyrillic;
1232 break ;
1233 case wxFONTENCODING_ISO8859_6 :
1234 enc = kTextEncodingISOLatinArabic;
1235 break ;
1236 case wxFONTENCODING_ISO8859_7 :
1237 enc = kTextEncodingISOLatinGreek;
1238 break ;
1239 case wxFONTENCODING_ISO8859_8 :
1240 enc = kTextEncodingISOLatinHebrew;
1241 break ;
1242 case wxFONTENCODING_ISO8859_9 :
1243 enc = kTextEncodingISOLatin5;
1244 break ;
1245 case wxFONTENCODING_ISO8859_10 :
1246 enc = kTextEncodingISOLatin6;
1247 break ;
1248 case wxFONTENCODING_ISO8859_13 :
1249 enc = kTextEncodingISOLatin7;
1250 break ;
1251 case wxFONTENCODING_ISO8859_14 :
1252 enc = kTextEncodingISOLatin8;
1253 break ;
1254 case wxFONTENCODING_ISO8859_15 :
1255 enc = kTextEncodingISOLatin9;
1256 break ;
1257
1258 case wxFONTENCODING_KOI8 :
1259 enc = kTextEncodingKOI8_R;
1260 break ;
1261 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1262 enc = kTextEncodingDOSRussian;
1263 break ;
1264/*
1265 case wxFONTENCODING_BULGARIAN :
1266 enc = ;
1267 break ;
1268*/
1269 case wxFONTENCODING_CP437 :
1270 enc =kTextEncodingDOSLatinUS ;
1271 break ;
1272 case wxFONTENCODING_CP850 :
1273 enc = kTextEncodingDOSLatin1;
1274 break ;
1275 case wxFONTENCODING_CP852 :
1276 enc = kTextEncodingDOSLatin2;
1277 break ;
1278 case wxFONTENCODING_CP855 :
1279 enc = kTextEncodingDOSCyrillic;
1280 break ;
1281 case wxFONTENCODING_CP866 :
1282 enc =kTextEncodingDOSRussian ;
1283 break ;
1284 case wxFONTENCODING_CP874 :
1285 enc = kTextEncodingDOSThai;
1286 break ;
1287 case wxFONTENCODING_CP932 :
1288 enc = kTextEncodingDOSJapanese;
1289 break ;
1290 case wxFONTENCODING_CP936 :
1291 enc =kTextEncodingDOSChineseSimplif ;
1292 break ;
1293 case wxFONTENCODING_CP949 :
1294 enc = kTextEncodingDOSKorean;
1295 break ;
1296 case wxFONTENCODING_CP950 :
1297 enc = kTextEncodingDOSChineseTrad;
1298 break ;
1299
1300 case wxFONTENCODING_CP1250 :
1301 enc = kTextEncodingWindowsLatin2;
1302 break ;
1303 case wxFONTENCODING_CP1251 :
1304 enc =kTextEncodingWindowsCyrillic ;
1305 break ;
1306 case wxFONTENCODING_CP1252 :
1307 enc =kTextEncodingWindowsLatin1 ;
1308 break ;
1309 case wxFONTENCODING_CP1253 :
1310 enc = kTextEncodingWindowsGreek;
1311 break ;
1312 case wxFONTENCODING_CP1254 :
1313 enc = kTextEncodingWindowsLatin5;
1314 break ;
1315 case wxFONTENCODING_CP1255 :
1316 enc =kTextEncodingWindowsHebrew ;
1317 break ;
1318 case wxFONTENCODING_CP1256 :
1319 enc =kTextEncodingWindowsArabic ;
1320 break ;
1321 case wxFONTENCODING_CP1257 :
1322 enc = kTextEncodingWindowsBalticRim;
1323 break ;
1324
1325 case wxFONTENCODING_UTF7 :
1326 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicodeUTF7Format) ;
1327 break ;
1328 case wxFONTENCODING_UTF8 :
1329 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicodeUTF8Format) ;
1330 break ;
1331 case wxFONTENCODING_EUC_JP :
1332 enc = kTextEncodingEUC_JP;
1333 break ;
1334 case wxFONTENCODING_UTF16BE :
1335 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1336 break ;
1337 case wxFONTENCODING_UTF16LE :
1338 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1339 break ;
1340 case wxFONTENCODING_UTF32BE :
1341 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode32BitFormat) ;
1342 break ;
1343 case wxFONTENCODING_UTF32LE :
1344 enc = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode32BitFormat) ;
1345 break ;
1346 } ;
1347 return enc ;
1348 }
1349
1350 void Init( TextEncodingBase encoding)
1351 {
1352 OSStatus status = noErr ;
1353 m_char_encoding = encoding ;
1354#if SIZEOF_WCHAR_T == 4
1355 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode32BitFormat) ;
1356#else
1357 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1358#endif
1359 status = TECCreateConverter(&m_MB2WC_converter,
1360 m_char_encoding,
1361 m_unicode_encoding);
1362 status = TECCreateConverter(&m_WC2MB_converter,
1363 m_unicode_encoding,
1364 m_char_encoding);
1365 }
1366
1367 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1368 {
1369 OSStatus status = noErr ;
1370 ByteCount byteOutLen ;
1371 ByteCount byteInLen = strlen(psz) ;
1372 ByteCount byteBufferLen = n ;
1373 wchar_t *tbuf = NULL ;
1374
1375 if (buf == NULL)
1376 {
1377 n = byteInLen * SIZEOF_WCHAR_T ;
1378 tbuf = (wchar_t*) malloc( n ) ;
1379 }
1380
1381 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1382 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1383
1384 if ( buf == NULL )
1385 free(tbuf) ;
1386
1387 size_t res = byteOutLen / SIZEOF_WCHAR_T ;
1388 if ( buf && res < n)
1389 buf[res] = 0;
1390
1391 return res ;
1392 }
1393
1394 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1395 {
1396 OSStatus status = noErr ;
1397 ByteCount byteOutLen ;
1398 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1399 ByteCount byteBufferLen = n ;
1400
1401 char *tbuf = NULL ;
1402
1403 if (buf == NULL)
1404 {
1405 n = byteInLen ;
1406 tbuf = (char*) malloc( n ) ;
1407 }
1408
1409 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1410 (TextPtr) ( buf ? buf : tbuf ) , byteBufferLen, &byteOutLen);
1411
1412 if ( buf == NULL )
1413 free(tbuf) ;
1414
1415 size_t res = byteOutLen ;
1416 if ( buf && res < n)
1417 buf[res] = 0;
1418
1419 return res ;
1420 }
1421
1422 bool IsOk() const
1423 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
1424
1425private:
1426 TECObjectRef m_MB2WC_converter ;
1427 TECObjectRef m_WC2MB_converter ;
1428
1429 TextEncodingBase m_char_encoding ;
1430 TextEncodingBase m_unicode_encoding ;
1431};
1432
1433#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 1434
36acb880
VZ
1435// ============================================================================
1436// wxEncodingConverter based conversion classes
1437// ============================================================================
1438
1e6feb95 1439#if wxUSE_FONTMAP
1cd52418 1440
e95354ec 1441class wxMBConv_wxwin : public wxMBConv
1cd52418 1442{
8b04d4c4
VZ
1443private:
1444 void Init()
1445 {
1446 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1447 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1448 }
1449
6001e347 1450public:
f1339c56
RR
1451 // temporarily just use wxEncodingConverter stuff,
1452 // so that it works while a better implementation is built
e95354ec 1453 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1454 {
1455 if (name)
e95354ec 1456 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1457 else
1458 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1459
8b04d4c4
VZ
1460 Init();
1461 }
1462
e95354ec 1463 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1464 {
1465 m_enc = enc;
1466
1467 Init();
f1339c56 1468 }
dccce9ea 1469
bde4baac 1470 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
1471 {
1472 size_t inbuf = strlen(psz);
dccce9ea 1473 if (buf)
4def3b35 1474 m2w.Convert(psz,buf);
f1339c56
RR
1475 return inbuf;
1476 }
dccce9ea 1477
bde4baac 1478 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 1479 {
f8d791e0 1480 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1481 if (buf)
1482 w2m.Convert(psz,buf);
dccce9ea 1483
f1339c56
RR
1484 return inbuf;
1485 }
dccce9ea 1486
e95354ec 1487 bool IsOk() const { return m_ok; }
f1339c56
RR
1488
1489public:
8b04d4c4 1490 wxFontEncoding m_enc;
f1339c56 1491 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1492
1493 // were we initialized successfully?
1494 bool m_ok;
fc7a2a60 1495
e95354ec 1496 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1497};
6001e347 1498
1e6feb95
VZ
1499#endif // wxUSE_FONTMAP
1500
36acb880
VZ
1501// ============================================================================
1502// wxCSConv implementation
1503// ============================================================================
1504
8b04d4c4 1505void wxCSConv::Init()
6001e347 1506{
e95354ec
VZ
1507 m_name = NULL;
1508 m_convReal = NULL;
1509 m_deferred = true;
1510}
1511
8b04d4c4
VZ
1512wxCSConv::wxCSConv(const wxChar *charset)
1513{
1514 Init();
82713003 1515
e95354ec
VZ
1516 if ( charset )
1517 {
e95354ec
VZ
1518 SetName(charset);
1519 }
bda3d86a
VZ
1520
1521 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
1522}
1523
8b04d4c4
VZ
1524wxCSConv::wxCSConv(wxFontEncoding encoding)
1525{
bda3d86a 1526 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
1527 {
1528 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1529
1530 encoding = wxFONTENCODING_SYSTEM;
1531 }
1532
8b04d4c4
VZ
1533 Init();
1534
bda3d86a 1535 m_encoding = encoding;
8b04d4c4
VZ
1536}
1537
6001e347
RR
1538wxCSConv::~wxCSConv()
1539{
65e50848
JS
1540 Clear();
1541}
1542
54380f29 1543wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1544 : wxMBConv()
54380f29 1545{
8b04d4c4
VZ
1546 Init();
1547
54380f29 1548 SetName(conv.m_name);
8b04d4c4 1549 m_encoding = conv.m_encoding;
54380f29
GD
1550}
1551
1552wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1553{
1554 Clear();
8b04d4c4 1555
54380f29 1556 SetName(conv.m_name);
8b04d4c4
VZ
1557 m_encoding = conv.m_encoding;
1558
54380f29
GD
1559 return *this;
1560}
1561
65e50848
JS
1562void wxCSConv::Clear()
1563{
8b04d4c4 1564 free(m_name);
e95354ec 1565 delete m_convReal;
8b04d4c4 1566
65e50848 1567 m_name = NULL;
e95354ec 1568 m_convReal = NULL;
6001e347
RR
1569}
1570
1571void wxCSConv::SetName(const wxChar *charset)
1572{
f1339c56
RR
1573 if (charset)
1574 {
1575 m_name = wxStrdup(charset);
e95354ec 1576 m_deferred = true;
f1339c56 1577 }
6001e347
RR
1578}
1579
e95354ec
VZ
1580wxMBConv *wxCSConv::DoCreate() const
1581{
c547282d
VZ
1582 // check for the special case of ASCII or ISO8859-1 charset: as we have
1583 // special knowledge of it anyhow, we don't need to create a special
1584 // conversion object
1585 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 1586 {
e95354ec
VZ
1587 // don't convert at all
1588 return NULL;
1589 }
dccce9ea 1590
e95354ec
VZ
1591 // we trust OS to do conversion better than we can so try external
1592 // conversion methods first
1593 //
1594 // the full order is:
1595 // 1. OS conversion (iconv() under Unix or Win32 API)
1596 // 2. hard coded conversions for UTF
1597 // 3. wxEncodingConverter as fall back
1598
1599 // step (1)
1600#ifdef HAVE_ICONV
c547282d 1601#if !wxUSE_FONTMAP
e95354ec 1602 if ( m_name )
c547282d 1603#endif // !wxUSE_FONTMAP
e95354ec 1604 {
c547282d
VZ
1605 wxString name(m_name);
1606
1607#if wxUSE_FONTMAP
1608 if ( name.empty() )
1609 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1610#endif // wxUSE_FONTMAP
1611
1612 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
1613 if ( conv->IsOk() )
1614 return conv;
1615
1616 delete conv;
1617 }
1618#endif // HAVE_ICONV
1619
1620#ifdef wxHAVE_WIN32_MB2WC
1621 {
1622 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1623 : new wxMBConv_win32(m_encoding);
1624 if ( conv->IsOk() )
1625 return conv;
1626
1627 delete conv;
1628 }
1629#endif // wxHAVE_WIN32_MB2WC
335d31e0
SC
1630#if defined(__WXMAC__)
1631 {
1632 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1633 {
1634
1635 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1636 : new wxMBConv_mac(m_encoding);
1637 if ( conv->IsOk() )
1638 return conv;
1639
1640 delete conv;
1641 }
1642 }
1643#endif
e95354ec
VZ
1644 // step (2)
1645 wxFontEncoding enc = m_encoding;
1646#if wxUSE_FONTMAP
c547282d
VZ
1647 if ( enc == wxFONTENCODING_SYSTEM && m_name )
1648 {
1649 // use "false" to suppress interactive dialogs -- we can be called from
1650 // anywhere and popping up a dialog from here is the last thing we want to
1651 // do
1652 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1653 }
e95354ec
VZ
1654#endif // wxUSE_FONTMAP
1655
1656 switch ( enc )
1657 {
1658 case wxFONTENCODING_UTF7:
1659 return new wxMBConvUTF7;
1660
1661 case wxFONTENCODING_UTF8:
1662 return new wxMBConvUTF8;
1663
e95354ec
VZ
1664 case wxFONTENCODING_UTF16BE:
1665 return new wxMBConvUTF16BE;
1666
1667 case wxFONTENCODING_UTF16LE:
1668 return new wxMBConvUTF16LE;
1669
e95354ec
VZ
1670 case wxFONTENCODING_UTF32BE:
1671 return new wxMBConvUTF32BE;
1672
1673 case wxFONTENCODING_UTF32LE:
1674 return new wxMBConvUTF32LE;
1675
1676 default:
1677 // nothing to do but put here to suppress gcc warnings
1678 ;
1679 }
1680
1681 // step (3)
1682#if wxUSE_FONTMAP
1683 {
1684 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1685 : new wxMBConv_wxwin(m_encoding);
1686 if ( conv->IsOk() )
1687 return conv;
1688
1689 delete conv;
1690 }
1691#endif // wxUSE_FONTMAP
1692
a58d4f4d
VS
1693 // NB: This is a hack to prevent deadlock. What could otherwise happen
1694 // in Unicode build: wxConvLocal creation ends up being here
1695 // because of some failure and logs the error. But wxLog will try to
1696 // attach timestamp, for which it will need wxConvLocal (to convert
1697 // time to char* and then wchar_t*), but that fails, tries to log
1698 // error, but wxLog has a (already locked) critical section that
1699 // guards static buffer.
1700 static bool alreadyLoggingError = false;
1701 if (!alreadyLoggingError)
1702 {
1703 alreadyLoggingError = true;
1704 wxLogError(_("Cannot convert from the charset '%s'!"),
1705 m_name ? m_name
e95354ec
VZ
1706 :
1707#if wxUSE_FONTMAP
1708 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1709#else // !wxUSE_FONTMAP
1710 wxString::Format(_("encoding %s"), m_encoding).c_str()
1711#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1712 );
a58d4f4d
VS
1713 alreadyLoggingError = false;
1714 }
e95354ec
VZ
1715
1716 return NULL;
1717}
1718
1719void wxCSConv::CreateConvIfNeeded() const
1720{
1721 if ( m_deferred )
1722 {
1723 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
1724
1725#if wxUSE_INTL
1726 // if we don't have neither the name nor the encoding, use the default
1727 // encoding for this system
1728 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1729 {
4d312c22 1730 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
1731 }
1732#endif // wxUSE_INTL
1733
e95354ec
VZ
1734 self->m_convReal = DoCreate();
1735 self->m_deferred = false;
6001e347 1736 }
6001e347
RR
1737}
1738
1739size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1740{
e95354ec 1741 CreateConvIfNeeded();
dccce9ea 1742
e95354ec
VZ
1743 if (m_convReal)
1744 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1745
1746 // latin-1 (direct)
4def3b35 1747 size_t len = strlen(psz);
dccce9ea 1748
f1339c56
RR
1749 if (buf)
1750 {
4def3b35 1751 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1752 buf[c] = (unsigned char)(psz[c]);
1753 }
dccce9ea 1754
f1339c56 1755 return len;
6001e347
RR
1756}
1757
1758size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1759{
e95354ec 1760 CreateConvIfNeeded();
dccce9ea 1761
e95354ec
VZ
1762 if (m_convReal)
1763 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1764
f1339c56 1765 // latin-1 (direct)
f8d791e0 1766 const size_t len = wxWcslen(psz);
f1339c56
RR
1767 if (buf)
1768 {
4def3b35 1769 for (size_t c = 0; c <= len; c++)
24642831
VS
1770 {
1771 if (psz[c] > 0xFF)
1772 return (size_t)-1;
1773 buf[c] = psz[c];
1774 }
1775 }
1776 else
1777 {
1778 for (size_t c = 0; c <= len; c++)
1779 {
1780 if (psz[c] > 0xFF)
1781 return (size_t)-1;
1782 }
f1339c56 1783 }
dccce9ea 1784
f1339c56 1785 return len;
6001e347
RR
1786}
1787
bde4baac
VZ
1788// ----------------------------------------------------------------------------
1789// globals
1790// ----------------------------------------------------------------------------
1791
1792#ifdef __WINDOWS__
1793 static wxMBConv_win32 wxConvLibcObj;
1794#else
dcc8fac0 1795 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
1796#endif
1797
1798static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1799static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1800static wxMBConvUTF7 wxConvUTF7Obj;
1801static wxMBConvUTF8 wxConvUTF8Obj;
1802
1803
1804WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1805WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1806WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1807WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1808WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1809WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1810
1811#else // !wxUSE_WCHAR_T
1812
1813// stand-ins in absence of wchar_t
1814WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1815 wxConvISO8859_1,
1816 wxConvLocal,
1817 wxConvUTF8;
1818
1819#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
1820
1821