]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
check if conversion to UTF7 works
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
65571936 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
bde4baac
VZ
37#include "wx/strconv.h"
38
39#if wxUSE_WCHAR_T
40
0a1c1e62 41#ifdef __WXMSW__
373658eb 42 #include "wx/msw/private.h"
7608a683
WS
43#endif
44
45#ifdef __WINDOWS__
13dd924a 46 #include "wx/msw/missing.h"
0a1c1e62
GRG
47#endif
48
1c193821 49#ifndef __WXWINCE__
1cd52418 50#include <errno.h>
1c193821
JS
51#endif
52
6001e347
RR
53#include <ctype.h>
54#include <string.h>
55#include <stdlib.h>
56
e95354ec
VZ
57#if defined(__WIN32__) && !defined(__WXMICROWIN__)
58 #define wxHAVE_WIN32_MB2WC
59#endif // __WIN32__ but !__WXMICROWIN__
60
373658eb
VZ
61// ----------------------------------------------------------------------------
62// headers
63// ----------------------------------------------------------------------------
7af284fd 64
6001e347 65#ifdef __SALFORDC__
373658eb 66 #include <clib.h>
6001e347
RR
67#endif
68
b040e242 69#ifdef HAVE_ICONV
373658eb 70 #include <iconv.h>
1cd52418 71#endif
1cd52418 72
373658eb
VZ
73#include "wx/encconv.h"
74#include "wx/fontmap.h"
7608a683 75#include "wx/utils.h"
373658eb 76
335d31e0 77#ifdef __WXMAC__
4227afa4
SC
78#include <ATSUnicode.h>
79#include <TextCommon.h>
80#include <TextEncodingConverter.h>
335d31e0
SC
81
82#include "wx/mac/private.h" // includes mac headers
83#endif
373658eb
VZ
84// ----------------------------------------------------------------------------
85// macros
86// ----------------------------------------------------------------------------
3e61dfb0 87
1cd52418 88#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 89#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
90
91#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
1cd52418 99#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
a3f2769e 102 #define WC_UTF16
3a0d76bc
VS
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
bab1e722 108#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
111#endif
112
373658eb
VZ
113// ============================================================================
114// implementation
115// ============================================================================
116
117// ----------------------------------------------------------------------------
c91830cb 118// UTF-16 en/decoding to/from UCS-4
373658eb 119// ----------------------------------------------------------------------------
6001e347 120
b0a6bb75 121
c91830cb 122static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 123{
dccce9ea 124 if (input<=0xffff)
4def3b35 125 {
999836aa
VZ
126 if (output)
127 *output = (wxUint16) input;
4def3b35 128 return 1;
dccce9ea
VZ
129 }
130 else if (input>=0x110000)
4def3b35
VS
131 {
132 return (size_t)-1;
dccce9ea
VZ
133 }
134 else
4def3b35 135 {
dccce9ea 136 if (output)
4def3b35 137 {
c91830cb 138 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 139 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
140 }
141 return 2;
1cd52418 142 }
1cd52418
OK
143}
144
c91830cb 145static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 146{
dccce9ea 147 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
148 {
149 output = *input;
150 return 1;
dccce9ea
VZ
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
153 {
154 output = *input;
155 return (size_t)-1;
dccce9ea
VZ
156 }
157 else
4def3b35
VS
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
1cd52418
OK
162}
163
b0a6bb75 164
f6bcfd97 165// ----------------------------------------------------------------------------
6001e347 166// wxMBConv
f6bcfd97 167// ----------------------------------------------------------------------------
2c53a80a
WS
168
169wxMBConv::~wxMBConv()
170{
171 // nothing to do here (necessary for Darwin linking probably)
172}
6001e347 173
6001e347
RR
174const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
175{
2b5f62a0 176 if ( psz )
6001e347 177 {
2b5f62a0
VZ
178 // calculate the length of the buffer needed first
179 size_t nLen = MB2WC(NULL, psz, 0);
180 if ( nLen != (size_t)-1 )
181 {
182 // now do the actual conversion
183 wxWCharBuffer buf(nLen);
635f33ce
VS
184 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
185 if ( nLen != (size_t)-1 )
186 {
187 return buf;
188 }
2b5f62a0 189 }
f6bcfd97 190 }
2b5f62a0
VZ
191
192 wxWCharBuffer buf((wchar_t *)NULL);
193
194 return buf;
6001e347
RR
195}
196
e5cceba0 197const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 198{
2b5f62a0
VZ
199 if ( pwz )
200 {
201 size_t nLen = WC2MB(NULL, pwz, 0);
202 if ( nLen != (size_t)-1 )
203 {
c91830cb 204 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
205 nLen = WC2MB(buf.data(), pwz, nLen + 4);
206 if ( nLen != (size_t)-1 )
207 {
208 return buf;
209 }
2b5f62a0
VZ
210 }
211 }
212
213 wxCharBuffer buf((char *)NULL);
e5cceba0 214
e5cceba0 215 return buf;
6001e347
RR
216}
217
6001e347 218// ----------------------------------------------------------------------------
bde4baac 219// wxMBConvLibc
6001e347
RR
220// ----------------------------------------------------------------------------
221
bde4baac
VZ
222size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
223{
224 return wxMB2WC(buf, psz, n);
225}
226
227size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
228{
229 return wxWC2MB(buf, psz, n);
230}
231
232// ----------------------------------------------------------------------------
233// UTF-7
234// ----------------------------------------------------------------------------
6001e347
RR
235
236#if 0
237static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
238 "abcdefghijklmnopqrstuvwxyz"
239 "0123456789'(),-./:?";
240static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
241static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
242 "abcdefghijklmnopqrstuvwxyz"
243 "0123456789+/";
244#endif
245
246// TODO: write actual implementations of UTF-7 here
247size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
248 const char * WXUNUSED(psz),
249 size_t WXUNUSED(n)) const
250{
251 return 0;
252}
253
254size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
255 const wchar_t * WXUNUSED(psz),
256 size_t WXUNUSED(n)) const
257{
258 return 0;
259}
260
f6bcfd97 261// ----------------------------------------------------------------------------
6001e347 262// UTF-8
f6bcfd97 263// ----------------------------------------------------------------------------
6001e347 264
dccce9ea 265static wxUint32 utf8_max[]=
4def3b35 266 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
267
268size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
269{
4def3b35
VS
270 size_t len = 0;
271
dccce9ea 272 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
273 {
274 unsigned char cc = *psz++, fc = cc;
275 unsigned cnt;
dccce9ea 276 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 277 fc <<= 1;
dccce9ea 278 if (!cnt)
4def3b35
VS
279 {
280 // plain ASCII char
dccce9ea 281 if (buf)
4def3b35
VS
282 *buf++ = cc;
283 len++;
dccce9ea
VZ
284 }
285 else
4def3b35
VS
286 {
287 cnt--;
dccce9ea 288 if (!cnt)
4def3b35
VS
289 {
290 // invalid UTF-8 sequence
291 return (size_t)-1;
dccce9ea
VZ
292 }
293 else
4def3b35
VS
294 {
295 unsigned ocnt = cnt - 1;
296 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 297 while (cnt--)
4def3b35
VS
298 {
299 cc = *psz++;
dccce9ea 300 if ((cc & 0xC0) != 0x80)
4def3b35
VS
301 {
302 // invalid UTF-8 sequence
303 return (size_t)-1;
304 }
305 res = (res << 6) | (cc & 0x3f);
306 }
dccce9ea 307 if (res <= utf8_max[ocnt])
4def3b35
VS
308 {
309 // illegal UTF-8 encoding
310 return (size_t)-1;
311 }
1cd52418 312#ifdef WC_UTF16
b5153fd8
VZ
313 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
314 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
315 if (pa == (size_t)-1)
316 return (size_t)-1;
dccce9ea 317 if (buf)
4def3b35
VS
318 buf += pa;
319 len += pa;
373658eb 320#else // !WC_UTF16
dccce9ea 321 if (buf)
4def3b35
VS
322 *buf++ = res;
323 len++;
373658eb 324#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
325 }
326 }
6001e347 327 }
dccce9ea 328 if (buf && (len < n))
4def3b35
VS
329 *buf = 0;
330 return len;
6001e347
RR
331}
332
333size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
334{
4def3b35 335 size_t len = 0;
6001e347 336
dccce9ea 337 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
338 {
339 wxUint32 cc;
1cd52418 340#ifdef WC_UTF16
b5153fd8
VZ
341 // cast is ok for WC_UTF16
342 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 343 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 344#else
4def3b35
VS
345 cc=(*psz++) & 0x7fffffff;
346#endif
347 unsigned cnt;
348 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 349 if (!cnt)
4def3b35
VS
350 {
351 // plain ASCII char
dccce9ea 352 if (buf)
574c939e 353 *buf++ = (char) cc;
4def3b35 354 len++;
dccce9ea
VZ
355 }
356
357 else
4def3b35
VS
358 {
359 len += cnt + 1;
dccce9ea 360 if (buf)
4def3b35 361 {
574c939e 362 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 363 while (cnt--)
574c939e 364 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
365 }
366 }
6001e347 367 }
4def3b35
VS
368
369 if (buf && (len<n)) *buf = 0;
adb45366 370
4def3b35 371 return len;
6001e347
RR
372}
373
c91830cb
VZ
374
375
376
377// ----------------------------------------------------------------------------
378// UTF-16
379// ----------------------------------------------------------------------------
380
381#ifdef WORDS_BIGENDIAN
bde4baac
VZ
382 #define wxMBConvUTF16straight wxMBConvUTF16BE
383 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 384#else
bde4baac
VZ
385 #define wxMBConvUTF16swap wxMBConvUTF16BE
386 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
387#endif
388
389
c91830cb
VZ
390#ifdef WC_UTF16
391
c91830cb
VZ
392// copy 16bit MB to 16bit String
393size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
394{
395 size_t len=0;
396
397 while (*(wxUint16*)psz && (!buf || len < n))
398 {
399 if (buf)
400 *buf++ = *(wxUint16*)psz;
401 len++;
402
403 psz += sizeof(wxUint16);
404 }
405 if (buf && len<n) *buf=0;
406
407 return len;
408}
409
410
411// copy 16bit String to 16bit MB
412size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
413{
414 size_t len=0;
415
416 while (*psz && (!buf || len < n))
417 {
418 if (buf)
419 {
420 *(wxUint16*)buf = *psz;
421 buf += sizeof(wxUint16);
422 }
423 len += sizeof(wxUint16);
424 psz++;
425 }
426 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
427
428 return len;
429}
430
431
432// swap 16bit MB to 16bit String
433size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
434{
435 size_t len=0;
436
437 while (*(wxUint16*)psz && (!buf || len < n))
438 {
439 if (buf)
440 {
441 ((char *)buf)[0] = psz[1];
442 ((char *)buf)[1] = psz[0];
443 buf++;
444 }
445 len++;
446 psz += sizeof(wxUint16);
447 }
448 if (buf && len<n) *buf=0;
449
450 return len;
451}
452
453
454// swap 16bit MB to 16bit String
455size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
456{
457 size_t len=0;
458
459 while (*psz && (!buf || len < n))
460 {
461 if (buf)
462 {
463 *buf++ = ((char*)psz)[1];
464 *buf++ = ((char*)psz)[0];
465 }
466 len += sizeof(wxUint16);
467 psz++;
468 }
469 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
470
471 return len;
472}
473
474
475#else // WC_UTF16
476
477
478// copy 16bit MB to 32bit String
479size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
480{
481 size_t len=0;
482
483 while (*(wxUint16*)psz && (!buf || len < n))
484 {
485 wxUint32 cc;
486 size_t pa=decode_utf16((wxUint16*)psz, cc);
487 if (pa == (size_t)-1)
488 return pa;
489
490 if (buf)
491 *buf++ = cc;
492 len++;
493 psz += pa * sizeof(wxUint16);
494 }
495 if (buf && len<n) *buf=0;
496
497 return len;
498}
499
500
501// copy 32bit String to 16bit MB
502size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
503{
504 size_t len=0;
505
506 while (*psz && (!buf || len < n))
507 {
508 wxUint16 cc[2];
509 size_t pa=encode_utf16(*psz, cc);
510
511 if (pa == (size_t)-1)
512 return pa;
513
514 if (buf)
515 {
69b80d28 516 *(wxUint16*)buf = cc[0];
b5153fd8 517 buf += sizeof(wxUint16);
c91830cb 518 if (pa > 1)
69b80d28
VZ
519 {
520 *(wxUint16*)buf = cc[1];
521 buf += sizeof(wxUint16);
522 }
c91830cb
VZ
523 }
524
525 len += pa*sizeof(wxUint16);
526 psz++;
527 }
528 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
529
530 return len;
531}
532
533
534// swap 16bit MB to 32bit String
535size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
536{
537 size_t len=0;
538
539 while (*(wxUint16*)psz && (!buf || len < n))
540 {
541 wxUint32 cc;
542 char tmp[4];
543 tmp[0]=psz[1]; tmp[1]=psz[0];
544 tmp[2]=psz[3]; tmp[3]=psz[2];
545
546 size_t pa=decode_utf16((wxUint16*)tmp, cc);
547 if (pa == (size_t)-1)
548 return pa;
549
550 if (buf)
551 *buf++ = cc;
552
553 len++;
554 psz += pa * sizeof(wxUint16);
555 }
556 if (buf && len<n) *buf=0;
557
558 return len;
559}
560
561
562// swap 32bit String to 16bit MB
563size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
564{
565 size_t len=0;
566
567 while (*psz && (!buf || len < n))
568 {
569 wxUint16 cc[2];
570 size_t pa=encode_utf16(*psz, cc);
571
572 if (pa == (size_t)-1)
573 return pa;
574
575 if (buf)
576 {
577 *buf++ = ((char*)cc)[1];
578 *buf++ = ((char*)cc)[0];
579 if (pa > 1)
580 {
581 *buf++ = ((char*)cc)[3];
582 *buf++ = ((char*)cc)[2];
583 }
584 }
585
586 len += pa*sizeof(wxUint16);
587 psz++;
588 }
589 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
590
591 return len;
592}
593
594#endif // WC_UTF16
595
596
597// ----------------------------------------------------------------------------
598// UTF-32
599// ----------------------------------------------------------------------------
600
601#ifdef WORDS_BIGENDIAN
602#define wxMBConvUTF32straight wxMBConvUTF32BE
603#define wxMBConvUTF32swap wxMBConvUTF32LE
604#else
605#define wxMBConvUTF32swap wxMBConvUTF32BE
606#define wxMBConvUTF32straight wxMBConvUTF32LE
607#endif
608
609
610WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
611WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
612
613
614#ifdef WC_UTF16
615
616// copy 32bit MB to 16bit String
617size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
618{
619 size_t len=0;
620
621 while (*(wxUint32*)psz && (!buf || len < n))
622 {
623 wxUint16 cc[2];
624
625 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
626 if (pa == (size_t)-1)
627 return pa;
628
629 if (buf)
630 {
631 *buf++ = cc[0];
632 if (pa > 1)
633 *buf++ = cc[1];
634 }
635 len += pa;
636 psz += sizeof(wxUint32);
637 }
638 if (buf && len<n) *buf=0;
639
640 return len;
641}
642
643
644// copy 16bit String to 32bit MB
645size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
646{
647 size_t len=0;
648
649 while (*psz && (!buf || len < n))
650 {
651 wxUint32 cc;
652
b5153fd8
VZ
653 // cast is ok for WC_UTF16
654 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
655 if (pa == (size_t)-1)
656 return pa;
657
658 if (buf)
659 {
660 *(wxUint32*)buf = cc;
661 buf += sizeof(wxUint32);
662 }
663 len += sizeof(wxUint32);
664 psz += pa;
665 }
b5153fd8
VZ
666
667 if (buf && len<=n-sizeof(wxUint32))
668 *(wxUint32*)buf=0;
c91830cb
VZ
669
670 return len;
671}
672
673
674
675// swap 32bit MB to 16bit String
676size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
677{
678 size_t len=0;
679
680 while (*(wxUint32*)psz && (!buf || len < n))
681 {
682 char tmp[4];
683 tmp[0] = psz[3]; tmp[1] = psz[2];
684 tmp[2] = psz[1]; tmp[3] = psz[0];
685
686
687 wxUint16 cc[2];
688
689 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
690 if (pa == (size_t)-1)
691 return pa;
692
693 if (buf)
694 {
695 *buf++ = cc[0];
696 if (pa > 1)
697 *buf++ = cc[1];
698 }
699 len += pa;
700 psz += sizeof(wxUint32);
701 }
b5153fd8
VZ
702
703 if (buf && len<n)
704 *buf=0;
c91830cb
VZ
705
706 return len;
707}
708
709
710// swap 16bit String to 32bit MB
711size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
712{
713 size_t len=0;
714
715 while (*psz && (!buf || len < n))
716 {
717 char cc[4];
718
b5153fd8
VZ
719 // cast is ok for WC_UTF16
720 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
721 if (pa == (size_t)-1)
722 return pa;
723
724 if (buf)
725 {
726 *buf++ = cc[3];
727 *buf++ = cc[2];
728 *buf++ = cc[1];
729 *buf++ = cc[0];
730 }
731 len += sizeof(wxUint32);
732 psz += pa;
733 }
b5153fd8
VZ
734
735 if (buf && len<=n-sizeof(wxUint32))
736 *(wxUint32*)buf=0;
c91830cb
VZ
737
738 return len;
739}
740
741#else // WC_UTF16
742
743
744// copy 32bit MB to 32bit String
745size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
746{
747 size_t len=0;
748
749 while (*(wxUint32*)psz && (!buf || len < n))
750 {
751 if (buf)
752 *buf++ = *(wxUint32*)psz;
753 len++;
754 psz += sizeof(wxUint32);
755 }
b5153fd8
VZ
756
757 if (buf && len<n)
758 *buf=0;
c91830cb
VZ
759
760 return len;
761}
762
763
764// copy 32bit String to 32bit MB
765size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
766{
767 size_t len=0;
768
769 while (*psz && (!buf || len < n))
770 {
771 if (buf)
772 {
773 *(wxUint32*)buf = *psz;
774 buf += sizeof(wxUint32);
775 }
776
777 len += sizeof(wxUint32);
778 psz++;
779 }
780
b5153fd8
VZ
781 if (buf && len<=n-sizeof(wxUint32))
782 *(wxUint32*)buf=0;
c91830cb
VZ
783
784 return len;
785}
786
787
788// swap 32bit MB to 32bit String
789size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
790{
791 size_t len=0;
792
793 while (*(wxUint32*)psz && (!buf || len < n))
794 {
795 if (buf)
796 {
797 ((char *)buf)[0] = psz[3];
798 ((char *)buf)[1] = psz[2];
799 ((char *)buf)[2] = psz[1];
800 ((char *)buf)[3] = psz[0];
801 buf++;
802 }
803 len++;
804 psz += sizeof(wxUint32);
805 }
b5153fd8
VZ
806
807 if (buf && len<n)
808 *buf=0;
c91830cb
VZ
809
810 return len;
811}
812
813
814// swap 32bit String to 32bit MB
815size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
816{
817 size_t len=0;
818
819 while (*psz && (!buf || len < n))
820 {
821 if (buf)
822 {
823 *buf++ = ((char *)psz)[3];
824 *buf++ = ((char *)psz)[2];
825 *buf++ = ((char *)psz)[1];
826 *buf++ = ((char *)psz)[0];
827 }
828 len += sizeof(wxUint32);
829 psz++;
830 }
b5153fd8
VZ
831
832 if (buf && len<=n-sizeof(wxUint32))
833 *(wxUint32*)buf=0;
c91830cb
VZ
834
835 return len;
836}
837
838
839#endif // WC_UTF16
840
841
36acb880
VZ
842// ============================================================================
843// The classes doing conversion using the iconv_xxx() functions
844// ============================================================================
3caec1bb 845
b040e242 846#ifdef HAVE_ICONV
3a0d76bc 847
3caec1bb
VS
848// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
849// if output buffer is _exactly_ as big as needed. Such case is (unless there's
850// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
851// (which means error) and says there are 0 bytes left in the input buffer --
852// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
853// this alternative test for iconv() failure.
854// [This bug does not appear in glibc 2.2.]
855#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
856#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
857 (errno != E2BIG || bufLeft != 0))
858#else
859#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
860#endif
861
ab217dba 862#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
863
864// ----------------------------------------------------------------------------
e95354ec 865// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
866// ----------------------------------------------------------------------------
867
e95354ec 868class wxMBConv_iconv : public wxMBConv
1cd52418
OK
869{
870public:
e95354ec
VZ
871 wxMBConv_iconv(const wxChar *name);
872 virtual ~wxMBConv_iconv();
36acb880 873
bde4baac
VZ
874 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
875 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 876
e95354ec 877 bool IsOk() const
36acb880
VZ
878 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
879
880protected:
881 // the iconv handlers used to translate from multibyte to wide char and in
882 // the other direction
883 iconv_t m2w,
884 w2m;
885
886private:
e95354ec 887 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
888 // available on this machine, it will remain NULL
889 static const char *ms_wcCharsetName;
890
891 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
892 // different endian-ness than the native one
405d8f46 893 static bool ms_wcNeedsSwap;
36acb880
VZ
894};
895
e95354ec
VZ
896const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
897bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 898
e95354ec 899wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 900{
04c79127
RR
901 // Do it the hard way
902 char cname[100];
903 for (size_t i = 0; i < wxStrlen(name)+1; i++)
904 cname[i] = (char) name[i];
905
36acb880
VZ
906 // check for charset that represents wchar_t:
907 if (ms_wcCharsetName == NULL)
f1339c56 908 {
e95354ec 909 ms_wcNeedsSwap = false;
dccce9ea 910
36acb880
VZ
911 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
912 ms_wcCharsetName = WC_NAME_BEST;
04c79127 913 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 914
36acb880
VZ
915 if (m2w == (iconv_t)-1)
916 {
917 // try charset w/o bytesex info (e.g. "UCS4")
918 // and check for bytesex ourselves:
919 ms_wcCharsetName = WC_NAME;
04c79127 920 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
921
922 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
923 if (m2w == (iconv_t)-1)
924 {
36acb880 925 ms_wcCharsetName = "WCHAR_T";
04c79127 926 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 927 }
3a0d76bc 928
36acb880
VZ
929 if (m2w != (iconv_t)-1)
930 {
931 char buf[2], *bufPtr;
932 wchar_t wbuf[2], *wbufPtr;
933 size_t insz, outsz;
934 size_t res;
935
936 buf[0] = 'A';
937 buf[1] = 0;
938 wbuf[0] = 0;
939 insz = 2;
940 outsz = SIZEOF_WCHAR_T * 2;
941 wbufPtr = wbuf;
942 bufPtr = buf;
943
944 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
945 (char**)&wbufPtr, &outsz);
946
947 if (ICONV_FAILED(res, insz))
3a0d76bc 948 {
36acb880
VZ
949 ms_wcCharsetName = NULL;
950 wxLogLastError(wxT("iconv"));
2b5f62a0 951 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
952 }
953 else
954 {
36acb880 955 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
956 }
957 }
36acb880
VZ
958 else
959 {
960 ms_wcCharsetName = NULL;
373658eb 961
77ffb593 962 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
963 // fall back to using wxEncodingConverter.
964 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
965 //wxLogError(
36acb880 966 }
3a0d76bc 967 }
36acb880 968 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 969 }
36acb880 970 else // we already have ms_wcCharsetName
3caec1bb 971 {
04c79127 972 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 973 }
dccce9ea 974
36acb880
VZ
975 // NB: don't ever pass NULL to iconv_open(), it may crash!
976 if ( ms_wcCharsetName )
f1339c56 977 {
04c79127 978 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 979 }
405d8f46
VZ
980 else
981 {
982 w2m = (iconv_t)-1;
983 }
36acb880 984}
3caec1bb 985
e95354ec 986wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
987{
988 if ( m2w != (iconv_t)-1 )
989 iconv_close(m2w);
990 if ( w2m != (iconv_t)-1 )
991 iconv_close(w2m);
992}
3a0d76bc 993
bde4baac 994size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
995{
996 size_t inbuf = strlen(psz);
997 size_t outbuf = n * SIZEOF_WCHAR_T;
998 size_t res, cres;
999 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000 wchar_t *bufPtr = buf;
1001 const char *pszPtr = psz;
1002
1003 if (buf)
1004 {
1005 // have destination buffer, convert there
1006 cres = iconv(m2w,
1007 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1008 (char**)&bufPtr, &outbuf);
1009 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1010
36acb880 1011 if (ms_wcNeedsSwap)
3a0d76bc 1012 {
36acb880
VZ
1013 // convert to native endianness
1014 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1015 }
adb45366 1016
49dd9820
VS
1017 // NB: iconv was given only strlen(psz) characters on input, and so
1018 // it couldn't convert the trailing zero. Let's do it ourselves
1019 // if there's some room left for it in the output buffer.
1020 if (res < n)
1021 buf[res] = 0;
36acb880
VZ
1022 }
1023 else
1024 {
1025 // no destination buffer... convert using temp buffer
1026 // to calculate destination buffer requirement
1027 wchar_t tbuf[8];
1028 res = 0;
1029 do {
1030 bufPtr = tbuf;
1031 outbuf = 8*SIZEOF_WCHAR_T;
1032
1033 cres = iconv(m2w,
1034 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1035 (char**)&bufPtr, &outbuf );
1036
1037 res += 8-(outbuf/SIZEOF_WCHAR_T);
1038 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1039 }
dccce9ea 1040
36acb880 1041 if (ICONV_FAILED(cres, inbuf))
f1339c56 1042 {
36acb880
VZ
1043 //VS: it is ok if iconv fails, hence trace only
1044 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1045 return (size_t)-1;
1046 }
1047
1048 return res;
1049}
1050
bde4baac 1051size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1052{
f8d791e0 1053 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1054 size_t outbuf = n;
1055 size_t res, cres;
3a0d76bc 1056
36acb880 1057 wchar_t *tmpbuf = 0;
3caec1bb 1058
36acb880
VZ
1059 if (ms_wcNeedsSwap)
1060 {
1061 // need to copy to temp buffer to switch endianness
1062 // this absolutely doesn't rock!
1063 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064 // could be in read-only memory, or be accessed in some other thread)
1065 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1066 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1067 WC_BSWAP(tmpbuf, inbuf)
1068 psz=tmpbuf;
1069 }
3a0d76bc 1070
36acb880
VZ
1071 if (buf)
1072 {
1073 // have destination buffer, convert there
1074 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1075
36acb880 1076 res = n-outbuf;
adb45366 1077
49dd9820
VS
1078 // NB: iconv was given only wcslen(psz) characters on input, and so
1079 // it couldn't convert the trailing zero. Let's do it ourselves
1080 // if there's some room left for it in the output buffer.
1081 if (res < n)
1082 buf[0] = 0;
36acb880
VZ
1083 }
1084 else
1085 {
1086 // no destination buffer... convert using temp buffer
1087 // to calculate destination buffer requirement
1088 char tbuf[16];
1089 res = 0;
1090 do {
1091 buf = tbuf; outbuf = 16;
1092
1093 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1094
36acb880
VZ
1095 res += 16 - outbuf;
1096 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1097 }
dccce9ea 1098
36acb880
VZ
1099 if (ms_wcNeedsSwap)
1100 {
1101 free(tmpbuf);
1102 }
dccce9ea 1103
36acb880
VZ
1104 if (ICONV_FAILED(cres, inbuf))
1105 {
1106 //VS: it is ok if iconv fails, hence trace only
1107 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1108 return (size_t)-1;
1109 }
1110
1111 return res;
1112}
1113
b040e242 1114#endif // HAVE_ICONV
36acb880 1115
e95354ec 1116
36acb880
VZ
1117// ============================================================================
1118// Win32 conversion classes
1119// ============================================================================
1cd52418 1120
e95354ec 1121#ifdef wxHAVE_WIN32_MB2WC
373658eb 1122
8b04d4c4 1123// from utils.cpp
d775fa82 1124#if wxUSE_FONTMAP
8b04d4c4
VZ
1125extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1126extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1127#endif
373658eb 1128
e95354ec 1129class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1130{
1131public:
bde4baac
VZ
1132 wxMBConv_win32()
1133 {
1134 m_CodePage = CP_ACP;
1135 }
1136
7608a683 1137#if wxUSE_FONTMAP
e95354ec 1138 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1139 {
1140 m_CodePage = wxCharsetToCodepage(name);
1141 }
dccce9ea 1142
e95354ec 1143 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1144 {
1145 m_CodePage = wxEncodingToCodepage(encoding);
1146 }
7608a683 1147#endif
8b04d4c4 1148
bde4baac 1149 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1150 {
02272c9c
VZ
1151 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1152 // the behaviour is not compatible with the Unix version (using iconv)
1153 // and break the library itself, e.g. wxTextInputStream::NextChar()
1154 // wouldn't work if reading an incomplete MB char didn't result in an
1155 // error
2b5f62a0
VZ
1156 const size_t len = ::MultiByteToWideChar
1157 (
1158 m_CodePage, // code page
02272c9c 1159 MB_ERR_INVALID_CHARS, // flags: fall on error
2b5f62a0
VZ
1160 psz, // input string
1161 -1, // its length (NUL-terminated)
b4da152e 1162 buf, // output string
2b5f62a0
VZ
1163 buf ? n : 0 // size of output buffer
1164 );
1165
03a991bc
VZ
1166 // note that it returns count of written chars for buf != NULL and size
1167 // of the needed buffer for buf == NULL so in either case the length of
1168 // the string (which never includes the terminating NUL) is one less
1169 return len ? len - 1 : (size_t)-1;
f1339c56 1170 }
dccce9ea 1171
13dd924a 1172 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1173 {
13dd924a
VZ
1174 /*
1175 we have a problem here: by default, WideCharToMultiByte() may
1176 replace characters unrepresentable in the target code page with bad
1177 quality approximations such as turning "1/2" symbol (U+00BD) into
1178 "1" for the code pages which don't have it and we, obviously, want
1179 to avoid this at any price
d775fa82 1180
13dd924a
VZ
1181 the trouble is that this function does it _silently_, i.e. it won't
1182 even tell us whether it did or not... Win98/2000 and higher provide
1183 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1184 we have to resort to a round trip, i.e. check that converting back
1185 results in the same string -- this is, of course, expensive but
1186 otherwise we simply can't be sure to not garble the data.
1187 */
1188
1189 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1190 // it doesn't work with CJK encodings (which we test for rather roughly
1191 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1192 // supporting it
907173e5
WS
1193 BOOL usedDef wxDUMMY_INITIALIZE(false);
1194 BOOL *pUsedDef;
13dd924a
VZ
1195 int flags;
1196 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1197 {
1198 // it's our lucky day
1199 flags = WC_NO_BEST_FIT_CHARS;
1200 pUsedDef = &usedDef;
1201 }
1202 else // old system or unsupported encoding
1203 {
1204 flags = 0;
1205 pUsedDef = NULL;
1206 }
1207
2b5f62a0
VZ
1208 const size_t len = ::WideCharToMultiByte
1209 (
1210 m_CodePage, // code page
13dd924a
VZ
1211 flags, // either none or no best fit
1212 pwz, // input string
2b5f62a0
VZ
1213 -1, // it is (wide) NUL-terminated
1214 buf, // output buffer
1215 buf ? n : 0, // and its size
1216 NULL, // default "replacement" char
13dd924a 1217 pUsedDef // [out] was it used?
2b5f62a0
VZ
1218 );
1219
13dd924a
VZ
1220 if ( !len )
1221 {
1222 // function totally failed
1223 return (size_t)-1;
1224 }
1225
1226 // if we were really converting, check if we succeeded
1227 if ( buf )
1228 {
1229 if ( flags )
1230 {
1231 // check if the conversion failed, i.e. if any replacements
1232 // were done
1233 if ( usedDef )
1234 return (size_t)-1;
1235 }
1236 else // we must resort to double tripping...
1237 {
1238 wxWCharBuffer wcBuf(n);
1239 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1240 wcscmp(wcBuf, pwz) != 0 )
1241 {
1242 // we didn't obtain the same thing we started from, hence
1243 // the conversion was lossy and we consider that it failed
1244 return (size_t)-1;
1245 }
1246 }
1247 }
1248
03a991bc 1249 // see the comment above for the reason of "len - 1"
13dd924a 1250 return len - 1;
f1339c56 1251 }
dccce9ea 1252
13dd924a
VZ
1253 bool IsOk() const { return m_CodePage != -1; }
1254
1255private:
1256 static bool CanUseNoBestFit()
1257 {
1258 static int s_isWin98Or2k = -1;
1259
1260 if ( s_isWin98Or2k == -1 )
1261 {
1262 int verMaj, verMin;
1263 switch ( wxGetOsVersion(&verMaj, &verMin) )
1264 {
1265 case wxWIN95:
1266 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1267 break;
1268
1269 case wxWINDOWS_NT:
1270 s_isWin98Or2k = verMaj >= 5;
1271 break;
1272
1273 default:
1274 // unknown, be conseravtive by default
1275 s_isWin98Or2k = 0;
1276 }
1277
1278 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1279 }
1280
1281 return s_isWin98Or2k == 1;
1282 }
f1339c56 1283
b1d66b54 1284 long m_CodePage;
1cd52418 1285};
e95354ec
VZ
1286
1287#endif // wxHAVE_WIN32_MB2WC
1288
f7e98dee
RN
1289// ============================================================================
1290// Cocoa conversion classes
1291// ============================================================================
1292
1293#if defined(__WXCOCOA__)
1294
ecd9653b 1295// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1296// Cocoa. Strangely enough, internally Core Foundation uses
1297// UTF 32 internally quite a bit - its just not public (yet).
1298
1299#include <CoreFoundation/CFString.h>
1300#include <CoreFoundation/CFStringEncodingExt.h>
1301
1302CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b
WS
1303{
1304 CFStringEncoding enc = 0 ;
1305 if ( encoding == wxFONTENCODING_DEFAULT )
1306 {
f7e98dee 1307#if wxUSE_GUI
ecd9653b 1308 encoding = wxFont::GetDefaultEncoding() ;
f7e98dee 1309#else
ecd9653b 1310 encoding = wxLocale::GetSystemEncoding() ;
f7e98dee 1311#endif
ecd9653b
WS
1312 }
1313 else switch( encoding)
1314 {
1315 case wxFONTENCODING_ISO8859_1 :
1316 enc = kCFStringEncodingISOLatin1 ;
1317 break ;
1318 case wxFONTENCODING_ISO8859_2 :
1319 enc = kCFStringEncodingISOLatin2;
1320 break ;
1321 case wxFONTENCODING_ISO8859_3 :
1322 enc = kCFStringEncodingISOLatin3 ;
1323 break ;
1324 case wxFONTENCODING_ISO8859_4 :
1325 enc = kCFStringEncodingISOLatin4;
1326 break ;
1327 case wxFONTENCODING_ISO8859_5 :
1328 enc = kCFStringEncodingISOLatinCyrillic;
1329 break ;
1330 case wxFONTENCODING_ISO8859_6 :
1331 enc = kCFStringEncodingISOLatinArabic;
1332 break ;
1333 case wxFONTENCODING_ISO8859_7 :
1334 enc = kCFStringEncodingISOLatinGreek;
1335 break ;
1336 case wxFONTENCODING_ISO8859_8 :
1337 enc = kCFStringEncodingISOLatinHebrew;
1338 break ;
1339 case wxFONTENCODING_ISO8859_9 :
1340 enc = kCFStringEncodingISOLatin5;
1341 break ;
1342 case wxFONTENCODING_ISO8859_10 :
1343 enc = kCFStringEncodingISOLatin6;
1344 break ;
1345 case wxFONTENCODING_ISO8859_11 :
1346 enc = kCFStringEncodingISOLatinThai;
1347 break ;
1348 case wxFONTENCODING_ISO8859_13 :
1349 enc = kCFStringEncodingISOLatin7;
1350 break ;
1351 case wxFONTENCODING_ISO8859_14 :
1352 enc = kCFStringEncodingISOLatin8;
1353 break ;
1354 case wxFONTENCODING_ISO8859_15 :
1355 enc = kCFStringEncodingISOLatin9;
1356 break ;
1357
1358 case wxFONTENCODING_KOI8 :
1359 enc = kCFStringEncodingKOI8_R;
1360 break ;
1361 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1362 enc = kCFStringEncodingDOSRussian;
1363 break ;
1364
1365// case wxFONTENCODING_BULGARIAN :
1366// enc = ;
1367// break ;
1368
1369 case wxFONTENCODING_CP437 :
1370 enc =kCFStringEncodingDOSLatinUS ;
1371 break ;
1372 case wxFONTENCODING_CP850 :
1373 enc = kCFStringEncodingDOSLatin1;
1374 break ;
1375 case wxFONTENCODING_CP852 :
1376 enc = kCFStringEncodingDOSLatin2;
1377 break ;
1378 case wxFONTENCODING_CP855 :
1379 enc = kCFStringEncodingDOSCyrillic;
1380 break ;
1381 case wxFONTENCODING_CP866 :
1382 enc =kCFStringEncodingDOSRussian ;
1383 break ;
1384 case wxFONTENCODING_CP874 :
1385 enc = kCFStringEncodingDOSThai;
1386 break ;
1387 case wxFONTENCODING_CP932 :
1388 enc = kCFStringEncodingDOSJapanese;
1389 break ;
1390 case wxFONTENCODING_CP936 :
1391 enc =kCFStringEncodingDOSChineseSimplif ;
1392 break ;
1393 case wxFONTENCODING_CP949 :
1394 enc = kCFStringEncodingDOSKorean;
1395 break ;
1396 case wxFONTENCODING_CP950 :
1397 enc = kCFStringEncodingDOSChineseTrad;
1398 break ;
1399
1400 case wxFONTENCODING_CP1250 :
1401 enc = kCFStringEncodingWindowsLatin2;
1402 break ;
1403 case wxFONTENCODING_CP1251 :
1404 enc =kCFStringEncodingWindowsCyrillic ;
1405 break ;
1406 case wxFONTENCODING_CP1252 :
1407 enc =kCFStringEncodingWindowsLatin1 ;
1408 break ;
1409 case wxFONTENCODING_CP1253 :
1410 enc = kCFStringEncodingWindowsGreek;
1411 break ;
1412 case wxFONTENCODING_CP1254 :
1413 enc = kCFStringEncodingWindowsLatin5;
1414 break ;
1415 case wxFONTENCODING_CP1255 :
1416 enc =kCFStringEncodingWindowsHebrew ;
1417 break ;
1418 case wxFONTENCODING_CP1256 :
1419 enc =kCFStringEncodingWindowsArabic ;
1420 break ;
1421 case wxFONTENCODING_CP1257 :
1422 enc = kCFStringEncodingWindowsBalticRim;
1423 break ;
1424 case wxFONTENCODING_UTF7 :
1425 enc = kCFStringEncodingNonLossyASCII ;
1426 break ;
1427 case wxFONTENCODING_UTF8 :
1428 enc = kCFStringEncodingUTF8 ;
1429 break ;
1430 case wxFONTENCODING_EUC_JP :
1431 enc = kCFStringEncodingEUC_JP;
1432 break ;
1433 case wxFONTENCODING_UTF16 :
f7e98dee 1434 enc = kCFStringEncodingUnicode ;
ecd9653b 1435 break ;
f7e98dee
RN
1436 case wxFONTENCODING_MACROMAN :
1437 enc = kCFStringEncodingMacRoman ;
1438 break ;
1439 case wxFONTENCODING_MACJAPANESE :
1440 enc = kCFStringEncodingMacJapanese ;
1441 break ;
1442 case wxFONTENCODING_MACCHINESETRAD :
1443 enc = kCFStringEncodingMacChineseTrad ;
1444 break ;
1445 case wxFONTENCODING_MACKOREAN :
1446 enc = kCFStringEncodingMacKorean ;
1447 break ;
1448 case wxFONTENCODING_MACARABIC :
1449 enc = kCFStringEncodingMacArabic ;
1450 break ;
1451 case wxFONTENCODING_MACHEBREW :
1452 enc = kCFStringEncodingMacHebrew ;
1453 break ;
1454 case wxFONTENCODING_MACGREEK :
1455 enc = kCFStringEncodingMacGreek ;
1456 break ;
1457 case wxFONTENCODING_MACCYRILLIC :
1458 enc = kCFStringEncodingMacCyrillic ;
1459 break ;
1460 case wxFONTENCODING_MACDEVANAGARI :
1461 enc = kCFStringEncodingMacDevanagari ;
1462 break ;
1463 case wxFONTENCODING_MACGURMUKHI :
1464 enc = kCFStringEncodingMacGurmukhi ;
1465 break ;
1466 case wxFONTENCODING_MACGUJARATI :
1467 enc = kCFStringEncodingMacGujarati ;
1468 break ;
1469 case wxFONTENCODING_MACORIYA :
1470 enc = kCFStringEncodingMacOriya ;
1471 break ;
1472 case wxFONTENCODING_MACBENGALI :
1473 enc = kCFStringEncodingMacBengali ;
1474 break ;
1475 case wxFONTENCODING_MACTAMIL :
1476 enc = kCFStringEncodingMacTamil ;
1477 break ;
1478 case wxFONTENCODING_MACTELUGU :
1479 enc = kCFStringEncodingMacTelugu ;
1480 break ;
1481 case wxFONTENCODING_MACKANNADA :
1482 enc = kCFStringEncodingMacKannada ;
1483 break ;
1484 case wxFONTENCODING_MACMALAJALAM :
1485 enc = kCFStringEncodingMacMalayalam ;
1486 break ;
1487 case wxFONTENCODING_MACSINHALESE :
1488 enc = kCFStringEncodingMacSinhalese ;
1489 break ;
1490 case wxFONTENCODING_MACBURMESE :
1491 enc = kCFStringEncodingMacBurmese ;
1492 break ;
1493 case wxFONTENCODING_MACKHMER :
1494 enc = kCFStringEncodingMacKhmer ;
1495 break ;
1496 case wxFONTENCODING_MACTHAI :
1497 enc = kCFStringEncodingMacThai ;
1498 break ;
1499 case wxFONTENCODING_MACLAOTIAN :
1500 enc = kCFStringEncodingMacLaotian ;
1501 break ;
1502 case wxFONTENCODING_MACGEORGIAN :
1503 enc = kCFStringEncodingMacGeorgian ;
1504 break ;
1505 case wxFONTENCODING_MACARMENIAN :
1506 enc = kCFStringEncodingMacArmenian ;
1507 break ;
1508 case wxFONTENCODING_MACCHINESESIMP :
1509 enc = kCFStringEncodingMacChineseSimp ;
1510 break ;
1511 case wxFONTENCODING_MACTIBETAN :
1512 enc = kCFStringEncodingMacTibetan ;
1513 break ;
1514 case wxFONTENCODING_MACMONGOLIAN :
1515 enc = kCFStringEncodingMacMongolian ;
1516 break ;
1517 case wxFONTENCODING_MACETHIOPIC :
1518 enc = kCFStringEncodingMacEthiopic ;
1519 break ;
1520 case wxFONTENCODING_MACCENTRALEUR :
1521 enc = kCFStringEncodingMacCentralEurRoman ;
1522 break ;
1523 case wxFONTENCODING_MACVIATNAMESE :
1524 enc = kCFStringEncodingMacVietnamese ;
1525 break ;
1526 case wxFONTENCODING_MACARABICEXT :
1527 enc = kCFStringEncodingMacExtArabic ;
1528 break ;
1529 case wxFONTENCODING_MACSYMBOL :
1530 enc = kCFStringEncodingMacSymbol ;
1531 break ;
1532 case wxFONTENCODING_MACDINGBATS :
1533 enc = kCFStringEncodingMacDingbats ;
1534 break ;
1535 case wxFONTENCODING_MACTURKISH :
1536 enc = kCFStringEncodingMacTurkish ;
1537 break ;
1538 case wxFONTENCODING_MACCROATIAN :
1539 enc = kCFStringEncodingMacCroatian ;
1540 break ;
1541 case wxFONTENCODING_MACICELANDIC :
1542 enc = kCFStringEncodingMacIcelandic ;
1543 break ;
1544 case wxFONTENCODING_MACROMANIAN :
1545 enc = kCFStringEncodingMacRomanian ;
1546 break ;
1547 case wxFONTENCODING_MACCELTIC :
1548 enc = kCFStringEncodingMacCeltic ;
1549 break ;
1550 case wxFONTENCODING_MACGAELIC :
1551 enc = kCFStringEncodingMacGaelic ;
1552 break ;
ecd9653b
WS
1553// case wxFONTENCODING_MACKEYBOARD :
1554// enc = kCFStringEncodingMacKeyboardGlyphs ;
1555// break ;
1556 default :
1557 // because gcc is picky
1558 break ;
1559 } ;
1560 return enc ;
f7e98dee
RN
1561}
1562
1563wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
ecd9653b
WS
1564{
1565 wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1566
1567 switch( encoding)
1568 {
1569 case kCFStringEncodingISOLatin1 :
1570 enc = wxFONTENCODING_ISO8859_1 ;
1571 break ;
1572 case kCFStringEncodingISOLatin2 :
1573 enc = wxFONTENCODING_ISO8859_2;
1574 break ;
1575 case kCFStringEncodingISOLatin3 :
1576 enc = wxFONTENCODING_ISO8859_3 ;
1577 break ;
1578 case kCFStringEncodingISOLatin4 :
1579 enc = wxFONTENCODING_ISO8859_4;
1580 break ;
1581 case kCFStringEncodingISOLatinCyrillic :
1582 enc = wxFONTENCODING_ISO8859_5;
1583 break ;
1584 case kCFStringEncodingISOLatinArabic :
1585 enc = wxFONTENCODING_ISO8859_6;
1586 break ;
1587 case kCFStringEncodingISOLatinGreek :
1588 enc = wxFONTENCODING_ISO8859_7;
1589 break ;
1590 case kCFStringEncodingISOLatinHebrew :
1591 enc = wxFONTENCODING_ISO8859_8;
1592 break ;
1593 case kCFStringEncodingISOLatin5 :
1594 enc = wxFONTENCODING_ISO8859_9;
1595 break ;
1596 case kCFStringEncodingISOLatin6 :
1597 enc = wxFONTENCODING_ISO8859_10;
1598 break ;
1599 case kCFStringEncodingISOLatin7 :
1600 enc = wxFONTENCODING_ISO8859_13;
1601 break ;
1602 case kCFStringEncodingISOLatin8 :
1603 enc = wxFONTENCODING_ISO8859_14;
1604 break ;
1605 case kCFStringEncodingISOLatin9 :
1606 enc =wxFONTENCODING_ISO8859_15 ;
1607 break ;
1608
1609 case kCFStringEncodingKOI8_R :
1610 enc = wxFONTENCODING_KOI8;
1611 break ;
1612
1613// case :
1614// enc = wxFONTENCODING_BULGARIAN;
1615// break ;
1616
1617 case kCFStringEncodingDOSLatinUS :
1618 enc = wxFONTENCODING_CP437;
1619 break ;
1620 case kCFStringEncodingDOSLatin1 :
1621 enc = wxFONTENCODING_CP850;
1622 break ;
1623 case kCFStringEncodingDOSLatin2 :
1624 enc =wxFONTENCODING_CP852 ;
1625 break ;
1626 case kCFStringEncodingDOSCyrillic :
1627 enc = wxFONTENCODING_CP855;
1628 break ;
1629 case kCFStringEncodingDOSRussian :
1630 enc = wxFONTENCODING_CP866;
1631 break ;
1632 case kCFStringEncodingDOSThai :
1633 enc =wxFONTENCODING_CP874 ;
1634 break ;
1635 case kCFStringEncodingDOSJapanese :
1636 enc = wxFONTENCODING_CP932;
1637 break ;
1638 case kCFStringEncodingDOSChineseSimplif :
1639 enc = wxFONTENCODING_CP936;
1640 break ;
1641 case kCFStringEncodingDOSKorean :
1642 enc = wxFONTENCODING_CP949;
1643 break ;
1644 case kCFStringEncodingDOSChineseTrad :
1645 enc = wxFONTENCODING_CP950;
1646 break ;
1647
1648 case kCFStringEncodingWindowsLatin2 :
1649 enc = wxFONTENCODING_CP1250;
1650 break ;
1651 case kCFStringEncodingWindowsCyrillic :
1652 enc = wxFONTENCODING_CP1251;
1653 break ;
1654 case kCFStringEncodingWindowsLatin1 :
1655 enc = wxFONTENCODING_CP1252;
1656 break ;
1657 case kCFStringEncodingWindowsGreek :
1658 enc = wxFONTENCODING_CP1253;
1659 break ;
1660 case kCFStringEncodingWindowsLatin5 :
1661 enc = wxFONTENCODING_CP1254;
1662 break ;
1663 case kCFStringEncodingWindowsHebrew :
1664 enc = wxFONTENCODING_CP1255;
1665 break ;
1666 case kCFStringEncodingWindowsArabic :
1667 enc = wxFONTENCODING_CP1256;
1668 break ;
1669 case kCFStringEncodingWindowsBalticRim :
1670 enc =wxFONTENCODING_CP1257 ;
1671 break ;
1672 case kCFStringEncodingEUC_JP :
1673 enc = wxFONTENCODING_EUC_JP;
1674 break ;
f7e98dee
RN
1675 case kCFStringEncodingUnicode :
1676 enc = wxFONTENCODING_UTF16;
1677 break;
1678 case kCFStringEncodingMacRoman :
1679 enc = wxFONTENCODING_MACROMAN ;
1680 break ;
1681 case kCFStringEncodingMacJapanese :
1682 enc = wxFONTENCODING_MACJAPANESE ;
1683 break ;
1684 case kCFStringEncodingMacChineseTrad :
1685 enc = wxFONTENCODING_MACCHINESETRAD ;
1686 break ;
1687 case kCFStringEncodingMacKorean :
1688 enc = wxFONTENCODING_MACKOREAN ;
1689 break ;
1690 case kCFStringEncodingMacArabic :
1691 enc =wxFONTENCODING_MACARABIC ;
1692 break ;
1693 case kCFStringEncodingMacHebrew :
1694 enc = wxFONTENCODING_MACHEBREW ;
1695 break ;
1696 case kCFStringEncodingMacGreek :
1697 enc = wxFONTENCODING_MACGREEK ;
1698 break ;
1699 case kCFStringEncodingMacCyrillic :
1700 enc = wxFONTENCODING_MACCYRILLIC ;
1701 break ;
1702 case kCFStringEncodingMacDevanagari :
1703 enc = wxFONTENCODING_MACDEVANAGARI ;
1704 break ;
1705 case kCFStringEncodingMacGurmukhi :
1706 enc = wxFONTENCODING_MACGURMUKHI ;
1707 break ;
1708 case kCFStringEncodingMacGujarati :
1709 enc = wxFONTENCODING_MACGUJARATI ;
1710 break ;
1711 case kCFStringEncodingMacOriya :
1712 enc =wxFONTENCODING_MACORIYA ;
1713 break ;
1714 case kCFStringEncodingMacBengali :
1715 enc =wxFONTENCODING_MACBENGALI ;
1716 break ;
1717 case kCFStringEncodingMacTamil :
1718 enc = wxFONTENCODING_MACTAMIL ;
1719 break ;
1720 case kCFStringEncodingMacTelugu :
1721 enc = wxFONTENCODING_MACTELUGU ;
1722 break ;
1723 case kCFStringEncodingMacKannada :
1724 enc = wxFONTENCODING_MACKANNADA ;
1725 break ;
1726 case kCFStringEncodingMacMalayalam :
1727 enc = wxFONTENCODING_MACMALAJALAM ;
1728 break ;
1729 case kCFStringEncodingMacSinhalese :
1730 enc = wxFONTENCODING_MACSINHALESE ;
1731 break ;
1732 case kCFStringEncodingMacBurmese :
1733 enc = wxFONTENCODING_MACBURMESE ;
1734 break ;
1735 case kCFStringEncodingMacKhmer :
1736 enc = wxFONTENCODING_MACKHMER ;
1737 break ;
1738 case kCFStringEncodingMacThai :
1739 enc = wxFONTENCODING_MACTHAI ;
1740 break ;
1741 case kCFStringEncodingMacLaotian :
1742 enc = wxFONTENCODING_MACLAOTIAN ;
1743 break ;
1744 case kCFStringEncodingMacGeorgian :
1745 enc = wxFONTENCODING_MACGEORGIAN ;
1746 break ;
1747 case kCFStringEncodingMacArmenian :
1748 enc = wxFONTENCODING_MACARMENIAN ;
1749 break ;
1750 case kCFStringEncodingMacChineseSimp :
1751 enc = wxFONTENCODING_MACCHINESESIMP ;
1752 break ;
1753 case kCFStringEncodingMacTibetan :
1754 enc = wxFONTENCODING_MACTIBETAN ;
1755 break ;
1756 case kCFStringEncodingMacMongolian :
1757 enc = wxFONTENCODING_MACMONGOLIAN ;
1758 break ;
1759 case kCFStringEncodingMacEthiopic :
1760 enc = wxFONTENCODING_MACETHIOPIC ;
1761 break ;
1762 case kCFStringEncodingMacCentralEurRoman:
1763 enc = wxFONTENCODING_MACCENTRALEUR ;
1764 break ;
1765 case kCFStringEncodingMacVietnamese:
1766 enc = wxFONTENCODING_MACVIATNAMESE ;
1767 break ;
1768 case kCFStringEncodingMacExtArabic :
1769 enc = wxFONTENCODING_MACARABICEXT ;
1770 break ;
1771 case kCFStringEncodingMacSymbol :
1772 enc = wxFONTENCODING_MACSYMBOL ;
1773 break ;
1774 case kCFStringEncodingMacDingbats :
1775 enc = wxFONTENCODING_MACDINGBATS ;
1776 break ;
1777 case kCFStringEncodingMacTurkish :
1778 enc = wxFONTENCODING_MACTURKISH ;
1779 break ;
1780 case kCFStringEncodingMacCroatian :
1781 enc = wxFONTENCODING_MACCROATIAN ;
1782 break ;
1783 case kCFStringEncodingMacIcelandic :
1784 enc = wxFONTENCODING_MACICELANDIC ;
1785 break ;
1786 case kCFStringEncodingMacRomanian :
1787 enc = wxFONTENCODING_MACROMANIAN ;
1788 break ;
1789 case kCFStringEncodingMacCeltic :
1790 enc = wxFONTENCODING_MACCELTIC ;
1791 break ;
1792 case kCFStringEncodingMacGaelic :
1793 enc = wxFONTENCODING_MACGAELIC ;
1794 break ;
1795// case kCFStringEncodingMacKeyboardGlyphs :
1796// enc = wxFONTENCODING_MACKEYBOARD ;
ecd9653b
WS
1797// break ;
1798 } ;
1799 return enc ;
f7e98dee
RN
1800}
1801
1802class wxMBConv_cocoa : public wxMBConv
1803{
1804public:
1805 wxMBConv_cocoa()
1806 {
1807 Init(CFStringGetSystemEncoding()) ;
1808 }
1809
1810 wxMBConv_cocoa(const wxChar* name)
1811 {
1812 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1813 }
1814
1815 wxMBConv_cocoa(wxFontEncoding encoding)
1816 {
1817 Init( wxCFStringEncFromFontEnc(encoding) );
1818 }
1819
1820 ~wxMBConv_cocoa()
1821 {
1822 }
1823
1824 void Init( CFStringEncoding encoding)
1825 {
1826 m_char_encoding = encoding ;
1827 m_unicode_encoding = kCFStringEncodingUnicode;
1828 }
1829
1830 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1831 {
1832 wxASSERT(szUnConv);
ecd9653b 1833
f7e98dee
RN
1834 size_t nBufSize = strlen(szUnConv) + 1;
1835 size_t nRealOutSize;
1836
ecd9653b
WS
1837 UniChar* szUniCharBuffer = (UniChar*) szOut;
1838 wchar_t* szConvBuffer = szOut;
1839
f7e98dee
RN
1840 if (szConvBuffer == NULL && nOutSize != 0)
1841 {
1842 szConvBuffer = new wchar_t[nOutSize] ;
1843 }
1844
1845#if SIZEOF_WCHAR_T == 4
1846 szUniCharBuffer = new UniChar[nOutSize];
1847#endif
1848
1849 CFDataRef theData = CFDataCreateWithBytesNoCopy (
ecd9653b
WS
1850 NULL, //allocator
1851 (const UInt8*)szUnConv,
f7e98dee 1852 nBufSize - 1,
ecd9653b
WS
1853 NULL //deallocator
1854 );
f7e98dee
RN
1855
1856 wxASSERT(theData);
1857
1858 CFStringRef theString = CFStringCreateFromExternalRepresentation (
1859 NULL,
1860 theData,
ecd9653b 1861 m_char_encoding
f7e98dee
RN
1862 );
1863
1864 wxASSERT(theString);
1865
1866 if (nOutSize == 0)
1867 {
1868 nRealOutSize = CFStringGetLength(theString) + 1;
1869 CFRelease(theString);
1870 return nRealOutSize - 1;
1871 }
ecd9653b 1872
f7e98dee 1873 CFRange theRange = { 0, CFStringGetLength(theString) };
ecd9653b 1874
f7e98dee 1875 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
ecd9653b
WS
1876
1877
f7e98dee 1878 nRealOutSize = (CFStringGetLength(theString) + 1);
ecd9653b 1879
f7e98dee 1880 CFRelease(theString);
ecd9653b 1881
f7e98dee
RN
1882 szUniCharBuffer[nRealOutSize-1] = '\0' ;
1883
1884#if SIZEOF_WCHAR_T == 4
1885 wxMBConvUTF16 converter ;
1886 converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
1887 delete[] szUniCharBuffer;
1888#endif
1889 if ( szOut == NULL )
1890 delete [] szConvBuffer;
1891
1892 return nRealOutSize ;
1893 }
1894
1895 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1896 {
1897 size_t nBufSize = wxWcslen(szUnConv) + 1;
1898 size_t nRealOutSize;
1899 char* szBuffer = szOut;
1900 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 1901
f7e98dee
RN
1902 if (szOut == NULL)
1903 {
1904 // worst case
72606b43 1905 nRealOutSize = ((nBufSize - 1) << 2)+1 ;
f7e98dee
RN
1906 szBuffer = new char[ nRealOutSize ] ;
1907 }
1908 else
1909 nRealOutSize = nOutSize;
1910
1911#if SIZEOF_WCHAR_T == 4
1912 wxMBConvUTF16BE converter ;
1913 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1914 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1915 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1916 nBufSize /= sizeof(UniChar);
1917 ++nBufSize;
1918#endif
1919
1920 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1921 NULL, //allocator
1922 szUniBuffer,
1923 nBufSize,
1924 NULL //deallocator
1925 );
ecd9653b 1926
f7e98dee 1927 wxASSERT(theString);
ecd9653b 1928
f7e98dee
RN
1929 //Note that CER puts a BOM when converting to unicode
1930 //so we may want to check and use getchars instead in that case
1931 CFDataRef theData = CFStringCreateExternalRepresentation(
1932 NULL, //allocator
1933 theString,
1934 m_char_encoding,
1935 0 //what to put in characters that can't be converted -
1936 //0 tells CFString to return NULL if it meets such a character
1937 );
1938
1939 if(!theData)
1940 return (size_t)-1;
ecd9653b 1941
f7e98dee 1942 CFRelease(theString);
ecd9653b 1943
f7e98dee
RN
1944 nRealOutSize = CFDataGetLength(theData);
1945
1946 if ( szOut == NULL )
1947 delete[] szBuffer;
1948
1949 if(nOutSize == 0)
1950 {
1951//TODO: This gets flagged as a non-malloced address by the debugger...
1952//#if SIZEOF_WCHAR_T == 4
1953// delete[] szUniBuffer;
1954//#endif
1955 CFRelease(theData);
1956 return nRealOutSize - 1;
1957 }
ecd9653b 1958
f7e98dee
RN
1959 CFRange theRange = {0, CFDataGetLength(theData) };
1960 CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
ecd9653b
WS
1961
1962 CFRelease(theData);
1963
f7e98dee
RN
1964//TODO: This gets flagged as a non-malloced address by the debugger...
1965//#if SIZEOF_WCHAR_T == 4
1966// delete[] szUniBuffer;
1967//#endif
1968 return nRealOutSize - 1;
1969 }
1970
1971 bool IsOk() const
ecd9653b 1972 {
f7e98dee 1973 //TODO: check for invalid en/de/coding
ecd9653b 1974 return true;
f7e98dee
RN
1975 }
1976
1977private:
1978 CFStringEncoding m_char_encoding ;
1979 CFStringEncoding m_unicode_encoding ;
1980};
1981
1982#endif // defined(__WXCOCOA__)
1983
335d31e0
SC
1984// ============================================================================
1985// Mac conversion classes
1986// ============================================================================
1987
1988#if defined(__WXMAC__) && defined(TARGET_CARBON)
1989
1990class wxMBConv_mac : public wxMBConv
1991{
1992public:
1993 wxMBConv_mac()
1994 {
1995 Init(CFStringGetSystemEncoding()) ;
1996 }
1997
1998 wxMBConv_mac(const wxChar* name)
1999 {
d775fa82 2000 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0
SC
2001 }
2002
2003 wxMBConv_mac(wxFontEncoding encoding)
2004 {
d775fa82
WS
2005 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2006 }
2007
2008 ~wxMBConv_mac()
2009 {
2010 OSStatus status = noErr ;
2011 status = TECDisposeConverter(m_MB2WC_converter);
2012 status = TECDisposeConverter(m_WC2MB_converter);
2013 }
2014
2015
2016 void Init( TextEncodingBase encoding)
2017 {
2018 OSStatus status = noErr ;
2019 m_char_encoding = encoding ;
2020 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2021
2022 status = TECCreateConverter(&m_MB2WC_converter,
2023 m_char_encoding,
2024 m_unicode_encoding);
2025 status = TECCreateConverter(&m_WC2MB_converter,
2026 m_unicode_encoding,
2027 m_char_encoding);
2028 }
2029
335d31e0
SC
2030 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2031 {
d775fa82
WS
2032 OSStatus status = noErr ;
2033 ByteCount byteOutLen ;
2034 ByteCount byteInLen = strlen(psz) ;
2035 wchar_t *tbuf = NULL ;
2036 UniChar* ubuf = NULL ;
2037 size_t res = 0 ;
2038
2039 if (buf == NULL)
2040 {
2041 n = byteInLen ;
2042 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2043 }
2044 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2045#if SIZEOF_WCHAR_T == 4
d775fa82 2046 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2047#else
d775fa82 2048 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2049#endif
d775fa82
WS
2050 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2051 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2052#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2053 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2054 // is not properly terminated we get random characters at the end
2055 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2056 wxMBConvUTF16BE converter ;
2057 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2058 free( ubuf ) ;
f3a355ce 2059#else
d775fa82 2060 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2061#endif
d775fa82
WS
2062 if ( buf == NULL )
2063 free(tbuf) ;
335d31e0 2064
335d31e0
SC
2065 if ( buf && res < n)
2066 buf[res] = 0;
2067
d775fa82 2068 return res ;
335d31e0
SC
2069 }
2070
2071 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2072 {
2073 OSStatus status = noErr ;
2074 ByteCount byteOutLen ;
2075 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2076
2077 char *tbuf = NULL ;
2078
2079 if (buf == NULL)
2080 {
2081 // worst case
72606b43 2082 n = byteInLen << 1 ;
d775fa82
WS
2083 tbuf = (char*) malloc( n ) ;
2084 }
2085
2086 ByteCount byteBufferLen = n ;
2087 UniChar* ubuf = NULL ;
f3a355ce 2088#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2089 wxMBConvUTF16BE converter ;
2090 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2091 byteInLen = unicharlen ;
2092 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2093 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2094#else
d775fa82 2095 ubuf = (UniChar*) psz ;
f3a355ce 2096#endif
d775fa82
WS
2097 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2098 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2099#if SIZEOF_WCHAR_T == 4
d775fa82 2100 free( ubuf ) ;
f3a355ce 2101#endif
d775fa82
WS
2102 if ( buf == NULL )
2103 free(tbuf) ;
335d31e0 2104
d775fa82 2105 size_t res = byteOutLen ;
335d31e0
SC
2106 if ( buf && res < n)
2107 buf[res] = 0;
2108
d775fa82 2109 return res ;
335d31e0
SC
2110 }
2111
2112 bool IsOk() const
2113 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2114
2115private:
d775fa82
WS
2116 TECObjectRef m_MB2WC_converter ;
2117 TECObjectRef m_WC2MB_converter ;
2118
2119 TextEncodingBase m_char_encoding ;
2120 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2121};
2122
2123#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2124
36acb880
VZ
2125// ============================================================================
2126// wxEncodingConverter based conversion classes
2127// ============================================================================
2128
1e6feb95 2129#if wxUSE_FONTMAP
1cd52418 2130
e95354ec 2131class wxMBConv_wxwin : public wxMBConv
1cd52418 2132{
8b04d4c4
VZ
2133private:
2134 void Init()
2135 {
2136 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2137 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2138 }
2139
6001e347 2140public:
f1339c56
RR
2141 // temporarily just use wxEncodingConverter stuff,
2142 // so that it works while a better implementation is built
e95354ec 2143 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2144 {
2145 if (name)
e95354ec 2146 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2147 else
2148 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2149
8b04d4c4
VZ
2150 Init();
2151 }
2152
e95354ec 2153 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2154 {
2155 m_enc = enc;
2156
2157 Init();
f1339c56 2158 }
dccce9ea 2159
bde4baac 2160 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2161 {
2162 size_t inbuf = strlen(psz);
dccce9ea 2163 if (buf)
4def3b35 2164 m2w.Convert(psz,buf);
f1339c56
RR
2165 return inbuf;
2166 }
dccce9ea 2167
bde4baac 2168 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2169 {
f8d791e0 2170 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
2171 if (buf)
2172 w2m.Convert(psz,buf);
dccce9ea 2173
f1339c56
RR
2174 return inbuf;
2175 }
dccce9ea 2176
e95354ec 2177 bool IsOk() const { return m_ok; }
f1339c56
RR
2178
2179public:
8b04d4c4 2180 wxFontEncoding m_enc;
f1339c56 2181 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2182
2183 // were we initialized successfully?
2184 bool m_ok;
fc7a2a60 2185
e95354ec 2186 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2187};
6001e347 2188
1e6feb95
VZ
2189#endif // wxUSE_FONTMAP
2190
36acb880
VZ
2191// ============================================================================
2192// wxCSConv implementation
2193// ============================================================================
2194
8b04d4c4 2195void wxCSConv::Init()
6001e347 2196{
e95354ec
VZ
2197 m_name = NULL;
2198 m_convReal = NULL;
2199 m_deferred = true;
2200}
2201
8b04d4c4
VZ
2202wxCSConv::wxCSConv(const wxChar *charset)
2203{
2204 Init();
82713003 2205
e95354ec
VZ
2206 if ( charset )
2207 {
e95354ec
VZ
2208 SetName(charset);
2209 }
bda3d86a
VZ
2210
2211 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2212}
2213
8b04d4c4
VZ
2214wxCSConv::wxCSConv(wxFontEncoding encoding)
2215{
bda3d86a 2216 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2217 {
2218 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2219
2220 encoding = wxFONTENCODING_SYSTEM;
2221 }
2222
8b04d4c4
VZ
2223 Init();
2224
bda3d86a 2225 m_encoding = encoding;
8b04d4c4
VZ
2226}
2227
6001e347
RR
2228wxCSConv::~wxCSConv()
2229{
65e50848
JS
2230 Clear();
2231}
2232
54380f29 2233wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2234 : wxMBConv()
54380f29 2235{
8b04d4c4
VZ
2236 Init();
2237
54380f29 2238 SetName(conv.m_name);
8b04d4c4 2239 m_encoding = conv.m_encoding;
54380f29
GD
2240}
2241
2242wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2243{
2244 Clear();
8b04d4c4 2245
54380f29 2246 SetName(conv.m_name);
8b04d4c4
VZ
2247 m_encoding = conv.m_encoding;
2248
54380f29
GD
2249 return *this;
2250}
2251
65e50848
JS
2252void wxCSConv::Clear()
2253{
8b04d4c4 2254 free(m_name);
e95354ec 2255 delete m_convReal;
8b04d4c4 2256
65e50848 2257 m_name = NULL;
e95354ec 2258 m_convReal = NULL;
6001e347
RR
2259}
2260
2261void wxCSConv::SetName(const wxChar *charset)
2262{
f1339c56
RR
2263 if (charset)
2264 {
2265 m_name = wxStrdup(charset);
e95354ec 2266 m_deferred = true;
f1339c56 2267 }
6001e347
RR
2268}
2269
e95354ec
VZ
2270wxMBConv *wxCSConv::DoCreate() const
2271{
c547282d
VZ
2272 // check for the special case of ASCII or ISO8859-1 charset: as we have
2273 // special knowledge of it anyhow, we don't need to create a special
2274 // conversion object
2275 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2276 {
e95354ec
VZ
2277 // don't convert at all
2278 return NULL;
2279 }
dccce9ea 2280
e95354ec
VZ
2281 // we trust OS to do conversion better than we can so try external
2282 // conversion methods first
2283 //
2284 // the full order is:
2285 // 1. OS conversion (iconv() under Unix or Win32 API)
2286 // 2. hard coded conversions for UTF
2287 // 3. wxEncodingConverter as fall back
2288
2289 // step (1)
2290#ifdef HAVE_ICONV
c547282d 2291#if !wxUSE_FONTMAP
e95354ec 2292 if ( m_name )
c547282d 2293#endif // !wxUSE_FONTMAP
e95354ec 2294 {
c547282d
VZ
2295 wxString name(m_name);
2296
2297#if wxUSE_FONTMAP
2298 if ( name.empty() )
2299 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2300#endif // wxUSE_FONTMAP
2301
2302 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2303 if ( conv->IsOk() )
2304 return conv;
2305
2306 delete conv;
2307 }
2308#endif // HAVE_ICONV
2309
2310#ifdef wxHAVE_WIN32_MB2WC
2311 {
7608a683 2312#if wxUSE_FONTMAP
e95354ec
VZ
2313 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2314 : new wxMBConv_win32(m_encoding);
2315 if ( conv->IsOk() )
2316 return conv;
2317
2318 delete conv;
7608a683
WS
2319#else
2320 return NULL;
2321#endif
e95354ec
VZ
2322 }
2323#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2324#if defined(__WXMAC__)
2325 {
2326 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2327 {
2328
2329 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2330 : new wxMBConv_mac(m_encoding);
2331 if ( conv->IsOk() )
f7e98dee
RN
2332 return conv;
2333
2334 delete conv;
2335 }
2336 }
2337#endif
2338#if defined(__WXCOCOA__)
2339 {
2340 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2341 {
2342
2343 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2344 : new wxMBConv_cocoa(m_encoding);
2345 if ( conv->IsOk() )
d775fa82
WS
2346 return conv;
2347
2348 delete conv;
2349 }
335d31e0
SC
2350 }
2351#endif
e95354ec
VZ
2352 // step (2)
2353 wxFontEncoding enc = m_encoding;
2354#if wxUSE_FONTMAP
c547282d
VZ
2355 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2356 {
2357 // use "false" to suppress interactive dialogs -- we can be called from
2358 // anywhere and popping up a dialog from here is the last thing we want to
2359 // do
2360 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2361 }
e95354ec
VZ
2362#endif // wxUSE_FONTMAP
2363
2364 switch ( enc )
2365 {
2366 case wxFONTENCODING_UTF7:
2367 return new wxMBConvUTF7;
2368
2369 case wxFONTENCODING_UTF8:
2370 return new wxMBConvUTF8;
2371
e95354ec
VZ
2372 case wxFONTENCODING_UTF16BE:
2373 return new wxMBConvUTF16BE;
2374
2375 case wxFONTENCODING_UTF16LE:
2376 return new wxMBConvUTF16LE;
2377
e95354ec
VZ
2378 case wxFONTENCODING_UTF32BE:
2379 return new wxMBConvUTF32BE;
2380
2381 case wxFONTENCODING_UTF32LE:
2382 return new wxMBConvUTF32LE;
2383
2384 default:
2385 // nothing to do but put here to suppress gcc warnings
2386 ;
2387 }
2388
2389 // step (3)
2390#if wxUSE_FONTMAP
2391 {
2392 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2393 : new wxMBConv_wxwin(m_encoding);
2394 if ( conv->IsOk() )
2395 return conv;
2396
2397 delete conv;
2398 }
2399#endif // wxUSE_FONTMAP
2400
a58d4f4d
VS
2401 // NB: This is a hack to prevent deadlock. What could otherwise happen
2402 // in Unicode build: wxConvLocal creation ends up being here
2403 // because of some failure and logs the error. But wxLog will try to
2404 // attach timestamp, for which it will need wxConvLocal (to convert
2405 // time to char* and then wchar_t*), but that fails, tries to log
2406 // error, but wxLog has a (already locked) critical section that
2407 // guards static buffer.
2408 static bool alreadyLoggingError = false;
2409 if (!alreadyLoggingError)
2410 {
2411 alreadyLoggingError = true;
2412 wxLogError(_("Cannot convert from the charset '%s'!"),
2413 m_name ? m_name
e95354ec
VZ
2414 :
2415#if wxUSE_FONTMAP
2416 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2417#else // !wxUSE_FONTMAP
2418 wxString::Format(_("encoding %s"), m_encoding).c_str()
2419#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2420 );
a58d4f4d
VS
2421 alreadyLoggingError = false;
2422 }
e95354ec
VZ
2423
2424 return NULL;
2425}
2426
2427void wxCSConv::CreateConvIfNeeded() const
2428{
2429 if ( m_deferred )
2430 {
2431 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2432
2433#if wxUSE_INTL
2434 // if we don't have neither the name nor the encoding, use the default
2435 // encoding for this system
2436 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2437 {
4d312c22 2438 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2439 }
2440#endif // wxUSE_INTL
2441
e95354ec
VZ
2442 self->m_convReal = DoCreate();
2443 self->m_deferred = false;
6001e347 2444 }
6001e347
RR
2445}
2446
2447size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2448{
e95354ec 2449 CreateConvIfNeeded();
dccce9ea 2450
e95354ec
VZ
2451 if (m_convReal)
2452 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2453
2454 // latin-1 (direct)
4def3b35 2455 size_t len = strlen(psz);
dccce9ea 2456
f1339c56
RR
2457 if (buf)
2458 {
4def3b35 2459 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2460 buf[c] = (unsigned char)(psz[c]);
2461 }
dccce9ea 2462
f1339c56 2463 return len;
6001e347
RR
2464}
2465
2466size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2467{
e95354ec 2468 CreateConvIfNeeded();
dccce9ea 2469
e95354ec
VZ
2470 if (m_convReal)
2471 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2472
f1339c56 2473 // latin-1 (direct)
f8d791e0 2474 const size_t len = wxWcslen(psz);
f1339c56
RR
2475 if (buf)
2476 {
4def3b35 2477 for (size_t c = 0; c <= len; c++)
24642831
VS
2478 {
2479 if (psz[c] > 0xFF)
2480 return (size_t)-1;
907173e5 2481 buf[c] = (char)psz[c];
24642831
VS
2482 }
2483 }
2484 else
2485 {
2486 for (size_t c = 0; c <= len; c++)
2487 {
2488 if (psz[c] > 0xFF)
2489 return (size_t)-1;
2490 }
f1339c56 2491 }
dccce9ea 2492
f1339c56 2493 return len;
6001e347
RR
2494}
2495
bde4baac
VZ
2496// ----------------------------------------------------------------------------
2497// globals
2498// ----------------------------------------------------------------------------
2499
2500#ifdef __WINDOWS__
2501 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2502#elif defined(__WXMAC__) && !defined(__MACH__)
2503 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2504#else
dcc8fac0 2505 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2506#endif
2507
2508static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2509static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2510static wxMBConvUTF7 wxConvUTF7Obj;
2511static wxMBConvUTF8 wxConvUTF8Obj;
2512
2513
2514WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2515WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2516WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2517WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2518WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2519WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2520
2521#else // !wxUSE_WCHAR_T
2522
2523// stand-ins in absence of wchar_t
2524WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2525 wxConvISO8859_1,
2526 wxConvLocal,
2527 wxConvUTF8;
2528
2529#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2530
2531