]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
made Convert() methods const
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
e95354ec
VZ
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
9// (c) 2000-2003 Vadim Zeitlin
55d99c7a 10// Licence: wxWindows licence
6001e347
RR
11/////////////////////////////////////////////////////////////////////////////
12
f6bcfd97
BP
13// ============================================================================
14// declarations
15// ============================================================================
16
17// ----------------------------------------------------------------------------
18// headers
19// ----------------------------------------------------------------------------
20
14f355c2 21#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
22 #pragma implementation "strconv.h"
23#endif
24
25// For compilers that support precompilation, includes "wx.h".
26#include "wx/wxprec.h"
27
28#ifdef __BORLANDC__
29 #pragma hdrstop
30#endif
31
373658eb
VZ
32#ifndef WX_PRECOMP
33 #include "wx/intl.h"
34 #include "wx/log.h"
35#endif // WX_PRECOMP
36
0a1c1e62 37#ifdef __WXMSW__
373658eb 38 #include "wx/msw/private.h"
0a1c1e62
GRG
39#endif
40
1c193821 41#ifndef __WXWINCE__
1cd52418 42#include <errno.h>
1c193821
JS
43#endif
44
6001e347
RR
45#include <ctype.h>
46#include <string.h>
47#include <stdlib.h>
48
65e50848 49#include "wx/module.h"
7af284fd 50#include "wx/strconv.h"
7af284fd 51
e95354ec
VZ
52#if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54#endif // __WIN32__ but !__WXMICROWIN__
55
7af284fd
VS
56// ----------------------------------------------------------------------------
57// globals
58// ----------------------------------------------------------------------------
59
373658eb 60#if wxUSE_WCHAR_T
fd242375
VS
61 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc;
62 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvLocal((const wxChar *)NULL);
63 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvISO8859_1(_T("iso-8859-1"));
373658eb
VZ
64#else
65 // stand-ins in absence of wchar_t
fd242375
VS
66 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
67 wxConvFile,
68 wxConvISO8859_1,
69 wxConvLocal,
70 wxConvUTF8;
373658eb 71#endif // wxUSE_WCHAR_T
7af284fd 72
fd242375 73WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibc;
7af284fd 74
65e50848
JS
75class wxStrConvModule: public wxModule
76{
77public:
78 wxStrConvModule() : wxModule() { }
e95354ec 79 virtual bool OnInit() { return true; }
65e50848
JS
80 virtual void OnExit()
81 {
7f1698c3 82#if wxUSE_WCHAR_T
ca11abde 83 wxConvLocal.Clear();
2b5f62a0 84 wxConvISO8859_1.Clear();
7f1698c3 85#endif
65e50848
JS
86 }
87
88 DECLARE_DYNAMIC_CLASS(wxStrConvModule)
89};
90
91IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule, wxModule)
92
93
373658eb
VZ
94// ----------------------------------------------------------------------------
95// headers
96// ----------------------------------------------------------------------------
7af284fd
VS
97
98#if wxUSE_WCHAR_T
99
6001e347 100#ifdef __SALFORDC__
373658eb 101 #include <clib.h>
6001e347
RR
102#endif
103
b040e242 104#ifdef HAVE_ICONV
373658eb 105 #include <iconv.h>
1cd52418 106#endif
1cd52418 107
373658eb
VZ
108#include "wx/encconv.h"
109#include "wx/fontmap.h"
110
111// ----------------------------------------------------------------------------
112// macros
113// ----------------------------------------------------------------------------
3e61dfb0 114
1cd52418 115#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 116#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418 117
a3f2769e
VZ
118// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
119// it might be not defined - assume the most common value
120#ifndef SIZEOF_WCHAR_T
121 #define SIZEOF_WCHAR_T 2
122#endif // !defined(SIZEOF_WCHAR_T)
123
1cd52418 124#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
125 #define WC_NAME "UCS4"
126 #define WC_BSWAP BSWAP_UCS4
127 #ifdef WORDS_BIGENDIAN
128 #define WC_NAME_BEST "UCS-4BE"
129 #else
130 #define WC_NAME_BEST "UCS-4LE"
131 #endif
1cd52418 132#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
133 #define WC_NAME "UTF16"
134 #define WC_BSWAP BSWAP_UTF16
a3f2769e 135 #define WC_UTF16
3a0d76bc
VS
136 #ifdef WORDS_BIGENDIAN
137 #define WC_NAME_BEST "UTF-16BE"
138 #else
139 #define WC_NAME_BEST "UTF-16LE"
140 #endif
bab1e722 141#else // sizeof(wchar_t) != 2 nor 4
a3f2769e
VZ
142 // I don't know what to do about this
143 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
1cd52418
OK
144#endif
145
373658eb
VZ
146// ============================================================================
147// implementation
148// ============================================================================
149
150// ----------------------------------------------------------------------------
c91830cb 151// UTF-16 en/decoding to/from UCS-4
373658eb 152// ----------------------------------------------------------------------------
6001e347 153
b0a6bb75 154
c91830cb 155static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 156{
dccce9ea 157 if (input<=0xffff)
4def3b35 158 {
c91830cb 159 if (output) *output++ = (wxUint16) input;
4def3b35 160 return 1;
dccce9ea
VZ
161 }
162 else if (input>=0x110000)
4def3b35
VS
163 {
164 return (size_t)-1;
dccce9ea
VZ
165 }
166 else
4def3b35 167 {
dccce9ea 168 if (output)
4def3b35 169 {
c91830cb
VZ
170 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
171 *output++ = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
172 }
173 return 2;
1cd52418 174 }
1cd52418
OK
175}
176
c91830cb 177static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 178{
dccce9ea 179 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
180 {
181 output = *input;
182 return 1;
dccce9ea
VZ
183 }
184 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
185 {
186 output = *input;
187 return (size_t)-1;
dccce9ea
VZ
188 }
189 else
4def3b35
VS
190 {
191 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
192 return 2;
193 }
1cd52418
OK
194}
195
b0a6bb75 196
f6bcfd97 197// ----------------------------------------------------------------------------
6001e347 198// wxMBConv
f6bcfd97 199// ----------------------------------------------------------------------------
6001e347 200
b1ac3b56
RR
201#define IGNORE_LIBC 0
202
2b5f62a0
VZ
203wxMBConv::~wxMBConv()
204{
205 // nothing to do here
206}
207
6001e347
RR
208size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
209{
b1ac3b56
RR
210#if IGNORE_LIBC
211 if (buf)
212 {
213 for (size_t i = 0; i < strlen( psz )+1; i++)
214 buf[i] = (wchar_t) psz[i];
b1ac3b56
RR
215 return strlen( psz );
216 }
217 else
218 {
219 return strlen( psz );
220 }
221#else
24f588af 222 return wxMB2WC(buf, psz, n);
b1ac3b56 223#endif
6001e347
RR
224}
225
226size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
227{
b1ac3b56
RR
228#if IGNORE_LIBC
229 if (buf)
230 {
231 for (size_t i = 0; i < wxStrlen( psz )+1; i++)
232 buf[i] = (char) psz[i];
b1ac3b56
RR
233 return wxStrlen( psz );
234 }
235 else
236 {
237 return wxStrlen( psz );
238 }
239#else
24f588af 240 return wxWC2MB(buf, psz, n);
b1ac3b56 241#endif
6001e347
RR
242}
243
244const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
245{
2b5f62a0 246 if ( psz )
6001e347 247 {
2b5f62a0
VZ
248 // calculate the length of the buffer needed first
249 size_t nLen = MB2WC(NULL, psz, 0);
250 if ( nLen != (size_t)-1 )
251 {
252 // now do the actual conversion
253 wxWCharBuffer buf(nLen);
254 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
255
256 return buf;
257 }
f6bcfd97 258 }
2b5f62a0
VZ
259
260 wxWCharBuffer buf((wchar_t *)NULL);
261
262 return buf;
6001e347
RR
263}
264
e5cceba0 265const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 266{
2b5f62a0
VZ
267 if ( pwz )
268 {
269 size_t nLen = WC2MB(NULL, pwz, 0);
270 if ( nLen != (size_t)-1 )
271 {
c91830cb
VZ
272 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
273 WC2MB(buf.data(), pwz, nLen + 4);
2b5f62a0
VZ
274
275 return buf;
276 }
277 }
278
279 wxCharBuffer buf((char *)NULL);
e5cceba0 280
e5cceba0 281 return buf;
6001e347
RR
282}
283
6001e347
RR
284// ----------------------------------------------------------------------------
285// UTF-7
286// ----------------------------------------------------------------------------
287
fd242375 288WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7) wxConvUTF7;
6001e347
RR
289
290#if 0
291static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
292 "abcdefghijklmnopqrstuvwxyz"
293 "0123456789'(),-./:?";
294static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
295static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
296 "abcdefghijklmnopqrstuvwxyz"
297 "0123456789+/";
298#endif
299
300// TODO: write actual implementations of UTF-7 here
301size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
302 const char * WXUNUSED(psz),
303 size_t WXUNUSED(n)) const
304{
305 return 0;
306}
307
308size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
309 const wchar_t * WXUNUSED(psz),
310 size_t WXUNUSED(n)) const
311{
312 return 0;
313}
314
f6bcfd97 315// ----------------------------------------------------------------------------
6001e347 316// UTF-8
f6bcfd97 317// ----------------------------------------------------------------------------
6001e347 318
fd242375 319WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8) wxConvUTF8;
6001e347 320
dccce9ea 321static wxUint32 utf8_max[]=
4def3b35 322 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
323
324size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
325{
4def3b35
VS
326 size_t len = 0;
327
dccce9ea 328 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
329 {
330 unsigned char cc = *psz++, fc = cc;
331 unsigned cnt;
dccce9ea 332 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 333 fc <<= 1;
dccce9ea 334 if (!cnt)
4def3b35
VS
335 {
336 // plain ASCII char
dccce9ea 337 if (buf)
4def3b35
VS
338 *buf++ = cc;
339 len++;
dccce9ea
VZ
340 }
341 else
4def3b35
VS
342 {
343 cnt--;
dccce9ea 344 if (!cnt)
4def3b35
VS
345 {
346 // invalid UTF-8 sequence
347 return (size_t)-1;
dccce9ea
VZ
348 }
349 else
4def3b35
VS
350 {
351 unsigned ocnt = cnt - 1;
352 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 353 while (cnt--)
4def3b35
VS
354 {
355 cc = *psz++;
dccce9ea 356 if ((cc & 0xC0) != 0x80)
4def3b35
VS
357 {
358 // invalid UTF-8 sequence
359 return (size_t)-1;
360 }
361 res = (res << 6) | (cc & 0x3f);
362 }
dccce9ea 363 if (res <= utf8_max[ocnt])
4def3b35
VS
364 {
365 // illegal UTF-8 encoding
366 return (size_t)-1;
367 }
1cd52418 368#ifdef WC_UTF16
4def3b35
VS
369 size_t pa = encode_utf16(res, buf);
370 if (pa == (size_t)-1)
371 return (size_t)-1;
dccce9ea 372 if (buf)
4def3b35
VS
373 buf += pa;
374 len += pa;
373658eb 375#else // !WC_UTF16
dccce9ea 376 if (buf)
4def3b35
VS
377 *buf++ = res;
378 len++;
373658eb 379#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
380 }
381 }
6001e347 382 }
dccce9ea 383 if (buf && (len < n))
4def3b35
VS
384 *buf = 0;
385 return len;
6001e347
RR
386}
387
388size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
389{
4def3b35 390 size_t len = 0;
6001e347 391
dccce9ea 392 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
393 {
394 wxUint32 cc;
1cd52418 395#ifdef WC_UTF16
eccf1b2c 396 size_t pa = decode_utf16(psz, cc);
4def3b35 397 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 398#else
4def3b35
VS
399 cc=(*psz++) & 0x7fffffff;
400#endif
401 unsigned cnt;
402 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 403 if (!cnt)
4def3b35
VS
404 {
405 // plain ASCII char
dccce9ea 406 if (buf)
574c939e 407 *buf++ = (char) cc;
4def3b35 408 len++;
dccce9ea
VZ
409 }
410
411 else
4def3b35
VS
412 {
413 len += cnt + 1;
dccce9ea 414 if (buf)
4def3b35 415 {
574c939e 416 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 417 while (cnt--)
574c939e 418 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
419 }
420 }
6001e347 421 }
4def3b35
VS
422
423 if (buf && (len<n)) *buf = 0;
adb45366 424
4def3b35 425 return len;
6001e347
RR
426}
427
c91830cb
VZ
428
429
430
431// ----------------------------------------------------------------------------
432// UTF-16
433// ----------------------------------------------------------------------------
434
435#ifdef WORDS_BIGENDIAN
436#define wxMBConvUTF16straight wxMBConvUTF16BE
437#define wxMBConvUTF16swap wxMBConvUTF16LE
438#else
439#define wxMBConvUTF16swap wxMBConvUTF16BE
440#define wxMBConvUTF16straight wxMBConvUTF16LE
441#endif
442
443
444WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16LE) wxConvUTF16LE;
445WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16BE) wxConvUTF16BE;
446
447
448
449
450
451#ifdef WC_UTF16
452
453
454// copy 16bit MB to 16bit String
455size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
456{
457 size_t len=0;
458
459 while (*(wxUint16*)psz && (!buf || len < n))
460 {
461 if (buf)
462 *buf++ = *(wxUint16*)psz;
463 len++;
464
465 psz += sizeof(wxUint16);
466 }
467 if (buf && len<n) *buf=0;
468
469 return len;
470}
471
472
473// copy 16bit String to 16bit MB
474size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
475{
476 size_t len=0;
477
478 while (*psz && (!buf || len < n))
479 {
480 if (buf)
481 {
482 *(wxUint16*)buf = *psz;
483 buf += sizeof(wxUint16);
484 }
485 len += sizeof(wxUint16);
486 psz++;
487 }
488 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
489
490 return len;
491}
492
493
494// swap 16bit MB to 16bit String
495size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
496{
497 size_t len=0;
498
499 while (*(wxUint16*)psz && (!buf || len < n))
500 {
501 if (buf)
502 {
503 ((char *)buf)[0] = psz[1];
504 ((char *)buf)[1] = psz[0];
505 buf++;
506 }
507 len++;
508 psz += sizeof(wxUint16);
509 }
510 if (buf && len<n) *buf=0;
511
512 return len;
513}
514
515
516// swap 16bit MB to 16bit String
517size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
518{
519 size_t len=0;
520
521 while (*psz && (!buf || len < n))
522 {
523 if (buf)
524 {
525 *buf++ = ((char*)psz)[1];
526 *buf++ = ((char*)psz)[0];
527 }
528 len += sizeof(wxUint16);
529 psz++;
530 }
531 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
532
533 return len;
534}
535
536
537#else // WC_UTF16
538
539
540// copy 16bit MB to 32bit String
541size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
542{
543 size_t len=0;
544
545 while (*(wxUint16*)psz && (!buf || len < n))
546 {
547 wxUint32 cc;
548 size_t pa=decode_utf16((wxUint16*)psz, cc);
549 if (pa == (size_t)-1)
550 return pa;
551
552 if (buf)
553 *buf++ = cc;
554 len++;
555 psz += pa * sizeof(wxUint16);
556 }
557 if (buf && len<n) *buf=0;
558
559 return len;
560}
561
562
563// copy 32bit String to 16bit MB
564size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
565{
566 size_t len=0;
567
568 while (*psz && (!buf || len < n))
569 {
570 wxUint16 cc[2];
571 size_t pa=encode_utf16(*psz, cc);
572
573 if (pa == (size_t)-1)
574 return pa;
575
576 if (buf)
577 {
69b80d28
VZ
578 *(wxUint16*)buf = cc[0];
579 buf += sizeof(wxUint16);
c91830cb 580 if (pa > 1)
69b80d28
VZ
581 {
582 *(wxUint16*)buf = cc[1];
583 buf += sizeof(wxUint16);
584 }
c91830cb
VZ
585 }
586
587 len += pa*sizeof(wxUint16);
588 psz++;
589 }
590 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
591
592 return len;
593}
594
595
596// swap 16bit MB to 32bit String
597size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
598{
599 size_t len=0;
600
601 while (*(wxUint16*)psz && (!buf || len < n))
602 {
603 wxUint32 cc;
604 char tmp[4];
605 tmp[0]=psz[1]; tmp[1]=psz[0];
606 tmp[2]=psz[3]; tmp[3]=psz[2];
607
608 size_t pa=decode_utf16((wxUint16*)tmp, cc);
609 if (pa == (size_t)-1)
610 return pa;
611
612 if (buf)
613 *buf++ = cc;
614
615 len++;
616 psz += pa * sizeof(wxUint16);
617 }
618 if (buf && len<n) *buf=0;
619
620 return len;
621}
622
623
624// swap 32bit String to 16bit MB
625size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
626{
627 size_t len=0;
628
629 while (*psz && (!buf || len < n))
630 {
631 wxUint16 cc[2];
632 size_t pa=encode_utf16(*psz, cc);
633
634 if (pa == (size_t)-1)
635 return pa;
636
637 if (buf)
638 {
639 *buf++ = ((char*)cc)[1];
640 *buf++ = ((char*)cc)[0];
641 if (pa > 1)
642 {
643 *buf++ = ((char*)cc)[3];
644 *buf++ = ((char*)cc)[2];
645 }
646 }
647
648 len += pa*sizeof(wxUint16);
649 psz++;
650 }
651 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
652
653 return len;
654}
655
656#endif // WC_UTF16
657
658
659// ----------------------------------------------------------------------------
660// UTF-32
661// ----------------------------------------------------------------------------
662
663#ifdef WORDS_BIGENDIAN
664#define wxMBConvUTF32straight wxMBConvUTF32BE
665#define wxMBConvUTF32swap wxMBConvUTF32LE
666#else
667#define wxMBConvUTF32swap wxMBConvUTF32BE
668#define wxMBConvUTF32straight wxMBConvUTF32LE
669#endif
670
671
672WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
673WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
674
675
676#ifdef WC_UTF16
677
678// copy 32bit MB to 16bit String
679size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
680{
681 size_t len=0;
682
683 while (*(wxUint32*)psz && (!buf || len < n))
684 {
685 wxUint16 cc[2];
686
687 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
688 if (pa == (size_t)-1)
689 return pa;
690
691 if (buf)
692 {
693 *buf++ = cc[0];
694 if (pa > 1)
695 *buf++ = cc[1];
696 }
697 len += pa;
698 psz += sizeof(wxUint32);
699 }
700 if (buf && len<n) *buf=0;
701
702 return len;
703}
704
705
706// copy 16bit String to 32bit MB
707size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
708{
709 size_t len=0;
710
711 while (*psz && (!buf || len < n))
712 {
713 wxUint32 cc;
714
715 size_t pa=decode_utf16(psz, cc);
716 if (pa == (size_t)-1)
717 return pa;
718
719 if (buf)
720 {
721 *(wxUint32*)buf = cc;
722 buf += sizeof(wxUint32);
723 }
724 len += sizeof(wxUint32);
725 psz += pa;
726 }
727 if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
728
729 return len;
730}
731
732
733
734// swap 32bit MB to 16bit String
735size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
736{
737 size_t len=0;
738
739 while (*(wxUint32*)psz && (!buf || len < n))
740 {
741 char tmp[4];
742 tmp[0] = psz[3]; tmp[1] = psz[2];
743 tmp[2] = psz[1]; tmp[3] = psz[0];
744
745
746 wxUint16 cc[2];
747
748 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
749 if (pa == (size_t)-1)
750 return pa;
751
752 if (buf)
753 {
754 *buf++ = cc[0];
755 if (pa > 1)
756 *buf++ = cc[1];
757 }
758 len += pa;
759 psz += sizeof(wxUint32);
760 }
761 if (buf && len<n) *buf=0;
762
763 return len;
764}
765
766
767// swap 16bit String to 32bit MB
768size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
769{
770 size_t len=0;
771
772 while (*psz && (!buf || len < n))
773 {
774 char cc[4];
775
776 size_t pa=decode_utf16(psz, *(wxUint32*)cc);
777 if (pa == (size_t)-1)
778 return pa;
779
780 if (buf)
781 {
782 *buf++ = cc[3];
783 *buf++ = cc[2];
784 *buf++ = cc[1];
785 *buf++ = cc[0];
786 }
787 len += sizeof(wxUint32);
788 psz += pa;
789 }
790 if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
791
792 return len;
793}
794
795#else // WC_UTF16
796
797
798// copy 32bit MB to 32bit String
799size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
800{
801 size_t len=0;
802
803 while (*(wxUint32*)psz && (!buf || len < n))
804 {
805 if (buf)
806 *buf++ = *(wxUint32*)psz;
807 len++;
808 psz += sizeof(wxUint32);
809 }
810 if (buf && len<n) *buf=0;
811
812 return len;
813}
814
815
816// copy 32bit String to 32bit MB
817size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
818{
819 size_t len=0;
820
821 while (*psz && (!buf || len < n))
822 {
823 if (buf)
824 {
825 *(wxUint32*)buf = *psz;
826 buf += sizeof(wxUint32);
827 }
828
829 len += sizeof(wxUint32);
830 psz++;
831 }
832
833 if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
834
835 return len;
836}
837
838
839// swap 32bit MB to 32bit String
840size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
841{
842 size_t len=0;
843
844 while (*(wxUint32*)psz && (!buf || len < n))
845 {
846 if (buf)
847 {
848 ((char *)buf)[0] = psz[3];
849 ((char *)buf)[1] = psz[2];
850 ((char *)buf)[2] = psz[1];
851 ((char *)buf)[3] = psz[0];
852 buf++;
853 }
854 len++;
855 psz += sizeof(wxUint32);
856 }
857 if (buf && len<n) *buf=0;
858
859 return len;
860}
861
862
863// swap 32bit String to 32bit MB
864size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
865{
866 size_t len=0;
867
868 while (*psz && (!buf || len < n))
869 {
870 if (buf)
871 {
872 *buf++ = ((char *)psz)[3];
873 *buf++ = ((char *)psz)[2];
874 *buf++ = ((char *)psz)[1];
875 *buf++ = ((char *)psz)[0];
876 }
877 len += sizeof(wxUint32);
878 psz++;
879 }
880 if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
881
882 return len;
883}
884
885
886#endif // WC_UTF16
887
888
36acb880
VZ
889// ============================================================================
890// The classes doing conversion using the iconv_xxx() functions
891// ============================================================================
3caec1bb 892
b040e242 893#ifdef HAVE_ICONV
3a0d76bc 894
3caec1bb
VS
895// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
896// if output buffer is _exactly_ as big as needed. Such case is (unless there's
897// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
898// (which means error) and says there are 0 bytes left in the input buffer --
899// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
900// this alternative test for iconv() failure.
901// [This bug does not appear in glibc 2.2.]
902#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
903#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
904 (errno != E2BIG || bufLeft != 0))
905#else
906#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
907#endif
908
ab217dba 909#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
910
911// ----------------------------------------------------------------------------
e95354ec 912// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
913// ----------------------------------------------------------------------------
914
e95354ec 915class wxMBConv_iconv : public wxMBConv
1cd52418
OK
916{
917public:
e95354ec
VZ
918 wxMBConv_iconv(const wxChar *name);
919 virtual ~wxMBConv_iconv();
36acb880
VZ
920
921 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
922 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
923
e95354ec 924 bool IsOk() const
36acb880
VZ
925 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
926
927protected:
928 // the iconv handlers used to translate from multibyte to wide char and in
929 // the other direction
930 iconv_t m2w,
931 w2m;
932
933private:
e95354ec 934 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
935 // available on this machine, it will remain NULL
936 static const char *ms_wcCharsetName;
937
938 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
939 // different endian-ness than the native one
405d8f46 940 static bool ms_wcNeedsSwap;
36acb880
VZ
941};
942
e95354ec
VZ
943const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
944bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 945
e95354ec 946wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 947{
04c79127
RR
948 // Do it the hard way
949 char cname[100];
950 for (size_t i = 0; i < wxStrlen(name)+1; i++)
951 cname[i] = (char) name[i];
952
36acb880
VZ
953 // check for charset that represents wchar_t:
954 if (ms_wcCharsetName == NULL)
f1339c56 955 {
e95354ec 956 ms_wcNeedsSwap = false;
dccce9ea 957
36acb880
VZ
958 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
959 ms_wcCharsetName = WC_NAME_BEST;
04c79127 960 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 961
36acb880
VZ
962 if (m2w == (iconv_t)-1)
963 {
964 // try charset w/o bytesex info (e.g. "UCS4")
965 // and check for bytesex ourselves:
966 ms_wcCharsetName = WC_NAME;
04c79127 967 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
968
969 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
970 if (m2w == (iconv_t)-1)
971 {
36acb880 972 ms_wcCharsetName = "WCHAR_T";
04c79127 973 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 974 }
3a0d76bc 975
36acb880
VZ
976 if (m2w != (iconv_t)-1)
977 {
978 char buf[2], *bufPtr;
979 wchar_t wbuf[2], *wbufPtr;
980 size_t insz, outsz;
981 size_t res;
982
983 buf[0] = 'A';
984 buf[1] = 0;
985 wbuf[0] = 0;
986 insz = 2;
987 outsz = SIZEOF_WCHAR_T * 2;
988 wbufPtr = wbuf;
989 bufPtr = buf;
990
991 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
992 (char**)&wbufPtr, &outsz);
993
994 if (ICONV_FAILED(res, insz))
3a0d76bc 995 {
36acb880
VZ
996 ms_wcCharsetName = NULL;
997 wxLogLastError(wxT("iconv"));
2b5f62a0 998 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
999 }
1000 else
1001 {
36acb880 1002 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1003 }
1004 }
36acb880
VZ
1005 else
1006 {
1007 ms_wcCharsetName = NULL;
373658eb 1008
957686c8
VS
1009 // VS: we must not output an error here, since wxWindows will safely
1010 // fall back to using wxEncodingConverter.
1011 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1012 //wxLogError(
36acb880 1013 }
3a0d76bc 1014 }
36acb880 1015 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1016 }
36acb880 1017 else // we already have ms_wcCharsetName
3caec1bb 1018 {
04c79127 1019 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1020 }
dccce9ea 1021
36acb880
VZ
1022 // NB: don't ever pass NULL to iconv_open(), it may crash!
1023 if ( ms_wcCharsetName )
f1339c56 1024 {
04c79127 1025 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1026 }
405d8f46
VZ
1027 else
1028 {
1029 w2m = (iconv_t)-1;
1030 }
36acb880 1031}
3caec1bb 1032
e95354ec 1033wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1034{
1035 if ( m2w != (iconv_t)-1 )
1036 iconv_close(m2w);
1037 if ( w2m != (iconv_t)-1 )
1038 iconv_close(w2m);
1039}
3a0d76bc 1040
e95354ec 1041size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n)
36acb880
VZ
1042{
1043 size_t inbuf = strlen(psz);
1044 size_t outbuf = n * SIZEOF_WCHAR_T;
1045 size_t res, cres;
1046 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1047 wchar_t *bufPtr = buf;
1048 const char *pszPtr = psz;
1049
1050 if (buf)
1051 {
1052 // have destination buffer, convert there
1053 cres = iconv(m2w,
1054 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1055 (char**)&bufPtr, &outbuf);
1056 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1057
36acb880 1058 if (ms_wcNeedsSwap)
3a0d76bc 1059 {
36acb880
VZ
1060 // convert to native endianness
1061 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1062 }
adb45366 1063
49dd9820
VS
1064 // NB: iconv was given only strlen(psz) characters on input, and so
1065 // it couldn't convert the trailing zero. Let's do it ourselves
1066 // if there's some room left for it in the output buffer.
1067 if (res < n)
1068 buf[res] = 0;
36acb880
VZ
1069 }
1070 else
1071 {
1072 // no destination buffer... convert using temp buffer
1073 // to calculate destination buffer requirement
1074 wchar_t tbuf[8];
1075 res = 0;
1076 do {
1077 bufPtr = tbuf;
1078 outbuf = 8*SIZEOF_WCHAR_T;
1079
1080 cres = iconv(m2w,
1081 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1082 (char**)&bufPtr, &outbuf );
1083
1084 res += 8-(outbuf/SIZEOF_WCHAR_T);
1085 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1086 }
dccce9ea 1087
36acb880 1088 if (ICONV_FAILED(cres, inbuf))
f1339c56 1089 {
36acb880
VZ
1090 //VS: it is ok if iconv fails, hence trace only
1091 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1092 return (size_t)-1;
1093 }
1094
1095 return res;
1096}
1097
e95354ec 1098size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n)
36acb880 1099{
f8d791e0 1100 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1101 size_t outbuf = n;
1102 size_t res, cres;
3a0d76bc 1103
36acb880 1104 wchar_t *tmpbuf = 0;
3caec1bb 1105
36acb880
VZ
1106 if (ms_wcNeedsSwap)
1107 {
1108 // need to copy to temp buffer to switch endianness
1109 // this absolutely doesn't rock!
1110 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1111 // could be in read-only memory, or be accessed in some other thread)
1112 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1113 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1114 WC_BSWAP(tmpbuf, inbuf)
1115 psz=tmpbuf;
1116 }
3a0d76bc 1117
36acb880
VZ
1118 if (buf)
1119 {
1120 // have destination buffer, convert there
1121 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1122
36acb880 1123 res = n-outbuf;
adb45366 1124
49dd9820
VS
1125 // NB: iconv was given only wcslen(psz) characters on input, and so
1126 // it couldn't convert the trailing zero. Let's do it ourselves
1127 // if there's some room left for it in the output buffer.
1128 if (res < n)
1129 buf[0] = 0;
36acb880
VZ
1130 }
1131 else
1132 {
1133 // no destination buffer... convert using temp buffer
1134 // to calculate destination buffer requirement
1135 char tbuf[16];
1136 res = 0;
1137 do {
1138 buf = tbuf; outbuf = 16;
1139
1140 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1141
36acb880
VZ
1142 res += 16 - outbuf;
1143 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1144 }
dccce9ea 1145
36acb880
VZ
1146 if (ms_wcNeedsSwap)
1147 {
1148 free(tmpbuf);
1149 }
dccce9ea 1150
36acb880
VZ
1151 if (ICONV_FAILED(cres, inbuf))
1152 {
1153 //VS: it is ok if iconv fails, hence trace only
1154 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1155 return (size_t)-1;
1156 }
1157
1158 return res;
1159}
1160
b040e242 1161#endif // HAVE_ICONV
36acb880 1162
e95354ec 1163
36acb880
VZ
1164// ============================================================================
1165// Win32 conversion classes
1166// ============================================================================
1cd52418 1167
e95354ec 1168#ifdef wxHAVE_WIN32_MB2WC
373658eb 1169
8b04d4c4
VZ
1170// from utils.cpp
1171extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1172extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
373658eb 1173
e95354ec 1174class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1175{
1176public:
e95354ec 1177 wxMBConv_win32(const wxChar* name)
b1d66b54
VZ
1178 {
1179 m_CodePage = wxCharsetToCodepage(name);
1180 }
dccce9ea 1181
e95354ec 1182 wxMBConv_win32(wxFontEncoding encoding)
8b04d4c4
VZ
1183 {
1184 m_CodePage = wxEncodingToCodepage(encoding);
1185 }
1186
4def3b35 1187 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 1188 {
2b5f62a0
VZ
1189 const size_t len = ::MultiByteToWideChar
1190 (
1191 m_CodePage, // code page
1192 0, // flags (none)
1193 psz, // input string
1194 -1, // its length (NUL-terminated)
b4da152e 1195 buf, // output string
2b5f62a0
VZ
1196 buf ? n : 0 // size of output buffer
1197 );
1198
1199 // note that it returns # of written chars for buf != NULL and *size*
1200 // of the needed buffer for buf == NULL
1201 return len ? (buf ? len : len - 1) : (size_t)-1;
f1339c56 1202 }
dccce9ea 1203
4def3b35 1204 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 1205 {
2b5f62a0
VZ
1206 const size_t len = ::WideCharToMultiByte
1207 (
1208 m_CodePage, // code page
1209 0, // flags (none)
b4da152e 1210 psz, // input string
2b5f62a0
VZ
1211 -1, // it is (wide) NUL-terminated
1212 buf, // output buffer
1213 buf ? n : 0, // and its size
1214 NULL, // default "replacement" char
1215 NULL // [out] was it used?
1216 );
1217
1218 // see the comment above!
1219 return len ? (buf ? len : len - 1) : (size_t)-1;
f1339c56 1220 }
dccce9ea 1221
e95354ec 1222 bool IsOk() const
b1d66b54 1223 { return m_CodePage != -1; }
f1339c56
RR
1224
1225public:
b1d66b54 1226 long m_CodePage;
1cd52418 1227};
e95354ec
VZ
1228
1229#endif // wxHAVE_WIN32_MB2WC
1230
1e6feb95 1231
36acb880
VZ
1232// ============================================================================
1233// wxEncodingConverter based conversion classes
1234// ============================================================================
1235
1e6feb95 1236#if wxUSE_FONTMAP
1cd52418 1237
e95354ec 1238class wxMBConv_wxwin : public wxMBConv
1cd52418 1239{
8b04d4c4
VZ
1240private:
1241 void Init()
1242 {
1243 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1244 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1245 }
1246
6001e347 1247public:
f1339c56
RR
1248 // temporarily just use wxEncodingConverter stuff,
1249 // so that it works while a better implementation is built
e95354ec 1250 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
1251 {
1252 if (name)
e95354ec 1253 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
1254 else
1255 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 1256
8b04d4c4
VZ
1257 Init();
1258 }
1259
e95354ec 1260 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
1261 {
1262 m_enc = enc;
1263
1264 Init();
f1339c56 1265 }
dccce9ea 1266
574c939e 1267 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
f1339c56
RR
1268 {
1269 size_t inbuf = strlen(psz);
dccce9ea 1270 if (buf)
4def3b35 1271 m2w.Convert(psz,buf);
f1339c56
RR
1272 return inbuf;
1273 }
dccce9ea 1274
574c939e 1275 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
f1339c56 1276 {
f8d791e0 1277 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
1278 if (buf)
1279 w2m.Convert(psz,buf);
dccce9ea 1280
f1339c56
RR
1281 return inbuf;
1282 }
dccce9ea 1283
e95354ec 1284 bool IsOk() const { return m_ok; }
f1339c56
RR
1285
1286public:
8b04d4c4 1287 wxFontEncoding m_enc;
f1339c56 1288 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
1289
1290 // were we initialized successfully?
1291 bool m_ok;
fc7a2a60 1292
e95354ec 1293 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 1294};
6001e347 1295
1e6feb95
VZ
1296#endif // wxUSE_FONTMAP
1297
36acb880
VZ
1298// ============================================================================
1299// wxCSConv implementation
1300// ============================================================================
1301
8b04d4c4 1302void wxCSConv::Init()
6001e347 1303{
e95354ec
VZ
1304 m_name = NULL;
1305 m_convReal = NULL;
1306 m_deferred = true;
1307}
1308
1309// find a valid value for the encoding
1310void wxCSConv::SetEncoding()
1311{
1312#if wxUSE_INTL
1313 m_encoding = wxLocale::GetSystemEncoding();
1314#else
1315 m_encoding = wxFONTENCODING_SYSTEM;
1316#endif
8b04d4c4
VZ
1317}
1318
1319wxCSConv::wxCSConv(const wxChar *charset)
1320{
1321 Init();
82713003 1322
e95354ec
VZ
1323 if ( charset )
1324 {
1325 // not used
1326 m_encoding = wxFONTENCODING_SYSTEM;
1327
1328 SetName(charset);
1329 }
1330 else // no charset specified
1331 {
1332 SetEncoding();
1333 }
6001e347
RR
1334}
1335
8b04d4c4
VZ
1336wxCSConv::wxCSConv(wxFontEncoding encoding)
1337{
e95354ec
VZ
1338 if ( encoding == wxFONTENCODING_MAX ||
1339 encoding == wxFONTENCODING_DEFAULT )
1340 {
1341 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1342
1343 encoding = wxFONTENCODING_SYSTEM;
1344 }
1345
8b04d4c4
VZ
1346 Init();
1347
e95354ec
VZ
1348 if ( encoding == wxFONTENCODING_SYSTEM )
1349 {
1350 SetEncoding();
1351 }
1352 else // have valid encoding, use it
1353 {
1354 m_encoding = encoding;
1355 }
8b04d4c4
VZ
1356}
1357
6001e347
RR
1358wxCSConv::~wxCSConv()
1359{
65e50848
JS
1360 Clear();
1361}
1362
54380f29 1363wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 1364 : wxMBConv()
54380f29 1365{
8b04d4c4
VZ
1366 Init();
1367
54380f29 1368 SetName(conv.m_name);
8b04d4c4 1369 m_encoding = conv.m_encoding;
54380f29
GD
1370}
1371
1372wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1373{
1374 Clear();
8b04d4c4 1375
54380f29 1376 SetName(conv.m_name);
8b04d4c4
VZ
1377 m_encoding = conv.m_encoding;
1378
54380f29
GD
1379 return *this;
1380}
1381
65e50848
JS
1382void wxCSConv::Clear()
1383{
8b04d4c4 1384 free(m_name);
e95354ec 1385 delete m_convReal;
8b04d4c4 1386
65e50848 1387 m_name = NULL;
e95354ec 1388 m_convReal = NULL;
6001e347
RR
1389}
1390
1391void wxCSConv::SetName(const wxChar *charset)
1392{
f1339c56
RR
1393 if (charset)
1394 {
1395 m_name = wxStrdup(charset);
e95354ec 1396 m_deferred = true;
f1339c56 1397 }
6001e347
RR
1398}
1399
e95354ec 1400static inline bool DoesntNeedConv(wxFontEncoding enc)
6001e347 1401{
e95354ec
VZ
1402 return enc == wxFONTENCODING_DEFAULT ||
1403 enc == wxFONTENCODING_SYSTEM ||
1404 enc == wxFONTENCODING_ISO8859_1;
1405}
1406
1407wxMBConv *wxCSConv::DoCreate() const
1408{
1409#if wxUSE_FONTMAP
1410 wxFontMapper * const fontMapper = wxFontMapper::Get();
1411
1412 wxFontEncoding encFromName = m_name ? fontMapper->CharsetToEncoding(m_name)
1413 : wxFONTENCODING_SYSTEM;
1414#endif // wxUSE_FONTMAP
1415
1416 // check for the special case of ASCII charset
1417 if ( (!m_name && DoesntNeedConv(m_encoding))
1418#if wxUSE_FONTMAP
1419 || (m_name && DoesntNeedConv(encFromName))
1420#endif // wxUSE_FONTMAP
1421 )
f1339c56 1422 {
e95354ec
VZ
1423 // don't convert at all
1424 return NULL;
1425 }
dccce9ea 1426
e95354ec
VZ
1427 // we trust OS to do conversion better than we can so try external
1428 // conversion methods first
1429 //
1430 // the full order is:
1431 // 1. OS conversion (iconv() under Unix or Win32 API)
1432 // 2. hard coded conversions for UTF
1433 // 3. wxEncodingConverter as fall back
1434
1435 // step (1)
1436#ifdef HAVE_ICONV
1437 if ( m_name )
1438 {
1439 wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
1440 if ( conv->IsOk() )
1441 return conv;
1442
1443 delete conv;
1444 }
1445#endif // HAVE_ICONV
1446
1447#ifdef wxHAVE_WIN32_MB2WC
1448 {
1449 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1450 : new wxMBConv_win32(m_encoding);
1451 if ( conv->IsOk() )
1452 return conv;
1453
1454 delete conv;
1455 }
1456#endif // wxHAVE_WIN32_MB2WC
1457
1458 // step (2)
1459 wxFontEncoding enc = m_encoding;
1460#if wxUSE_FONTMAP
1461 if ( enc == wxFONTENCODING_SYSTEM )
1462 enc = encFromName;
1463#endif // wxUSE_FONTMAP
1464
1465 switch ( enc )
1466 {
1467 case wxFONTENCODING_UTF7:
1468 return new wxMBConvUTF7;
1469
1470 case wxFONTENCODING_UTF8:
1471 return new wxMBConvUTF8;
1472
1473 case wxFONTENCODING_UTF16:
1474 return new wxMBConvUTF16;
1475
1476 case wxFONTENCODING_UTF16BE:
1477 return new wxMBConvUTF16BE;
1478
1479 case wxFONTENCODING_UTF16LE:
1480 return new wxMBConvUTF16LE;
1481
1482 case wxFONTENCODING_UTF32:
1483 return new wxMBConvUTF32;
1484
1485 case wxFONTENCODING_UTF32BE:
1486 return new wxMBConvUTF32BE;
1487
1488 case wxFONTENCODING_UTF32LE:
1489 return new wxMBConvUTF32LE;
1490
1491 default:
1492 // nothing to do but put here to suppress gcc warnings
1493 ;
1494 }
1495
1496 // step (3)
1497#if wxUSE_FONTMAP
1498 {
1499 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1500 : new wxMBConv_wxwin(m_encoding);
1501 if ( conv->IsOk() )
1502 return conv;
1503
1504 delete conv;
1505 }
1506#endif // wxUSE_FONTMAP
1507
1508 wxLogError(_("Cannot convert from the charset '%s'!"),
1509 m_name ? m_name
1510 :
1511#if wxUSE_FONTMAP
1512 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1513#else // !wxUSE_FONTMAP
1514 wxString::Format(_("encoding %s"), m_encoding).c_str()
1515#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1516 );
1517
1518 return NULL;
1519}
1520
1521void wxCSConv::CreateConvIfNeeded() const
1522{
1523 if ( m_deferred )
1524 {
1525 wxCSConv *self = (wxCSConv *)this; // const_cast
1526 self->m_convReal = DoCreate();
1527 self->m_deferred = false;
6001e347 1528 }
6001e347
RR
1529}
1530
1531size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1532{
e95354ec 1533 CreateConvIfNeeded();
dccce9ea 1534
e95354ec
VZ
1535 if (m_convReal)
1536 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
1537
1538 // latin-1 (direct)
4def3b35 1539 size_t len = strlen(psz);
dccce9ea 1540
f1339c56
RR
1541 if (buf)
1542 {
4def3b35 1543 for (size_t c = 0; c <= len; c++)
f1339c56
RR
1544 buf[c] = (unsigned char)(psz[c]);
1545 }
dccce9ea 1546
f1339c56 1547 return len;
6001e347
RR
1548}
1549
1550size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1551{
e95354ec 1552 CreateConvIfNeeded();
dccce9ea 1553
e95354ec
VZ
1554 if (m_convReal)
1555 return m_convReal->WC2MB(buf, psz, n);
1cd52418 1556
f1339c56 1557 // latin-1 (direct)
f8d791e0 1558 const size_t len = wxWcslen(psz);
f1339c56
RR
1559 if (buf)
1560 {
4def3b35
VS
1561 for (size_t c = 0; c <= len; c++)
1562 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
f1339c56 1563 }
dccce9ea 1564
f1339c56 1565 return len;
6001e347
RR
1566}
1567
f6bcfd97 1568#endif // wxUSE_WCHAR_T
6001e347
RR
1569
1570