]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
wxMac uses wxStandardPathsCF to mean wxStandardPaths in its wxBase
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
1cd52418 73#endif
1cd52418 74
373658eb
VZ
75#include "wx/encconv.h"
76#include "wx/fontmap.h"
7608a683 77#include "wx/utils.h"
373658eb 78
335d31e0 79#ifdef __WXMAC__
4227afa4
SC
80#include <ATSUnicode.h>
81#include <TextCommon.h>
82#include <TextEncodingConverter.h>
335d31e0
SC
83
84#include "wx/mac/private.h" // includes mac headers
85#endif
373658eb
VZ
86// ----------------------------------------------------------------------------
87// macros
88// ----------------------------------------------------------------------------
3e61dfb0 89
1cd52418 90#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 91#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
92
93#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
1cd52418 101#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
a3f2769e 104 #define WC_UTF16
3a0d76bc
VS
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
bab1e722 110#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
113#endif
114
373658eb
VZ
115// ============================================================================
116// implementation
117// ============================================================================
118
119// ----------------------------------------------------------------------------
c91830cb 120// UTF-16 en/decoding to/from UCS-4
373658eb 121// ----------------------------------------------------------------------------
6001e347 122
b0a6bb75 123
c91830cb 124static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 125{
dccce9ea 126 if (input<=0xffff)
4def3b35 127 {
999836aa
VZ
128 if (output)
129 *output = (wxUint16) input;
4def3b35 130 return 1;
dccce9ea
VZ
131 }
132 else if (input>=0x110000)
4def3b35
VS
133 {
134 return (size_t)-1;
dccce9ea
VZ
135 }
136 else
4def3b35 137 {
dccce9ea 138 if (output)
4def3b35 139 {
c91830cb 140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
142 }
143 return 2;
1cd52418 144 }
1cd52418
OK
145}
146
c91830cb 147static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 148{
dccce9ea 149 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
150 {
151 output = *input;
152 return 1;
dccce9ea
VZ
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
155 {
156 output = *input;
157 return (size_t)-1;
dccce9ea
VZ
158 }
159 else
4def3b35
VS
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
1cd52418
OK
164}
165
b0a6bb75 166
f6bcfd97 167// ----------------------------------------------------------------------------
6001e347 168// wxMBConv
f6bcfd97 169// ----------------------------------------------------------------------------
2c53a80a
WS
170
171wxMBConv::~wxMBConv()
172{
173 // nothing to do here (necessary for Darwin linking probably)
174}
6001e347 175
6001e347
RR
176const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177{
2b5f62a0 178 if ( psz )
6001e347 179 {
2b5f62a0
VZ
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
635f33ce
VS
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
2b5f62a0 191 }
f6bcfd97 192 }
2b5f62a0
VZ
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
6001e347
RR
197}
198
e5cceba0 199const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 200{
2b5f62a0
VZ
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
c91830cb 206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
2b5f62a0
VZ
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
e5cceba0 216
e5cceba0 217 return buf;
6001e347
RR
218}
219
6001e347 220// ----------------------------------------------------------------------------
bde4baac 221// wxMBConvLibc
6001e347
RR
222// ----------------------------------------------------------------------------
223
bde4baac
VZ
224size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
225{
226 return wxMB2WC(buf, psz, n);
227}
228
229size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
230{
231 return wxWC2MB(buf, psz, n);
232}
bde4baac 233// ----------------------------------------------------------------------------
15f2ee32 234// UTF-7
bde4baac 235// ----------------------------------------------------------------------------
6001e347 236
15f2ee32 237// Implementation (C) 2004 Fredrik Roubert
6001e347 238
15f2ee32
RN
239//
240// BASE64 decoding table
241//
242static const unsigned char utf7unb64[] =
6001e347 243{
15f2ee32
RN
244 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
245 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
246 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
247 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
248 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
249 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
250 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
251 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
252 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
253 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
254 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
255 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
256 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
257 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
258 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
259 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
260 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
261 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
262 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
263 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
264 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
265 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
266 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
267 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
268 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
269 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
271 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
272 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
273 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
274 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
275 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
276};
277
278size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
279{
280
281 size_t len = 0;
282
283 while (*psz && ((!buf) || (len < n)))
284 {
285 unsigned char cc = *psz++;
286 if (cc != '+')
287 {
288 // plain ASCII char
289 if (buf)
290 *buf++ = cc;
291 len++;
292 }
293 else if (*psz == '-')
294 {
295 // encoded plus sign
296 if (buf)
297 *buf++ = cc;
298 len++;
299 psz++;
300 }
301 else
302 {
303 // BASE64 encoded string
304 bool lsb;
305 unsigned char c;
306 unsigned int d, l;
307 for (lsb = false, d = 0, l = 0;
308 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
309 {
310 d <<= 6;
311 d += cc;
312 for (l += 6; l >= 8; lsb = !lsb)
313 {
314 c = (d >> (l -= 8)) % 256;
315 if (lsb)
316 {
317 if (buf)
318 *buf++ |= c;
319 len ++;
320 }
321 else
322 if (buf)
323 *buf = c << 8;
324 }
325 }
326 if (*psz == '-')
327 psz++;
328 }
329 }
330 if (buf && (len < n))
331 *buf = 0;
332 return len;
6001e347
RR
333}
334
15f2ee32
RN
335//
336// BASE64 encoding table
337//
338static const unsigned char utf7enb64[] =
339{
340 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
341 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
342 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
343 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
344 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
345 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
346 'w', 'x', 'y', 'z', '0', '1', '2', '3',
347 '4', '5', '6', '7', '8', '9', '+', '/'
348};
349
350//
351// UTF-7 encoding table
352//
353// 0 - Set D (directly encoded characters)
354// 1 - Set O (optional direct characters)
355// 2 - whitespace characters (optional)
356// 3 - special characters
357//
358static const unsigned char utf7encode[128] =
6001e347 359{
15f2ee32
RN
360 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
361 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
362 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
364 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
366 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
368};
369
370size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
371*psz, size_t n) const
372{
373
374
375 size_t len = 0;
376
377 while (*psz && ((!buf) || (len < n)))
378 {
379 wchar_t cc = *psz++;
380 if (cc < 0x80 && utf7encode[cc] < 1)
381 {
382 // plain ASCII char
383 if (buf)
384 *buf++ = (char)cc;
385 len++;
386 }
387#ifndef WC_UTF16
388 else if (cc > 0xffff)
389 {
390 // no surrogate pair generation (yet?)
391 return (size_t)-1;
392 }
393#endif
394 else
395 {
396 if (buf)
397 *buf++ = '+';
398 len++;
399 if (cc != '+')
400 {
401 // BASE64 encode string
402 unsigned int lsb, d, l;
403 for (d = 0, l = 0;; psz++)
404 {
405 for (lsb = 0; lsb < 2; lsb ++)
406 {
407 d <<= 8;
408 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
409
410 for (l += 8; l >= 6; )
411 {
412 l -= 6;
413 if (buf)
414 *buf++ = utf7enb64[(d >> l) % 64];
415 len++;
416 }
417 }
418 cc = *psz;
419 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
420 break;
421 }
422 if (l != 0)
423 {
424 if (buf)
425 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
426 len++;
427 }
428 }
429 if (buf)
430 *buf++ = '-';
431 len++;
432 }
433 }
434 if (buf && (len < n))
435 *buf = 0;
436 return len;
6001e347
RR
437}
438
f6bcfd97 439// ----------------------------------------------------------------------------
6001e347 440// UTF-8
f6bcfd97 441// ----------------------------------------------------------------------------
6001e347 442
dccce9ea 443static wxUint32 utf8_max[]=
4def3b35 444 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
445
446size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
447{
4def3b35
VS
448 size_t len = 0;
449
dccce9ea 450 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
451 {
452 unsigned char cc = *psz++, fc = cc;
453 unsigned cnt;
dccce9ea 454 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 455 fc <<= 1;
dccce9ea 456 if (!cnt)
4def3b35
VS
457 {
458 // plain ASCII char
dccce9ea 459 if (buf)
4def3b35
VS
460 *buf++ = cc;
461 len++;
dccce9ea
VZ
462 }
463 else
4def3b35
VS
464 {
465 cnt--;
dccce9ea 466 if (!cnt)
4def3b35
VS
467 {
468 // invalid UTF-8 sequence
469 return (size_t)-1;
dccce9ea
VZ
470 }
471 else
4def3b35
VS
472 {
473 unsigned ocnt = cnt - 1;
474 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 475 while (cnt--)
4def3b35
VS
476 {
477 cc = *psz++;
dccce9ea 478 if ((cc & 0xC0) != 0x80)
4def3b35
VS
479 {
480 // invalid UTF-8 sequence
481 return (size_t)-1;
482 }
483 res = (res << 6) | (cc & 0x3f);
484 }
dccce9ea 485 if (res <= utf8_max[ocnt])
4def3b35
VS
486 {
487 // illegal UTF-8 encoding
488 return (size_t)-1;
489 }
1cd52418 490#ifdef WC_UTF16
b5153fd8
VZ
491 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
492 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
493 if (pa == (size_t)-1)
494 return (size_t)-1;
dccce9ea 495 if (buf)
4def3b35
VS
496 buf += pa;
497 len += pa;
373658eb 498#else // !WC_UTF16
dccce9ea 499 if (buf)
4def3b35
VS
500 *buf++ = res;
501 len++;
373658eb 502#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
503 }
504 }
6001e347 505 }
dccce9ea 506 if (buf && (len < n))
4def3b35
VS
507 *buf = 0;
508 return len;
6001e347
RR
509}
510
511size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
512{
4def3b35 513 size_t len = 0;
6001e347 514
dccce9ea 515 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
516 {
517 wxUint32 cc;
1cd52418 518#ifdef WC_UTF16
b5153fd8
VZ
519 // cast is ok for WC_UTF16
520 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 521 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 522#else
4def3b35
VS
523 cc=(*psz++) & 0x7fffffff;
524#endif
525 unsigned cnt;
526 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 527 if (!cnt)
4def3b35
VS
528 {
529 // plain ASCII char
dccce9ea 530 if (buf)
574c939e 531 *buf++ = (char) cc;
4def3b35 532 len++;
dccce9ea
VZ
533 }
534
535 else
4def3b35
VS
536 {
537 len += cnt + 1;
dccce9ea 538 if (buf)
4def3b35 539 {
574c939e 540 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 541 while (cnt--)
574c939e 542 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
543 }
544 }
6001e347 545 }
4def3b35
VS
546
547 if (buf && (len<n)) *buf = 0;
adb45366 548
4def3b35 549 return len;
6001e347
RR
550}
551
c91830cb
VZ
552
553
554
555// ----------------------------------------------------------------------------
556// UTF-16
557// ----------------------------------------------------------------------------
558
559#ifdef WORDS_BIGENDIAN
bde4baac
VZ
560 #define wxMBConvUTF16straight wxMBConvUTF16BE
561 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 562#else
bde4baac
VZ
563 #define wxMBConvUTF16swap wxMBConvUTF16BE
564 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
565#endif
566
567
c91830cb
VZ
568#ifdef WC_UTF16
569
c91830cb
VZ
570// copy 16bit MB to 16bit String
571size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
572{
573 size_t len=0;
574
575 while (*(wxUint16*)psz && (!buf || len < n))
576 {
577 if (buf)
578 *buf++ = *(wxUint16*)psz;
579 len++;
580
581 psz += sizeof(wxUint16);
582 }
583 if (buf && len<n) *buf=0;
584
585 return len;
586}
587
588
589// copy 16bit String to 16bit MB
590size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
591{
592 size_t len=0;
593
594 while (*psz && (!buf || len < n))
595 {
596 if (buf)
597 {
598 *(wxUint16*)buf = *psz;
599 buf += sizeof(wxUint16);
600 }
601 len += sizeof(wxUint16);
602 psz++;
603 }
604 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
605
606 return len;
607}
608
609
610// swap 16bit MB to 16bit String
611size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
612{
613 size_t len=0;
614
615 while (*(wxUint16*)psz && (!buf || len < n))
616 {
617 if (buf)
618 {
619 ((char *)buf)[0] = psz[1];
620 ((char *)buf)[1] = psz[0];
621 buf++;
622 }
623 len++;
624 psz += sizeof(wxUint16);
625 }
626 if (buf && len<n) *buf=0;
627
628 return len;
629}
630
631
632// swap 16bit MB to 16bit String
633size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
634{
635 size_t len=0;
636
637 while (*psz && (!buf || len < n))
638 {
639 if (buf)
640 {
641 *buf++ = ((char*)psz)[1];
642 *buf++ = ((char*)psz)[0];
643 }
644 len += sizeof(wxUint16);
645 psz++;
646 }
647 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
648
649 return len;
650}
651
652
653#else // WC_UTF16
654
655
656// copy 16bit MB to 32bit String
657size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
658{
659 size_t len=0;
660
661 while (*(wxUint16*)psz && (!buf || len < n))
662 {
663 wxUint32 cc;
664 size_t pa=decode_utf16((wxUint16*)psz, cc);
665 if (pa == (size_t)-1)
666 return pa;
667
668 if (buf)
669 *buf++ = cc;
670 len++;
671 psz += pa * sizeof(wxUint16);
672 }
673 if (buf && len<n) *buf=0;
674
675 return len;
676}
677
678
679// copy 32bit String to 16bit MB
680size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
681{
682 size_t len=0;
683
684 while (*psz && (!buf || len < n))
685 {
686 wxUint16 cc[2];
687 size_t pa=encode_utf16(*psz, cc);
688
689 if (pa == (size_t)-1)
690 return pa;
691
692 if (buf)
693 {
69b80d28 694 *(wxUint16*)buf = cc[0];
b5153fd8 695 buf += sizeof(wxUint16);
c91830cb 696 if (pa > 1)
69b80d28
VZ
697 {
698 *(wxUint16*)buf = cc[1];
699 buf += sizeof(wxUint16);
700 }
c91830cb
VZ
701 }
702
703 len += pa*sizeof(wxUint16);
704 psz++;
705 }
706 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
707
708 return len;
709}
710
711
712// swap 16bit MB to 32bit String
713size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
714{
715 size_t len=0;
716
717 while (*(wxUint16*)psz && (!buf || len < n))
718 {
719 wxUint32 cc;
720 char tmp[4];
721 tmp[0]=psz[1]; tmp[1]=psz[0];
722 tmp[2]=psz[3]; tmp[3]=psz[2];
723
724 size_t pa=decode_utf16((wxUint16*)tmp, cc);
725 if (pa == (size_t)-1)
726 return pa;
727
728 if (buf)
729 *buf++ = cc;
730
731 len++;
732 psz += pa * sizeof(wxUint16);
733 }
734 if (buf && len<n) *buf=0;
735
736 return len;
737}
738
739
740// swap 32bit String to 16bit MB
741size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
742{
743 size_t len=0;
744
745 while (*psz && (!buf || len < n))
746 {
747 wxUint16 cc[2];
748 size_t pa=encode_utf16(*psz, cc);
749
750 if (pa == (size_t)-1)
751 return pa;
752
753 if (buf)
754 {
755 *buf++ = ((char*)cc)[1];
756 *buf++ = ((char*)cc)[0];
757 if (pa > 1)
758 {
759 *buf++ = ((char*)cc)[3];
760 *buf++ = ((char*)cc)[2];
761 }
762 }
763
764 len += pa*sizeof(wxUint16);
765 psz++;
766 }
767 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
768
769 return len;
770}
771
772#endif // WC_UTF16
773
774
775// ----------------------------------------------------------------------------
776// UTF-32
777// ----------------------------------------------------------------------------
778
779#ifdef WORDS_BIGENDIAN
780#define wxMBConvUTF32straight wxMBConvUTF32BE
781#define wxMBConvUTF32swap wxMBConvUTF32LE
782#else
783#define wxMBConvUTF32swap wxMBConvUTF32BE
784#define wxMBConvUTF32straight wxMBConvUTF32LE
785#endif
786
787
788WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
789WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
790
791
792#ifdef WC_UTF16
793
794// copy 32bit MB to 16bit String
795size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
796{
797 size_t len=0;
798
799 while (*(wxUint32*)psz && (!buf || len < n))
800 {
801 wxUint16 cc[2];
802
803 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
804 if (pa == (size_t)-1)
805 return pa;
806
807 if (buf)
808 {
809 *buf++ = cc[0];
810 if (pa > 1)
811 *buf++ = cc[1];
812 }
813 len += pa;
814 psz += sizeof(wxUint32);
815 }
816 if (buf && len<n) *buf=0;
817
818 return len;
819}
820
821
822// copy 16bit String to 32bit MB
823size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
824{
825 size_t len=0;
826
827 while (*psz && (!buf || len < n))
828 {
829 wxUint32 cc;
830
b5153fd8
VZ
831 // cast is ok for WC_UTF16
832 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
833 if (pa == (size_t)-1)
834 return pa;
835
836 if (buf)
837 {
838 *(wxUint32*)buf = cc;
839 buf += sizeof(wxUint32);
840 }
841 len += sizeof(wxUint32);
842 psz += pa;
843 }
b5153fd8
VZ
844
845 if (buf && len<=n-sizeof(wxUint32))
846 *(wxUint32*)buf=0;
c91830cb
VZ
847
848 return len;
849}
850
851
852
853// swap 32bit MB to 16bit String
854size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
855{
856 size_t len=0;
857
858 while (*(wxUint32*)psz && (!buf || len < n))
859 {
860 char tmp[4];
861 tmp[0] = psz[3]; tmp[1] = psz[2];
862 tmp[2] = psz[1]; tmp[3] = psz[0];
863
864
865 wxUint16 cc[2];
866
867 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
868 if (pa == (size_t)-1)
869 return pa;
870
871 if (buf)
872 {
873 *buf++ = cc[0];
874 if (pa > 1)
875 *buf++ = cc[1];
876 }
877 len += pa;
878 psz += sizeof(wxUint32);
879 }
b5153fd8
VZ
880
881 if (buf && len<n)
882 *buf=0;
c91830cb
VZ
883
884 return len;
885}
886
887
888// swap 16bit String to 32bit MB
889size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
890{
891 size_t len=0;
892
893 while (*psz && (!buf || len < n))
894 {
895 char cc[4];
896
b5153fd8
VZ
897 // cast is ok for WC_UTF16
898 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
899 if (pa == (size_t)-1)
900 return pa;
901
902 if (buf)
903 {
904 *buf++ = cc[3];
905 *buf++ = cc[2];
906 *buf++ = cc[1];
907 *buf++ = cc[0];
908 }
909 len += sizeof(wxUint32);
910 psz += pa;
911 }
b5153fd8
VZ
912
913 if (buf && len<=n-sizeof(wxUint32))
914 *(wxUint32*)buf=0;
c91830cb
VZ
915
916 return len;
917}
918
919#else // WC_UTF16
920
921
922// copy 32bit MB to 32bit String
923size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
924{
925 size_t len=0;
926
927 while (*(wxUint32*)psz && (!buf || len < n))
928 {
929 if (buf)
930 *buf++ = *(wxUint32*)psz;
931 len++;
932 psz += sizeof(wxUint32);
933 }
b5153fd8
VZ
934
935 if (buf && len<n)
936 *buf=0;
c91830cb
VZ
937
938 return len;
939}
940
941
942// copy 32bit String to 32bit MB
943size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
944{
945 size_t len=0;
946
947 while (*psz && (!buf || len < n))
948 {
949 if (buf)
950 {
951 *(wxUint32*)buf = *psz;
952 buf += sizeof(wxUint32);
953 }
954
955 len += sizeof(wxUint32);
956 psz++;
957 }
958
b5153fd8
VZ
959 if (buf && len<=n-sizeof(wxUint32))
960 *(wxUint32*)buf=0;
c91830cb
VZ
961
962 return len;
963}
964
965
966// swap 32bit MB to 32bit String
967size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
968{
969 size_t len=0;
970
971 while (*(wxUint32*)psz && (!buf || len < n))
972 {
973 if (buf)
974 {
975 ((char *)buf)[0] = psz[3];
976 ((char *)buf)[1] = psz[2];
977 ((char *)buf)[2] = psz[1];
978 ((char *)buf)[3] = psz[0];
979 buf++;
980 }
981 len++;
982 psz += sizeof(wxUint32);
983 }
b5153fd8
VZ
984
985 if (buf && len<n)
986 *buf=0;
c91830cb
VZ
987
988 return len;
989}
990
991
992// swap 32bit String to 32bit MB
993size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
994{
995 size_t len=0;
996
997 while (*psz && (!buf || len < n))
998 {
999 if (buf)
1000 {
1001 *buf++ = ((char *)psz)[3];
1002 *buf++ = ((char *)psz)[2];
1003 *buf++ = ((char *)psz)[1];
1004 *buf++ = ((char *)psz)[0];
1005 }
1006 len += sizeof(wxUint32);
1007 psz++;
1008 }
b5153fd8
VZ
1009
1010 if (buf && len<=n-sizeof(wxUint32))
1011 *(wxUint32*)buf=0;
c91830cb
VZ
1012
1013 return len;
1014}
1015
1016
1017#endif // WC_UTF16
1018
1019
36acb880
VZ
1020// ============================================================================
1021// The classes doing conversion using the iconv_xxx() functions
1022// ============================================================================
3caec1bb 1023
b040e242 1024#ifdef HAVE_ICONV
3a0d76bc 1025
3caec1bb
VS
1026// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1027// if output buffer is _exactly_ as big as needed. Such case is (unless there's
1028// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1029// (which means error) and says there are 0 bytes left in the input buffer --
1030// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1031// this alternative test for iconv() failure.
1032// [This bug does not appear in glibc 2.2.]
1033#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1034#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1035 (errno != E2BIG || bufLeft != 0))
1036#else
1037#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1038#endif
1039
ab217dba 1040#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1041
1042// ----------------------------------------------------------------------------
e95354ec 1043// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1044// ----------------------------------------------------------------------------
1045
e95354ec 1046class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1047{
1048public:
e95354ec
VZ
1049 wxMBConv_iconv(const wxChar *name);
1050 virtual ~wxMBConv_iconv();
36acb880 1051
bde4baac
VZ
1052 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1053 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1054
e95354ec 1055 bool IsOk() const
36acb880
VZ
1056 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1057
1058protected:
1059 // the iconv handlers used to translate from multibyte to wide char and in
1060 // the other direction
1061 iconv_t m2w,
1062 w2m;
1063
1064private:
e95354ec 1065 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1066 // available on this machine, it will remain NULL
1067 static const char *ms_wcCharsetName;
1068
1069 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1070 // different endian-ness than the native one
405d8f46 1071 static bool ms_wcNeedsSwap;
36acb880
VZ
1072};
1073
e95354ec
VZ
1074const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1075bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1076
e95354ec 1077wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1078{
04c79127
RR
1079 // Do it the hard way
1080 char cname[100];
1081 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1082 cname[i] = (char) name[i];
1083
36acb880
VZ
1084 // check for charset that represents wchar_t:
1085 if (ms_wcCharsetName == NULL)
f1339c56 1086 {
e95354ec 1087 ms_wcNeedsSwap = false;
dccce9ea 1088
36acb880
VZ
1089 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1090 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1091 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1092
36acb880
VZ
1093 if (m2w == (iconv_t)-1)
1094 {
1095 // try charset w/o bytesex info (e.g. "UCS4")
1096 // and check for bytesex ourselves:
1097 ms_wcCharsetName = WC_NAME;
04c79127 1098 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1099
1100 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1101 if (m2w == (iconv_t)-1)
1102 {
36acb880 1103 ms_wcCharsetName = "WCHAR_T";
04c79127 1104 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1105 }
3a0d76bc 1106
36acb880
VZ
1107 if (m2w != (iconv_t)-1)
1108 {
1109 char buf[2], *bufPtr;
1110 wchar_t wbuf[2], *wbufPtr;
1111 size_t insz, outsz;
1112 size_t res;
1113
1114 buf[0] = 'A';
1115 buf[1] = 0;
1116 wbuf[0] = 0;
1117 insz = 2;
1118 outsz = SIZEOF_WCHAR_T * 2;
1119 wbufPtr = wbuf;
1120 bufPtr = buf;
1121
1122 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1123 (char**)&wbufPtr, &outsz);
1124
1125 if (ICONV_FAILED(res, insz))
3a0d76bc 1126 {
36acb880
VZ
1127 ms_wcCharsetName = NULL;
1128 wxLogLastError(wxT("iconv"));
2b5f62a0 1129 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1130 }
1131 else
1132 {
36acb880 1133 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1134 }
1135 }
36acb880
VZ
1136 else
1137 {
1138 ms_wcCharsetName = NULL;
373658eb 1139
77ffb593 1140 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1141 // fall back to using wxEncodingConverter.
1142 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1143 //wxLogError(
36acb880 1144 }
3a0d76bc 1145 }
36acb880 1146 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1147 }
36acb880 1148 else // we already have ms_wcCharsetName
3caec1bb 1149 {
04c79127 1150 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1151 }
dccce9ea 1152
36acb880
VZ
1153 // NB: don't ever pass NULL to iconv_open(), it may crash!
1154 if ( ms_wcCharsetName )
f1339c56 1155 {
04c79127 1156 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1157 }
405d8f46
VZ
1158 else
1159 {
1160 w2m = (iconv_t)-1;
1161 }
36acb880 1162}
3caec1bb 1163
e95354ec 1164wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1165{
1166 if ( m2w != (iconv_t)-1 )
1167 iconv_close(m2w);
1168 if ( w2m != (iconv_t)-1 )
1169 iconv_close(w2m);
1170}
3a0d76bc 1171
bde4baac 1172size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
1173{
1174 size_t inbuf = strlen(psz);
1175 size_t outbuf = n * SIZEOF_WCHAR_T;
1176 size_t res, cres;
1177 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1178 wchar_t *bufPtr = buf;
1179 const char *pszPtr = psz;
1180
1181 if (buf)
1182 {
1183 // have destination buffer, convert there
1184 cres = iconv(m2w,
1185 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1186 (char**)&bufPtr, &outbuf);
1187 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1188
36acb880 1189 if (ms_wcNeedsSwap)
3a0d76bc 1190 {
36acb880
VZ
1191 // convert to native endianness
1192 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1193 }
adb45366 1194
49dd9820
VS
1195 // NB: iconv was given only strlen(psz) characters on input, and so
1196 // it couldn't convert the trailing zero. Let's do it ourselves
1197 // if there's some room left for it in the output buffer.
1198 if (res < n)
1199 buf[res] = 0;
36acb880
VZ
1200 }
1201 else
1202 {
1203 // no destination buffer... convert using temp buffer
1204 // to calculate destination buffer requirement
1205 wchar_t tbuf[8];
1206 res = 0;
1207 do {
1208 bufPtr = tbuf;
1209 outbuf = 8*SIZEOF_WCHAR_T;
1210
1211 cres = iconv(m2w,
1212 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1213 (char**)&bufPtr, &outbuf );
1214
1215 res += 8-(outbuf/SIZEOF_WCHAR_T);
1216 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1217 }
dccce9ea 1218
36acb880 1219 if (ICONV_FAILED(cres, inbuf))
f1339c56 1220 {
36acb880
VZ
1221 //VS: it is ok if iconv fails, hence trace only
1222 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1223 return (size_t)-1;
1224 }
1225
1226 return res;
1227}
1228
bde4baac 1229size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1230{
f8d791e0 1231 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1232 size_t outbuf = n;
1233 size_t res, cres;
3a0d76bc 1234
36acb880 1235 wchar_t *tmpbuf = 0;
3caec1bb 1236
36acb880
VZ
1237 if (ms_wcNeedsSwap)
1238 {
1239 // need to copy to temp buffer to switch endianness
1240 // this absolutely doesn't rock!
1241 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1242 // could be in read-only memory, or be accessed in some other thread)
1243 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1244 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1245 WC_BSWAP(tmpbuf, inbuf)
1246 psz=tmpbuf;
1247 }
3a0d76bc 1248
36acb880
VZ
1249 if (buf)
1250 {
1251 // have destination buffer, convert there
1252 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1253
36acb880 1254 res = n-outbuf;
adb45366 1255
49dd9820
VS
1256 // NB: iconv was given only wcslen(psz) characters on input, and so
1257 // it couldn't convert the trailing zero. Let's do it ourselves
1258 // if there's some room left for it in the output buffer.
1259 if (res < n)
1260 buf[0] = 0;
36acb880
VZ
1261 }
1262 else
1263 {
1264 // no destination buffer... convert using temp buffer
1265 // to calculate destination buffer requirement
1266 char tbuf[16];
1267 res = 0;
1268 do {
1269 buf = tbuf; outbuf = 16;
1270
1271 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1272
36acb880
VZ
1273 res += 16 - outbuf;
1274 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1275 }
dccce9ea 1276
36acb880
VZ
1277 if (ms_wcNeedsSwap)
1278 {
1279 free(tmpbuf);
1280 }
dccce9ea 1281
36acb880
VZ
1282 if (ICONV_FAILED(cres, inbuf))
1283 {
1284 //VS: it is ok if iconv fails, hence trace only
1285 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1286 return (size_t)-1;
1287 }
1288
1289 return res;
1290}
1291
b040e242 1292#endif // HAVE_ICONV
36acb880 1293
e95354ec 1294
36acb880
VZ
1295// ============================================================================
1296// Win32 conversion classes
1297// ============================================================================
1cd52418 1298
e95354ec 1299#ifdef wxHAVE_WIN32_MB2WC
373658eb 1300
8b04d4c4 1301// from utils.cpp
d775fa82 1302#if wxUSE_FONTMAP
8b04d4c4
VZ
1303extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1304extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1305#endif
373658eb 1306
e95354ec 1307class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1308{
1309public:
bde4baac
VZ
1310 wxMBConv_win32()
1311 {
1312 m_CodePage = CP_ACP;
1313 }
1314
7608a683 1315#if wxUSE_FONTMAP
e95354ec 1316 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1317 {
1318 m_CodePage = wxCharsetToCodepage(name);
1319 }
dccce9ea 1320
e95354ec 1321 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1322 {
1323 m_CodePage = wxEncodingToCodepage(encoding);
1324 }
7608a683 1325#endif
8b04d4c4 1326
bde4baac 1327 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1328 {
02272c9c
VZ
1329 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1330 // the behaviour is not compatible with the Unix version (using iconv)
1331 // and break the library itself, e.g. wxTextInputStream::NextChar()
1332 // wouldn't work if reading an incomplete MB char didn't result in an
1333 // error
2b5f62a0
VZ
1334 const size_t len = ::MultiByteToWideChar
1335 (
1336 m_CodePage, // code page
02272c9c 1337 MB_ERR_INVALID_CHARS, // flags: fall on error
2b5f62a0
VZ
1338 psz, // input string
1339 -1, // its length (NUL-terminated)
b4da152e 1340 buf, // output string
2b5f62a0
VZ
1341 buf ? n : 0 // size of output buffer
1342 );
1343
03a991bc
VZ
1344 // note that it returns count of written chars for buf != NULL and size
1345 // of the needed buffer for buf == NULL so in either case the length of
1346 // the string (which never includes the terminating NUL) is one less
1347 return len ? len - 1 : (size_t)-1;
f1339c56 1348 }
dccce9ea 1349
13dd924a 1350 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1351 {
13dd924a
VZ
1352 /*
1353 we have a problem here: by default, WideCharToMultiByte() may
1354 replace characters unrepresentable in the target code page with bad
1355 quality approximations such as turning "1/2" symbol (U+00BD) into
1356 "1" for the code pages which don't have it and we, obviously, want
1357 to avoid this at any price
d775fa82 1358
13dd924a
VZ
1359 the trouble is that this function does it _silently_, i.e. it won't
1360 even tell us whether it did or not... Win98/2000 and higher provide
1361 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1362 we have to resort to a round trip, i.e. check that converting back
1363 results in the same string -- this is, of course, expensive but
1364 otherwise we simply can't be sure to not garble the data.
1365 */
1366
1367 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1368 // it doesn't work with CJK encodings (which we test for rather roughly
1369 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1370 // supporting it
907173e5
WS
1371 BOOL usedDef wxDUMMY_INITIALIZE(false);
1372 BOOL *pUsedDef;
13dd924a
VZ
1373 int flags;
1374 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1375 {
1376 // it's our lucky day
1377 flags = WC_NO_BEST_FIT_CHARS;
1378 pUsedDef = &usedDef;
1379 }
1380 else // old system or unsupported encoding
1381 {
1382 flags = 0;
1383 pUsedDef = NULL;
1384 }
1385
2b5f62a0
VZ
1386 const size_t len = ::WideCharToMultiByte
1387 (
1388 m_CodePage, // code page
13dd924a
VZ
1389 flags, // either none or no best fit
1390 pwz, // input string
2b5f62a0
VZ
1391 -1, // it is (wide) NUL-terminated
1392 buf, // output buffer
1393 buf ? n : 0, // and its size
1394 NULL, // default "replacement" char
13dd924a 1395 pUsedDef // [out] was it used?
2b5f62a0
VZ
1396 );
1397
13dd924a
VZ
1398 if ( !len )
1399 {
1400 // function totally failed
1401 return (size_t)-1;
1402 }
1403
1404 // if we were really converting, check if we succeeded
1405 if ( buf )
1406 {
1407 if ( flags )
1408 {
1409 // check if the conversion failed, i.e. if any replacements
1410 // were done
1411 if ( usedDef )
1412 return (size_t)-1;
1413 }
1414 else // we must resort to double tripping...
1415 {
1416 wxWCharBuffer wcBuf(n);
1417 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1418 wcscmp(wcBuf, pwz) != 0 )
1419 {
1420 // we didn't obtain the same thing we started from, hence
1421 // the conversion was lossy and we consider that it failed
1422 return (size_t)-1;
1423 }
1424 }
1425 }
1426
03a991bc 1427 // see the comment above for the reason of "len - 1"
13dd924a 1428 return len - 1;
f1339c56 1429 }
dccce9ea 1430
13dd924a
VZ
1431 bool IsOk() const { return m_CodePage != -1; }
1432
1433private:
1434 static bool CanUseNoBestFit()
1435 {
1436 static int s_isWin98Or2k = -1;
1437
1438 if ( s_isWin98Or2k == -1 )
1439 {
1440 int verMaj, verMin;
1441 switch ( wxGetOsVersion(&verMaj, &verMin) )
1442 {
1443 case wxWIN95:
1444 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1445 break;
1446
1447 case wxWINDOWS_NT:
1448 s_isWin98Or2k = verMaj >= 5;
1449 break;
1450
1451 default:
1452 // unknown, be conseravtive by default
1453 s_isWin98Or2k = 0;
1454 }
1455
1456 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1457 }
1458
1459 return s_isWin98Or2k == 1;
1460 }
f1339c56 1461
b1d66b54 1462 long m_CodePage;
1cd52418 1463};
e95354ec
VZ
1464
1465#endif // wxHAVE_WIN32_MB2WC
1466
f7e98dee
RN
1467// ============================================================================
1468// Cocoa conversion classes
1469// ============================================================================
1470
1471#if defined(__WXCOCOA__)
1472
ecd9653b 1473// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1474// Cocoa. Strangely enough, internally Core Foundation uses
1475// UTF 32 internally quite a bit - its just not public (yet).
1476
1477#include <CoreFoundation/CFString.h>
1478#include <CoreFoundation/CFStringEncodingExt.h>
1479
1480CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b
WS
1481{
1482 CFStringEncoding enc = 0 ;
1483 if ( encoding == wxFONTENCODING_DEFAULT )
1484 {
f7e98dee 1485#if wxUSE_GUI
ecd9653b 1486 encoding = wxFont::GetDefaultEncoding() ;
f7e98dee 1487#else
ecd9653b 1488 encoding = wxLocale::GetSystemEncoding() ;
f7e98dee 1489#endif
ecd9653b
WS
1490 }
1491 else switch( encoding)
1492 {
1493 case wxFONTENCODING_ISO8859_1 :
1494 enc = kCFStringEncodingISOLatin1 ;
1495 break ;
1496 case wxFONTENCODING_ISO8859_2 :
1497 enc = kCFStringEncodingISOLatin2;
1498 break ;
1499 case wxFONTENCODING_ISO8859_3 :
1500 enc = kCFStringEncodingISOLatin3 ;
1501 break ;
1502 case wxFONTENCODING_ISO8859_4 :
1503 enc = kCFStringEncodingISOLatin4;
1504 break ;
1505 case wxFONTENCODING_ISO8859_5 :
1506 enc = kCFStringEncodingISOLatinCyrillic;
1507 break ;
1508 case wxFONTENCODING_ISO8859_6 :
1509 enc = kCFStringEncodingISOLatinArabic;
1510 break ;
1511 case wxFONTENCODING_ISO8859_7 :
1512 enc = kCFStringEncodingISOLatinGreek;
1513 break ;
1514 case wxFONTENCODING_ISO8859_8 :
1515 enc = kCFStringEncodingISOLatinHebrew;
1516 break ;
1517 case wxFONTENCODING_ISO8859_9 :
1518 enc = kCFStringEncodingISOLatin5;
1519 break ;
1520 case wxFONTENCODING_ISO8859_10 :
1521 enc = kCFStringEncodingISOLatin6;
1522 break ;
1523 case wxFONTENCODING_ISO8859_11 :
1524 enc = kCFStringEncodingISOLatinThai;
1525 break ;
1526 case wxFONTENCODING_ISO8859_13 :
1527 enc = kCFStringEncodingISOLatin7;
1528 break ;
1529 case wxFONTENCODING_ISO8859_14 :
1530 enc = kCFStringEncodingISOLatin8;
1531 break ;
1532 case wxFONTENCODING_ISO8859_15 :
1533 enc = kCFStringEncodingISOLatin9;
1534 break ;
1535
1536 case wxFONTENCODING_KOI8 :
1537 enc = kCFStringEncodingKOI8_R;
1538 break ;
1539 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1540 enc = kCFStringEncodingDOSRussian;
1541 break ;
1542
1543// case wxFONTENCODING_BULGARIAN :
1544// enc = ;
1545// break ;
1546
1547 case wxFONTENCODING_CP437 :
1548 enc =kCFStringEncodingDOSLatinUS ;
1549 break ;
1550 case wxFONTENCODING_CP850 :
1551 enc = kCFStringEncodingDOSLatin1;
1552 break ;
1553 case wxFONTENCODING_CP852 :
1554 enc = kCFStringEncodingDOSLatin2;
1555 break ;
1556 case wxFONTENCODING_CP855 :
1557 enc = kCFStringEncodingDOSCyrillic;
1558 break ;
1559 case wxFONTENCODING_CP866 :
1560 enc =kCFStringEncodingDOSRussian ;
1561 break ;
1562 case wxFONTENCODING_CP874 :
1563 enc = kCFStringEncodingDOSThai;
1564 break ;
1565 case wxFONTENCODING_CP932 :
1566 enc = kCFStringEncodingDOSJapanese;
1567 break ;
1568 case wxFONTENCODING_CP936 :
1569 enc =kCFStringEncodingDOSChineseSimplif ;
1570 break ;
1571 case wxFONTENCODING_CP949 :
1572 enc = kCFStringEncodingDOSKorean;
1573 break ;
1574 case wxFONTENCODING_CP950 :
1575 enc = kCFStringEncodingDOSChineseTrad;
1576 break ;
1577
1578 case wxFONTENCODING_CP1250 :
1579 enc = kCFStringEncodingWindowsLatin2;
1580 break ;
1581 case wxFONTENCODING_CP1251 :
1582 enc =kCFStringEncodingWindowsCyrillic ;
1583 break ;
1584 case wxFONTENCODING_CP1252 :
1585 enc =kCFStringEncodingWindowsLatin1 ;
1586 break ;
1587 case wxFONTENCODING_CP1253 :
1588 enc = kCFStringEncodingWindowsGreek;
1589 break ;
1590 case wxFONTENCODING_CP1254 :
1591 enc = kCFStringEncodingWindowsLatin5;
1592 break ;
1593 case wxFONTENCODING_CP1255 :
1594 enc =kCFStringEncodingWindowsHebrew ;
1595 break ;
1596 case wxFONTENCODING_CP1256 :
1597 enc =kCFStringEncodingWindowsArabic ;
1598 break ;
1599 case wxFONTENCODING_CP1257 :
1600 enc = kCFStringEncodingWindowsBalticRim;
1601 break ;
1602 case wxFONTENCODING_UTF7 :
1603 enc = kCFStringEncodingNonLossyASCII ;
1604 break ;
1605 case wxFONTENCODING_UTF8 :
1606 enc = kCFStringEncodingUTF8 ;
1607 break ;
1608 case wxFONTENCODING_EUC_JP :
1609 enc = kCFStringEncodingEUC_JP;
1610 break ;
1611 case wxFONTENCODING_UTF16 :
f7e98dee 1612 enc = kCFStringEncodingUnicode ;
ecd9653b 1613 break ;
f7e98dee
RN
1614 case wxFONTENCODING_MACROMAN :
1615 enc = kCFStringEncodingMacRoman ;
1616 break ;
1617 case wxFONTENCODING_MACJAPANESE :
1618 enc = kCFStringEncodingMacJapanese ;
1619 break ;
1620 case wxFONTENCODING_MACCHINESETRAD :
1621 enc = kCFStringEncodingMacChineseTrad ;
1622 break ;
1623 case wxFONTENCODING_MACKOREAN :
1624 enc = kCFStringEncodingMacKorean ;
1625 break ;
1626 case wxFONTENCODING_MACARABIC :
1627 enc = kCFStringEncodingMacArabic ;
1628 break ;
1629 case wxFONTENCODING_MACHEBREW :
1630 enc = kCFStringEncodingMacHebrew ;
1631 break ;
1632 case wxFONTENCODING_MACGREEK :
1633 enc = kCFStringEncodingMacGreek ;
1634 break ;
1635 case wxFONTENCODING_MACCYRILLIC :
1636 enc = kCFStringEncodingMacCyrillic ;
1637 break ;
1638 case wxFONTENCODING_MACDEVANAGARI :
1639 enc = kCFStringEncodingMacDevanagari ;
1640 break ;
1641 case wxFONTENCODING_MACGURMUKHI :
1642 enc = kCFStringEncodingMacGurmukhi ;
1643 break ;
1644 case wxFONTENCODING_MACGUJARATI :
1645 enc = kCFStringEncodingMacGujarati ;
1646 break ;
1647 case wxFONTENCODING_MACORIYA :
1648 enc = kCFStringEncodingMacOriya ;
1649 break ;
1650 case wxFONTENCODING_MACBENGALI :
1651 enc = kCFStringEncodingMacBengali ;
1652 break ;
1653 case wxFONTENCODING_MACTAMIL :
1654 enc = kCFStringEncodingMacTamil ;
1655 break ;
1656 case wxFONTENCODING_MACTELUGU :
1657 enc = kCFStringEncodingMacTelugu ;
1658 break ;
1659 case wxFONTENCODING_MACKANNADA :
1660 enc = kCFStringEncodingMacKannada ;
1661 break ;
1662 case wxFONTENCODING_MACMALAJALAM :
1663 enc = kCFStringEncodingMacMalayalam ;
1664 break ;
1665 case wxFONTENCODING_MACSINHALESE :
1666 enc = kCFStringEncodingMacSinhalese ;
1667 break ;
1668 case wxFONTENCODING_MACBURMESE :
1669 enc = kCFStringEncodingMacBurmese ;
1670 break ;
1671 case wxFONTENCODING_MACKHMER :
1672 enc = kCFStringEncodingMacKhmer ;
1673 break ;
1674 case wxFONTENCODING_MACTHAI :
1675 enc = kCFStringEncodingMacThai ;
1676 break ;
1677 case wxFONTENCODING_MACLAOTIAN :
1678 enc = kCFStringEncodingMacLaotian ;
1679 break ;
1680 case wxFONTENCODING_MACGEORGIAN :
1681 enc = kCFStringEncodingMacGeorgian ;
1682 break ;
1683 case wxFONTENCODING_MACARMENIAN :
1684 enc = kCFStringEncodingMacArmenian ;
1685 break ;
1686 case wxFONTENCODING_MACCHINESESIMP :
1687 enc = kCFStringEncodingMacChineseSimp ;
1688 break ;
1689 case wxFONTENCODING_MACTIBETAN :
1690 enc = kCFStringEncodingMacTibetan ;
1691 break ;
1692 case wxFONTENCODING_MACMONGOLIAN :
1693 enc = kCFStringEncodingMacMongolian ;
1694 break ;
1695 case wxFONTENCODING_MACETHIOPIC :
1696 enc = kCFStringEncodingMacEthiopic ;
1697 break ;
1698 case wxFONTENCODING_MACCENTRALEUR :
1699 enc = kCFStringEncodingMacCentralEurRoman ;
1700 break ;
1701 case wxFONTENCODING_MACVIATNAMESE :
1702 enc = kCFStringEncodingMacVietnamese ;
1703 break ;
1704 case wxFONTENCODING_MACARABICEXT :
1705 enc = kCFStringEncodingMacExtArabic ;
1706 break ;
1707 case wxFONTENCODING_MACSYMBOL :
1708 enc = kCFStringEncodingMacSymbol ;
1709 break ;
1710 case wxFONTENCODING_MACDINGBATS :
1711 enc = kCFStringEncodingMacDingbats ;
1712 break ;
1713 case wxFONTENCODING_MACTURKISH :
1714 enc = kCFStringEncodingMacTurkish ;
1715 break ;
1716 case wxFONTENCODING_MACCROATIAN :
1717 enc = kCFStringEncodingMacCroatian ;
1718 break ;
1719 case wxFONTENCODING_MACICELANDIC :
1720 enc = kCFStringEncodingMacIcelandic ;
1721 break ;
1722 case wxFONTENCODING_MACROMANIAN :
1723 enc = kCFStringEncodingMacRomanian ;
1724 break ;
1725 case wxFONTENCODING_MACCELTIC :
1726 enc = kCFStringEncodingMacCeltic ;
1727 break ;
1728 case wxFONTENCODING_MACGAELIC :
1729 enc = kCFStringEncodingMacGaelic ;
1730 break ;
ecd9653b
WS
1731// case wxFONTENCODING_MACKEYBOARD :
1732// enc = kCFStringEncodingMacKeyboardGlyphs ;
1733// break ;
1734 default :
1735 // because gcc is picky
1736 break ;
1737 } ;
1738 return enc ;
f7e98dee
RN
1739}
1740
1741wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
ecd9653b
WS
1742{
1743 wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1744
1745 switch( encoding)
1746 {
1747 case kCFStringEncodingISOLatin1 :
1748 enc = wxFONTENCODING_ISO8859_1 ;
1749 break ;
1750 case kCFStringEncodingISOLatin2 :
1751 enc = wxFONTENCODING_ISO8859_2;
1752 break ;
1753 case kCFStringEncodingISOLatin3 :
1754 enc = wxFONTENCODING_ISO8859_3 ;
1755 break ;
1756 case kCFStringEncodingISOLatin4 :
1757 enc = wxFONTENCODING_ISO8859_4;
1758 break ;
1759 case kCFStringEncodingISOLatinCyrillic :
1760 enc = wxFONTENCODING_ISO8859_5;
1761 break ;
1762 case kCFStringEncodingISOLatinArabic :
1763 enc = wxFONTENCODING_ISO8859_6;
1764 break ;
1765 case kCFStringEncodingISOLatinGreek :
1766 enc = wxFONTENCODING_ISO8859_7;
1767 break ;
1768 case kCFStringEncodingISOLatinHebrew :
1769 enc = wxFONTENCODING_ISO8859_8;
1770 break ;
1771 case kCFStringEncodingISOLatin5 :
1772 enc = wxFONTENCODING_ISO8859_9;
1773 break ;
1774 case kCFStringEncodingISOLatin6 :
1775 enc = wxFONTENCODING_ISO8859_10;
1776 break ;
1777 case kCFStringEncodingISOLatin7 :
1778 enc = wxFONTENCODING_ISO8859_13;
1779 break ;
1780 case kCFStringEncodingISOLatin8 :
1781 enc = wxFONTENCODING_ISO8859_14;
1782 break ;
1783 case kCFStringEncodingISOLatin9 :
1784 enc =wxFONTENCODING_ISO8859_15 ;
1785 break ;
1786
1787 case kCFStringEncodingKOI8_R :
1788 enc = wxFONTENCODING_KOI8;
1789 break ;
1790
1791// case :
1792// enc = wxFONTENCODING_BULGARIAN;
1793// break ;
1794
1795 case kCFStringEncodingDOSLatinUS :
1796 enc = wxFONTENCODING_CP437;
1797 break ;
1798 case kCFStringEncodingDOSLatin1 :
1799 enc = wxFONTENCODING_CP850;
1800 break ;
1801 case kCFStringEncodingDOSLatin2 :
1802 enc =wxFONTENCODING_CP852 ;
1803 break ;
1804 case kCFStringEncodingDOSCyrillic :
1805 enc = wxFONTENCODING_CP855;
1806 break ;
1807 case kCFStringEncodingDOSRussian :
1808 enc = wxFONTENCODING_CP866;
1809 break ;
1810 case kCFStringEncodingDOSThai :
1811 enc =wxFONTENCODING_CP874 ;
1812 break ;
1813 case kCFStringEncodingDOSJapanese :
1814 enc = wxFONTENCODING_CP932;
1815 break ;
1816 case kCFStringEncodingDOSChineseSimplif :
1817 enc = wxFONTENCODING_CP936;
1818 break ;
1819 case kCFStringEncodingDOSKorean :
1820 enc = wxFONTENCODING_CP949;
1821 break ;
1822 case kCFStringEncodingDOSChineseTrad :
1823 enc = wxFONTENCODING_CP950;
1824 break ;
1825
1826 case kCFStringEncodingWindowsLatin2 :
1827 enc = wxFONTENCODING_CP1250;
1828 break ;
1829 case kCFStringEncodingWindowsCyrillic :
1830 enc = wxFONTENCODING_CP1251;
1831 break ;
1832 case kCFStringEncodingWindowsLatin1 :
1833 enc = wxFONTENCODING_CP1252;
1834 break ;
1835 case kCFStringEncodingWindowsGreek :
1836 enc = wxFONTENCODING_CP1253;
1837 break ;
1838 case kCFStringEncodingWindowsLatin5 :
1839 enc = wxFONTENCODING_CP1254;
1840 break ;
1841 case kCFStringEncodingWindowsHebrew :
1842 enc = wxFONTENCODING_CP1255;
1843 break ;
1844 case kCFStringEncodingWindowsArabic :
1845 enc = wxFONTENCODING_CP1256;
1846 break ;
1847 case kCFStringEncodingWindowsBalticRim :
1848 enc =wxFONTENCODING_CP1257 ;
1849 break ;
1850 case kCFStringEncodingEUC_JP :
1851 enc = wxFONTENCODING_EUC_JP;
1852 break ;
f7e98dee
RN
1853 case kCFStringEncodingUnicode :
1854 enc = wxFONTENCODING_UTF16;
1855 break;
1856 case kCFStringEncodingMacRoman :
1857 enc = wxFONTENCODING_MACROMAN ;
1858 break ;
1859 case kCFStringEncodingMacJapanese :
1860 enc = wxFONTENCODING_MACJAPANESE ;
1861 break ;
1862 case kCFStringEncodingMacChineseTrad :
1863 enc = wxFONTENCODING_MACCHINESETRAD ;
1864 break ;
1865 case kCFStringEncodingMacKorean :
1866 enc = wxFONTENCODING_MACKOREAN ;
1867 break ;
1868 case kCFStringEncodingMacArabic :
1869 enc =wxFONTENCODING_MACARABIC ;
1870 break ;
1871 case kCFStringEncodingMacHebrew :
1872 enc = wxFONTENCODING_MACHEBREW ;
1873 break ;
1874 case kCFStringEncodingMacGreek :
1875 enc = wxFONTENCODING_MACGREEK ;
1876 break ;
1877 case kCFStringEncodingMacCyrillic :
1878 enc = wxFONTENCODING_MACCYRILLIC ;
1879 break ;
1880 case kCFStringEncodingMacDevanagari :
1881 enc = wxFONTENCODING_MACDEVANAGARI ;
1882 break ;
1883 case kCFStringEncodingMacGurmukhi :
1884 enc = wxFONTENCODING_MACGURMUKHI ;
1885 break ;
1886 case kCFStringEncodingMacGujarati :
1887 enc = wxFONTENCODING_MACGUJARATI ;
1888 break ;
1889 case kCFStringEncodingMacOriya :
1890 enc =wxFONTENCODING_MACORIYA ;
1891 break ;
1892 case kCFStringEncodingMacBengali :
1893 enc =wxFONTENCODING_MACBENGALI ;
1894 break ;
1895 case kCFStringEncodingMacTamil :
1896 enc = wxFONTENCODING_MACTAMIL ;
1897 break ;
1898 case kCFStringEncodingMacTelugu :
1899 enc = wxFONTENCODING_MACTELUGU ;
1900 break ;
1901 case kCFStringEncodingMacKannada :
1902 enc = wxFONTENCODING_MACKANNADA ;
1903 break ;
1904 case kCFStringEncodingMacMalayalam :
1905 enc = wxFONTENCODING_MACMALAJALAM ;
1906 break ;
1907 case kCFStringEncodingMacSinhalese :
1908 enc = wxFONTENCODING_MACSINHALESE ;
1909 break ;
1910 case kCFStringEncodingMacBurmese :
1911 enc = wxFONTENCODING_MACBURMESE ;
1912 break ;
1913 case kCFStringEncodingMacKhmer :
1914 enc = wxFONTENCODING_MACKHMER ;
1915 break ;
1916 case kCFStringEncodingMacThai :
1917 enc = wxFONTENCODING_MACTHAI ;
1918 break ;
1919 case kCFStringEncodingMacLaotian :
1920 enc = wxFONTENCODING_MACLAOTIAN ;
1921 break ;
1922 case kCFStringEncodingMacGeorgian :
1923 enc = wxFONTENCODING_MACGEORGIAN ;
1924 break ;
1925 case kCFStringEncodingMacArmenian :
1926 enc = wxFONTENCODING_MACARMENIAN ;
1927 break ;
1928 case kCFStringEncodingMacChineseSimp :
1929 enc = wxFONTENCODING_MACCHINESESIMP ;
1930 break ;
1931 case kCFStringEncodingMacTibetan :
1932 enc = wxFONTENCODING_MACTIBETAN ;
1933 break ;
1934 case kCFStringEncodingMacMongolian :
1935 enc = wxFONTENCODING_MACMONGOLIAN ;
1936 break ;
1937 case kCFStringEncodingMacEthiopic :
1938 enc = wxFONTENCODING_MACETHIOPIC ;
1939 break ;
1940 case kCFStringEncodingMacCentralEurRoman:
1941 enc = wxFONTENCODING_MACCENTRALEUR ;
1942 break ;
1943 case kCFStringEncodingMacVietnamese:
1944 enc = wxFONTENCODING_MACVIATNAMESE ;
1945 break ;
1946 case kCFStringEncodingMacExtArabic :
1947 enc = wxFONTENCODING_MACARABICEXT ;
1948 break ;
1949 case kCFStringEncodingMacSymbol :
1950 enc = wxFONTENCODING_MACSYMBOL ;
1951 break ;
1952 case kCFStringEncodingMacDingbats :
1953 enc = wxFONTENCODING_MACDINGBATS ;
1954 break ;
1955 case kCFStringEncodingMacTurkish :
1956 enc = wxFONTENCODING_MACTURKISH ;
1957 break ;
1958 case kCFStringEncodingMacCroatian :
1959 enc = wxFONTENCODING_MACCROATIAN ;
1960 break ;
1961 case kCFStringEncodingMacIcelandic :
1962 enc = wxFONTENCODING_MACICELANDIC ;
1963 break ;
1964 case kCFStringEncodingMacRomanian :
1965 enc = wxFONTENCODING_MACROMANIAN ;
1966 break ;
1967 case kCFStringEncodingMacCeltic :
1968 enc = wxFONTENCODING_MACCELTIC ;
1969 break ;
1970 case kCFStringEncodingMacGaelic :
1971 enc = wxFONTENCODING_MACGAELIC ;
1972 break ;
1973// case kCFStringEncodingMacKeyboardGlyphs :
1974// enc = wxFONTENCODING_MACKEYBOARD ;
ecd9653b
WS
1975// break ;
1976 } ;
1977 return enc ;
f7e98dee
RN
1978}
1979
1980class wxMBConv_cocoa : public wxMBConv
1981{
1982public:
1983 wxMBConv_cocoa()
1984 {
1985 Init(CFStringGetSystemEncoding()) ;
1986 }
1987
1988 wxMBConv_cocoa(const wxChar* name)
1989 {
1990 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1991 }
1992
1993 wxMBConv_cocoa(wxFontEncoding encoding)
1994 {
1995 Init( wxCFStringEncFromFontEnc(encoding) );
1996 }
1997
1998 ~wxMBConv_cocoa()
1999 {
2000 }
2001
2002 void Init( CFStringEncoding encoding)
2003 {
2004 m_char_encoding = encoding ;
2005 m_unicode_encoding = kCFStringEncodingUnicode;
2006 }
2007
2008 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2009 {
2010 wxASSERT(szUnConv);
ecd9653b 2011
f7e98dee
RN
2012 size_t nBufSize = strlen(szUnConv) + 1;
2013 size_t nRealOutSize;
2014
ecd9653b
WS
2015 UniChar* szUniCharBuffer = (UniChar*) szOut;
2016 wchar_t* szConvBuffer = szOut;
2017
f7e98dee
RN
2018 if (szConvBuffer == NULL && nOutSize != 0)
2019 {
2020 szConvBuffer = new wchar_t[nOutSize] ;
2021 }
2022
2023#if SIZEOF_WCHAR_T == 4
2024 szUniCharBuffer = new UniChar[nOutSize];
2025#endif
2026
2027 CFDataRef theData = CFDataCreateWithBytesNoCopy (
ecd9653b
WS
2028 NULL, //allocator
2029 (const UInt8*)szUnConv,
f7e98dee 2030 nBufSize - 1,
ecd9653b
WS
2031 NULL //deallocator
2032 );
f7e98dee
RN
2033
2034 wxASSERT(theData);
2035
2036 CFStringRef theString = CFStringCreateFromExternalRepresentation (
2037 NULL,
2038 theData,
ecd9653b 2039 m_char_encoding
f7e98dee
RN
2040 );
2041
2042 wxASSERT(theString);
2043
2044 if (nOutSize == 0)
2045 {
2046 nRealOutSize = CFStringGetLength(theString) + 1;
2047 CFRelease(theString);
2048 return nRealOutSize - 1;
2049 }
ecd9653b 2050
f7e98dee 2051 CFRange theRange = { 0, CFStringGetLength(theString) };
ecd9653b 2052
f7e98dee 2053 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
ecd9653b
WS
2054
2055
f7e98dee 2056 nRealOutSize = (CFStringGetLength(theString) + 1);
ecd9653b 2057
f7e98dee 2058 CFRelease(theString);
ecd9653b 2059
f7e98dee
RN
2060 szUniCharBuffer[nRealOutSize-1] = '\0' ;
2061
2062#if SIZEOF_WCHAR_T == 4
2063 wxMBConvUTF16 converter ;
2064 converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
2065 delete[] szUniCharBuffer;
2066#endif
2067 if ( szOut == NULL )
2068 delete [] szConvBuffer;
2069
2070 return nRealOutSize ;
2071 }
2072
2073 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2074 {
2075 size_t nBufSize = wxWcslen(szUnConv) + 1;
2076 size_t nRealOutSize;
2077 char* szBuffer = szOut;
2078 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2079
f7e98dee
RN
2080 if (szOut == NULL)
2081 {
2082 // worst case
03ff8fda 2083 nRealOutSize = wxString::WorstEncodingCase(nBufSize - 1, *this)+1 ;
f7e98dee
RN
2084 szBuffer = new char[ nRealOutSize ] ;
2085 }
2086 else
2087 nRealOutSize = nOutSize;
2088
2089#if SIZEOF_WCHAR_T == 4
2090 wxMBConvUTF16BE converter ;
2091 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2092 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2093 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2094 nBufSize /= sizeof(UniChar);
2095 ++nBufSize;
2096#endif
2097
2098 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2099 NULL, //allocator
2100 szUniBuffer,
2101 nBufSize,
2102 NULL //deallocator
2103 );
ecd9653b 2104
f7e98dee 2105 wxASSERT(theString);
ecd9653b 2106
f7e98dee
RN
2107 //Note that CER puts a BOM when converting to unicode
2108 //so we may want to check and use getchars instead in that case
2109 CFDataRef theData = CFStringCreateExternalRepresentation(
2110 NULL, //allocator
2111 theString,
2112 m_char_encoding,
2113 0 //what to put in characters that can't be converted -
2114 //0 tells CFString to return NULL if it meets such a character
2115 );
2116
2117 if(!theData)
2118 return (size_t)-1;
ecd9653b 2119
f7e98dee 2120 CFRelease(theString);
ecd9653b 2121
f7e98dee
RN
2122 nRealOutSize = CFDataGetLength(theData);
2123
2124 if ( szOut == NULL )
2125 delete[] szBuffer;
2126
2127 if(nOutSize == 0)
2128 {
2129//TODO: This gets flagged as a non-malloced address by the debugger...
2130//#if SIZEOF_WCHAR_T == 4
2131// delete[] szUniBuffer;
2132//#endif
2133 CFRelease(theData);
2134 return nRealOutSize - 1;
2135 }
ecd9653b 2136
f7e98dee
RN
2137 CFRange theRange = {0, CFDataGetLength(theData) };
2138 CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
ecd9653b
WS
2139
2140 CFRelease(theData);
2141
f7e98dee
RN
2142//TODO: This gets flagged as a non-malloced address by the debugger...
2143//#if SIZEOF_WCHAR_T == 4
2144// delete[] szUniBuffer;
2145//#endif
2146 return nRealOutSize - 1;
2147 }
2148
2149 bool IsOk() const
ecd9653b 2150 {
f7e98dee 2151 //TODO: check for invalid en/de/coding
ecd9653b 2152 return true;
f7e98dee
RN
2153 }
2154
2155private:
2156 CFStringEncoding m_char_encoding ;
2157 CFStringEncoding m_unicode_encoding ;
2158};
2159
2160#endif // defined(__WXCOCOA__)
2161
335d31e0
SC
2162// ============================================================================
2163// Mac conversion classes
2164// ============================================================================
2165
2166#if defined(__WXMAC__) && defined(TARGET_CARBON)
2167
2168class wxMBConv_mac : public wxMBConv
2169{
2170public:
2171 wxMBConv_mac()
2172 {
2173 Init(CFStringGetSystemEncoding()) ;
2174 }
2175
2176 wxMBConv_mac(const wxChar* name)
2177 {
d775fa82 2178 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0
SC
2179 }
2180
2181 wxMBConv_mac(wxFontEncoding encoding)
2182 {
d775fa82
WS
2183 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2184 }
2185
2186 ~wxMBConv_mac()
2187 {
2188 OSStatus status = noErr ;
2189 status = TECDisposeConverter(m_MB2WC_converter);
2190 status = TECDisposeConverter(m_WC2MB_converter);
2191 }
2192
2193
2194 void Init( TextEncodingBase encoding)
2195 {
2196 OSStatus status = noErr ;
2197 m_char_encoding = encoding ;
2198 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2199
2200 status = TECCreateConverter(&m_MB2WC_converter,
2201 m_char_encoding,
2202 m_unicode_encoding);
2203 status = TECCreateConverter(&m_WC2MB_converter,
2204 m_unicode_encoding,
2205 m_char_encoding);
2206 }
2207
335d31e0
SC
2208 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2209 {
d775fa82
WS
2210 OSStatus status = noErr ;
2211 ByteCount byteOutLen ;
2212 ByteCount byteInLen = strlen(psz) ;
2213 wchar_t *tbuf = NULL ;
2214 UniChar* ubuf = NULL ;
2215 size_t res = 0 ;
2216
2217 if (buf == NULL)
2218 {
2219 n = byteInLen ;
2220 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2221 }
2222 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2223#if SIZEOF_WCHAR_T == 4
d775fa82 2224 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2225#else
d775fa82 2226 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2227#endif
d775fa82
WS
2228 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2229 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2230#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2231 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2232 // is not properly terminated we get random characters at the end
2233 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2234 wxMBConvUTF16BE converter ;
2235 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2236 free( ubuf ) ;
f3a355ce 2237#else
d775fa82 2238 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2239#endif
d775fa82
WS
2240 if ( buf == NULL )
2241 free(tbuf) ;
335d31e0 2242
335d31e0
SC
2243 if ( buf && res < n)
2244 buf[res] = 0;
2245
d775fa82 2246 return res ;
335d31e0
SC
2247 }
2248
2249 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2250 {
2251 OSStatus status = noErr ;
2252 ByteCount byteOutLen ;
2253 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2254
2255 char *tbuf = NULL ;
2256
2257 if (buf == NULL)
2258 {
2259 // worst case
03ff8fda 2260 n = wxString::WorstEncodingCase(byteInLen / SIZEOF_WCHAR_T, *this) + SIZEOF_WCHAR_T;
d775fa82
WS
2261 tbuf = (char*) malloc( n ) ;
2262 }
2263
2264 ByteCount byteBufferLen = n ;
2265 UniChar* ubuf = NULL ;
f3a355ce 2266#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2267 wxMBConvUTF16BE converter ;
2268 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2269 byteInLen = unicharlen ;
2270 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2271 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2272#else
d775fa82 2273 ubuf = (UniChar*) psz ;
f3a355ce 2274#endif
d775fa82
WS
2275 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2276 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2277#if SIZEOF_WCHAR_T == 4
d775fa82 2278 free( ubuf ) ;
f3a355ce 2279#endif
d775fa82
WS
2280 if ( buf == NULL )
2281 free(tbuf) ;
335d31e0 2282
d775fa82 2283 size_t res = byteOutLen ;
335d31e0
SC
2284 if ( buf && res < n)
2285 buf[res] = 0;
2286
d775fa82 2287 return res ;
335d31e0
SC
2288 }
2289
2290 bool IsOk() const
2291 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2292
2293private:
d775fa82
WS
2294 TECObjectRef m_MB2WC_converter ;
2295 TECObjectRef m_WC2MB_converter ;
2296
2297 TextEncodingBase m_char_encoding ;
2298 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2299};
2300
2301#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2302
36acb880
VZ
2303// ============================================================================
2304// wxEncodingConverter based conversion classes
2305// ============================================================================
2306
1e6feb95 2307#if wxUSE_FONTMAP
1cd52418 2308
e95354ec 2309class wxMBConv_wxwin : public wxMBConv
1cd52418 2310{
8b04d4c4
VZ
2311private:
2312 void Init()
2313 {
2314 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2315 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2316 }
2317
6001e347 2318public:
f1339c56
RR
2319 // temporarily just use wxEncodingConverter stuff,
2320 // so that it works while a better implementation is built
e95354ec 2321 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2322 {
2323 if (name)
e95354ec 2324 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2325 else
2326 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2327
8b04d4c4
VZ
2328 Init();
2329 }
2330
e95354ec 2331 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2332 {
2333 m_enc = enc;
2334
2335 Init();
f1339c56 2336 }
dccce9ea 2337
bde4baac 2338 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2339 {
2340 size_t inbuf = strlen(psz);
dccce9ea 2341 if (buf)
4def3b35 2342 m2w.Convert(psz,buf);
f1339c56
RR
2343 return inbuf;
2344 }
dccce9ea 2345
bde4baac 2346 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2347 {
f8d791e0 2348 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
2349 if (buf)
2350 w2m.Convert(psz,buf);
dccce9ea 2351
f1339c56
RR
2352 return inbuf;
2353 }
dccce9ea 2354
e95354ec 2355 bool IsOk() const { return m_ok; }
f1339c56
RR
2356
2357public:
8b04d4c4 2358 wxFontEncoding m_enc;
f1339c56 2359 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2360
2361 // were we initialized successfully?
2362 bool m_ok;
fc7a2a60 2363
e95354ec 2364 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2365};
6001e347 2366
1e6feb95
VZ
2367#endif // wxUSE_FONTMAP
2368
36acb880
VZ
2369// ============================================================================
2370// wxCSConv implementation
2371// ============================================================================
2372
8b04d4c4 2373void wxCSConv::Init()
6001e347 2374{
e95354ec
VZ
2375 m_name = NULL;
2376 m_convReal = NULL;
2377 m_deferred = true;
2378}
2379
8b04d4c4
VZ
2380wxCSConv::wxCSConv(const wxChar *charset)
2381{
2382 Init();
82713003 2383
e95354ec
VZ
2384 if ( charset )
2385 {
e95354ec
VZ
2386 SetName(charset);
2387 }
bda3d86a
VZ
2388
2389 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2390}
2391
8b04d4c4
VZ
2392wxCSConv::wxCSConv(wxFontEncoding encoding)
2393{
bda3d86a 2394 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2395 {
2396 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2397
2398 encoding = wxFONTENCODING_SYSTEM;
2399 }
2400
8b04d4c4
VZ
2401 Init();
2402
bda3d86a 2403 m_encoding = encoding;
8b04d4c4
VZ
2404}
2405
6001e347
RR
2406wxCSConv::~wxCSConv()
2407{
65e50848
JS
2408 Clear();
2409}
2410
54380f29 2411wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2412 : wxMBConv()
54380f29 2413{
8b04d4c4
VZ
2414 Init();
2415
54380f29 2416 SetName(conv.m_name);
8b04d4c4 2417 m_encoding = conv.m_encoding;
54380f29
GD
2418}
2419
2420wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2421{
2422 Clear();
8b04d4c4 2423
54380f29 2424 SetName(conv.m_name);
8b04d4c4
VZ
2425 m_encoding = conv.m_encoding;
2426
54380f29
GD
2427 return *this;
2428}
2429
65e50848
JS
2430void wxCSConv::Clear()
2431{
8b04d4c4 2432 free(m_name);
e95354ec 2433 delete m_convReal;
8b04d4c4 2434
65e50848 2435 m_name = NULL;
e95354ec 2436 m_convReal = NULL;
6001e347
RR
2437}
2438
2439void wxCSConv::SetName(const wxChar *charset)
2440{
f1339c56
RR
2441 if (charset)
2442 {
2443 m_name = wxStrdup(charset);
e95354ec 2444 m_deferred = true;
f1339c56 2445 }
6001e347
RR
2446}
2447
e95354ec
VZ
2448wxMBConv *wxCSConv::DoCreate() const
2449{
c547282d
VZ
2450 // check for the special case of ASCII or ISO8859-1 charset: as we have
2451 // special knowledge of it anyhow, we don't need to create a special
2452 // conversion object
2453 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2454 {
e95354ec
VZ
2455 // don't convert at all
2456 return NULL;
2457 }
dccce9ea 2458
e95354ec
VZ
2459 // we trust OS to do conversion better than we can so try external
2460 // conversion methods first
2461 //
2462 // the full order is:
2463 // 1. OS conversion (iconv() under Unix or Win32 API)
2464 // 2. hard coded conversions for UTF
2465 // 3. wxEncodingConverter as fall back
2466
2467 // step (1)
2468#ifdef HAVE_ICONV
c547282d 2469#if !wxUSE_FONTMAP
e95354ec 2470 if ( m_name )
c547282d 2471#endif // !wxUSE_FONTMAP
e95354ec 2472 {
c547282d
VZ
2473 wxString name(m_name);
2474
2475#if wxUSE_FONTMAP
2476 if ( name.empty() )
2477 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2478#endif // wxUSE_FONTMAP
2479
2480 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2481 if ( conv->IsOk() )
2482 return conv;
2483
2484 delete conv;
2485 }
2486#endif // HAVE_ICONV
2487
2488#ifdef wxHAVE_WIN32_MB2WC
2489 {
7608a683 2490#if wxUSE_FONTMAP
e95354ec
VZ
2491 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2492 : new wxMBConv_win32(m_encoding);
2493 if ( conv->IsOk() )
2494 return conv;
2495
2496 delete conv;
7608a683
WS
2497#else
2498 return NULL;
2499#endif
e95354ec
VZ
2500 }
2501#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2502#if defined(__WXMAC__)
2503 {
2504 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2505 {
2506
2507 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2508 : new wxMBConv_mac(m_encoding);
2509 if ( conv->IsOk() )
f7e98dee
RN
2510 return conv;
2511
2512 delete conv;
2513 }
2514 }
2515#endif
2516#if defined(__WXCOCOA__)
2517 {
2518 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2519 {
2520
2521 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2522 : new wxMBConv_cocoa(m_encoding);
2523 if ( conv->IsOk() )
d775fa82
WS
2524 return conv;
2525
2526 delete conv;
2527 }
335d31e0
SC
2528 }
2529#endif
e95354ec
VZ
2530 // step (2)
2531 wxFontEncoding enc = m_encoding;
2532#if wxUSE_FONTMAP
c547282d
VZ
2533 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2534 {
2535 // use "false" to suppress interactive dialogs -- we can be called from
2536 // anywhere and popping up a dialog from here is the last thing we want to
2537 // do
2538 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2539 }
e95354ec
VZ
2540#endif // wxUSE_FONTMAP
2541
2542 switch ( enc )
2543 {
2544 case wxFONTENCODING_UTF7:
2545 return new wxMBConvUTF7;
2546
2547 case wxFONTENCODING_UTF8:
2548 return new wxMBConvUTF8;
2549
e95354ec
VZ
2550 case wxFONTENCODING_UTF16BE:
2551 return new wxMBConvUTF16BE;
2552
2553 case wxFONTENCODING_UTF16LE:
2554 return new wxMBConvUTF16LE;
2555
e95354ec
VZ
2556 case wxFONTENCODING_UTF32BE:
2557 return new wxMBConvUTF32BE;
2558
2559 case wxFONTENCODING_UTF32LE:
2560 return new wxMBConvUTF32LE;
2561
2562 default:
2563 // nothing to do but put here to suppress gcc warnings
2564 ;
2565 }
2566
2567 // step (3)
2568#if wxUSE_FONTMAP
2569 {
2570 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2571 : new wxMBConv_wxwin(m_encoding);
2572 if ( conv->IsOk() )
2573 return conv;
2574
2575 delete conv;
2576 }
2577#endif // wxUSE_FONTMAP
2578
a58d4f4d
VS
2579 // NB: This is a hack to prevent deadlock. What could otherwise happen
2580 // in Unicode build: wxConvLocal creation ends up being here
2581 // because of some failure and logs the error. But wxLog will try to
2582 // attach timestamp, for which it will need wxConvLocal (to convert
2583 // time to char* and then wchar_t*), but that fails, tries to log
2584 // error, but wxLog has a (already locked) critical section that
2585 // guards static buffer.
2586 static bool alreadyLoggingError = false;
2587 if (!alreadyLoggingError)
2588 {
2589 alreadyLoggingError = true;
2590 wxLogError(_("Cannot convert from the charset '%s'!"),
2591 m_name ? m_name
e95354ec
VZ
2592 :
2593#if wxUSE_FONTMAP
2594 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2595#else // !wxUSE_FONTMAP
2596 wxString::Format(_("encoding %s"), m_encoding).c_str()
2597#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2598 );
a58d4f4d
VS
2599 alreadyLoggingError = false;
2600 }
e95354ec
VZ
2601
2602 return NULL;
2603}
2604
2605void wxCSConv::CreateConvIfNeeded() const
2606{
2607 if ( m_deferred )
2608 {
2609 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2610
2611#if wxUSE_INTL
2612 // if we don't have neither the name nor the encoding, use the default
2613 // encoding for this system
2614 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2615 {
4d312c22 2616 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2617 }
2618#endif // wxUSE_INTL
2619
e95354ec
VZ
2620 self->m_convReal = DoCreate();
2621 self->m_deferred = false;
6001e347 2622 }
6001e347
RR
2623}
2624
2625size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2626{
e95354ec 2627 CreateConvIfNeeded();
dccce9ea 2628
e95354ec
VZ
2629 if (m_convReal)
2630 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2631
2632 // latin-1 (direct)
4def3b35 2633 size_t len = strlen(psz);
dccce9ea 2634
f1339c56
RR
2635 if (buf)
2636 {
4def3b35 2637 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2638 buf[c] = (unsigned char)(psz[c]);
2639 }
dccce9ea 2640
f1339c56 2641 return len;
6001e347
RR
2642}
2643
2644size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2645{
e95354ec 2646 CreateConvIfNeeded();
dccce9ea 2647
e95354ec
VZ
2648 if (m_convReal)
2649 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2650
f1339c56 2651 // latin-1 (direct)
f8d791e0 2652 const size_t len = wxWcslen(psz);
f1339c56
RR
2653 if (buf)
2654 {
4def3b35 2655 for (size_t c = 0; c <= len; c++)
24642831
VS
2656 {
2657 if (psz[c] > 0xFF)
2658 return (size_t)-1;
907173e5 2659 buf[c] = (char)psz[c];
24642831
VS
2660 }
2661 }
2662 else
2663 {
2664 for (size_t c = 0; c <= len; c++)
2665 {
2666 if (psz[c] > 0xFF)
2667 return (size_t)-1;
2668 }
f1339c56 2669 }
dccce9ea 2670
f1339c56 2671 return len;
6001e347
RR
2672}
2673
bde4baac
VZ
2674// ----------------------------------------------------------------------------
2675// globals
2676// ----------------------------------------------------------------------------
2677
2678#ifdef __WINDOWS__
2679 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2680#elif defined(__WXMAC__) && !defined(__MACH__)
2681 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2682#else
dcc8fac0 2683 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2684#endif
2685
2686static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2687static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2688static wxMBConvUTF7 wxConvUTF7Obj;
2689static wxMBConvUTF8 wxConvUTF8Obj;
2690
2691
2692WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2693WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2694WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2695WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2696WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2697WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2698
2699#else // !wxUSE_WCHAR_T
2700
2701// stand-ins in absence of wchar_t
2702WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2703 wxConvISO8859_1,
2704 wxConvLocal,
2705 wxConvUTF8;
2706
2707#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2708
2709