]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
re-renamed DoCreate() to XmDoCreateTLW() to avoid virtual function hiding in other...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
6001e347
RR
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
373658eb
VZ
30#ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33#endif // WX_PRECOMP
34
bde4baac
VZ
35#include "wx/strconv.h"
36
37#if wxUSE_WCHAR_T
38
7608a683 39#ifdef __WINDOWS__
532d575b 40 #include "wx/msw/private.h"
13dd924a 41 #include "wx/msw/missing.h"
0a1c1e62
GRG
42#endif
43
1c193821 44#ifndef __WXWINCE__
1cd52418 45#include <errno.h>
1c193821
JS
46#endif
47
6001e347
RR
48#include <ctype.h>
49#include <string.h>
50#include <stdlib.h>
51
e95354ec
VZ
52#if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54#endif // __WIN32__ but !__WXMICROWIN__
55
6001e347 56#ifdef __SALFORDC__
373658eb 57 #include <clib.h>
6001e347
RR
58#endif
59
b040e242 60#ifdef HAVE_ICONV
373658eb 61 #include <iconv.h>
b1d547eb 62 #include "wx/thread.h"
1cd52418 63#endif
1cd52418 64
373658eb
VZ
65#include "wx/encconv.h"
66#include "wx/fontmap.h"
7608a683 67#include "wx/utils.h"
373658eb 68
335d31e0 69#ifdef __WXMAC__
40ba2f3b 70#ifndef __DARWIN__
4227afa4
SC
71#include <ATSUnicode.h>
72#include <TextCommon.h>
73#include <TextEncodingConverter.h>
40ba2f3b 74#endif
335d31e0
SC
75
76#include "wx/mac/private.h" // includes mac headers
77#endif
ce6f8d6f
VZ
78
79#define TRACE_STRCONV _T("strconv")
80
373658eb
VZ
81// ============================================================================
82// implementation
83// ============================================================================
84
85// ----------------------------------------------------------------------------
c91830cb 86// UTF-16 en/decoding to/from UCS-4
373658eb 87// ----------------------------------------------------------------------------
6001e347 88
b0a6bb75 89
c91830cb 90static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 91{
dccce9ea 92 if (input<=0xffff)
4def3b35 93 {
999836aa
VZ
94 if (output)
95 *output = (wxUint16) input;
4def3b35 96 return 1;
dccce9ea
VZ
97 }
98 else if (input>=0x110000)
4def3b35
VS
99 {
100 return (size_t)-1;
dccce9ea
VZ
101 }
102 else
4def3b35 103 {
dccce9ea 104 if (output)
4def3b35 105 {
c91830cb 106 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 107 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
108 }
109 return 2;
1cd52418 110 }
1cd52418
OK
111}
112
c91830cb 113static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 114{
dccce9ea 115 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
116 {
117 output = *input;
118 return 1;
dccce9ea 119 }
cdb14ecb 120 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
121 {
122 output = *input;
123 return (size_t)-1;
dccce9ea
VZ
124 }
125 else
4def3b35
VS
126 {
127 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
128 return 2;
129 }
1cd52418
OK
130}
131
b0a6bb75 132
f6bcfd97 133// ----------------------------------------------------------------------------
6001e347 134// wxMBConv
f6bcfd97 135// ----------------------------------------------------------------------------
2c53a80a
WS
136
137wxMBConv::~wxMBConv()
138{
139 // nothing to do here (necessary for Darwin linking probably)
140}
6001e347 141
6001e347
RR
142const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
143{
2b5f62a0 144 if ( psz )
6001e347 145 {
2b5f62a0
VZ
146 // calculate the length of the buffer needed first
147 size_t nLen = MB2WC(NULL, psz, 0);
148 if ( nLen != (size_t)-1 )
149 {
150 // now do the actual conversion
151 wxWCharBuffer buf(nLen);
635f33ce
VS
152 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
153 if ( nLen != (size_t)-1 )
154 {
155 return buf;
156 }
2b5f62a0 157 }
f6bcfd97 158 }
2b5f62a0
VZ
159
160 wxWCharBuffer buf((wchar_t *)NULL);
161
162 return buf;
6001e347
RR
163}
164
e5cceba0 165const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 166{
2b5f62a0
VZ
167 if ( pwz )
168 {
169 size_t nLen = WC2MB(NULL, pwz, 0);
170 if ( nLen != (size_t)-1 )
171 {
c91830cb 172 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
173 nLen = WC2MB(buf.data(), pwz, nLen + 4);
174 if ( nLen != (size_t)-1 )
175 {
176 return buf;
177 }
2b5f62a0
VZ
178 }
179 }
180
181 wxCharBuffer buf((char *)NULL);
e5cceba0 182
e5cceba0 183 return buf;
6001e347
RR
184}
185
f5fb6871 186const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 187{
f5fb6871
RN
188 wxASSERT(pOutSize != NULL);
189
e4e3bbb4
RN
190 const char* szEnd = szString + nStringLen + 1;
191 const char* szPos = szString;
192 const char* szStart = szPos;
193
194 size_t nActualLength = 0;
f5fb6871
RN
195 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
196
197 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
198
199 //Convert the string until the length() is reached, continuing the
200 //loop every time a null character is reached
201 while(szPos != szEnd)
202 {
203 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
204
205 //Get the length of the current (sub)string
206 size_t nLen = MB2WC(NULL, szPos, 0);
207
208 //Invalid conversion?
209 if( nLen == (size_t)-1 )
f5fb6871
RN
210 {
211 *pOutSize = 0;
212 theBuffer.data()[0u] = wxT('\0');
213 return theBuffer;
214 }
215
e4e3bbb4
RN
216
217 //Increase the actual length (+1 for current null character)
218 nActualLength += nLen + 1;
219
f5fb6871
RN
220 //if buffer too big, realloc the buffer
221 if (nActualLength > (nCurrentSize+1))
222 {
223 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
224 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
225 theBuffer = theNewBuffer;
226 nCurrentSize <<= 1;
227 }
228
229 //Convert the current (sub)string
230 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 231 {
f5fb6871
RN
232 *pOutSize = 0;
233 theBuffer.data()[0u] = wxT('\0');
234 return theBuffer;
e4e3bbb4
RN
235 }
236
237 //Increment to next (sub)string
3103e8a9
JS
238 //Note that we have to use strlen instead of nLen here
239 //because XX2XX gives us the size of the output buffer,
240 //which is not necessarily the length of the string
e4e3bbb4
RN
241 szPos += strlen(szPos) + 1;
242 }
243
f5fb6871
RN
244 //success - return actual length and the buffer
245 *pOutSize = nActualLength;
3698ae71 246 return theBuffer;
e4e3bbb4
RN
247}
248
f5fb6871 249const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 250{
f5fb6871
RN
251 wxASSERT(pOutSize != NULL);
252
e4e3bbb4
RN
253 const wchar_t* szEnd = szString + nStringLen + 1;
254 const wchar_t* szPos = szString;
255 const wchar_t* szStart = szPos;
256
257 size_t nActualLength = 0;
f5fb6871
RN
258 size_t nCurrentSize = nStringLen << 2; //try * 4 first
259
260 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
261
262 //Convert the string until the length() is reached, continuing the
263 //loop every time a null character is reached
264 while(szPos != szEnd)
265 {
266 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
267
268 //Get the length of the current (sub)string
269 size_t nLen = WC2MB(NULL, szPos, 0);
270
271 //Invalid conversion?
272 if( nLen == (size_t)-1 )
f5fb6871
RN
273 {
274 *pOutSize = 0;
275 theBuffer.data()[0u] = wxT('\0');
276 return theBuffer;
277 }
e4e3bbb4
RN
278
279 //Increase the actual length (+1 for current null character)
280 nActualLength += nLen + 1;
3698ae71 281
f5fb6871
RN
282 //if buffer too big, realloc the buffer
283 if (nActualLength > (nCurrentSize+1))
284 {
285 wxCharBuffer theNewBuffer(nCurrentSize << 1);
286 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
287 theBuffer = theNewBuffer;
288 nCurrentSize <<= 1;
289 }
290
291 //Convert the current (sub)string
292 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 293 {
f5fb6871
RN
294 *pOutSize = 0;
295 theBuffer.data()[0u] = wxT('\0');
296 return theBuffer;
e4e3bbb4
RN
297 }
298
299 //Increment to next (sub)string
3103e8a9
JS
300 //Note that we have to use wxWcslen instead of nLen here
301 //because XX2XX gives us the size of the output buffer,
302 //which is not necessarily the length of the string
e4e3bbb4
RN
303 szPos += wxWcslen(szPos) + 1;
304 }
305
f5fb6871
RN
306 //success - return actual length and the buffer
307 *pOutSize = nActualLength;
3698ae71 308 return theBuffer;
e4e3bbb4
RN
309}
310
6001e347 311// ----------------------------------------------------------------------------
bde4baac 312// wxMBConvLibc
6001e347
RR
313// ----------------------------------------------------------------------------
314
bde4baac
VZ
315size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
316{
317 return wxMB2WC(buf, psz, n);
318}
319
320size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
321{
322 return wxWC2MB(buf, psz, n);
323}
e1bfe89e 324
66bf0099 325#ifdef __UNIX__
c12b7f79 326
e1bfe89e 327// ----------------------------------------------------------------------------
532d575b 328// wxConvBrokenFileNames
e1bfe89e
RR
329// ----------------------------------------------------------------------------
330
845905d5 331wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 332{
845905d5
MW
333 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
334 || wxStricmp(charset, _T("UTF8")) == 0 )
335 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
336 else
337 m_conv = new wxCSConv(charset);
ea8ce907
RR
338}
339
c12b7f79
VZ
340size_t
341wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
342 const char *psz,
343 size_t outputSize) const
e1bfe89e 344{
c12b7f79 345 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
346}
347
c12b7f79
VZ
348size_t
349wxConvBrokenFileNames::WC2MB(char *outputBuf,
350 const wchar_t *psz,
351 size_t outputSize) const
e1bfe89e 352{
c12b7f79 353 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
354}
355
66bf0099 356#endif
c12b7f79 357
bde4baac 358// ----------------------------------------------------------------------------
3698ae71 359// UTF-7
bde4baac 360// ----------------------------------------------------------------------------
6001e347 361
15f2ee32 362// Implementation (C) 2004 Fredrik Roubert
6001e347 363
15f2ee32
RN
364//
365// BASE64 decoding table
366//
367static const unsigned char utf7unb64[] =
6001e347 368{
15f2ee32
RN
369 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
375 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
376 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
378 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
379 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
380 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
382 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
383 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
384 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
401};
402
403size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
404{
15f2ee32
RN
405 size_t len = 0;
406
407 while (*psz && ((!buf) || (len < n)))
408 {
409 unsigned char cc = *psz++;
410 if (cc != '+')
411 {
412 // plain ASCII char
413 if (buf)
414 *buf++ = cc;
415 len++;
416 }
417 else if (*psz == '-')
418 {
419 // encoded plus sign
420 if (buf)
421 *buf++ = cc;
422 len++;
423 psz++;
424 }
425 else
426 {
427 // BASE64 encoded string
428 bool lsb;
429 unsigned char c;
430 unsigned int d, l;
431 for (lsb = false, d = 0, l = 0;
432 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
433 {
434 d <<= 6;
435 d += cc;
436 for (l += 6; l >= 8; lsb = !lsb)
437 {
6356d52a 438 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
439 if (lsb)
440 {
441 if (buf)
442 *buf++ |= c;
443 len ++;
444 }
445 else
446 if (buf)
6356d52a 447 *buf = (wchar_t)(c << 8);
15f2ee32
RN
448 }
449 }
450 if (*psz == '-')
451 psz++;
452 }
453 }
454 if (buf && (len < n))
455 *buf = 0;
456 return len;
6001e347
RR
457}
458
15f2ee32
RN
459//
460// BASE64 encoding table
461//
462static const unsigned char utf7enb64[] =
463{
464 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
465 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
466 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
467 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
468 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
469 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
470 'w', 'x', 'y', 'z', '0', '1', '2', '3',
471 '4', '5', '6', '7', '8', '9', '+', '/'
472};
473
474//
475// UTF-7 encoding table
476//
477// 0 - Set D (directly encoded characters)
478// 1 - Set O (optional direct characters)
479// 2 - whitespace characters (optional)
480// 3 - special characters
481//
482static const unsigned char utf7encode[128] =
6001e347 483{
15f2ee32
RN
484 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
486 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
488 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
490 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
492};
493
667e5b3e 494size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
495{
496
497
498 size_t len = 0;
499
500 while (*psz && ((!buf) || (len < n)))
501 {
502 wchar_t cc = *psz++;
503 if (cc < 0x80 && utf7encode[cc] < 1)
504 {
505 // plain ASCII char
506 if (buf)
507 *buf++ = (char)cc;
508 len++;
509 }
510#ifndef WC_UTF16
79c78d42 511 else if (((wxUint32)cc) > 0xffff)
b2c13097 512 {
15f2ee32
RN
513 // no surrogate pair generation (yet?)
514 return (size_t)-1;
515 }
516#endif
517 else
518 {
519 if (buf)
520 *buf++ = '+';
521 len++;
522 if (cc != '+')
523 {
524 // BASE64 encode string
525 unsigned int lsb, d, l;
526 for (d = 0, l = 0;; psz++)
527 {
528 for (lsb = 0; lsb < 2; lsb ++)
529 {
530 d <<= 8;
531 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
532
533 for (l += 8; l >= 6; )
534 {
535 l -= 6;
536 if (buf)
537 *buf++ = utf7enb64[(d >> l) % 64];
538 len++;
539 }
540 }
541 cc = *psz;
542 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
543 break;
544 }
545 if (l != 0)
546 {
547 if (buf)
548 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
549 len++;
550 }
551 }
552 if (buf)
553 *buf++ = '-';
554 len++;
555 }
556 }
557 if (buf && (len < n))
558 *buf = 0;
559 return len;
6001e347
RR
560}
561
f6bcfd97 562// ----------------------------------------------------------------------------
6001e347 563// UTF-8
f6bcfd97 564// ----------------------------------------------------------------------------
6001e347 565
dccce9ea 566static wxUint32 utf8_max[]=
4def3b35 567 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 568
3698ae71
VZ
569// boundaries of the private use area we use to (temporarily) remap invalid
570// characters invalid in a UTF-8 encoded string
ea8ce907
RR
571const wxUint32 wxUnicodePUA = 0x100000;
572const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
573
6001e347
RR
574size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
575{
4def3b35
VS
576 size_t len = 0;
577
dccce9ea 578 while (*psz && ((!buf) || (len < n)))
4def3b35 579 {
ea8ce907
RR
580 const char *opsz = psz;
581 bool invalid = false;
4def3b35
VS
582 unsigned char cc = *psz++, fc = cc;
583 unsigned cnt;
dccce9ea 584 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 585 fc <<= 1;
dccce9ea 586 if (!cnt)
4def3b35
VS
587 {
588 // plain ASCII char
dccce9ea 589 if (buf)
4def3b35
VS
590 *buf++ = cc;
591 len++;
561488ef
MW
592
593 // escape the escape character for octal escapes
594 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
595 && cc == '\\' && (!buf || len < n))
596 {
597 if (buf)
598 *buf++ = cc;
599 len++;
600 }
dccce9ea
VZ
601 }
602 else
4def3b35
VS
603 {
604 cnt--;
dccce9ea 605 if (!cnt)
4def3b35
VS
606 {
607 // invalid UTF-8 sequence
ea8ce907 608 invalid = true;
dccce9ea
VZ
609 }
610 else
4def3b35
VS
611 {
612 unsigned ocnt = cnt - 1;
613 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 614 while (cnt--)
4def3b35 615 {
ea8ce907 616 cc = *psz;
dccce9ea 617 if ((cc & 0xC0) != 0x80)
4def3b35
VS
618 {
619 // invalid UTF-8 sequence
ea8ce907
RR
620 invalid = true;
621 break;
4def3b35 622 }
ea8ce907 623 psz++;
4def3b35
VS
624 res = (res << 6) | (cc & 0x3f);
625 }
ea8ce907 626 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
627 {
628 // illegal UTF-8 encoding
ea8ce907 629 invalid = true;
4def3b35 630 }
ea8ce907
RR
631 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
632 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
633 {
634 // if one of our PUA characters turns up externally
635 // it must also be treated as an illegal sequence
636 // (a bit like you have to escape an escape character)
637 invalid = true;
638 }
639 else
640 {
1cd52418 641#ifdef WC_UTF16
ea8ce907
RR
642 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
643 size_t pa = encode_utf16(res, (wxUint16 *)buf);
644 if (pa == (size_t)-1)
645 {
646 invalid = true;
647 }
648 else
649 {
650 if (buf)
651 buf += pa;
652 len += pa;
653 }
373658eb 654#else // !WC_UTF16
ea8ce907
RR
655 if (buf)
656 *buf++ = res;
657 len++;
373658eb 658#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
659 }
660 }
661 if (invalid)
662 {
663 if (m_options & MAP_INVALID_UTF8_TO_PUA)
664 {
665 while (opsz < psz && (!buf || len < n))
666 {
667#ifdef WC_UTF16
668 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
669 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
670 wxASSERT(pa != (size_t)-1);
671 if (buf)
672 buf += pa;
673 opsz++;
674 len += pa;
675#else
676 if (buf)
677 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
678 opsz++;
679 len++;
680#endif
681 }
682 }
3698ae71 683 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
684 {
685 while (opsz < psz && (!buf || len < n))
686 {
3698ae71
VZ
687 if ( buf && len + 3 < n )
688 {
17a1ebd1 689 unsigned char on = *opsz;
3698ae71 690 *buf++ = L'\\';
17a1ebd1
VZ
691 *buf++ = (wchar_t)( L'0' + on / 0100 );
692 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
693 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 694 }
ea8ce907
RR
695 opsz++;
696 len += 4;
697 }
698 }
3698ae71 699 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
700 {
701 return (size_t)-1;
702 }
4def3b35
VS
703 }
704 }
6001e347 705 }
dccce9ea 706 if (buf && (len < n))
4def3b35
VS
707 *buf = 0;
708 return len;
6001e347
RR
709}
710
3698ae71
VZ
711static inline bool isoctal(wchar_t wch)
712{
713 return L'0' <= wch && wch <= L'7';
714}
715
6001e347
RR
716size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
717{
4def3b35 718 size_t len = 0;
6001e347 719
dccce9ea 720 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
721 {
722 wxUint32 cc;
1cd52418 723#ifdef WC_UTF16
b5153fd8
VZ
724 // cast is ok for WC_UTF16
725 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 726 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 727#else
4def3b35
VS
728 cc=(*psz++) & 0x7fffffff;
729#endif
3698ae71
VZ
730
731 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
732 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 733 {
dccce9ea 734 if (buf)
ea8ce907 735 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 736 len++;
3698ae71 737 }
561488ef
MW
738 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
739 && cc == L'\\' && psz[0] == L'\\' )
740 {
741 if (buf)
742 *buf++ = (char)cc;
743 psz++;
744 len++;
745 }
3698ae71
VZ
746 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
747 cc == L'\\' &&
748 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 749 {
dccce9ea 750 if (buf)
3698ae71 751 {
b2c13097
WS
752 *buf++ = (char) ((psz[0] - L'0')*0100 +
753 (psz[1] - L'0')*010 +
754 (psz[2] - L'0'));
3698ae71
VZ
755 }
756
757 psz += 3;
ea8ce907
RR
758 len++;
759 }
760 else
761 {
762 unsigned cnt;
763 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
764 if (!cnt)
4def3b35 765 {
ea8ce907
RR
766 // plain ASCII char
767 if (buf)
768 *buf++ = (char) cc;
769 len++;
770 }
771
772 else
773 {
774 len += cnt + 1;
775 if (buf)
776 {
777 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
778 while (cnt--)
779 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
780 }
4def3b35
VS
781 }
782 }
6001e347 783 }
4def3b35 784
3698ae71
VZ
785 if (buf && (len<n))
786 *buf = 0;
adb45366 787
4def3b35 788 return len;
6001e347
RR
789}
790
c91830cb
VZ
791// ----------------------------------------------------------------------------
792// UTF-16
793// ----------------------------------------------------------------------------
794
795#ifdef WORDS_BIGENDIAN
bde4baac
VZ
796 #define wxMBConvUTF16straight wxMBConvUTF16BE
797 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 798#else
bde4baac
VZ
799 #define wxMBConvUTF16swap wxMBConvUTF16BE
800 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
801#endif
802
803
c91830cb
VZ
804#ifdef WC_UTF16
805
c91830cb
VZ
806// copy 16bit MB to 16bit String
807size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
808{
809 size_t len=0;
810
811 while (*(wxUint16*)psz && (!buf || len < n))
812 {
813 if (buf)
814 *buf++ = *(wxUint16*)psz;
815 len++;
816
817 psz += sizeof(wxUint16);
818 }
819 if (buf && len<n) *buf=0;
820
821 return len;
822}
823
824
825// copy 16bit String to 16bit MB
826size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
827{
828 size_t len=0;
829
830 while (*psz && (!buf || len < n))
831 {
832 if (buf)
833 {
834 *(wxUint16*)buf = *psz;
835 buf += sizeof(wxUint16);
836 }
837 len += sizeof(wxUint16);
838 psz++;
839 }
840 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
841
842 return len;
843}
844
845
846// swap 16bit MB to 16bit String
847size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
848{
849 size_t len=0;
850
851 while (*(wxUint16*)psz && (!buf || len < n))
852 {
853 if (buf)
854 {
855 ((char *)buf)[0] = psz[1];
856 ((char *)buf)[1] = psz[0];
857 buf++;
858 }
859 len++;
860 psz += sizeof(wxUint16);
861 }
862 if (buf && len<n) *buf=0;
863
864 return len;
865}
866
867
868// swap 16bit MB to 16bit String
869size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
870{
871 size_t len=0;
872
873 while (*psz && (!buf || len < n))
874 {
875 if (buf)
876 {
877 *buf++ = ((char*)psz)[1];
878 *buf++ = ((char*)psz)[0];
879 }
880 len += sizeof(wxUint16);
881 psz++;
882 }
883 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
884
885 return len;
886}
887
888
889#else // WC_UTF16
890
891
892// copy 16bit MB to 32bit String
893size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
894{
895 size_t len=0;
896
897 while (*(wxUint16*)psz && (!buf || len < n))
898 {
899 wxUint32 cc;
900 size_t pa=decode_utf16((wxUint16*)psz, cc);
901 if (pa == (size_t)-1)
902 return pa;
903
904 if (buf)
905 *buf++ = cc;
906 len++;
907 psz += pa * sizeof(wxUint16);
908 }
909 if (buf && len<n) *buf=0;
910
911 return len;
912}
913
914
915// copy 32bit String to 16bit MB
916size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
917{
918 size_t len=0;
919
920 while (*psz && (!buf || len < n))
921 {
922 wxUint16 cc[2];
923 size_t pa=encode_utf16(*psz, cc);
924
925 if (pa == (size_t)-1)
926 return pa;
927
928 if (buf)
929 {
69b80d28 930 *(wxUint16*)buf = cc[0];
b5153fd8 931 buf += sizeof(wxUint16);
c91830cb 932 if (pa > 1)
69b80d28
VZ
933 {
934 *(wxUint16*)buf = cc[1];
935 buf += sizeof(wxUint16);
936 }
c91830cb
VZ
937 }
938
939 len += pa*sizeof(wxUint16);
940 psz++;
941 }
942 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
943
944 return len;
945}
946
947
948// swap 16bit MB to 32bit String
949size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
950{
951 size_t len=0;
952
953 while (*(wxUint16*)psz && (!buf || len < n))
954 {
955 wxUint32 cc;
956 char tmp[4];
957 tmp[0]=psz[1]; tmp[1]=psz[0];
958 tmp[2]=psz[3]; tmp[3]=psz[2];
959
960 size_t pa=decode_utf16((wxUint16*)tmp, cc);
961 if (pa == (size_t)-1)
962 return pa;
963
964 if (buf)
965 *buf++ = cc;
966
967 len++;
968 psz += pa * sizeof(wxUint16);
969 }
970 if (buf && len<n) *buf=0;
971
972 return len;
973}
974
975
976// swap 32bit String to 16bit MB
977size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
978{
979 size_t len=0;
980
981 while (*psz && (!buf || len < n))
982 {
983 wxUint16 cc[2];
984 size_t pa=encode_utf16(*psz, cc);
985
986 if (pa == (size_t)-1)
987 return pa;
988
989 if (buf)
990 {
991 *buf++ = ((char*)cc)[1];
992 *buf++ = ((char*)cc)[0];
993 if (pa > 1)
994 {
995 *buf++ = ((char*)cc)[3];
996 *buf++ = ((char*)cc)[2];
997 }
998 }
999
1000 len += pa*sizeof(wxUint16);
1001 psz++;
1002 }
1003 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1004
1005 return len;
1006}
1007
1008#endif // WC_UTF16
1009
1010
1011// ----------------------------------------------------------------------------
1012// UTF-32
1013// ----------------------------------------------------------------------------
1014
1015#ifdef WORDS_BIGENDIAN
1016#define wxMBConvUTF32straight wxMBConvUTF32BE
1017#define wxMBConvUTF32swap wxMBConvUTF32LE
1018#else
1019#define wxMBConvUTF32swap wxMBConvUTF32BE
1020#define wxMBConvUTF32straight wxMBConvUTF32LE
1021#endif
1022
1023
1024WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1025WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1026
1027
1028#ifdef WC_UTF16
1029
1030// copy 32bit MB to 16bit String
1031size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1032{
1033 size_t len=0;
1034
1035 while (*(wxUint32*)psz && (!buf || len < n))
1036 {
1037 wxUint16 cc[2];
1038
1039 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1040 if (pa == (size_t)-1)
1041 return pa;
1042
1043 if (buf)
1044 {
1045 *buf++ = cc[0];
1046 if (pa > 1)
1047 *buf++ = cc[1];
1048 }
1049 len += pa;
1050 psz += sizeof(wxUint32);
1051 }
1052 if (buf && len<n) *buf=0;
1053
1054 return len;
1055}
1056
1057
1058// copy 16bit String to 32bit MB
1059size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1060{
1061 size_t len=0;
1062
1063 while (*psz && (!buf || len < n))
1064 {
1065 wxUint32 cc;
1066
b5153fd8
VZ
1067 // cast is ok for WC_UTF16
1068 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1069 if (pa == (size_t)-1)
1070 return pa;
1071
1072 if (buf)
1073 {
1074 *(wxUint32*)buf = cc;
1075 buf += sizeof(wxUint32);
1076 }
1077 len += sizeof(wxUint32);
1078 psz += pa;
1079 }
b5153fd8
VZ
1080
1081 if (buf && len<=n-sizeof(wxUint32))
1082 *(wxUint32*)buf=0;
c91830cb
VZ
1083
1084 return len;
1085}
1086
1087
1088
1089// swap 32bit MB to 16bit String
1090size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091{
1092 size_t len=0;
1093
1094 while (*(wxUint32*)psz && (!buf || len < n))
1095 {
1096 char tmp[4];
1097 tmp[0] = psz[3]; tmp[1] = psz[2];
1098 tmp[2] = psz[1]; tmp[3] = psz[0];
1099
1100
1101 wxUint16 cc[2];
1102
1103 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1104 if (pa == (size_t)-1)
1105 return pa;
1106
1107 if (buf)
1108 {
1109 *buf++ = cc[0];
1110 if (pa > 1)
1111 *buf++ = cc[1];
1112 }
1113 len += pa;
1114 psz += sizeof(wxUint32);
1115 }
b5153fd8
VZ
1116
1117 if (buf && len<n)
1118 *buf=0;
c91830cb
VZ
1119
1120 return len;
1121}
1122
1123
1124// swap 16bit String to 32bit MB
1125size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126{
1127 size_t len=0;
1128
1129 while (*psz && (!buf || len < n))
1130 {
1131 char cc[4];
1132
b5153fd8
VZ
1133 // cast is ok for WC_UTF16
1134 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1135 if (pa == (size_t)-1)
1136 return pa;
1137
1138 if (buf)
1139 {
1140 *buf++ = cc[3];
1141 *buf++ = cc[2];
1142 *buf++ = cc[1];
1143 *buf++ = cc[0];
1144 }
1145 len += sizeof(wxUint32);
1146 psz += pa;
1147 }
b5153fd8
VZ
1148
1149 if (buf && len<=n-sizeof(wxUint32))
1150 *(wxUint32*)buf=0;
c91830cb
VZ
1151
1152 return len;
1153}
1154
1155#else // WC_UTF16
1156
1157
1158// copy 32bit MB to 32bit String
1159size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1160{
1161 size_t len=0;
1162
1163 while (*(wxUint32*)psz && (!buf || len < n))
1164 {
1165 if (buf)
1166 *buf++ = *(wxUint32*)psz;
1167 len++;
1168 psz += sizeof(wxUint32);
1169 }
b5153fd8
VZ
1170
1171 if (buf && len<n)
1172 *buf=0;
c91830cb
VZ
1173
1174 return len;
1175}
1176
1177
1178// copy 32bit String to 32bit MB
1179size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1180{
1181 size_t len=0;
1182
1183 while (*psz && (!buf || len < n))
1184 {
1185 if (buf)
1186 {
1187 *(wxUint32*)buf = *psz;
1188 buf += sizeof(wxUint32);
1189 }
1190
1191 len += sizeof(wxUint32);
1192 psz++;
1193 }
1194
b5153fd8
VZ
1195 if (buf && len<=n-sizeof(wxUint32))
1196 *(wxUint32*)buf=0;
c91830cb
VZ
1197
1198 return len;
1199}
1200
1201
1202// swap 32bit MB to 32bit String
1203size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1204{
1205 size_t len=0;
1206
1207 while (*(wxUint32*)psz && (!buf || len < n))
1208 {
1209 if (buf)
1210 {
1211 ((char *)buf)[0] = psz[3];
1212 ((char *)buf)[1] = psz[2];
1213 ((char *)buf)[2] = psz[1];
1214 ((char *)buf)[3] = psz[0];
1215 buf++;
1216 }
1217 len++;
1218 psz += sizeof(wxUint32);
1219 }
b5153fd8
VZ
1220
1221 if (buf && len<n)
1222 *buf=0;
c91830cb
VZ
1223
1224 return len;
1225}
1226
1227
1228// swap 32bit String to 32bit MB
1229size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1230{
1231 size_t len=0;
1232
1233 while (*psz && (!buf || len < n))
1234 {
1235 if (buf)
1236 {
1237 *buf++ = ((char *)psz)[3];
1238 *buf++ = ((char *)psz)[2];
1239 *buf++ = ((char *)psz)[1];
1240 *buf++ = ((char *)psz)[0];
1241 }
1242 len += sizeof(wxUint32);
1243 psz++;
1244 }
b5153fd8
VZ
1245
1246 if (buf && len<=n-sizeof(wxUint32))
1247 *(wxUint32*)buf=0;
c91830cb
VZ
1248
1249 return len;
1250}
1251
1252
1253#endif // WC_UTF16
1254
1255
36acb880
VZ
1256// ============================================================================
1257// The classes doing conversion using the iconv_xxx() functions
1258// ============================================================================
3caec1bb 1259
b040e242 1260#ifdef HAVE_ICONV
3a0d76bc 1261
b1d547eb
VS
1262// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264// (unless there's yet another bug in glibc) the only case when iconv()
1265// returns with (size_t)-1 (which means error) and says there are 0 bytes
1266// left in the input buffer -- when _real_ error occurs,
1267// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1268// iconv() failure.
3caec1bb
VS
1269// [This bug does not appear in glibc 2.2.]
1270#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272 (errno != E2BIG || bufLeft != 0))
1273#else
1274#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1275#endif
1276
ab217dba 1277#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1278
74a7eb0b
VZ
1279#define ICONV_T_INVALID ((iconv_t)-1)
1280
1281#if SIZEOF_WCHAR_T == 4
1282 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1283 #define WC_ENC wxFONTENCODING_UTF32
1284#elif SIZEOF_WCHAR_T == 2
1285 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1286 #define WC_ENC wxFONTENCODING_UTF16
1287#else // sizeof(wchar_t) != 2 nor 4
1288 // does this ever happen?
1289 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1290#endif
1291
36acb880 1292// ----------------------------------------------------------------------------
e95354ec 1293// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1294// ----------------------------------------------------------------------------
1295
e95354ec 1296class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1297{
1298public:
e95354ec
VZ
1299 wxMBConv_iconv(const wxChar *name);
1300 virtual ~wxMBConv_iconv();
36acb880 1301
bde4baac
VZ
1302 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1303 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1304
e95354ec 1305 bool IsOk() const
74a7eb0b 1306 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1307
1308protected:
1309 // the iconv handlers used to translate from multibyte to wide char and in
1310 // the other direction
1311 iconv_t m2w,
1312 w2m;
b1d547eb
VS
1313#if wxUSE_THREADS
1314 // guards access to m2w and w2m objects
1315 wxMutex m_iconvMutex;
1316#endif
36acb880
VZ
1317
1318private:
e95354ec 1319 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1320 // available on this machine, it will remain NULL
74a7eb0b 1321 static wxString ms_wcCharsetName;
36acb880
VZ
1322
1323 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324 // different endian-ness than the native one
405d8f46 1325 static bool ms_wcNeedsSwap;
36acb880
VZ
1326};
1327
8f115891
MW
1328// make the constructor available for unit testing
1329WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1330{
1331 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1332 if ( !result->IsOk() )
1333 {
1334 delete result;
1335 return 0;
1336 }
1337 return result;
1338}
1339
422e411e 1340wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1341bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1342
e95354ec 1343wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1344{
0331b385
VZ
1345 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346 // names for the charsets
200a9923 1347 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1348
36acb880 1349 // check for charset that represents wchar_t:
74a7eb0b 1350 if ( ms_wcCharsetName.empty() )
f1339c56 1351 {
74a7eb0b
VZ
1352#if wxUSE_FONTMAP
1353 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1354#else // !wxUSE_FONTMAP
1355 static const wxChar *names[] =
36acb880 1356 {
74a7eb0b
VZ
1357#if SIZEOF_WCHAR_T == 4
1358 _T("UCS-4"),
1359#elif SIZEOF_WCHAR_T = 2
1360 _T("UCS-2"),
1361#endif
1362 NULL
1363 };
1364#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1365
74a7eb0b
VZ
1366 for ( ; *names; ++names )
1367 {
17a1ebd1 1368 const wxString nameCS(*names);
74a7eb0b
VZ
1369
1370 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1371 wxString nameXE(nameCS);
74a7eb0b
VZ
1372 #ifdef WORDS_BIGENDIAN
1373 nameXE += _T("BE");
1374 #else // little endian
1375 nameXE += _T("LE");
1376 #endif
1377
1378 m2w = iconv_open(nameXE.ToAscii(), cname);
1379 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1380 {
74a7eb0b 1381 // try charset w/o bytesex info (e.g. "UCS4")
17a1ebd1 1382 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1383
74a7eb0b
VZ
1384 // and check for bytesex ourselves:
1385 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1386 {
74a7eb0b
VZ
1387 char buf[2], *bufPtr;
1388 wchar_t wbuf[2], *wbufPtr;
1389 size_t insz, outsz;
1390 size_t res;
1391
1392 buf[0] = 'A';
1393 buf[1] = 0;
1394 wbuf[0] = 0;
1395 insz = 2;
1396 outsz = SIZEOF_WCHAR_T * 2;
1397 wbufPtr = wbuf;
1398 bufPtr = buf;
1399
1400 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1401 (char**)&wbufPtr, &outsz);
1402
1403 if (ICONV_FAILED(res, insz))
1404 {
1405 wxLogLastError(wxT("iconv"));
422e411e 1406 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1407 nameCS.c_str());
74a7eb0b
VZ
1408 }
1409 else // ok, can convert to this encoding, remember it
1410 {
17a1ebd1 1411 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1412 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1413 }
3a0d76bc
VS
1414 }
1415 }
74a7eb0b 1416 else // use charset not requiring byte swapping
36acb880 1417 {
74a7eb0b 1418 ms_wcCharsetName = nameXE;
36acb880 1419 }
3a0d76bc 1420 }
74a7eb0b 1421
0944fceb 1422 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1423 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1424 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1425 : ms_wcCharsetName.c_str(),
1426 ms_wcNeedsSwap ? _T(" (needs swap)")
1427 : _T(""));
3a0d76bc 1428 }
36acb880 1429 else // we already have ms_wcCharsetName
3caec1bb 1430 {
74a7eb0b 1431 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1432 }
dccce9ea 1433
74a7eb0b 1434 if ( ms_wcCharsetName.empty() )
f1339c56 1435 {
74a7eb0b 1436 w2m = ICONV_T_INVALID;
36acb880 1437 }
405d8f46
VZ
1438 else
1439 {
74a7eb0b
VZ
1440 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1441 if ( w2m == ICONV_T_INVALID )
1442 {
1443 wxLogTrace(TRACE_STRCONV,
1444 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1445 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1446 }
405d8f46 1447 }
36acb880 1448}
3caec1bb 1449
e95354ec 1450wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1451{
74a7eb0b 1452 if ( m2w != ICONV_T_INVALID )
36acb880 1453 iconv_close(m2w);
74a7eb0b 1454 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1455 iconv_close(w2m);
1456}
3a0d76bc 1457
bde4baac 1458size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1459{
b1d547eb
VS
1460#if wxUSE_THREADS
1461 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462 // Unfortunately there is a couple of global wxCSConv objects such as
1463 // wxConvLocal that are used all over wx code, so we have to make sure
1464 // the handle is used by at most one thread at the time. Otherwise
1465 // only a few wx classes would be safe to use from non-main threads
1466 // as MB<->WC conversion would fail "randomly".
1467 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1468#endif
3698ae71 1469
36acb880
VZ
1470 size_t inbuf = strlen(psz);
1471 size_t outbuf = n * SIZEOF_WCHAR_T;
1472 size_t res, cres;
1473 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474 wchar_t *bufPtr = buf;
1475 const char *pszPtr = psz;
1476
1477 if (buf)
1478 {
1479 // have destination buffer, convert there
1480 cres = iconv(m2w,
1481 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1482 (char**)&bufPtr, &outbuf);
1483 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1484
36acb880 1485 if (ms_wcNeedsSwap)
3a0d76bc 1486 {
36acb880 1487 // convert to native endianness
17a1ebd1
VZ
1488 for ( unsigned i = 0; i < res; i++ )
1489 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1490 }
adb45366 1491
49dd9820
VS
1492 // NB: iconv was given only strlen(psz) characters on input, and so
1493 // it couldn't convert the trailing zero. Let's do it ourselves
1494 // if there's some room left for it in the output buffer.
1495 if (res < n)
1496 buf[res] = 0;
36acb880
VZ
1497 }
1498 else
1499 {
1500 // no destination buffer... convert using temp buffer
1501 // to calculate destination buffer requirement
1502 wchar_t tbuf[8];
1503 res = 0;
1504 do {
1505 bufPtr = tbuf;
1506 outbuf = 8*SIZEOF_WCHAR_T;
1507
1508 cres = iconv(m2w,
1509 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1510 (char**)&bufPtr, &outbuf );
1511
1512 res += 8-(outbuf/SIZEOF_WCHAR_T);
1513 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1514 }
dccce9ea 1515
36acb880 1516 if (ICONV_FAILED(cres, inbuf))
f1339c56 1517 {
36acb880 1518 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1519 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1520 return (size_t)-1;
1521 }
1522
1523 return res;
1524}
1525
bde4baac 1526size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1527{
b1d547eb
VS
1528#if wxUSE_THREADS
1529 // NB: explained in MB2WC
1530 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1531#endif
3698ae71 1532
156162ec
MW
1533 size_t inlen = wxWcslen(psz);
1534 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1535 size_t outbuf = n;
1536 size_t res, cres;
3a0d76bc 1537
36acb880 1538 wchar_t *tmpbuf = 0;
3caec1bb 1539
36acb880
VZ
1540 if (ms_wcNeedsSwap)
1541 {
1542 // need to copy to temp buffer to switch endianness
74a7eb0b 1543 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1544 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1545 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1546 for ( size_t i = 0; i < inlen; i++ )
1547 tmpbuf[n] = WC_BSWAP(psz[i]);
156162ec 1548 tmpbuf[inlen] = L'\0';
74a7eb0b 1549 psz = tmpbuf;
36acb880 1550 }
3a0d76bc 1551
36acb880
VZ
1552 if (buf)
1553 {
1554 // have destination buffer, convert there
1555 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1556
36acb880 1557 res = n-outbuf;
adb45366 1558
49dd9820
VS
1559 // NB: iconv was given only wcslen(psz) characters on input, and so
1560 // it couldn't convert the trailing zero. Let's do it ourselves
1561 // if there's some room left for it in the output buffer.
1562 if (res < n)
1563 buf[0] = 0;
36acb880
VZ
1564 }
1565 else
1566 {
1567 // no destination buffer... convert using temp buffer
1568 // to calculate destination buffer requirement
1569 char tbuf[16];
1570 res = 0;
1571 do {
1572 buf = tbuf; outbuf = 16;
1573
1574 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1575
36acb880
VZ
1576 res += 16 - outbuf;
1577 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1578 }
dccce9ea 1579
36acb880
VZ
1580 if (ms_wcNeedsSwap)
1581 {
1582 free(tmpbuf);
1583 }
dccce9ea 1584
36acb880
VZ
1585 if (ICONV_FAILED(cres, inbuf))
1586 {
1587 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1588 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1589 return (size_t)-1;
1590 }
1591
1592 return res;
1593}
1594
b040e242 1595#endif // HAVE_ICONV
36acb880 1596
e95354ec 1597
36acb880
VZ
1598// ============================================================================
1599// Win32 conversion classes
1600// ============================================================================
1cd52418 1601
e95354ec 1602#ifdef wxHAVE_WIN32_MB2WC
373658eb 1603
8b04d4c4 1604// from utils.cpp
d775fa82 1605#if wxUSE_FONTMAP
8b04d4c4
VZ
1606extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1607extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1608#endif
373658eb 1609
e95354ec 1610class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1611{
1612public:
bde4baac
VZ
1613 wxMBConv_win32()
1614 {
1615 m_CodePage = CP_ACP;
1616 }
1617
7608a683 1618#if wxUSE_FONTMAP
e95354ec 1619 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1620 {
1621 m_CodePage = wxCharsetToCodepage(name);
1622 }
dccce9ea 1623
e95354ec 1624 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1625 {
1626 m_CodePage = wxEncodingToCodepage(encoding);
1627 }
7608a683 1628#endif
8b04d4c4 1629
bde4baac 1630 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1631 {
02272c9c
VZ
1632 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1633 // the behaviour is not compatible with the Unix version (using iconv)
1634 // and break the library itself, e.g. wxTextInputStream::NextChar()
1635 // wouldn't work if reading an incomplete MB char didn't result in an
1636 // error
667e5b3e
VZ
1637 //
1638 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1639 // an error (tested under Windows Server 2003) and apparently it is
1640 // done on purpose, i.e. the function accepts any input in this case
1641 // and although I'd prefer to return error on ill-formed output, our
1642 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1643 // explicitly ill-formed according to RFC 2152) neither so we don't
1644 // even have any fallback here...
1645 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1646
2b5f62a0
VZ
1647 const size_t len = ::MultiByteToWideChar
1648 (
1649 m_CodePage, // code page
667e5b3e 1650 flags, // flags: fall on error
2b5f62a0
VZ
1651 psz, // input string
1652 -1, // its length (NUL-terminated)
b4da152e 1653 buf, // output string
2b5f62a0
VZ
1654 buf ? n : 0 // size of output buffer
1655 );
1656
03a991bc
VZ
1657 // note that it returns count of written chars for buf != NULL and size
1658 // of the needed buffer for buf == NULL so in either case the length of
1659 // the string (which never includes the terminating NUL) is one less
1660 return len ? len - 1 : (size_t)-1;
f1339c56 1661 }
dccce9ea 1662
13dd924a 1663 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1664 {
13dd924a
VZ
1665 /*
1666 we have a problem here: by default, WideCharToMultiByte() may
1667 replace characters unrepresentable in the target code page with bad
1668 quality approximations such as turning "1/2" symbol (U+00BD) into
1669 "1" for the code pages which don't have it and we, obviously, want
1670 to avoid this at any price
d775fa82 1671
13dd924a
VZ
1672 the trouble is that this function does it _silently_, i.e. it won't
1673 even tell us whether it did or not... Win98/2000 and higher provide
1674 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1675 we have to resort to a round trip, i.e. check that converting back
1676 results in the same string -- this is, of course, expensive but
1677 otherwise we simply can't be sure to not garble the data.
1678 */
1679
1680 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1681 // it doesn't work with CJK encodings (which we test for rather roughly
1682 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1683 // supporting it
907173e5
WS
1684 BOOL usedDef wxDUMMY_INITIALIZE(false);
1685 BOOL *pUsedDef;
13dd924a
VZ
1686 int flags;
1687 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1688 {
1689 // it's our lucky day
1690 flags = WC_NO_BEST_FIT_CHARS;
1691 pUsedDef = &usedDef;
1692 }
1693 else // old system or unsupported encoding
1694 {
1695 flags = 0;
1696 pUsedDef = NULL;
1697 }
1698
2b5f62a0
VZ
1699 const size_t len = ::WideCharToMultiByte
1700 (
1701 m_CodePage, // code page
13dd924a
VZ
1702 flags, // either none or no best fit
1703 pwz, // input string
2b5f62a0
VZ
1704 -1, // it is (wide) NUL-terminated
1705 buf, // output buffer
1706 buf ? n : 0, // and its size
1707 NULL, // default "replacement" char
13dd924a 1708 pUsedDef // [out] was it used?
2b5f62a0
VZ
1709 );
1710
13dd924a
VZ
1711 if ( !len )
1712 {
1713 // function totally failed
1714 return (size_t)-1;
1715 }
1716
1717 // if we were really converting, check if we succeeded
1718 if ( buf )
1719 {
1720 if ( flags )
1721 {
1722 // check if the conversion failed, i.e. if any replacements
1723 // were done
1724 if ( usedDef )
1725 return (size_t)-1;
1726 }
1727 else // we must resort to double tripping...
1728 {
1729 wxWCharBuffer wcBuf(n);
1730 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1731 wcscmp(wcBuf, pwz) != 0 )
1732 {
1733 // we didn't obtain the same thing we started from, hence
1734 // the conversion was lossy and we consider that it failed
1735 return (size_t)-1;
1736 }
1737 }
1738 }
1739
03a991bc 1740 // see the comment above for the reason of "len - 1"
13dd924a 1741 return len - 1;
f1339c56 1742 }
dccce9ea 1743
13dd924a
VZ
1744 bool IsOk() const { return m_CodePage != -1; }
1745
1746private:
1747 static bool CanUseNoBestFit()
1748 {
1749 static int s_isWin98Or2k = -1;
1750
1751 if ( s_isWin98Or2k == -1 )
1752 {
1753 int verMaj, verMin;
1754 switch ( wxGetOsVersion(&verMaj, &verMin) )
1755 {
1756 case wxWIN95:
1757 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1758 break;
1759
1760 case wxWINDOWS_NT:
1761 s_isWin98Or2k = verMaj >= 5;
1762 break;
1763
1764 default:
1765 // unknown, be conseravtive by default
1766 s_isWin98Or2k = 0;
1767 }
1768
1769 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1770 }
1771
1772 return s_isWin98Or2k == 1;
1773 }
f1339c56 1774
b1d66b54 1775 long m_CodePage;
1cd52418 1776};
e95354ec
VZ
1777
1778#endif // wxHAVE_WIN32_MB2WC
1779
f7e98dee
RN
1780// ============================================================================
1781// Cocoa conversion classes
1782// ============================================================================
1783
1784#if defined(__WXCOCOA__)
1785
ecd9653b 1786// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1787// Cocoa. Strangely enough, internally Core Foundation uses
1788// UTF 32 internally quite a bit - its just not public (yet).
1789
1790#include <CoreFoundation/CFString.h>
1791#include <CoreFoundation/CFStringEncodingExt.h>
1792
1793CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1794{
638357a0 1795 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1796 if ( encoding == wxFONTENCODING_DEFAULT )
1797 {
638357a0 1798 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1799 }
1800 else switch( encoding)
1801 {
1802 case wxFONTENCODING_ISO8859_1 :
1803 enc = kCFStringEncodingISOLatin1 ;
1804 break ;
1805 case wxFONTENCODING_ISO8859_2 :
1806 enc = kCFStringEncodingISOLatin2;
1807 break ;
1808 case wxFONTENCODING_ISO8859_3 :
1809 enc = kCFStringEncodingISOLatin3 ;
1810 break ;
1811 case wxFONTENCODING_ISO8859_4 :
1812 enc = kCFStringEncodingISOLatin4;
1813 break ;
1814 case wxFONTENCODING_ISO8859_5 :
1815 enc = kCFStringEncodingISOLatinCyrillic;
1816 break ;
1817 case wxFONTENCODING_ISO8859_6 :
1818 enc = kCFStringEncodingISOLatinArabic;
1819 break ;
1820 case wxFONTENCODING_ISO8859_7 :
1821 enc = kCFStringEncodingISOLatinGreek;
1822 break ;
1823 case wxFONTENCODING_ISO8859_8 :
1824 enc = kCFStringEncodingISOLatinHebrew;
1825 break ;
1826 case wxFONTENCODING_ISO8859_9 :
1827 enc = kCFStringEncodingISOLatin5;
1828 break ;
1829 case wxFONTENCODING_ISO8859_10 :
1830 enc = kCFStringEncodingISOLatin6;
1831 break ;
1832 case wxFONTENCODING_ISO8859_11 :
1833 enc = kCFStringEncodingISOLatinThai;
1834 break ;
1835 case wxFONTENCODING_ISO8859_13 :
1836 enc = kCFStringEncodingISOLatin7;
1837 break ;
1838 case wxFONTENCODING_ISO8859_14 :
1839 enc = kCFStringEncodingISOLatin8;
1840 break ;
1841 case wxFONTENCODING_ISO8859_15 :
1842 enc = kCFStringEncodingISOLatin9;
1843 break ;
1844
1845 case wxFONTENCODING_KOI8 :
1846 enc = kCFStringEncodingKOI8_R;
1847 break ;
1848 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1849 enc = kCFStringEncodingDOSRussian;
1850 break ;
1851
1852// case wxFONTENCODING_BULGARIAN :
1853// enc = ;
1854// break ;
1855
1856 case wxFONTENCODING_CP437 :
1857 enc =kCFStringEncodingDOSLatinUS ;
1858 break ;
1859 case wxFONTENCODING_CP850 :
1860 enc = kCFStringEncodingDOSLatin1;
1861 break ;
1862 case wxFONTENCODING_CP852 :
1863 enc = kCFStringEncodingDOSLatin2;
1864 break ;
1865 case wxFONTENCODING_CP855 :
1866 enc = kCFStringEncodingDOSCyrillic;
1867 break ;
1868 case wxFONTENCODING_CP866 :
1869 enc =kCFStringEncodingDOSRussian ;
1870 break ;
1871 case wxFONTENCODING_CP874 :
1872 enc = kCFStringEncodingDOSThai;
1873 break ;
1874 case wxFONTENCODING_CP932 :
1875 enc = kCFStringEncodingDOSJapanese;
1876 break ;
1877 case wxFONTENCODING_CP936 :
1878 enc =kCFStringEncodingDOSChineseSimplif ;
1879 break ;
1880 case wxFONTENCODING_CP949 :
1881 enc = kCFStringEncodingDOSKorean;
1882 break ;
1883 case wxFONTENCODING_CP950 :
1884 enc = kCFStringEncodingDOSChineseTrad;
1885 break ;
ecd9653b
WS
1886 case wxFONTENCODING_CP1250 :
1887 enc = kCFStringEncodingWindowsLatin2;
1888 break ;
1889 case wxFONTENCODING_CP1251 :
1890 enc =kCFStringEncodingWindowsCyrillic ;
1891 break ;
1892 case wxFONTENCODING_CP1252 :
1893 enc =kCFStringEncodingWindowsLatin1 ;
1894 break ;
1895 case wxFONTENCODING_CP1253 :
1896 enc = kCFStringEncodingWindowsGreek;
1897 break ;
1898 case wxFONTENCODING_CP1254 :
1899 enc = kCFStringEncodingWindowsLatin5;
1900 break ;
1901 case wxFONTENCODING_CP1255 :
1902 enc =kCFStringEncodingWindowsHebrew ;
1903 break ;
1904 case wxFONTENCODING_CP1256 :
1905 enc =kCFStringEncodingWindowsArabic ;
1906 break ;
1907 case wxFONTENCODING_CP1257 :
1908 enc = kCFStringEncodingWindowsBalticRim;
1909 break ;
638357a0
RN
1910// This only really encodes to UTF7 (if that) evidently
1911// case wxFONTENCODING_UTF7 :
1912// enc = kCFStringEncodingNonLossyASCII ;
1913// break ;
ecd9653b
WS
1914 case wxFONTENCODING_UTF8 :
1915 enc = kCFStringEncodingUTF8 ;
1916 break ;
1917 case wxFONTENCODING_EUC_JP :
1918 enc = kCFStringEncodingEUC_JP;
1919 break ;
1920 case wxFONTENCODING_UTF16 :
f7e98dee 1921 enc = kCFStringEncodingUnicode ;
ecd9653b 1922 break ;
f7e98dee
RN
1923 case wxFONTENCODING_MACROMAN :
1924 enc = kCFStringEncodingMacRoman ;
1925 break ;
1926 case wxFONTENCODING_MACJAPANESE :
1927 enc = kCFStringEncodingMacJapanese ;
1928 break ;
1929 case wxFONTENCODING_MACCHINESETRAD :
1930 enc = kCFStringEncodingMacChineseTrad ;
1931 break ;
1932 case wxFONTENCODING_MACKOREAN :
1933 enc = kCFStringEncodingMacKorean ;
1934 break ;
1935 case wxFONTENCODING_MACARABIC :
1936 enc = kCFStringEncodingMacArabic ;
1937 break ;
1938 case wxFONTENCODING_MACHEBREW :
1939 enc = kCFStringEncodingMacHebrew ;
1940 break ;
1941 case wxFONTENCODING_MACGREEK :
1942 enc = kCFStringEncodingMacGreek ;
1943 break ;
1944 case wxFONTENCODING_MACCYRILLIC :
1945 enc = kCFStringEncodingMacCyrillic ;
1946 break ;
1947 case wxFONTENCODING_MACDEVANAGARI :
1948 enc = kCFStringEncodingMacDevanagari ;
1949 break ;
1950 case wxFONTENCODING_MACGURMUKHI :
1951 enc = kCFStringEncodingMacGurmukhi ;
1952 break ;
1953 case wxFONTENCODING_MACGUJARATI :
1954 enc = kCFStringEncodingMacGujarati ;
1955 break ;
1956 case wxFONTENCODING_MACORIYA :
1957 enc = kCFStringEncodingMacOriya ;
1958 break ;
1959 case wxFONTENCODING_MACBENGALI :
1960 enc = kCFStringEncodingMacBengali ;
1961 break ;
1962 case wxFONTENCODING_MACTAMIL :
1963 enc = kCFStringEncodingMacTamil ;
1964 break ;
1965 case wxFONTENCODING_MACTELUGU :
1966 enc = kCFStringEncodingMacTelugu ;
1967 break ;
1968 case wxFONTENCODING_MACKANNADA :
1969 enc = kCFStringEncodingMacKannada ;
1970 break ;
1971 case wxFONTENCODING_MACMALAJALAM :
1972 enc = kCFStringEncodingMacMalayalam ;
1973 break ;
1974 case wxFONTENCODING_MACSINHALESE :
1975 enc = kCFStringEncodingMacSinhalese ;
1976 break ;
1977 case wxFONTENCODING_MACBURMESE :
1978 enc = kCFStringEncodingMacBurmese ;
1979 break ;
1980 case wxFONTENCODING_MACKHMER :
1981 enc = kCFStringEncodingMacKhmer ;
1982 break ;
1983 case wxFONTENCODING_MACTHAI :
1984 enc = kCFStringEncodingMacThai ;
1985 break ;
1986 case wxFONTENCODING_MACLAOTIAN :
1987 enc = kCFStringEncodingMacLaotian ;
1988 break ;
1989 case wxFONTENCODING_MACGEORGIAN :
1990 enc = kCFStringEncodingMacGeorgian ;
1991 break ;
1992 case wxFONTENCODING_MACARMENIAN :
1993 enc = kCFStringEncodingMacArmenian ;
1994 break ;
1995 case wxFONTENCODING_MACCHINESESIMP :
1996 enc = kCFStringEncodingMacChineseSimp ;
1997 break ;
1998 case wxFONTENCODING_MACTIBETAN :
1999 enc = kCFStringEncodingMacTibetan ;
2000 break ;
2001 case wxFONTENCODING_MACMONGOLIAN :
2002 enc = kCFStringEncodingMacMongolian ;
2003 break ;
2004 case wxFONTENCODING_MACETHIOPIC :
2005 enc = kCFStringEncodingMacEthiopic ;
2006 break ;
2007 case wxFONTENCODING_MACCENTRALEUR :
2008 enc = kCFStringEncodingMacCentralEurRoman ;
2009 break ;
2010 case wxFONTENCODING_MACVIATNAMESE :
2011 enc = kCFStringEncodingMacVietnamese ;
2012 break ;
2013 case wxFONTENCODING_MACARABICEXT :
2014 enc = kCFStringEncodingMacExtArabic ;
2015 break ;
2016 case wxFONTENCODING_MACSYMBOL :
2017 enc = kCFStringEncodingMacSymbol ;
2018 break ;
2019 case wxFONTENCODING_MACDINGBATS :
2020 enc = kCFStringEncodingMacDingbats ;
2021 break ;
2022 case wxFONTENCODING_MACTURKISH :
2023 enc = kCFStringEncodingMacTurkish ;
2024 break ;
2025 case wxFONTENCODING_MACCROATIAN :
2026 enc = kCFStringEncodingMacCroatian ;
2027 break ;
2028 case wxFONTENCODING_MACICELANDIC :
2029 enc = kCFStringEncodingMacIcelandic ;
2030 break ;
2031 case wxFONTENCODING_MACROMANIAN :
2032 enc = kCFStringEncodingMacRomanian ;
2033 break ;
2034 case wxFONTENCODING_MACCELTIC :
2035 enc = kCFStringEncodingMacCeltic ;
2036 break ;
2037 case wxFONTENCODING_MACGAELIC :
2038 enc = kCFStringEncodingMacGaelic ;
2039 break ;
ecd9653b
WS
2040// case wxFONTENCODING_MACKEYBOARD :
2041// enc = kCFStringEncodingMacKeyboardGlyphs ;
2042// break ;
2043 default :
2044 // because gcc is picky
2045 break ;
2046 } ;
2047 return enc ;
f7e98dee
RN
2048}
2049
f7e98dee
RN
2050class wxMBConv_cocoa : public wxMBConv
2051{
2052public:
2053 wxMBConv_cocoa()
2054 {
2055 Init(CFStringGetSystemEncoding()) ;
2056 }
2057
a6900d10 2058#if wxUSE_FONTMAP
f7e98dee
RN
2059 wxMBConv_cocoa(const wxChar* name)
2060 {
267e11c5 2061 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2062 }
a6900d10 2063#endif
f7e98dee
RN
2064
2065 wxMBConv_cocoa(wxFontEncoding encoding)
2066 {
2067 Init( wxCFStringEncFromFontEnc(encoding) );
2068 }
2069
2070 ~wxMBConv_cocoa()
2071 {
2072 }
2073
2074 void Init( CFStringEncoding encoding)
2075 {
638357a0 2076 m_encoding = encoding ;
f7e98dee
RN
2077 }
2078
2079 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2080 {
2081 wxASSERT(szUnConv);
ecd9653b 2082
638357a0
RN
2083 CFStringRef theString = CFStringCreateWithBytes (
2084 NULL, //the allocator
2085 (const UInt8*)szUnConv,
2086 strlen(szUnConv),
2087 m_encoding,
2088 false //no BOM/external representation
f7e98dee
RN
2089 );
2090
2091 wxASSERT(theString);
2092
638357a0
RN
2093 size_t nOutLength = CFStringGetLength(theString);
2094
2095 if (szOut == NULL)
f7e98dee 2096 {
f7e98dee 2097 CFRelease(theString);
638357a0 2098 return nOutLength;
f7e98dee 2099 }
ecd9653b 2100
638357a0 2101 CFRange theRange = { 0, nOutSize };
ecd9653b 2102
638357a0
RN
2103#if SIZEOF_WCHAR_T == 4
2104 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2105#endif
3698ae71 2106
f7e98dee 2107 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2108
f7e98dee 2109 CFRelease(theString);
ecd9653b 2110
638357a0 2111 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2112
2113#if SIZEOF_WCHAR_T == 4
2114 wxMBConvUTF16 converter ;
638357a0 2115 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2116 delete[] szUniCharBuffer;
2117#endif
3698ae71 2118
638357a0 2119 return nOutLength;
f7e98dee
RN
2120 }
2121
2122 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2123 {
638357a0 2124 wxASSERT(szUnConv);
3698ae71 2125
f7e98dee 2126 size_t nRealOutSize;
638357a0 2127 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2128 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2129
f7e98dee 2130#if SIZEOF_WCHAR_T == 4
d9d488cf 2131 wxMBConvUTF16 converter ;
f7e98dee
RN
2132 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2133 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2134 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2135 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2136#endif
2137
2138 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2139 NULL, //allocator
2140 szUniBuffer,
2141 nBufSize,
638357a0 2142 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2143 );
ecd9653b 2144
f7e98dee 2145 wxASSERT(theString);
ecd9653b 2146
f7e98dee 2147 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2148 //so we check and use getchars instead in that case
2149 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2150 {
638357a0
RN
2151 if (szOut != NULL)
2152 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2153
638357a0
RN
2154 nRealOutSize = CFStringGetLength(theString) + 1;
2155 }
2156 else
2157 {
2158 CFStringGetBytes(
2159 theString,
2160 CFRangeMake(0, CFStringGetLength(theString)),
2161 m_encoding,
2162 0, //what to put in characters that can't be converted -
2163 //0 tells CFString to return NULL if it meets such a character
2164 false, //not an external representation
2165 (UInt8*) szOut,
3698ae71 2166 nOutSize,
638357a0
RN
2167 (CFIndex*) &nRealOutSize
2168 );
f7e98dee 2169 }
ecd9653b 2170
638357a0 2171 CFRelease(theString);
ecd9653b 2172
638357a0
RN
2173#if SIZEOF_WCHAR_T == 4
2174 delete[] szUniBuffer;
2175#endif
ecd9653b 2176
f7e98dee
RN
2177 return nRealOutSize - 1;
2178 }
2179
2180 bool IsOk() const
ecd9653b 2181 {
3698ae71 2182 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2183 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2184 }
2185
2186private:
638357a0 2187 CFStringEncoding m_encoding ;
f7e98dee
RN
2188};
2189
2190#endif // defined(__WXCOCOA__)
2191
335d31e0
SC
2192// ============================================================================
2193// Mac conversion classes
2194// ============================================================================
2195
2196#if defined(__WXMAC__) && defined(TARGET_CARBON)
2197
2198class wxMBConv_mac : public wxMBConv
2199{
2200public:
2201 wxMBConv_mac()
2202 {
2203 Init(CFStringGetSystemEncoding()) ;
2204 }
2205
2d1659cf 2206#if wxUSE_FONTMAP
335d31e0
SC
2207 wxMBConv_mac(const wxChar* name)
2208 {
267e11c5 2209 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2210 }
2d1659cf 2211#endif
335d31e0
SC
2212
2213 wxMBConv_mac(wxFontEncoding encoding)
2214 {
d775fa82
WS
2215 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2216 }
2217
2218 ~wxMBConv_mac()
2219 {
2220 OSStatus status = noErr ;
2221 status = TECDisposeConverter(m_MB2WC_converter);
2222 status = TECDisposeConverter(m_WC2MB_converter);
2223 }
2224
2225
2226 void Init( TextEncodingBase encoding)
2227 {
2228 OSStatus status = noErr ;
2229 m_char_encoding = encoding ;
2230 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2231
2232 status = TECCreateConverter(&m_MB2WC_converter,
2233 m_char_encoding,
2234 m_unicode_encoding);
2235 status = TECCreateConverter(&m_WC2MB_converter,
2236 m_unicode_encoding,
2237 m_char_encoding);
2238 }
2239
335d31e0
SC
2240 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2241 {
d775fa82
WS
2242 OSStatus status = noErr ;
2243 ByteCount byteOutLen ;
2244 ByteCount byteInLen = strlen(psz) ;
2245 wchar_t *tbuf = NULL ;
2246 UniChar* ubuf = NULL ;
2247 size_t res = 0 ;
2248
2249 if (buf == NULL)
2250 {
638357a0 2251 //apple specs say at least 32
c543817b 2252 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2253 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2254 }
2255 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2256#if SIZEOF_WCHAR_T == 4
d775fa82 2257 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2258#else
d775fa82 2259 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2260#endif
d775fa82
WS
2261 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2262 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2263#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2264 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2265 // is not properly terminated we get random characters at the end
2266 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2267 wxMBConvUTF16 converter ;
d775fa82
WS
2268 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2269 free( ubuf ) ;
f3a355ce 2270#else
d775fa82 2271 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2272#endif
d775fa82
WS
2273 if ( buf == NULL )
2274 free(tbuf) ;
335d31e0 2275
335d31e0
SC
2276 if ( buf && res < n)
2277 buf[res] = 0;
2278
d775fa82 2279 return res ;
335d31e0
SC
2280 }
2281
2282 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2283 {
2284 OSStatus status = noErr ;
2285 ByteCount byteOutLen ;
2286 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2287
2288 char *tbuf = NULL ;
2289
2290 if (buf == NULL)
2291 {
638357a0 2292 //apple specs say at least 32
c543817b 2293 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2294 tbuf = (char*) malloc( n ) ;
2295 }
2296
2297 ByteCount byteBufferLen = n ;
2298 UniChar* ubuf = NULL ;
f3a355ce 2299#if SIZEOF_WCHAR_T == 4
d9d488cf 2300 wxMBConvUTF16 converter ;
d775fa82
WS
2301 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2302 byteInLen = unicharlen ;
2303 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2304 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2305#else
d775fa82 2306 ubuf = (UniChar*) psz ;
f3a355ce 2307#endif
d775fa82
WS
2308 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2309 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2310#if SIZEOF_WCHAR_T == 4
d775fa82 2311 free( ubuf ) ;
f3a355ce 2312#endif
d775fa82
WS
2313 if ( buf == NULL )
2314 free(tbuf) ;
335d31e0 2315
d775fa82 2316 size_t res = byteOutLen ;
335d31e0 2317 if ( buf && res < n)
638357a0 2318 {
335d31e0 2319 buf[res] = 0;
3698ae71 2320
638357a0
RN
2321 //we need to double-trip to verify it didn't insert any ? in place
2322 //of bogus characters
2323 wxWCharBuffer wcBuf(n);
2324 size_t pszlen = wxWcslen(psz);
2325 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2326 wxWcslen(wcBuf) != pszlen ||
2327 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2328 {
2329 // we didn't obtain the same thing we started from, hence
2330 // the conversion was lossy and we consider that it failed
2331 return (size_t)-1;
2332 }
2333 }
335d31e0 2334
d775fa82 2335 return res ;
335d31e0
SC
2336 }
2337
2338 bool IsOk() const
2339 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2340
2341private:
d775fa82
WS
2342 TECObjectRef m_MB2WC_converter ;
2343 TECObjectRef m_WC2MB_converter ;
2344
2345 TextEncodingBase m_char_encoding ;
2346 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2347};
2348
2349#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2350
36acb880
VZ
2351// ============================================================================
2352// wxEncodingConverter based conversion classes
2353// ============================================================================
2354
1e6feb95 2355#if wxUSE_FONTMAP
1cd52418 2356
e95354ec 2357class wxMBConv_wxwin : public wxMBConv
1cd52418 2358{
8b04d4c4
VZ
2359private:
2360 void Init()
2361 {
2362 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2363 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2364 }
2365
6001e347 2366public:
f1339c56
RR
2367 // temporarily just use wxEncodingConverter stuff,
2368 // so that it works while a better implementation is built
e95354ec 2369 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2370 {
2371 if (name)
267e11c5 2372 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2373 else
2374 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2375
8b04d4c4
VZ
2376 Init();
2377 }
2378
e95354ec 2379 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2380 {
2381 m_enc = enc;
2382
2383 Init();
f1339c56 2384 }
dccce9ea 2385
bde4baac 2386 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2387 {
2388 size_t inbuf = strlen(psz);
dccce9ea 2389 if (buf)
c643a977
VS
2390 {
2391 if (!m2w.Convert(psz,buf))
2392 return (size_t)-1;
2393 }
f1339c56
RR
2394 return inbuf;
2395 }
dccce9ea 2396
bde4baac 2397 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2398 {
f8d791e0 2399 const size_t inbuf = wxWcslen(psz);
f1339c56 2400 if (buf)
c643a977
VS
2401 {
2402 if (!w2m.Convert(psz,buf))
2403 return (size_t)-1;
2404 }
dccce9ea 2405
f1339c56
RR
2406 return inbuf;
2407 }
dccce9ea 2408
e95354ec 2409 bool IsOk() const { return m_ok; }
f1339c56
RR
2410
2411public:
8b04d4c4 2412 wxFontEncoding m_enc;
f1339c56 2413 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2414
2415 // were we initialized successfully?
2416 bool m_ok;
fc7a2a60 2417
e95354ec 2418 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2419};
6001e347 2420
8f115891
MW
2421// make the constructors available for unit testing
2422WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2423{
2424 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2425 if ( !result->IsOk() )
2426 {
2427 delete result;
2428 return 0;
2429 }
2430 return result;
2431}
2432
1e6feb95
VZ
2433#endif // wxUSE_FONTMAP
2434
36acb880
VZ
2435// ============================================================================
2436// wxCSConv implementation
2437// ============================================================================
2438
8b04d4c4 2439void wxCSConv::Init()
6001e347 2440{
e95354ec
VZ
2441 m_name = NULL;
2442 m_convReal = NULL;
2443 m_deferred = true;
2444}
2445
8b04d4c4
VZ
2446wxCSConv::wxCSConv(const wxChar *charset)
2447{
2448 Init();
82713003 2449
e95354ec
VZ
2450 if ( charset )
2451 {
e95354ec
VZ
2452 SetName(charset);
2453 }
bda3d86a
VZ
2454
2455 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2456}
2457
8b04d4c4
VZ
2458wxCSConv::wxCSConv(wxFontEncoding encoding)
2459{
bda3d86a 2460 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2461 {
2462 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463
2464 encoding = wxFONTENCODING_SYSTEM;
2465 }
2466
8b04d4c4
VZ
2467 Init();
2468
bda3d86a 2469 m_encoding = encoding;
8b04d4c4
VZ
2470}
2471
6001e347
RR
2472wxCSConv::~wxCSConv()
2473{
65e50848
JS
2474 Clear();
2475}
2476
54380f29 2477wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2478 : wxMBConv()
54380f29 2479{
8b04d4c4
VZ
2480 Init();
2481
54380f29 2482 SetName(conv.m_name);
8b04d4c4 2483 m_encoding = conv.m_encoding;
54380f29
GD
2484}
2485
2486wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2487{
2488 Clear();
8b04d4c4 2489
54380f29 2490 SetName(conv.m_name);
8b04d4c4
VZ
2491 m_encoding = conv.m_encoding;
2492
54380f29
GD
2493 return *this;
2494}
2495
65e50848
JS
2496void wxCSConv::Clear()
2497{
8b04d4c4 2498 free(m_name);
e95354ec 2499 delete m_convReal;
8b04d4c4 2500
65e50848 2501 m_name = NULL;
e95354ec 2502 m_convReal = NULL;
6001e347
RR
2503}
2504
2505void wxCSConv::SetName(const wxChar *charset)
2506{
f1339c56
RR
2507 if (charset)
2508 {
2509 m_name = wxStrdup(charset);
e95354ec 2510 m_deferred = true;
f1339c56 2511 }
6001e347
RR
2512}
2513
8b3eb85d
VZ
2514#if wxUSE_FONTMAP
2515#include "wx/hashmap.h"
2516
2517WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2518 wxEncodingNameCache );
2519
2520static wxEncodingNameCache gs_nameCache;
2521#endif
2522
e95354ec
VZ
2523wxMBConv *wxCSConv::DoCreate() const
2524{
ce6f8d6f
VZ
2525#if wxUSE_FONTMAP
2526 wxLogTrace(TRACE_STRCONV,
2527 wxT("creating conversion for %s"),
2528 (m_name ? m_name
2529 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2530#endif // wxUSE_FONTMAP
2531
c547282d
VZ
2532 // check for the special case of ASCII or ISO8859-1 charset: as we have
2533 // special knowledge of it anyhow, we don't need to create a special
2534 // conversion object
2535 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2536 {
e95354ec
VZ
2537 // don't convert at all
2538 return NULL;
2539 }
dccce9ea 2540
e95354ec
VZ
2541 // we trust OS to do conversion better than we can so try external
2542 // conversion methods first
2543 //
2544 // the full order is:
2545 // 1. OS conversion (iconv() under Unix or Win32 API)
2546 // 2. hard coded conversions for UTF
2547 // 3. wxEncodingConverter as fall back
2548
2549 // step (1)
2550#ifdef HAVE_ICONV
c547282d 2551#if !wxUSE_FONTMAP
e95354ec 2552 if ( m_name )
c547282d 2553#endif // !wxUSE_FONTMAP
e95354ec 2554 {
c547282d 2555 wxString name(m_name);
8b3eb85d
VZ
2556 wxFontEncoding encoding(m_encoding);
2557
2558 if ( !name.empty() )
2559 {
2560 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2561 if ( conv->IsOk() )
2562 return conv;
2563
2564 delete conv;
c547282d
VZ
2565
2566#if wxUSE_FONTMAP
8b3eb85d
VZ
2567 encoding =
2568 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 2569#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2570 }
2571#if wxUSE_FONTMAP
2572 {
2573 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2574 if ( it != gs_nameCache.end() )
2575 {
2576 if ( it->second.empty() )
2577 return NULL;
c547282d 2578
8b3eb85d
VZ
2579 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2580 if ( conv->IsOk() )
2581 return conv;
e95354ec 2582
8b3eb85d
VZ
2583 delete conv;
2584 }
2585
2586 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2587
2588 for ( ; *names; ++names )
2589 {
2590 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2591 if ( conv->IsOk() )
2592 {
2593 gs_nameCache[encoding] = *names;
2594 return conv;
2595 }
2596
2597 delete conv;
2598 }
2599
40711af8 2600 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d
VZ
2601 }
2602#endif // wxUSE_FONTMAP
e95354ec
VZ
2603 }
2604#endif // HAVE_ICONV
2605
2606#ifdef wxHAVE_WIN32_MB2WC
2607 {
7608a683 2608#if wxUSE_FONTMAP
e95354ec
VZ
2609 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2610 : new wxMBConv_win32(m_encoding);
2611 if ( conv->IsOk() )
2612 return conv;
2613
2614 delete conv;
7608a683
WS
2615#else
2616 return NULL;
2617#endif
e95354ec
VZ
2618 }
2619#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2620#if defined(__WXMAC__)
2621 {
5c3c8676 2622 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2623 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2624 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2625 {
2626
2d1659cf 2627#if wxUSE_FONTMAP
d775fa82
WS
2628 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2629 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2630#else
2631 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2632#endif
d775fa82 2633 if ( conv->IsOk() )
f7e98dee
RN
2634 return conv;
2635
2636 delete conv;
2637 }
2638 }
2639#endif
2640#if defined(__WXCOCOA__)
2641 {
2642 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2643 {
2644
a6900d10 2645#if wxUSE_FONTMAP
f7e98dee
RN
2646 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2647 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2648#else
2649 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2650#endif
f7e98dee 2651 if ( conv->IsOk() )
d775fa82
WS
2652 return conv;
2653
2654 delete conv;
2655 }
335d31e0
SC
2656 }
2657#endif
e95354ec
VZ
2658 // step (2)
2659 wxFontEncoding enc = m_encoding;
2660#if wxUSE_FONTMAP
c547282d
VZ
2661 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2662 {
2663 // use "false" to suppress interactive dialogs -- we can be called from
2664 // anywhere and popping up a dialog from here is the last thing we want to
2665 // do
267e11c5 2666 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2667 }
e95354ec
VZ
2668#endif // wxUSE_FONTMAP
2669
2670 switch ( enc )
2671 {
2672 case wxFONTENCODING_UTF7:
2673 return new wxMBConvUTF7;
2674
2675 case wxFONTENCODING_UTF8:
2676 return new wxMBConvUTF8;
2677
e95354ec
VZ
2678 case wxFONTENCODING_UTF16BE:
2679 return new wxMBConvUTF16BE;
2680
2681 case wxFONTENCODING_UTF16LE:
2682 return new wxMBConvUTF16LE;
2683
e95354ec
VZ
2684 case wxFONTENCODING_UTF32BE:
2685 return new wxMBConvUTF32BE;
2686
2687 case wxFONTENCODING_UTF32LE:
2688 return new wxMBConvUTF32LE;
2689
2690 default:
2691 // nothing to do but put here to suppress gcc warnings
2692 ;
2693 }
2694
2695 // step (3)
2696#if wxUSE_FONTMAP
2697 {
2698 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2699 : new wxMBConv_wxwin(m_encoding);
2700 if ( conv->IsOk() )
2701 return conv;
2702
2703 delete conv;
2704 }
2705#endif // wxUSE_FONTMAP
2706
a58d4f4d
VS
2707 // NB: This is a hack to prevent deadlock. What could otherwise happen
2708 // in Unicode build: wxConvLocal creation ends up being here
2709 // because of some failure and logs the error. But wxLog will try to
2710 // attach timestamp, for which it will need wxConvLocal (to convert
2711 // time to char* and then wchar_t*), but that fails, tries to log
2712 // error, but wxLog has a (already locked) critical section that
2713 // guards static buffer.
2714 static bool alreadyLoggingError = false;
2715 if (!alreadyLoggingError)
2716 {
2717 alreadyLoggingError = true;
2718 wxLogError(_("Cannot convert from the charset '%s'!"),
2719 m_name ? m_name
e95354ec
VZ
2720 :
2721#if wxUSE_FONTMAP
267e11c5 2722 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2723#else // !wxUSE_FONTMAP
2724 wxString::Format(_("encoding %s"), m_encoding).c_str()
2725#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2726 );
a58d4f4d
VS
2727 alreadyLoggingError = false;
2728 }
e95354ec
VZ
2729
2730 return NULL;
2731}
2732
2733void wxCSConv::CreateConvIfNeeded() const
2734{
2735 if ( m_deferred )
2736 {
2737 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2738
2739#if wxUSE_INTL
2740 // if we don't have neither the name nor the encoding, use the default
2741 // encoding for this system
2742 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2743 {
4d312c22 2744 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2745 }
2746#endif // wxUSE_INTL
2747
e95354ec
VZ
2748 self->m_convReal = DoCreate();
2749 self->m_deferred = false;
6001e347 2750 }
6001e347
RR
2751}
2752
2753size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2754{
e95354ec 2755 CreateConvIfNeeded();
dccce9ea 2756
e95354ec
VZ
2757 if (m_convReal)
2758 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2759
2760 // latin-1 (direct)
4def3b35 2761 size_t len = strlen(psz);
dccce9ea 2762
f1339c56
RR
2763 if (buf)
2764 {
4def3b35 2765 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2766 buf[c] = (unsigned char)(psz[c]);
2767 }
dccce9ea 2768
f1339c56 2769 return len;
6001e347
RR
2770}
2771
2772size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2773{
e95354ec 2774 CreateConvIfNeeded();
dccce9ea 2775
e95354ec
VZ
2776 if (m_convReal)
2777 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2778
f1339c56 2779 // latin-1 (direct)
f8d791e0 2780 const size_t len = wxWcslen(psz);
f1339c56
RR
2781 if (buf)
2782 {
4def3b35 2783 for (size_t c = 0; c <= len; c++)
24642831
VS
2784 {
2785 if (psz[c] > 0xFF)
2786 return (size_t)-1;
907173e5 2787 buf[c] = (char)psz[c];
24642831
VS
2788 }
2789 }
2790 else
2791 {
2792 for (size_t c = 0; c <= len; c++)
2793 {
2794 if (psz[c] > 0xFF)
2795 return (size_t)-1;
2796 }
f1339c56 2797 }
dccce9ea 2798
f1339c56 2799 return len;
6001e347
RR
2800}
2801
bde4baac
VZ
2802// ----------------------------------------------------------------------------
2803// globals
2804// ----------------------------------------------------------------------------
2805
2806#ifdef __WINDOWS__
2807 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2808#elif defined(__WXMAC__) && !defined(__MACH__)
2809 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2810#else
dcc8fac0 2811 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2812#endif
2813
2814static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2815static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2816static wxMBConvUTF7 wxConvUTF7Obj;
2817static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2818
bde4baac
VZ
2819WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2820WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2821WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2822WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2823WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2824WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2825WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2826#ifdef __WXOSX__
ea8ce907 2827 wxConvUTF8Obj;
f5a1953b 2828#else
ea8ce907 2829 wxConvLibcObj;
f5a1953b
VZ
2830#endif
2831
bde4baac
VZ
2832
2833#else // !wxUSE_WCHAR_T
2834
2835// stand-ins in absence of wchar_t
2836WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2837 wxConvISO8859_1,
2838 wxConvLocal,
2839 wxConvUTF8;
2840
2841#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2842
2843