]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
don't try to subclass tab control using the same window proc for our class, this...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
7608a683 43#ifdef __WINDOWS__
532d575b 44 #include "wx/msw/private.h"
13dd924a 45 #include "wx/msw/missing.h"
0a1c1e62
GRG
46#endif
47
1c193821 48#ifndef __WXWINCE__
1cd52418 49#include <errno.h>
1c193821
JS
50#endif
51
6001e347
RR
52#include <ctype.h>
53#include <string.h>
54#include <stdlib.h>
55
e95354ec
VZ
56#if defined(__WIN32__) && !defined(__WXMICROWIN__)
57 #define wxHAVE_WIN32_MB2WC
58#endif // __WIN32__ but !__WXMICROWIN__
59
373658eb
VZ
60// ----------------------------------------------------------------------------
61// headers
62// ----------------------------------------------------------------------------
7af284fd 63
6001e347 64#ifdef __SALFORDC__
373658eb 65 #include <clib.h>
6001e347
RR
66#endif
67
b040e242 68#ifdef HAVE_ICONV
373658eb 69 #include <iconv.h>
b1d547eb 70 #include "wx/thread.h"
1cd52418 71#endif
1cd52418 72
373658eb
VZ
73#include "wx/encconv.h"
74#include "wx/fontmap.h"
7608a683 75#include "wx/utils.h"
373658eb 76
335d31e0 77#ifdef __WXMAC__
4227afa4
SC
78#include <ATSUnicode.h>
79#include <TextCommon.h>
80#include <TextEncodingConverter.h>
335d31e0
SC
81
82#include "wx/mac/private.h" // includes mac headers
83#endif
373658eb
VZ
84// ----------------------------------------------------------------------------
85// macros
86// ----------------------------------------------------------------------------
3e61dfb0 87
1cd52418 88#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 89#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
90
91#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
1cd52418 99#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
a3f2769e 102 #define WC_UTF16
3a0d76bc
VS
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
bab1e722 108#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
109 // does this ever happen?
110 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
111#endif
112
373658eb
VZ
113// ============================================================================
114// implementation
115// ============================================================================
116
117// ----------------------------------------------------------------------------
c91830cb 118// UTF-16 en/decoding to/from UCS-4
373658eb 119// ----------------------------------------------------------------------------
6001e347 120
b0a6bb75 121
c91830cb 122static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 123{
dccce9ea 124 if (input<=0xffff)
4def3b35 125 {
999836aa
VZ
126 if (output)
127 *output = (wxUint16) input;
4def3b35 128 return 1;
dccce9ea
VZ
129 }
130 else if (input>=0x110000)
4def3b35
VS
131 {
132 return (size_t)-1;
dccce9ea
VZ
133 }
134 else
4def3b35 135 {
dccce9ea 136 if (output)
4def3b35 137 {
c91830cb 138 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 139 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
140 }
141 return 2;
1cd52418 142 }
1cd52418
OK
143}
144
c91830cb 145static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 146{
dccce9ea 147 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
148 {
149 output = *input;
150 return 1;
dccce9ea 151 }
cdb14ecb 152 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
153 {
154 output = *input;
155 return (size_t)-1;
dccce9ea
VZ
156 }
157 else
4def3b35
VS
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
1cd52418
OK
162}
163
b0a6bb75 164
f6bcfd97 165// ----------------------------------------------------------------------------
6001e347 166// wxMBConv
f6bcfd97 167// ----------------------------------------------------------------------------
2c53a80a
WS
168
169wxMBConv::~wxMBConv()
170{
171 // nothing to do here (necessary for Darwin linking probably)
172}
6001e347 173
6001e347
RR
174const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
175{
2b5f62a0 176 if ( psz )
6001e347 177 {
2b5f62a0
VZ
178 // calculate the length of the buffer needed first
179 size_t nLen = MB2WC(NULL, psz, 0);
180 if ( nLen != (size_t)-1 )
181 {
182 // now do the actual conversion
183 wxWCharBuffer buf(nLen);
635f33ce
VS
184 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
185 if ( nLen != (size_t)-1 )
186 {
187 return buf;
188 }
2b5f62a0 189 }
f6bcfd97 190 }
2b5f62a0
VZ
191
192 wxWCharBuffer buf((wchar_t *)NULL);
193
194 return buf;
6001e347
RR
195}
196
e5cceba0 197const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 198{
2b5f62a0
VZ
199 if ( pwz )
200 {
201 size_t nLen = WC2MB(NULL, pwz, 0);
202 if ( nLen != (size_t)-1 )
203 {
c91830cb 204 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
205 nLen = WC2MB(buf.data(), pwz, nLen + 4);
206 if ( nLen != (size_t)-1 )
207 {
208 return buf;
209 }
2b5f62a0
VZ
210 }
211 }
212
213 wxCharBuffer buf((char *)NULL);
e5cceba0 214
e5cceba0 215 return buf;
6001e347
RR
216}
217
f5fb6871 218const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 219{
f5fb6871
RN
220 wxASSERT(pOutSize != NULL);
221
e4e3bbb4
RN
222 const char* szEnd = szString + nStringLen + 1;
223 const char* szPos = szString;
224 const char* szStart = szPos;
225
226 size_t nActualLength = 0;
f5fb6871
RN
227 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
228
229 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
230
231 //Convert the string until the length() is reached, continuing the
232 //loop every time a null character is reached
233 while(szPos != szEnd)
234 {
235 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
236
237 //Get the length of the current (sub)string
238 size_t nLen = MB2WC(NULL, szPos, 0);
239
240 //Invalid conversion?
241 if( nLen == (size_t)-1 )
f5fb6871
RN
242 {
243 *pOutSize = 0;
244 theBuffer.data()[0u] = wxT('\0');
245 return theBuffer;
246 }
247
e4e3bbb4
RN
248
249 //Increase the actual length (+1 for current null character)
250 nActualLength += nLen + 1;
251
f5fb6871
RN
252 //if buffer too big, realloc the buffer
253 if (nActualLength > (nCurrentSize+1))
254 {
255 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
256 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
257 theBuffer = theNewBuffer;
258 nCurrentSize <<= 1;
259 }
260
261 //Convert the current (sub)string
262 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 263 {
f5fb6871
RN
264 *pOutSize = 0;
265 theBuffer.data()[0u] = wxT('\0');
266 return theBuffer;
e4e3bbb4
RN
267 }
268
269 //Increment to next (sub)string
3103e8a9
JS
270 //Note that we have to use strlen instead of nLen here
271 //because XX2XX gives us the size of the output buffer,
272 //which is not necessarily the length of the string
e4e3bbb4
RN
273 szPos += strlen(szPos) + 1;
274 }
275
f5fb6871
RN
276 //success - return actual length and the buffer
277 *pOutSize = nActualLength;
3698ae71 278 return theBuffer;
e4e3bbb4
RN
279}
280
f5fb6871 281const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 282{
f5fb6871
RN
283 wxASSERT(pOutSize != NULL);
284
e4e3bbb4
RN
285 const wchar_t* szEnd = szString + nStringLen + 1;
286 const wchar_t* szPos = szString;
287 const wchar_t* szStart = szPos;
288
289 size_t nActualLength = 0;
f5fb6871
RN
290 size_t nCurrentSize = nStringLen << 2; //try * 4 first
291
292 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
293
294 //Convert the string until the length() is reached, continuing the
295 //loop every time a null character is reached
296 while(szPos != szEnd)
297 {
298 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
299
300 //Get the length of the current (sub)string
301 size_t nLen = WC2MB(NULL, szPos, 0);
302
303 //Invalid conversion?
304 if( nLen == (size_t)-1 )
f5fb6871
RN
305 {
306 *pOutSize = 0;
307 theBuffer.data()[0u] = wxT('\0');
308 return theBuffer;
309 }
e4e3bbb4
RN
310
311 //Increase the actual length (+1 for current null character)
312 nActualLength += nLen + 1;
3698ae71 313
f5fb6871
RN
314 //if buffer too big, realloc the buffer
315 if (nActualLength > (nCurrentSize+1))
316 {
317 wxCharBuffer theNewBuffer(nCurrentSize << 1);
318 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
319 theBuffer = theNewBuffer;
320 nCurrentSize <<= 1;
321 }
322
323 //Convert the current (sub)string
324 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 325 {
f5fb6871
RN
326 *pOutSize = 0;
327 theBuffer.data()[0u] = wxT('\0');
328 return theBuffer;
e4e3bbb4
RN
329 }
330
331 //Increment to next (sub)string
3103e8a9
JS
332 //Note that we have to use wxWcslen instead of nLen here
333 //because XX2XX gives us the size of the output buffer,
334 //which is not necessarily the length of the string
e4e3bbb4
RN
335 szPos += wxWcslen(szPos) + 1;
336 }
337
f5fb6871
RN
338 //success - return actual length and the buffer
339 *pOutSize = nActualLength;
3698ae71 340 return theBuffer;
e4e3bbb4
RN
341}
342
6001e347 343// ----------------------------------------------------------------------------
bde4baac 344// wxMBConvLibc
6001e347
RR
345// ----------------------------------------------------------------------------
346
bde4baac
VZ
347size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
348{
349 return wxMB2WC(buf, psz, n);
350}
351
352size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
353{
354 return wxWC2MB(buf, psz, n);
355}
e1bfe89e 356
66bf0099 357#ifdef __UNIX__
c12b7f79 358
e1bfe89e 359// ----------------------------------------------------------------------------
532d575b 360// wxConvBrokenFileNames
e1bfe89e
RR
361// ----------------------------------------------------------------------------
362
845905d5 363wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 364{
845905d5
MW
365 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
366 || wxStricmp(charset, _T("UTF8")) == 0 )
367 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
368 else
369 m_conv = new wxCSConv(charset);
ea8ce907
RR
370}
371
c12b7f79
VZ
372size_t
373wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
374 const char *psz,
375 size_t outputSize) const
e1bfe89e 376{
c12b7f79 377 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
378}
379
c12b7f79
VZ
380size_t
381wxConvBrokenFileNames::WC2MB(char *outputBuf,
382 const wchar_t *psz,
383 size_t outputSize) const
e1bfe89e 384{
c12b7f79 385 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
386}
387
66bf0099 388#endif
c12b7f79 389
bde4baac 390// ----------------------------------------------------------------------------
3698ae71 391// UTF-7
bde4baac 392// ----------------------------------------------------------------------------
6001e347 393
15f2ee32 394// Implementation (C) 2004 Fredrik Roubert
6001e347 395
15f2ee32
RN
396//
397// BASE64 decoding table
398//
399static const unsigned char utf7unb64[] =
6001e347 400{
15f2ee32
RN
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
407 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
408 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
410 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
411 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
412 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
413 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
414 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
415 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
416 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
433};
434
435size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
436{
15f2ee32
RN
437 size_t len = 0;
438
439 while (*psz && ((!buf) || (len < n)))
440 {
441 unsigned char cc = *psz++;
442 if (cc != '+')
443 {
444 // plain ASCII char
445 if (buf)
446 *buf++ = cc;
447 len++;
448 }
449 else if (*psz == '-')
450 {
451 // encoded plus sign
452 if (buf)
453 *buf++ = cc;
454 len++;
455 psz++;
456 }
457 else
458 {
459 // BASE64 encoded string
460 bool lsb;
461 unsigned char c;
462 unsigned int d, l;
463 for (lsb = false, d = 0, l = 0;
464 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
465 {
466 d <<= 6;
467 d += cc;
468 for (l += 6; l >= 8; lsb = !lsb)
469 {
6356d52a 470 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
471 if (lsb)
472 {
473 if (buf)
474 *buf++ |= c;
475 len ++;
476 }
477 else
478 if (buf)
6356d52a 479 *buf = (wchar_t)(c << 8);
15f2ee32
RN
480 }
481 }
482 if (*psz == '-')
483 psz++;
484 }
485 }
486 if (buf && (len < n))
487 *buf = 0;
488 return len;
6001e347
RR
489}
490
15f2ee32
RN
491//
492// BASE64 encoding table
493//
494static const unsigned char utf7enb64[] =
495{
496 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
497 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
498 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
499 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
500 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
501 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
502 'w', 'x', 'y', 'z', '0', '1', '2', '3',
503 '4', '5', '6', '7', '8', '9', '+', '/'
504};
505
506//
507// UTF-7 encoding table
508//
509// 0 - Set D (directly encoded characters)
510// 1 - Set O (optional direct characters)
511// 2 - whitespace characters (optional)
512// 3 - special characters
513//
514static const unsigned char utf7encode[128] =
6001e347 515{
15f2ee32
RN
516 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
517 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
518 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
520 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
524};
525
667e5b3e 526size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
527{
528
529
530 size_t len = 0;
531
532 while (*psz && ((!buf) || (len < n)))
533 {
534 wchar_t cc = *psz++;
535 if (cc < 0x80 && utf7encode[cc] < 1)
536 {
537 // plain ASCII char
538 if (buf)
539 *buf++ = (char)cc;
540 len++;
541 }
542#ifndef WC_UTF16
79c78d42 543 else if (((wxUint32)cc) > 0xffff)
b2c13097 544 {
15f2ee32
RN
545 // no surrogate pair generation (yet?)
546 return (size_t)-1;
547 }
548#endif
549 else
550 {
551 if (buf)
552 *buf++ = '+';
553 len++;
554 if (cc != '+')
555 {
556 // BASE64 encode string
557 unsigned int lsb, d, l;
558 for (d = 0, l = 0;; psz++)
559 {
560 for (lsb = 0; lsb < 2; lsb ++)
561 {
562 d <<= 8;
563 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
564
565 for (l += 8; l >= 6; )
566 {
567 l -= 6;
568 if (buf)
569 *buf++ = utf7enb64[(d >> l) % 64];
570 len++;
571 }
572 }
573 cc = *psz;
574 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
575 break;
576 }
577 if (l != 0)
578 {
579 if (buf)
580 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
581 len++;
582 }
583 }
584 if (buf)
585 *buf++ = '-';
586 len++;
587 }
588 }
589 if (buf && (len < n))
590 *buf = 0;
591 return len;
6001e347
RR
592}
593
f6bcfd97 594// ----------------------------------------------------------------------------
6001e347 595// UTF-8
f6bcfd97 596// ----------------------------------------------------------------------------
6001e347 597
dccce9ea 598static wxUint32 utf8_max[]=
4def3b35 599 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 600
3698ae71
VZ
601// boundaries of the private use area we use to (temporarily) remap invalid
602// characters invalid in a UTF-8 encoded string
ea8ce907
RR
603const wxUint32 wxUnicodePUA = 0x100000;
604const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
605
6001e347
RR
606size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
607{
4def3b35
VS
608 size_t len = 0;
609
dccce9ea 610 while (*psz && ((!buf) || (len < n)))
4def3b35 611 {
ea8ce907
RR
612 const char *opsz = psz;
613 bool invalid = false;
4def3b35
VS
614 unsigned char cc = *psz++, fc = cc;
615 unsigned cnt;
dccce9ea 616 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 617 fc <<= 1;
dccce9ea 618 if (!cnt)
4def3b35
VS
619 {
620 // plain ASCII char
dccce9ea 621 if (buf)
4def3b35
VS
622 *buf++ = cc;
623 len++;
561488ef
MW
624
625 // escape the escape character for octal escapes
626 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
627 && cc == '\\' && (!buf || len < n))
628 {
629 if (buf)
630 *buf++ = cc;
631 len++;
632 }
dccce9ea
VZ
633 }
634 else
4def3b35
VS
635 {
636 cnt--;
dccce9ea 637 if (!cnt)
4def3b35
VS
638 {
639 // invalid UTF-8 sequence
ea8ce907 640 invalid = true;
dccce9ea
VZ
641 }
642 else
4def3b35
VS
643 {
644 unsigned ocnt = cnt - 1;
645 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 646 while (cnt--)
4def3b35 647 {
ea8ce907 648 cc = *psz;
dccce9ea 649 if ((cc & 0xC0) != 0x80)
4def3b35
VS
650 {
651 // invalid UTF-8 sequence
ea8ce907
RR
652 invalid = true;
653 break;
4def3b35 654 }
ea8ce907 655 psz++;
4def3b35
VS
656 res = (res << 6) | (cc & 0x3f);
657 }
ea8ce907 658 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
659 {
660 // illegal UTF-8 encoding
ea8ce907 661 invalid = true;
4def3b35 662 }
ea8ce907
RR
663 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
664 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
665 {
666 // if one of our PUA characters turns up externally
667 // it must also be treated as an illegal sequence
668 // (a bit like you have to escape an escape character)
669 invalid = true;
670 }
671 else
672 {
1cd52418 673#ifdef WC_UTF16
ea8ce907
RR
674 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
675 size_t pa = encode_utf16(res, (wxUint16 *)buf);
676 if (pa == (size_t)-1)
677 {
678 invalid = true;
679 }
680 else
681 {
682 if (buf)
683 buf += pa;
684 len += pa;
685 }
373658eb 686#else // !WC_UTF16
ea8ce907
RR
687 if (buf)
688 *buf++ = res;
689 len++;
373658eb 690#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
691 }
692 }
693 if (invalid)
694 {
695 if (m_options & MAP_INVALID_UTF8_TO_PUA)
696 {
697 while (opsz < psz && (!buf || len < n))
698 {
699#ifdef WC_UTF16
700 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
701 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
702 wxASSERT(pa != (size_t)-1);
703 if (buf)
704 buf += pa;
705 opsz++;
706 len += pa;
707#else
708 if (buf)
709 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
710 opsz++;
711 len++;
712#endif
713 }
714 }
3698ae71 715 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
716 {
717 while (opsz < psz && (!buf || len < n))
718 {
3698ae71
VZ
719 if ( buf && len + 3 < n )
720 {
721 unsigned char n = *opsz;
722 *buf++ = L'\\';
b2c13097
WS
723 *buf++ = (wchar_t)( L'0' + n / 0100 );
724 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
725 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 726 }
ea8ce907
RR
727 opsz++;
728 len += 4;
729 }
730 }
3698ae71 731 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
732 {
733 return (size_t)-1;
734 }
4def3b35
VS
735 }
736 }
6001e347 737 }
dccce9ea 738 if (buf && (len < n))
4def3b35
VS
739 *buf = 0;
740 return len;
6001e347
RR
741}
742
3698ae71
VZ
743static inline bool isoctal(wchar_t wch)
744{
745 return L'0' <= wch && wch <= L'7';
746}
747
6001e347
RR
748size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
749{
4def3b35 750 size_t len = 0;
6001e347 751
dccce9ea 752 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
753 {
754 wxUint32 cc;
1cd52418 755#ifdef WC_UTF16
b5153fd8
VZ
756 // cast is ok for WC_UTF16
757 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 758 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 759#else
4def3b35
VS
760 cc=(*psz++) & 0x7fffffff;
761#endif
3698ae71
VZ
762
763 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
764 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 765 {
dccce9ea 766 if (buf)
ea8ce907 767 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 768 len++;
3698ae71 769 }
561488ef
MW
770 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
771 && cc == L'\\' && psz[0] == L'\\' )
772 {
773 if (buf)
774 *buf++ = (char)cc;
775 psz++;
776 len++;
777 }
3698ae71
VZ
778 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
779 cc == L'\\' &&
780 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 781 {
dccce9ea 782 if (buf)
3698ae71 783 {
b2c13097
WS
784 *buf++ = (char) ((psz[0] - L'0')*0100 +
785 (psz[1] - L'0')*010 +
786 (psz[2] - L'0'));
3698ae71
VZ
787 }
788
789 psz += 3;
ea8ce907
RR
790 len++;
791 }
792 else
793 {
794 unsigned cnt;
795 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
796 if (!cnt)
4def3b35 797 {
ea8ce907
RR
798 // plain ASCII char
799 if (buf)
800 *buf++ = (char) cc;
801 len++;
802 }
803
804 else
805 {
806 len += cnt + 1;
807 if (buf)
808 {
809 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
810 while (cnt--)
811 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
812 }
4def3b35
VS
813 }
814 }
6001e347 815 }
4def3b35 816
3698ae71
VZ
817 if (buf && (len<n))
818 *buf = 0;
adb45366 819
4def3b35 820 return len;
6001e347
RR
821}
822
c91830cb
VZ
823// ----------------------------------------------------------------------------
824// UTF-16
825// ----------------------------------------------------------------------------
826
827#ifdef WORDS_BIGENDIAN
bde4baac
VZ
828 #define wxMBConvUTF16straight wxMBConvUTF16BE
829 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 830#else
bde4baac
VZ
831 #define wxMBConvUTF16swap wxMBConvUTF16BE
832 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
833#endif
834
835
c91830cb
VZ
836#ifdef WC_UTF16
837
c91830cb
VZ
838// copy 16bit MB to 16bit String
839size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
840{
841 size_t len=0;
842
843 while (*(wxUint16*)psz && (!buf || len < n))
844 {
845 if (buf)
846 *buf++ = *(wxUint16*)psz;
847 len++;
848
849 psz += sizeof(wxUint16);
850 }
851 if (buf && len<n) *buf=0;
852
853 return len;
854}
855
856
857// copy 16bit String to 16bit MB
858size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
859{
860 size_t len=0;
861
862 while (*psz && (!buf || len < n))
863 {
864 if (buf)
865 {
866 *(wxUint16*)buf = *psz;
867 buf += sizeof(wxUint16);
868 }
869 len += sizeof(wxUint16);
870 psz++;
871 }
872 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
873
874 return len;
875}
876
877
878// swap 16bit MB to 16bit String
879size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
880{
881 size_t len=0;
882
883 while (*(wxUint16*)psz && (!buf || len < n))
884 {
885 if (buf)
886 {
887 ((char *)buf)[0] = psz[1];
888 ((char *)buf)[1] = psz[0];
889 buf++;
890 }
891 len++;
892 psz += sizeof(wxUint16);
893 }
894 if (buf && len<n) *buf=0;
895
896 return len;
897}
898
899
900// swap 16bit MB to 16bit String
901size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
902{
903 size_t len=0;
904
905 while (*psz && (!buf || len < n))
906 {
907 if (buf)
908 {
909 *buf++ = ((char*)psz)[1];
910 *buf++ = ((char*)psz)[0];
911 }
912 len += sizeof(wxUint16);
913 psz++;
914 }
915 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
916
917 return len;
918}
919
920
921#else // WC_UTF16
922
923
924// copy 16bit MB to 32bit String
925size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
926{
927 size_t len=0;
928
929 while (*(wxUint16*)psz && (!buf || len < n))
930 {
931 wxUint32 cc;
932 size_t pa=decode_utf16((wxUint16*)psz, cc);
933 if (pa == (size_t)-1)
934 return pa;
935
936 if (buf)
937 *buf++ = cc;
938 len++;
939 psz += pa * sizeof(wxUint16);
940 }
941 if (buf && len<n) *buf=0;
942
943 return len;
944}
945
946
947// copy 32bit String to 16bit MB
948size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
949{
950 size_t len=0;
951
952 while (*psz && (!buf || len < n))
953 {
954 wxUint16 cc[2];
955 size_t pa=encode_utf16(*psz, cc);
956
957 if (pa == (size_t)-1)
958 return pa;
959
960 if (buf)
961 {
69b80d28 962 *(wxUint16*)buf = cc[0];
b5153fd8 963 buf += sizeof(wxUint16);
c91830cb 964 if (pa > 1)
69b80d28
VZ
965 {
966 *(wxUint16*)buf = cc[1];
967 buf += sizeof(wxUint16);
968 }
c91830cb
VZ
969 }
970
971 len += pa*sizeof(wxUint16);
972 psz++;
973 }
974 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
975
976 return len;
977}
978
979
980// swap 16bit MB to 32bit String
981size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
982{
983 size_t len=0;
984
985 while (*(wxUint16*)psz && (!buf || len < n))
986 {
987 wxUint32 cc;
988 char tmp[4];
989 tmp[0]=psz[1]; tmp[1]=psz[0];
990 tmp[2]=psz[3]; tmp[3]=psz[2];
991
992 size_t pa=decode_utf16((wxUint16*)tmp, cc);
993 if (pa == (size_t)-1)
994 return pa;
995
996 if (buf)
997 *buf++ = cc;
998
999 len++;
1000 psz += pa * sizeof(wxUint16);
1001 }
1002 if (buf && len<n) *buf=0;
1003
1004 return len;
1005}
1006
1007
1008// swap 32bit String to 16bit MB
1009size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1010{
1011 size_t len=0;
1012
1013 while (*psz && (!buf || len < n))
1014 {
1015 wxUint16 cc[2];
1016 size_t pa=encode_utf16(*psz, cc);
1017
1018 if (pa == (size_t)-1)
1019 return pa;
1020
1021 if (buf)
1022 {
1023 *buf++ = ((char*)cc)[1];
1024 *buf++ = ((char*)cc)[0];
1025 if (pa > 1)
1026 {
1027 *buf++ = ((char*)cc)[3];
1028 *buf++ = ((char*)cc)[2];
1029 }
1030 }
1031
1032 len += pa*sizeof(wxUint16);
1033 psz++;
1034 }
1035 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1036
1037 return len;
1038}
1039
1040#endif // WC_UTF16
1041
1042
1043// ----------------------------------------------------------------------------
1044// UTF-32
1045// ----------------------------------------------------------------------------
1046
1047#ifdef WORDS_BIGENDIAN
1048#define wxMBConvUTF32straight wxMBConvUTF32BE
1049#define wxMBConvUTF32swap wxMBConvUTF32LE
1050#else
1051#define wxMBConvUTF32swap wxMBConvUTF32BE
1052#define wxMBConvUTF32straight wxMBConvUTF32LE
1053#endif
1054
1055
1056WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1057WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1058
1059
1060#ifdef WC_UTF16
1061
1062// copy 32bit MB to 16bit String
1063size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1064{
1065 size_t len=0;
1066
1067 while (*(wxUint32*)psz && (!buf || len < n))
1068 {
1069 wxUint16 cc[2];
1070
1071 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1072 if (pa == (size_t)-1)
1073 return pa;
1074
1075 if (buf)
1076 {
1077 *buf++ = cc[0];
1078 if (pa > 1)
1079 *buf++ = cc[1];
1080 }
1081 len += pa;
1082 psz += sizeof(wxUint32);
1083 }
1084 if (buf && len<n) *buf=0;
1085
1086 return len;
1087}
1088
1089
1090// copy 16bit String to 32bit MB
1091size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1092{
1093 size_t len=0;
1094
1095 while (*psz && (!buf || len < n))
1096 {
1097 wxUint32 cc;
1098
b5153fd8
VZ
1099 // cast is ok for WC_UTF16
1100 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1101 if (pa == (size_t)-1)
1102 return pa;
1103
1104 if (buf)
1105 {
1106 *(wxUint32*)buf = cc;
1107 buf += sizeof(wxUint32);
1108 }
1109 len += sizeof(wxUint32);
1110 psz += pa;
1111 }
b5153fd8
VZ
1112
1113 if (buf && len<=n-sizeof(wxUint32))
1114 *(wxUint32*)buf=0;
c91830cb
VZ
1115
1116 return len;
1117}
1118
1119
1120
1121// swap 32bit MB to 16bit String
1122size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1123{
1124 size_t len=0;
1125
1126 while (*(wxUint32*)psz && (!buf || len < n))
1127 {
1128 char tmp[4];
1129 tmp[0] = psz[3]; tmp[1] = psz[2];
1130 tmp[2] = psz[1]; tmp[3] = psz[0];
1131
1132
1133 wxUint16 cc[2];
1134
1135 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1136 if (pa == (size_t)-1)
1137 return pa;
1138
1139 if (buf)
1140 {
1141 *buf++ = cc[0];
1142 if (pa > 1)
1143 *buf++ = cc[1];
1144 }
1145 len += pa;
1146 psz += sizeof(wxUint32);
1147 }
b5153fd8
VZ
1148
1149 if (buf && len<n)
1150 *buf=0;
c91830cb
VZ
1151
1152 return len;
1153}
1154
1155
1156// swap 16bit String to 32bit MB
1157size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1158{
1159 size_t len=0;
1160
1161 while (*psz && (!buf || len < n))
1162 {
1163 char cc[4];
1164
b5153fd8
VZ
1165 // cast is ok for WC_UTF16
1166 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1167 if (pa == (size_t)-1)
1168 return pa;
1169
1170 if (buf)
1171 {
1172 *buf++ = cc[3];
1173 *buf++ = cc[2];
1174 *buf++ = cc[1];
1175 *buf++ = cc[0];
1176 }
1177 len += sizeof(wxUint32);
1178 psz += pa;
1179 }
b5153fd8
VZ
1180
1181 if (buf && len<=n-sizeof(wxUint32))
1182 *(wxUint32*)buf=0;
c91830cb
VZ
1183
1184 return len;
1185}
1186
1187#else // WC_UTF16
1188
1189
1190// copy 32bit MB to 32bit String
1191size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1192{
1193 size_t len=0;
1194
1195 while (*(wxUint32*)psz && (!buf || len < n))
1196 {
1197 if (buf)
1198 *buf++ = *(wxUint32*)psz;
1199 len++;
1200 psz += sizeof(wxUint32);
1201 }
b5153fd8
VZ
1202
1203 if (buf && len<n)
1204 *buf=0;
c91830cb
VZ
1205
1206 return len;
1207}
1208
1209
1210// copy 32bit String to 32bit MB
1211size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1212{
1213 size_t len=0;
1214
1215 while (*psz && (!buf || len < n))
1216 {
1217 if (buf)
1218 {
1219 *(wxUint32*)buf = *psz;
1220 buf += sizeof(wxUint32);
1221 }
1222
1223 len += sizeof(wxUint32);
1224 psz++;
1225 }
1226
b5153fd8
VZ
1227 if (buf && len<=n-sizeof(wxUint32))
1228 *(wxUint32*)buf=0;
c91830cb
VZ
1229
1230 return len;
1231}
1232
1233
1234// swap 32bit MB to 32bit String
1235size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1236{
1237 size_t len=0;
1238
1239 while (*(wxUint32*)psz && (!buf || len < n))
1240 {
1241 if (buf)
1242 {
1243 ((char *)buf)[0] = psz[3];
1244 ((char *)buf)[1] = psz[2];
1245 ((char *)buf)[2] = psz[1];
1246 ((char *)buf)[3] = psz[0];
1247 buf++;
1248 }
1249 len++;
1250 psz += sizeof(wxUint32);
1251 }
b5153fd8
VZ
1252
1253 if (buf && len<n)
1254 *buf=0;
c91830cb
VZ
1255
1256 return len;
1257}
1258
1259
1260// swap 32bit String to 32bit MB
1261size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1262{
1263 size_t len=0;
1264
1265 while (*psz && (!buf || len < n))
1266 {
1267 if (buf)
1268 {
1269 *buf++ = ((char *)psz)[3];
1270 *buf++ = ((char *)psz)[2];
1271 *buf++ = ((char *)psz)[1];
1272 *buf++ = ((char *)psz)[0];
1273 }
1274 len += sizeof(wxUint32);
1275 psz++;
1276 }
b5153fd8
VZ
1277
1278 if (buf && len<=n-sizeof(wxUint32))
1279 *(wxUint32*)buf=0;
c91830cb
VZ
1280
1281 return len;
1282}
1283
1284
1285#endif // WC_UTF16
1286
1287
36acb880
VZ
1288// ============================================================================
1289// The classes doing conversion using the iconv_xxx() functions
1290// ============================================================================
3caec1bb 1291
b040e242 1292#ifdef HAVE_ICONV
3a0d76bc 1293
b1d547eb
VS
1294// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1295// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1296// (unless there's yet another bug in glibc) the only case when iconv()
1297// returns with (size_t)-1 (which means error) and says there are 0 bytes
1298// left in the input buffer -- when _real_ error occurs,
1299// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1300// iconv() failure.
3caec1bb
VS
1301// [This bug does not appear in glibc 2.2.]
1302#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1303#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1304 (errno != E2BIG || bufLeft != 0))
1305#else
1306#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1307#endif
1308
ab217dba 1309#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1310
1311// ----------------------------------------------------------------------------
e95354ec 1312// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1313// ----------------------------------------------------------------------------
1314
e95354ec 1315class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1316{
1317public:
e95354ec
VZ
1318 wxMBConv_iconv(const wxChar *name);
1319 virtual ~wxMBConv_iconv();
36acb880 1320
bde4baac
VZ
1321 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1322 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1323
e95354ec 1324 bool IsOk() const
36acb880
VZ
1325 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1326
1327protected:
1328 // the iconv handlers used to translate from multibyte to wide char and in
1329 // the other direction
1330 iconv_t m2w,
1331 w2m;
b1d547eb
VS
1332#if wxUSE_THREADS
1333 // guards access to m2w and w2m objects
1334 wxMutex m_iconvMutex;
1335#endif
36acb880
VZ
1336
1337private:
e95354ec 1338 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1339 // available on this machine, it will remain NULL
1340 static const char *ms_wcCharsetName;
1341
1342 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1343 // different endian-ness than the native one
405d8f46 1344 static bool ms_wcNeedsSwap;
36acb880
VZ
1345};
1346
e95354ec
VZ
1347const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1348bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1349
e95354ec 1350wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1351{
04c79127
RR
1352 // Do it the hard way
1353 char cname[100];
1354 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1355 cname[i] = (char) name[i];
1356
36acb880
VZ
1357 // check for charset that represents wchar_t:
1358 if (ms_wcCharsetName == NULL)
f1339c56 1359 {
e95354ec 1360 ms_wcNeedsSwap = false;
dccce9ea 1361
36acb880
VZ
1362 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1363 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1364 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1365
36acb880
VZ
1366 if (m2w == (iconv_t)-1)
1367 {
1368 // try charset w/o bytesex info (e.g. "UCS4")
1369 // and check for bytesex ourselves:
1370 ms_wcCharsetName = WC_NAME;
04c79127 1371 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1372
1373 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1374 if (m2w == (iconv_t)-1)
1375 {
36acb880 1376 ms_wcCharsetName = "WCHAR_T";
04c79127 1377 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1378 }
3a0d76bc 1379
36acb880
VZ
1380 if (m2w != (iconv_t)-1)
1381 {
1382 char buf[2], *bufPtr;
1383 wchar_t wbuf[2], *wbufPtr;
1384 size_t insz, outsz;
1385 size_t res;
1386
1387 buf[0] = 'A';
1388 buf[1] = 0;
1389 wbuf[0] = 0;
1390 insz = 2;
1391 outsz = SIZEOF_WCHAR_T * 2;
1392 wbufPtr = wbuf;
1393 bufPtr = buf;
1394
1395 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1396 (char**)&wbufPtr, &outsz);
1397
1398 if (ICONV_FAILED(res, insz))
3a0d76bc 1399 {
36acb880
VZ
1400 ms_wcCharsetName = NULL;
1401 wxLogLastError(wxT("iconv"));
2b5f62a0 1402 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1403 }
1404 else
1405 {
36acb880 1406 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1407 }
1408 }
36acb880
VZ
1409 else
1410 {
1411 ms_wcCharsetName = NULL;
373658eb 1412
77ffb593 1413 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1414 // fall back to using wxEncodingConverter.
1415 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1416 //wxLogError(
36acb880 1417 }
3a0d76bc 1418 }
36acb880 1419 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1420 }
36acb880 1421 else // we already have ms_wcCharsetName
3caec1bb 1422 {
04c79127 1423 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1424 }
dccce9ea 1425
36acb880
VZ
1426 // NB: don't ever pass NULL to iconv_open(), it may crash!
1427 if ( ms_wcCharsetName )
f1339c56 1428 {
04c79127 1429 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1430 }
405d8f46
VZ
1431 else
1432 {
1433 w2m = (iconv_t)-1;
1434 }
36acb880 1435}
3caec1bb 1436
e95354ec 1437wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1438{
1439 if ( m2w != (iconv_t)-1 )
1440 iconv_close(m2w);
1441 if ( w2m != (iconv_t)-1 )
1442 iconv_close(w2m);
1443}
3a0d76bc 1444
bde4baac 1445size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1446{
b1d547eb
VS
1447#if wxUSE_THREADS
1448 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1449 // Unfortunately there is a couple of global wxCSConv objects such as
1450 // wxConvLocal that are used all over wx code, so we have to make sure
1451 // the handle is used by at most one thread at the time. Otherwise
1452 // only a few wx classes would be safe to use from non-main threads
1453 // as MB<->WC conversion would fail "randomly".
1454 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1455#endif
3698ae71 1456
36acb880
VZ
1457 size_t inbuf = strlen(psz);
1458 size_t outbuf = n * SIZEOF_WCHAR_T;
1459 size_t res, cres;
1460 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1461 wchar_t *bufPtr = buf;
1462 const char *pszPtr = psz;
1463
1464 if (buf)
1465 {
1466 // have destination buffer, convert there
1467 cres = iconv(m2w,
1468 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1469 (char**)&bufPtr, &outbuf);
1470 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1471
36acb880 1472 if (ms_wcNeedsSwap)
3a0d76bc 1473 {
36acb880
VZ
1474 // convert to native endianness
1475 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1476 }
adb45366 1477
49dd9820
VS
1478 // NB: iconv was given only strlen(psz) characters on input, and so
1479 // it couldn't convert the trailing zero. Let's do it ourselves
1480 // if there's some room left for it in the output buffer.
1481 if (res < n)
1482 buf[res] = 0;
36acb880
VZ
1483 }
1484 else
1485 {
1486 // no destination buffer... convert using temp buffer
1487 // to calculate destination buffer requirement
1488 wchar_t tbuf[8];
1489 res = 0;
1490 do {
1491 bufPtr = tbuf;
1492 outbuf = 8*SIZEOF_WCHAR_T;
1493
1494 cres = iconv(m2w,
1495 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496 (char**)&bufPtr, &outbuf );
1497
1498 res += 8-(outbuf/SIZEOF_WCHAR_T);
1499 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1500 }
dccce9ea 1501
36acb880 1502 if (ICONV_FAILED(cres, inbuf))
f1339c56 1503 {
36acb880
VZ
1504 //VS: it is ok if iconv fails, hence trace only
1505 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1506 return (size_t)-1;
1507 }
1508
1509 return res;
1510}
1511
bde4baac 1512size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1513{
b1d547eb
VS
1514#if wxUSE_THREADS
1515 // NB: explained in MB2WC
1516 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1517#endif
3698ae71 1518
f8d791e0 1519 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1520 size_t outbuf = n;
1521 size_t res, cres;
3a0d76bc 1522
36acb880 1523 wchar_t *tmpbuf = 0;
3caec1bb 1524
36acb880
VZ
1525 if (ms_wcNeedsSwap)
1526 {
1527 // need to copy to temp buffer to switch endianness
1528 // this absolutely doesn't rock!
1529 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1530 // could be in read-only memory, or be accessed in some other thread)
1531 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1532 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1533 WC_BSWAP(tmpbuf, inbuf)
1534 psz=tmpbuf;
1535 }
3a0d76bc 1536
36acb880
VZ
1537 if (buf)
1538 {
1539 // have destination buffer, convert there
1540 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1541
36acb880 1542 res = n-outbuf;
adb45366 1543
49dd9820
VS
1544 // NB: iconv was given only wcslen(psz) characters on input, and so
1545 // it couldn't convert the trailing zero. Let's do it ourselves
1546 // if there's some room left for it in the output buffer.
1547 if (res < n)
1548 buf[0] = 0;
36acb880
VZ
1549 }
1550 else
1551 {
1552 // no destination buffer... convert using temp buffer
1553 // to calculate destination buffer requirement
1554 char tbuf[16];
1555 res = 0;
1556 do {
1557 buf = tbuf; outbuf = 16;
1558
1559 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1560
36acb880
VZ
1561 res += 16 - outbuf;
1562 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1563 }
dccce9ea 1564
36acb880
VZ
1565 if (ms_wcNeedsSwap)
1566 {
1567 free(tmpbuf);
1568 }
dccce9ea 1569
36acb880
VZ
1570 if (ICONV_FAILED(cres, inbuf))
1571 {
1572 //VS: it is ok if iconv fails, hence trace only
1573 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1574 return (size_t)-1;
1575 }
1576
1577 return res;
1578}
1579
b040e242 1580#endif // HAVE_ICONV
36acb880 1581
e95354ec 1582
36acb880
VZ
1583// ============================================================================
1584// Win32 conversion classes
1585// ============================================================================
1cd52418 1586
e95354ec 1587#ifdef wxHAVE_WIN32_MB2WC
373658eb 1588
8b04d4c4 1589// from utils.cpp
d775fa82 1590#if wxUSE_FONTMAP
8b04d4c4
VZ
1591extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1592extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1593#endif
373658eb 1594
e95354ec 1595class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1596{
1597public:
bde4baac
VZ
1598 wxMBConv_win32()
1599 {
1600 m_CodePage = CP_ACP;
1601 }
1602
7608a683 1603#if wxUSE_FONTMAP
e95354ec 1604 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1605 {
1606 m_CodePage = wxCharsetToCodepage(name);
1607 }
dccce9ea 1608
e95354ec 1609 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1610 {
1611 m_CodePage = wxEncodingToCodepage(encoding);
1612 }
7608a683 1613#endif
8b04d4c4 1614
bde4baac 1615 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1616 {
02272c9c
VZ
1617 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1618 // the behaviour is not compatible with the Unix version (using iconv)
1619 // and break the library itself, e.g. wxTextInputStream::NextChar()
1620 // wouldn't work if reading an incomplete MB char didn't result in an
1621 // error
667e5b3e
VZ
1622 //
1623 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1624 // an error (tested under Windows Server 2003) and apparently it is
1625 // done on purpose, i.e. the function accepts any input in this case
1626 // and although I'd prefer to return error on ill-formed output, our
1627 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1628 // explicitly ill-formed according to RFC 2152) neither so we don't
1629 // even have any fallback here...
1630 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1631
2b5f62a0
VZ
1632 const size_t len = ::MultiByteToWideChar
1633 (
1634 m_CodePage, // code page
667e5b3e 1635 flags, // flags: fall on error
2b5f62a0
VZ
1636 psz, // input string
1637 -1, // its length (NUL-terminated)
b4da152e 1638 buf, // output string
2b5f62a0
VZ
1639 buf ? n : 0 // size of output buffer
1640 );
1641
03a991bc
VZ
1642 // note that it returns count of written chars for buf != NULL and size
1643 // of the needed buffer for buf == NULL so in either case the length of
1644 // the string (which never includes the terminating NUL) is one less
1645 return len ? len - 1 : (size_t)-1;
f1339c56 1646 }
dccce9ea 1647
13dd924a 1648 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1649 {
13dd924a
VZ
1650 /*
1651 we have a problem here: by default, WideCharToMultiByte() may
1652 replace characters unrepresentable in the target code page with bad
1653 quality approximations such as turning "1/2" symbol (U+00BD) into
1654 "1" for the code pages which don't have it and we, obviously, want
1655 to avoid this at any price
d775fa82 1656
13dd924a
VZ
1657 the trouble is that this function does it _silently_, i.e. it won't
1658 even tell us whether it did or not... Win98/2000 and higher provide
1659 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1660 we have to resort to a round trip, i.e. check that converting back
1661 results in the same string -- this is, of course, expensive but
1662 otherwise we simply can't be sure to not garble the data.
1663 */
1664
1665 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1666 // it doesn't work with CJK encodings (which we test for rather roughly
1667 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1668 // supporting it
907173e5
WS
1669 BOOL usedDef wxDUMMY_INITIALIZE(false);
1670 BOOL *pUsedDef;
13dd924a
VZ
1671 int flags;
1672 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1673 {
1674 // it's our lucky day
1675 flags = WC_NO_BEST_FIT_CHARS;
1676 pUsedDef = &usedDef;
1677 }
1678 else // old system or unsupported encoding
1679 {
1680 flags = 0;
1681 pUsedDef = NULL;
1682 }
1683
2b5f62a0
VZ
1684 const size_t len = ::WideCharToMultiByte
1685 (
1686 m_CodePage, // code page
13dd924a
VZ
1687 flags, // either none or no best fit
1688 pwz, // input string
2b5f62a0
VZ
1689 -1, // it is (wide) NUL-terminated
1690 buf, // output buffer
1691 buf ? n : 0, // and its size
1692 NULL, // default "replacement" char
13dd924a 1693 pUsedDef // [out] was it used?
2b5f62a0
VZ
1694 );
1695
13dd924a
VZ
1696 if ( !len )
1697 {
1698 // function totally failed
1699 return (size_t)-1;
1700 }
1701
1702 // if we were really converting, check if we succeeded
1703 if ( buf )
1704 {
1705 if ( flags )
1706 {
1707 // check if the conversion failed, i.e. if any replacements
1708 // were done
1709 if ( usedDef )
1710 return (size_t)-1;
1711 }
1712 else // we must resort to double tripping...
1713 {
1714 wxWCharBuffer wcBuf(n);
1715 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1716 wcscmp(wcBuf, pwz) != 0 )
1717 {
1718 // we didn't obtain the same thing we started from, hence
1719 // the conversion was lossy and we consider that it failed
1720 return (size_t)-1;
1721 }
1722 }
1723 }
1724
03a991bc 1725 // see the comment above for the reason of "len - 1"
13dd924a 1726 return len - 1;
f1339c56 1727 }
dccce9ea 1728
13dd924a
VZ
1729 bool IsOk() const { return m_CodePage != -1; }
1730
1731private:
1732 static bool CanUseNoBestFit()
1733 {
1734 static int s_isWin98Or2k = -1;
1735
1736 if ( s_isWin98Or2k == -1 )
1737 {
1738 int verMaj, verMin;
1739 switch ( wxGetOsVersion(&verMaj, &verMin) )
1740 {
1741 case wxWIN95:
1742 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1743 break;
1744
1745 case wxWINDOWS_NT:
1746 s_isWin98Or2k = verMaj >= 5;
1747 break;
1748
1749 default:
1750 // unknown, be conseravtive by default
1751 s_isWin98Or2k = 0;
1752 }
1753
1754 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1755 }
1756
1757 return s_isWin98Or2k == 1;
1758 }
f1339c56 1759
b1d66b54 1760 long m_CodePage;
1cd52418 1761};
e95354ec
VZ
1762
1763#endif // wxHAVE_WIN32_MB2WC
1764
f7e98dee
RN
1765// ============================================================================
1766// Cocoa conversion classes
1767// ============================================================================
1768
1769#if defined(__WXCOCOA__)
1770
ecd9653b 1771// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1772// Cocoa. Strangely enough, internally Core Foundation uses
1773// UTF 32 internally quite a bit - its just not public (yet).
1774
1775#include <CoreFoundation/CFString.h>
1776#include <CoreFoundation/CFStringEncodingExt.h>
1777
1778CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1779{
638357a0 1780 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1781 if ( encoding == wxFONTENCODING_DEFAULT )
1782 {
638357a0 1783 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1784 }
1785 else switch( encoding)
1786 {
1787 case wxFONTENCODING_ISO8859_1 :
1788 enc = kCFStringEncodingISOLatin1 ;
1789 break ;
1790 case wxFONTENCODING_ISO8859_2 :
1791 enc = kCFStringEncodingISOLatin2;
1792 break ;
1793 case wxFONTENCODING_ISO8859_3 :
1794 enc = kCFStringEncodingISOLatin3 ;
1795 break ;
1796 case wxFONTENCODING_ISO8859_4 :
1797 enc = kCFStringEncodingISOLatin4;
1798 break ;
1799 case wxFONTENCODING_ISO8859_5 :
1800 enc = kCFStringEncodingISOLatinCyrillic;
1801 break ;
1802 case wxFONTENCODING_ISO8859_6 :
1803 enc = kCFStringEncodingISOLatinArabic;
1804 break ;
1805 case wxFONTENCODING_ISO8859_7 :
1806 enc = kCFStringEncodingISOLatinGreek;
1807 break ;
1808 case wxFONTENCODING_ISO8859_8 :
1809 enc = kCFStringEncodingISOLatinHebrew;
1810 break ;
1811 case wxFONTENCODING_ISO8859_9 :
1812 enc = kCFStringEncodingISOLatin5;
1813 break ;
1814 case wxFONTENCODING_ISO8859_10 :
1815 enc = kCFStringEncodingISOLatin6;
1816 break ;
1817 case wxFONTENCODING_ISO8859_11 :
1818 enc = kCFStringEncodingISOLatinThai;
1819 break ;
1820 case wxFONTENCODING_ISO8859_13 :
1821 enc = kCFStringEncodingISOLatin7;
1822 break ;
1823 case wxFONTENCODING_ISO8859_14 :
1824 enc = kCFStringEncodingISOLatin8;
1825 break ;
1826 case wxFONTENCODING_ISO8859_15 :
1827 enc = kCFStringEncodingISOLatin9;
1828 break ;
1829
1830 case wxFONTENCODING_KOI8 :
1831 enc = kCFStringEncodingKOI8_R;
1832 break ;
1833 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1834 enc = kCFStringEncodingDOSRussian;
1835 break ;
1836
1837// case wxFONTENCODING_BULGARIAN :
1838// enc = ;
1839// break ;
1840
1841 case wxFONTENCODING_CP437 :
1842 enc =kCFStringEncodingDOSLatinUS ;
1843 break ;
1844 case wxFONTENCODING_CP850 :
1845 enc = kCFStringEncodingDOSLatin1;
1846 break ;
1847 case wxFONTENCODING_CP852 :
1848 enc = kCFStringEncodingDOSLatin2;
1849 break ;
1850 case wxFONTENCODING_CP855 :
1851 enc = kCFStringEncodingDOSCyrillic;
1852 break ;
1853 case wxFONTENCODING_CP866 :
1854 enc =kCFStringEncodingDOSRussian ;
1855 break ;
1856 case wxFONTENCODING_CP874 :
1857 enc = kCFStringEncodingDOSThai;
1858 break ;
1859 case wxFONTENCODING_CP932 :
1860 enc = kCFStringEncodingDOSJapanese;
1861 break ;
1862 case wxFONTENCODING_CP936 :
1863 enc =kCFStringEncodingDOSChineseSimplif ;
1864 break ;
1865 case wxFONTENCODING_CP949 :
1866 enc = kCFStringEncodingDOSKorean;
1867 break ;
1868 case wxFONTENCODING_CP950 :
1869 enc = kCFStringEncodingDOSChineseTrad;
1870 break ;
ecd9653b
WS
1871 case wxFONTENCODING_CP1250 :
1872 enc = kCFStringEncodingWindowsLatin2;
1873 break ;
1874 case wxFONTENCODING_CP1251 :
1875 enc =kCFStringEncodingWindowsCyrillic ;
1876 break ;
1877 case wxFONTENCODING_CP1252 :
1878 enc =kCFStringEncodingWindowsLatin1 ;
1879 break ;
1880 case wxFONTENCODING_CP1253 :
1881 enc = kCFStringEncodingWindowsGreek;
1882 break ;
1883 case wxFONTENCODING_CP1254 :
1884 enc = kCFStringEncodingWindowsLatin5;
1885 break ;
1886 case wxFONTENCODING_CP1255 :
1887 enc =kCFStringEncodingWindowsHebrew ;
1888 break ;
1889 case wxFONTENCODING_CP1256 :
1890 enc =kCFStringEncodingWindowsArabic ;
1891 break ;
1892 case wxFONTENCODING_CP1257 :
1893 enc = kCFStringEncodingWindowsBalticRim;
1894 break ;
638357a0
RN
1895// This only really encodes to UTF7 (if that) evidently
1896// case wxFONTENCODING_UTF7 :
1897// enc = kCFStringEncodingNonLossyASCII ;
1898// break ;
ecd9653b
WS
1899 case wxFONTENCODING_UTF8 :
1900 enc = kCFStringEncodingUTF8 ;
1901 break ;
1902 case wxFONTENCODING_EUC_JP :
1903 enc = kCFStringEncodingEUC_JP;
1904 break ;
1905 case wxFONTENCODING_UTF16 :
f7e98dee 1906 enc = kCFStringEncodingUnicode ;
ecd9653b 1907 break ;
f7e98dee
RN
1908 case wxFONTENCODING_MACROMAN :
1909 enc = kCFStringEncodingMacRoman ;
1910 break ;
1911 case wxFONTENCODING_MACJAPANESE :
1912 enc = kCFStringEncodingMacJapanese ;
1913 break ;
1914 case wxFONTENCODING_MACCHINESETRAD :
1915 enc = kCFStringEncodingMacChineseTrad ;
1916 break ;
1917 case wxFONTENCODING_MACKOREAN :
1918 enc = kCFStringEncodingMacKorean ;
1919 break ;
1920 case wxFONTENCODING_MACARABIC :
1921 enc = kCFStringEncodingMacArabic ;
1922 break ;
1923 case wxFONTENCODING_MACHEBREW :
1924 enc = kCFStringEncodingMacHebrew ;
1925 break ;
1926 case wxFONTENCODING_MACGREEK :
1927 enc = kCFStringEncodingMacGreek ;
1928 break ;
1929 case wxFONTENCODING_MACCYRILLIC :
1930 enc = kCFStringEncodingMacCyrillic ;
1931 break ;
1932 case wxFONTENCODING_MACDEVANAGARI :
1933 enc = kCFStringEncodingMacDevanagari ;
1934 break ;
1935 case wxFONTENCODING_MACGURMUKHI :
1936 enc = kCFStringEncodingMacGurmukhi ;
1937 break ;
1938 case wxFONTENCODING_MACGUJARATI :
1939 enc = kCFStringEncodingMacGujarati ;
1940 break ;
1941 case wxFONTENCODING_MACORIYA :
1942 enc = kCFStringEncodingMacOriya ;
1943 break ;
1944 case wxFONTENCODING_MACBENGALI :
1945 enc = kCFStringEncodingMacBengali ;
1946 break ;
1947 case wxFONTENCODING_MACTAMIL :
1948 enc = kCFStringEncodingMacTamil ;
1949 break ;
1950 case wxFONTENCODING_MACTELUGU :
1951 enc = kCFStringEncodingMacTelugu ;
1952 break ;
1953 case wxFONTENCODING_MACKANNADA :
1954 enc = kCFStringEncodingMacKannada ;
1955 break ;
1956 case wxFONTENCODING_MACMALAJALAM :
1957 enc = kCFStringEncodingMacMalayalam ;
1958 break ;
1959 case wxFONTENCODING_MACSINHALESE :
1960 enc = kCFStringEncodingMacSinhalese ;
1961 break ;
1962 case wxFONTENCODING_MACBURMESE :
1963 enc = kCFStringEncodingMacBurmese ;
1964 break ;
1965 case wxFONTENCODING_MACKHMER :
1966 enc = kCFStringEncodingMacKhmer ;
1967 break ;
1968 case wxFONTENCODING_MACTHAI :
1969 enc = kCFStringEncodingMacThai ;
1970 break ;
1971 case wxFONTENCODING_MACLAOTIAN :
1972 enc = kCFStringEncodingMacLaotian ;
1973 break ;
1974 case wxFONTENCODING_MACGEORGIAN :
1975 enc = kCFStringEncodingMacGeorgian ;
1976 break ;
1977 case wxFONTENCODING_MACARMENIAN :
1978 enc = kCFStringEncodingMacArmenian ;
1979 break ;
1980 case wxFONTENCODING_MACCHINESESIMP :
1981 enc = kCFStringEncodingMacChineseSimp ;
1982 break ;
1983 case wxFONTENCODING_MACTIBETAN :
1984 enc = kCFStringEncodingMacTibetan ;
1985 break ;
1986 case wxFONTENCODING_MACMONGOLIAN :
1987 enc = kCFStringEncodingMacMongolian ;
1988 break ;
1989 case wxFONTENCODING_MACETHIOPIC :
1990 enc = kCFStringEncodingMacEthiopic ;
1991 break ;
1992 case wxFONTENCODING_MACCENTRALEUR :
1993 enc = kCFStringEncodingMacCentralEurRoman ;
1994 break ;
1995 case wxFONTENCODING_MACVIATNAMESE :
1996 enc = kCFStringEncodingMacVietnamese ;
1997 break ;
1998 case wxFONTENCODING_MACARABICEXT :
1999 enc = kCFStringEncodingMacExtArabic ;
2000 break ;
2001 case wxFONTENCODING_MACSYMBOL :
2002 enc = kCFStringEncodingMacSymbol ;
2003 break ;
2004 case wxFONTENCODING_MACDINGBATS :
2005 enc = kCFStringEncodingMacDingbats ;
2006 break ;
2007 case wxFONTENCODING_MACTURKISH :
2008 enc = kCFStringEncodingMacTurkish ;
2009 break ;
2010 case wxFONTENCODING_MACCROATIAN :
2011 enc = kCFStringEncodingMacCroatian ;
2012 break ;
2013 case wxFONTENCODING_MACICELANDIC :
2014 enc = kCFStringEncodingMacIcelandic ;
2015 break ;
2016 case wxFONTENCODING_MACROMANIAN :
2017 enc = kCFStringEncodingMacRomanian ;
2018 break ;
2019 case wxFONTENCODING_MACCELTIC :
2020 enc = kCFStringEncodingMacCeltic ;
2021 break ;
2022 case wxFONTENCODING_MACGAELIC :
2023 enc = kCFStringEncodingMacGaelic ;
2024 break ;
ecd9653b
WS
2025// case wxFONTENCODING_MACKEYBOARD :
2026// enc = kCFStringEncodingMacKeyboardGlyphs ;
2027// break ;
2028 default :
2029 // because gcc is picky
2030 break ;
2031 } ;
2032 return enc ;
f7e98dee
RN
2033}
2034
f7e98dee
RN
2035class wxMBConv_cocoa : public wxMBConv
2036{
2037public:
2038 wxMBConv_cocoa()
2039 {
2040 Init(CFStringGetSystemEncoding()) ;
2041 }
2042
a6900d10 2043#if wxUSE_FONTMAP
f7e98dee
RN
2044 wxMBConv_cocoa(const wxChar* name)
2045 {
267e11c5 2046 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2047 }
a6900d10 2048#endif
f7e98dee
RN
2049
2050 wxMBConv_cocoa(wxFontEncoding encoding)
2051 {
2052 Init( wxCFStringEncFromFontEnc(encoding) );
2053 }
2054
2055 ~wxMBConv_cocoa()
2056 {
2057 }
2058
2059 void Init( CFStringEncoding encoding)
2060 {
638357a0 2061 m_encoding = encoding ;
f7e98dee
RN
2062 }
2063
2064 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2065 {
2066 wxASSERT(szUnConv);
ecd9653b 2067
638357a0
RN
2068 CFStringRef theString = CFStringCreateWithBytes (
2069 NULL, //the allocator
2070 (const UInt8*)szUnConv,
2071 strlen(szUnConv),
2072 m_encoding,
2073 false //no BOM/external representation
f7e98dee
RN
2074 );
2075
2076 wxASSERT(theString);
2077
638357a0
RN
2078 size_t nOutLength = CFStringGetLength(theString);
2079
2080 if (szOut == NULL)
f7e98dee 2081 {
f7e98dee 2082 CFRelease(theString);
638357a0 2083 return nOutLength;
f7e98dee 2084 }
ecd9653b 2085
638357a0 2086 CFRange theRange = { 0, nOutSize };
ecd9653b 2087
638357a0
RN
2088#if SIZEOF_WCHAR_T == 4
2089 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2090#endif
3698ae71 2091
f7e98dee 2092 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2093
f7e98dee 2094 CFRelease(theString);
ecd9653b 2095
638357a0 2096 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2097
2098#if SIZEOF_WCHAR_T == 4
2099 wxMBConvUTF16 converter ;
638357a0 2100 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2101 delete[] szUniCharBuffer;
2102#endif
3698ae71 2103
638357a0 2104 return nOutLength;
f7e98dee
RN
2105 }
2106
2107 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2108 {
638357a0 2109 wxASSERT(szUnConv);
3698ae71 2110
f7e98dee 2111 size_t nRealOutSize;
638357a0 2112 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2113 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2114
f7e98dee 2115#if SIZEOF_WCHAR_T == 4
d9d488cf 2116 wxMBConvUTF16 converter ;
f7e98dee
RN
2117 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2118 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2119 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2120 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2121#endif
2122
2123 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2124 NULL, //allocator
2125 szUniBuffer,
2126 nBufSize,
638357a0 2127 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2128 );
ecd9653b 2129
f7e98dee 2130 wxASSERT(theString);
ecd9653b 2131
f7e98dee 2132 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2133 //so we check and use getchars instead in that case
2134 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2135 {
638357a0
RN
2136 if (szOut != NULL)
2137 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2138
638357a0
RN
2139 nRealOutSize = CFStringGetLength(theString) + 1;
2140 }
2141 else
2142 {
2143 CFStringGetBytes(
2144 theString,
2145 CFRangeMake(0, CFStringGetLength(theString)),
2146 m_encoding,
2147 0, //what to put in characters that can't be converted -
2148 //0 tells CFString to return NULL if it meets such a character
2149 false, //not an external representation
2150 (UInt8*) szOut,
3698ae71 2151 nOutSize,
638357a0
RN
2152 (CFIndex*) &nRealOutSize
2153 );
f7e98dee 2154 }
ecd9653b 2155
638357a0 2156 CFRelease(theString);
ecd9653b 2157
638357a0
RN
2158#if SIZEOF_WCHAR_T == 4
2159 delete[] szUniBuffer;
2160#endif
ecd9653b 2161
f7e98dee
RN
2162 return nRealOutSize - 1;
2163 }
2164
2165 bool IsOk() const
ecd9653b 2166 {
3698ae71 2167 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2168 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2169 }
2170
2171private:
638357a0 2172 CFStringEncoding m_encoding ;
f7e98dee
RN
2173};
2174
2175#endif // defined(__WXCOCOA__)
2176
335d31e0
SC
2177// ============================================================================
2178// Mac conversion classes
2179// ============================================================================
2180
2181#if defined(__WXMAC__) && defined(TARGET_CARBON)
2182
2183class wxMBConv_mac : public wxMBConv
2184{
2185public:
2186 wxMBConv_mac()
2187 {
2188 Init(CFStringGetSystemEncoding()) ;
2189 }
2190
2d1659cf 2191#if wxUSE_FONTMAP
335d31e0
SC
2192 wxMBConv_mac(const wxChar* name)
2193 {
267e11c5 2194 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2195 }
2d1659cf 2196#endif
335d31e0
SC
2197
2198 wxMBConv_mac(wxFontEncoding encoding)
2199 {
d775fa82
WS
2200 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2201 }
2202
2203 ~wxMBConv_mac()
2204 {
2205 OSStatus status = noErr ;
2206 status = TECDisposeConverter(m_MB2WC_converter);
2207 status = TECDisposeConverter(m_WC2MB_converter);
2208 }
2209
2210
2211 void Init( TextEncodingBase encoding)
2212 {
2213 OSStatus status = noErr ;
2214 m_char_encoding = encoding ;
2215 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2216
2217 status = TECCreateConverter(&m_MB2WC_converter,
2218 m_char_encoding,
2219 m_unicode_encoding);
2220 status = TECCreateConverter(&m_WC2MB_converter,
2221 m_unicode_encoding,
2222 m_char_encoding);
2223 }
2224
335d31e0
SC
2225 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2226 {
d775fa82
WS
2227 OSStatus status = noErr ;
2228 ByteCount byteOutLen ;
2229 ByteCount byteInLen = strlen(psz) ;
2230 wchar_t *tbuf = NULL ;
2231 UniChar* ubuf = NULL ;
2232 size_t res = 0 ;
2233
2234 if (buf == NULL)
2235 {
638357a0 2236 //apple specs say at least 32
c543817b 2237 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2238 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2239 }
2240 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2241#if SIZEOF_WCHAR_T == 4
d775fa82 2242 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2243#else
d775fa82 2244 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2245#endif
d775fa82
WS
2246 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2247 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2248#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2249 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2250 // is not properly terminated we get random characters at the end
2251 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2252 wxMBConvUTF16 converter ;
d775fa82
WS
2253 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2254 free( ubuf ) ;
f3a355ce 2255#else
d775fa82 2256 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2257#endif
d775fa82
WS
2258 if ( buf == NULL )
2259 free(tbuf) ;
335d31e0 2260
335d31e0
SC
2261 if ( buf && res < n)
2262 buf[res] = 0;
2263
d775fa82 2264 return res ;
335d31e0
SC
2265 }
2266
2267 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2268 {
2269 OSStatus status = noErr ;
2270 ByteCount byteOutLen ;
2271 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2272
2273 char *tbuf = NULL ;
2274
2275 if (buf == NULL)
2276 {
638357a0 2277 //apple specs say at least 32
c543817b 2278 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2279 tbuf = (char*) malloc( n ) ;
2280 }
2281
2282 ByteCount byteBufferLen = n ;
2283 UniChar* ubuf = NULL ;
f3a355ce 2284#if SIZEOF_WCHAR_T == 4
d9d488cf 2285 wxMBConvUTF16 converter ;
d775fa82
WS
2286 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2287 byteInLen = unicharlen ;
2288 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2289 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2290#else
d775fa82 2291 ubuf = (UniChar*) psz ;
f3a355ce 2292#endif
d775fa82
WS
2293 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2294 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2295#if SIZEOF_WCHAR_T == 4
d775fa82 2296 free( ubuf ) ;
f3a355ce 2297#endif
d775fa82
WS
2298 if ( buf == NULL )
2299 free(tbuf) ;
335d31e0 2300
d775fa82 2301 size_t res = byteOutLen ;
335d31e0 2302 if ( buf && res < n)
638357a0 2303 {
335d31e0 2304 buf[res] = 0;
3698ae71 2305
638357a0
RN
2306 //we need to double-trip to verify it didn't insert any ? in place
2307 //of bogus characters
2308 wxWCharBuffer wcBuf(n);
2309 size_t pszlen = wxWcslen(psz);
2310 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2311 wxWcslen(wcBuf) != pszlen ||
2312 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2313 {
2314 // we didn't obtain the same thing we started from, hence
2315 // the conversion was lossy and we consider that it failed
2316 return (size_t)-1;
2317 }
2318 }
335d31e0 2319
d775fa82 2320 return res ;
335d31e0
SC
2321 }
2322
2323 bool IsOk() const
2324 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2325
2326private:
d775fa82
WS
2327 TECObjectRef m_MB2WC_converter ;
2328 TECObjectRef m_WC2MB_converter ;
2329
2330 TextEncodingBase m_char_encoding ;
2331 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2332};
2333
2334#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2335
36acb880
VZ
2336// ============================================================================
2337// wxEncodingConverter based conversion classes
2338// ============================================================================
2339
1e6feb95 2340#if wxUSE_FONTMAP
1cd52418 2341
e95354ec 2342class wxMBConv_wxwin : public wxMBConv
1cd52418 2343{
8b04d4c4
VZ
2344private:
2345 void Init()
2346 {
2347 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2348 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2349 }
2350
6001e347 2351public:
f1339c56
RR
2352 // temporarily just use wxEncodingConverter stuff,
2353 // so that it works while a better implementation is built
e95354ec 2354 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2355 {
2356 if (name)
267e11c5 2357 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2358 else
2359 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2360
8b04d4c4
VZ
2361 Init();
2362 }
2363
e95354ec 2364 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2365 {
2366 m_enc = enc;
2367
2368 Init();
f1339c56 2369 }
dccce9ea 2370
bde4baac 2371 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2372 {
2373 size_t inbuf = strlen(psz);
dccce9ea 2374 if (buf)
c643a977
VS
2375 {
2376 if (!m2w.Convert(psz,buf))
2377 return (size_t)-1;
2378 }
f1339c56
RR
2379 return inbuf;
2380 }
dccce9ea 2381
bde4baac 2382 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2383 {
f8d791e0 2384 const size_t inbuf = wxWcslen(psz);
f1339c56 2385 if (buf)
c643a977
VS
2386 {
2387 if (!w2m.Convert(psz,buf))
2388 return (size_t)-1;
2389 }
dccce9ea 2390
f1339c56
RR
2391 return inbuf;
2392 }
dccce9ea 2393
e95354ec 2394 bool IsOk() const { return m_ok; }
f1339c56
RR
2395
2396public:
8b04d4c4 2397 wxFontEncoding m_enc;
f1339c56 2398 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2399
2400 // were we initialized successfully?
2401 bool m_ok;
fc7a2a60 2402
e95354ec 2403 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2404};
6001e347 2405
1e6feb95
VZ
2406#endif // wxUSE_FONTMAP
2407
36acb880
VZ
2408// ============================================================================
2409// wxCSConv implementation
2410// ============================================================================
2411
8b04d4c4 2412void wxCSConv::Init()
6001e347 2413{
e95354ec
VZ
2414 m_name = NULL;
2415 m_convReal = NULL;
2416 m_deferred = true;
2417}
2418
8b04d4c4
VZ
2419wxCSConv::wxCSConv(const wxChar *charset)
2420{
2421 Init();
82713003 2422
e95354ec
VZ
2423 if ( charset )
2424 {
e95354ec
VZ
2425 SetName(charset);
2426 }
bda3d86a
VZ
2427
2428 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2429}
2430
8b04d4c4
VZ
2431wxCSConv::wxCSConv(wxFontEncoding encoding)
2432{
bda3d86a 2433 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2434 {
2435 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2436
2437 encoding = wxFONTENCODING_SYSTEM;
2438 }
2439
8b04d4c4
VZ
2440 Init();
2441
bda3d86a 2442 m_encoding = encoding;
8b04d4c4
VZ
2443}
2444
6001e347
RR
2445wxCSConv::~wxCSConv()
2446{
65e50848
JS
2447 Clear();
2448}
2449
54380f29 2450wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2451 : wxMBConv()
54380f29 2452{
8b04d4c4
VZ
2453 Init();
2454
54380f29 2455 SetName(conv.m_name);
8b04d4c4 2456 m_encoding = conv.m_encoding;
54380f29
GD
2457}
2458
2459wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2460{
2461 Clear();
8b04d4c4 2462
54380f29 2463 SetName(conv.m_name);
8b04d4c4
VZ
2464 m_encoding = conv.m_encoding;
2465
54380f29
GD
2466 return *this;
2467}
2468
65e50848
JS
2469void wxCSConv::Clear()
2470{
8b04d4c4 2471 free(m_name);
e95354ec 2472 delete m_convReal;
8b04d4c4 2473
65e50848 2474 m_name = NULL;
e95354ec 2475 m_convReal = NULL;
6001e347
RR
2476}
2477
2478void wxCSConv::SetName(const wxChar *charset)
2479{
f1339c56
RR
2480 if (charset)
2481 {
2482 m_name = wxStrdup(charset);
e95354ec 2483 m_deferred = true;
f1339c56 2484 }
6001e347
RR
2485}
2486
e95354ec
VZ
2487wxMBConv *wxCSConv::DoCreate() const
2488{
c547282d
VZ
2489 // check for the special case of ASCII or ISO8859-1 charset: as we have
2490 // special knowledge of it anyhow, we don't need to create a special
2491 // conversion object
2492 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2493 {
e95354ec
VZ
2494 // don't convert at all
2495 return NULL;
2496 }
dccce9ea 2497
e95354ec
VZ
2498 // we trust OS to do conversion better than we can so try external
2499 // conversion methods first
2500 //
2501 // the full order is:
2502 // 1. OS conversion (iconv() under Unix or Win32 API)
2503 // 2. hard coded conversions for UTF
2504 // 3. wxEncodingConverter as fall back
2505
2506 // step (1)
2507#ifdef HAVE_ICONV
c547282d 2508#if !wxUSE_FONTMAP
e95354ec 2509 if ( m_name )
c547282d 2510#endif // !wxUSE_FONTMAP
e95354ec 2511 {
c547282d
VZ
2512 wxString name(m_name);
2513
2514#if wxUSE_FONTMAP
2515 if ( name.empty() )
d0ee33f5 2516 name = wxFontMapperBase::GetEncodingName(m_encoding);
c547282d
VZ
2517#endif // wxUSE_FONTMAP
2518
2519 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2520 if ( conv->IsOk() )
2521 return conv;
2522
2523 delete conv;
2524 }
2525#endif // HAVE_ICONV
2526
2527#ifdef wxHAVE_WIN32_MB2WC
2528 {
7608a683 2529#if wxUSE_FONTMAP
e95354ec
VZ
2530 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2531 : new wxMBConv_win32(m_encoding);
2532 if ( conv->IsOk() )
2533 return conv;
2534
2535 delete conv;
7608a683
WS
2536#else
2537 return NULL;
2538#endif
e95354ec
VZ
2539 }
2540#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2541#if defined(__WXMAC__)
2542 {
5c3c8676 2543 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2544 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2545 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2546 {
2547
2d1659cf 2548#if wxUSE_FONTMAP
d775fa82
WS
2549 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2550 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2551#else
2552 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2553#endif
d775fa82 2554 if ( conv->IsOk() )
f7e98dee
RN
2555 return conv;
2556
2557 delete conv;
2558 }
2559 }
2560#endif
2561#if defined(__WXCOCOA__)
2562 {
2563 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2564 {
2565
a6900d10 2566#if wxUSE_FONTMAP
f7e98dee
RN
2567 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2568 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2569#else
2570 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2571#endif
f7e98dee 2572 if ( conv->IsOk() )
d775fa82
WS
2573 return conv;
2574
2575 delete conv;
2576 }
335d31e0
SC
2577 }
2578#endif
e95354ec
VZ
2579 // step (2)
2580 wxFontEncoding enc = m_encoding;
2581#if wxUSE_FONTMAP
c547282d
VZ
2582 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2583 {
2584 // use "false" to suppress interactive dialogs -- we can be called from
2585 // anywhere and popping up a dialog from here is the last thing we want to
2586 // do
267e11c5 2587 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2588 }
e95354ec
VZ
2589#endif // wxUSE_FONTMAP
2590
2591 switch ( enc )
2592 {
2593 case wxFONTENCODING_UTF7:
2594 return new wxMBConvUTF7;
2595
2596 case wxFONTENCODING_UTF8:
2597 return new wxMBConvUTF8;
2598
e95354ec
VZ
2599 case wxFONTENCODING_UTF16BE:
2600 return new wxMBConvUTF16BE;
2601
2602 case wxFONTENCODING_UTF16LE:
2603 return new wxMBConvUTF16LE;
2604
e95354ec
VZ
2605 case wxFONTENCODING_UTF32BE:
2606 return new wxMBConvUTF32BE;
2607
2608 case wxFONTENCODING_UTF32LE:
2609 return new wxMBConvUTF32LE;
2610
2611 default:
2612 // nothing to do but put here to suppress gcc warnings
2613 ;
2614 }
2615
2616 // step (3)
2617#if wxUSE_FONTMAP
2618 {
2619 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2620 : new wxMBConv_wxwin(m_encoding);
2621 if ( conv->IsOk() )
2622 return conv;
2623
2624 delete conv;
2625 }
2626#endif // wxUSE_FONTMAP
2627
a58d4f4d
VS
2628 // NB: This is a hack to prevent deadlock. What could otherwise happen
2629 // in Unicode build: wxConvLocal creation ends up being here
2630 // because of some failure and logs the error. But wxLog will try to
2631 // attach timestamp, for which it will need wxConvLocal (to convert
2632 // time to char* and then wchar_t*), but that fails, tries to log
2633 // error, but wxLog has a (already locked) critical section that
2634 // guards static buffer.
2635 static bool alreadyLoggingError = false;
2636 if (!alreadyLoggingError)
2637 {
2638 alreadyLoggingError = true;
2639 wxLogError(_("Cannot convert from the charset '%s'!"),
2640 m_name ? m_name
e95354ec
VZ
2641 :
2642#if wxUSE_FONTMAP
267e11c5 2643 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2644#else // !wxUSE_FONTMAP
2645 wxString::Format(_("encoding %s"), m_encoding).c_str()
2646#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2647 );
a58d4f4d
VS
2648 alreadyLoggingError = false;
2649 }
e95354ec
VZ
2650
2651 return NULL;
2652}
2653
2654void wxCSConv::CreateConvIfNeeded() const
2655{
2656 if ( m_deferred )
2657 {
2658 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2659
2660#if wxUSE_INTL
2661 // if we don't have neither the name nor the encoding, use the default
2662 // encoding for this system
2663 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2664 {
4d312c22 2665 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2666 }
2667#endif // wxUSE_INTL
2668
e95354ec
VZ
2669 self->m_convReal = DoCreate();
2670 self->m_deferred = false;
6001e347 2671 }
6001e347
RR
2672}
2673
2674size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2675{
e95354ec 2676 CreateConvIfNeeded();
dccce9ea 2677
e95354ec
VZ
2678 if (m_convReal)
2679 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2680
2681 // latin-1 (direct)
4def3b35 2682 size_t len = strlen(psz);
dccce9ea 2683
f1339c56
RR
2684 if (buf)
2685 {
4def3b35 2686 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2687 buf[c] = (unsigned char)(psz[c]);
2688 }
dccce9ea 2689
f1339c56 2690 return len;
6001e347
RR
2691}
2692
2693size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2694{
e95354ec 2695 CreateConvIfNeeded();
dccce9ea 2696
e95354ec
VZ
2697 if (m_convReal)
2698 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2699
f1339c56 2700 // latin-1 (direct)
f8d791e0 2701 const size_t len = wxWcslen(psz);
f1339c56
RR
2702 if (buf)
2703 {
4def3b35 2704 for (size_t c = 0; c <= len; c++)
24642831
VS
2705 {
2706 if (psz[c] > 0xFF)
2707 return (size_t)-1;
907173e5 2708 buf[c] = (char)psz[c];
24642831
VS
2709 }
2710 }
2711 else
2712 {
2713 for (size_t c = 0; c <= len; c++)
2714 {
2715 if (psz[c] > 0xFF)
2716 return (size_t)-1;
2717 }
f1339c56 2718 }
dccce9ea 2719
f1339c56 2720 return len;
6001e347
RR
2721}
2722
bde4baac
VZ
2723// ----------------------------------------------------------------------------
2724// globals
2725// ----------------------------------------------------------------------------
2726
2727#ifdef __WINDOWS__
2728 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2729#elif defined(__WXMAC__) && !defined(__MACH__)
2730 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2731#else
dcc8fac0 2732 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2733#endif
2734
2735static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2736static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2737static wxMBConvUTF7 wxConvUTF7Obj;
2738static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2739
bde4baac
VZ
2740WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2741WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2742WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2743WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2744WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2745WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2746WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2747#ifdef __WXOSX__
ea8ce907 2748 wxConvUTF8Obj;
f5a1953b 2749#else
ea8ce907 2750 wxConvLibcObj;
f5a1953b
VZ
2751#endif
2752
bde4baac
VZ
2753
2754#else // !wxUSE_WCHAR_T
2755
2756// stand-ins in absence of wchar_t
2757WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2758 wxConvISO8859_1,
2759 wxConvLocal,
2760 wxConvUTF8;
2761
2762#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2763
2764