]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Fix for decoding of utf-16 surrogates. Also remove #include that's not needed
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea 154 }
cdb14ecb 155 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
3698ae71 281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
3698ae71 316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
3698ae71 343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
e1bfe89e 359
66bf0099 360#ifdef __UNIX__
c12b7f79 361
e1bfe89e 362// ----------------------------------------------------------------------------
66bf0099 363// wxConvBrokenFileNames
e1bfe89e
RR
364// ----------------------------------------------------------------------------
365
c12b7f79 366wxConvBrokenFileNames::wxConvBrokenFileNames()
ea8ce907 367{
c12b7f79
VZ
368 // decide which conversion to use for the file names
369
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
372 const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
373 if ( encName )
374 {
375 m_conv = new wxCSConv(encName);
376 }
377 else // no G_FILENAME_ENCODING
378 {
379 // (2) if a non default locale is set, assume that the user wants his
380 // filenames in this locale too
381 switch ( wxLocale::GetSystemEncoding() )
382 {
383 default:
384 m_conv = new wxMBConvLibc;
385 break;
386
387 // (3) finally use UTF-8 by default
388 case wxFONTENCODING_SYSTEM:
389 case wxFONTENCODING_UTF8:
390 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
391 break;
392 }
393 }
ea8ce907
RR
394}
395
c12b7f79
VZ
396size_t
397wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
398 const char *psz,
399 size_t outputSize) const
e1bfe89e 400{
c12b7f79 401 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
402}
403
c12b7f79
VZ
404size_t
405wxConvBrokenFileNames::WC2MB(char *outputBuf,
406 const wchar_t *psz,
407 size_t outputSize) const
e1bfe89e 408{
c12b7f79 409 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
410}
411
66bf0099 412#endif
c12b7f79 413
bde4baac 414// ----------------------------------------------------------------------------
3698ae71 415// UTF-7
bde4baac 416// ----------------------------------------------------------------------------
6001e347 417
15f2ee32 418// Implementation (C) 2004 Fredrik Roubert
6001e347 419
15f2ee32
RN
420//
421// BASE64 decoding table
422//
423static const unsigned char utf7unb64[] =
6001e347 424{
15f2ee32
RN
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
431 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
432 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
434 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
435 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
436 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
438 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
439 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
440 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
457};
458
459size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
460{
15f2ee32
RN
461 size_t len = 0;
462
463 while (*psz && ((!buf) || (len < n)))
464 {
465 unsigned char cc = *psz++;
466 if (cc != '+')
467 {
468 // plain ASCII char
469 if (buf)
470 *buf++ = cc;
471 len++;
472 }
473 else if (*psz == '-')
474 {
475 // encoded plus sign
476 if (buf)
477 *buf++ = cc;
478 len++;
479 psz++;
480 }
481 else
482 {
483 // BASE64 encoded string
484 bool lsb;
485 unsigned char c;
486 unsigned int d, l;
487 for (lsb = false, d = 0, l = 0;
488 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
489 {
490 d <<= 6;
491 d += cc;
492 for (l += 6; l >= 8; lsb = !lsb)
493 {
6356d52a 494 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
495 if (lsb)
496 {
497 if (buf)
498 *buf++ |= c;
499 len ++;
500 }
501 else
502 if (buf)
6356d52a 503 *buf = (wchar_t)(c << 8);
15f2ee32
RN
504 }
505 }
506 if (*psz == '-')
507 psz++;
508 }
509 }
510 if (buf && (len < n))
511 *buf = 0;
512 return len;
6001e347
RR
513}
514
15f2ee32
RN
515//
516// BASE64 encoding table
517//
518static const unsigned char utf7enb64[] =
519{
520 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
521 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
522 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
523 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
524 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
525 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
526 'w', 'x', 'y', 'z', '0', '1', '2', '3',
527 '4', '5', '6', '7', '8', '9', '+', '/'
528};
529
530//
531// UTF-7 encoding table
532//
533// 0 - Set D (directly encoded characters)
534// 1 - Set O (optional direct characters)
535// 2 - whitespace characters (optional)
536// 3 - special characters
537//
538static const unsigned char utf7encode[128] =
6001e347 539{
15f2ee32
RN
540 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
541 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
542 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
544 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
548};
549
667e5b3e 550size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
551{
552
553
554 size_t len = 0;
555
556 while (*psz && ((!buf) || (len < n)))
557 {
558 wchar_t cc = *psz++;
559 if (cc < 0x80 && utf7encode[cc] < 1)
560 {
561 // plain ASCII char
562 if (buf)
563 *buf++ = (char)cc;
564 len++;
565 }
566#ifndef WC_UTF16
79c78d42 567 else if (((wxUint32)cc) > 0xffff)
b2c13097 568 {
15f2ee32
RN
569 // no surrogate pair generation (yet?)
570 return (size_t)-1;
571 }
572#endif
573 else
574 {
575 if (buf)
576 *buf++ = '+';
577 len++;
578 if (cc != '+')
579 {
580 // BASE64 encode string
581 unsigned int lsb, d, l;
582 for (d = 0, l = 0;; psz++)
583 {
584 for (lsb = 0; lsb < 2; lsb ++)
585 {
586 d <<= 8;
587 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
588
589 for (l += 8; l >= 6; )
590 {
591 l -= 6;
592 if (buf)
593 *buf++ = utf7enb64[(d >> l) % 64];
594 len++;
595 }
596 }
597 cc = *psz;
598 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
599 break;
600 }
601 if (l != 0)
602 {
603 if (buf)
604 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
605 len++;
606 }
607 }
608 if (buf)
609 *buf++ = '-';
610 len++;
611 }
612 }
613 if (buf && (len < n))
614 *buf = 0;
615 return len;
6001e347
RR
616}
617
f6bcfd97 618// ----------------------------------------------------------------------------
6001e347 619// UTF-8
f6bcfd97 620// ----------------------------------------------------------------------------
6001e347 621
dccce9ea 622static wxUint32 utf8_max[]=
4def3b35 623 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 624
3698ae71
VZ
625// boundaries of the private use area we use to (temporarily) remap invalid
626// characters invalid in a UTF-8 encoded string
ea8ce907
RR
627const wxUint32 wxUnicodePUA = 0x100000;
628const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
629
6001e347
RR
630size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
631{
4def3b35
VS
632 size_t len = 0;
633
dccce9ea 634 while (*psz && ((!buf) || (len < n)))
4def3b35 635 {
ea8ce907
RR
636 const char *opsz = psz;
637 bool invalid = false;
4def3b35
VS
638 unsigned char cc = *psz++, fc = cc;
639 unsigned cnt;
dccce9ea 640 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 641 fc <<= 1;
dccce9ea 642 if (!cnt)
4def3b35
VS
643 {
644 // plain ASCII char
dccce9ea 645 if (buf)
4def3b35
VS
646 *buf++ = cc;
647 len++;
dccce9ea
VZ
648 }
649 else
4def3b35
VS
650 {
651 cnt--;
dccce9ea 652 if (!cnt)
4def3b35
VS
653 {
654 // invalid UTF-8 sequence
ea8ce907 655 invalid = true;
dccce9ea
VZ
656 }
657 else
4def3b35
VS
658 {
659 unsigned ocnt = cnt - 1;
660 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 661 while (cnt--)
4def3b35 662 {
ea8ce907 663 cc = *psz;
dccce9ea 664 if ((cc & 0xC0) != 0x80)
4def3b35
VS
665 {
666 // invalid UTF-8 sequence
ea8ce907
RR
667 invalid = true;
668 break;
4def3b35 669 }
ea8ce907 670 psz++;
4def3b35
VS
671 res = (res << 6) | (cc & 0x3f);
672 }
ea8ce907 673 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
674 {
675 // illegal UTF-8 encoding
ea8ce907 676 invalid = true;
4def3b35 677 }
ea8ce907
RR
678 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
679 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
680 {
681 // if one of our PUA characters turns up externally
682 // it must also be treated as an illegal sequence
683 // (a bit like you have to escape an escape character)
684 invalid = true;
685 }
686 else
687 {
1cd52418 688#ifdef WC_UTF16
ea8ce907
RR
689 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
690 size_t pa = encode_utf16(res, (wxUint16 *)buf);
691 if (pa == (size_t)-1)
692 {
693 invalid = true;
694 }
695 else
696 {
697 if (buf)
698 buf += pa;
699 len += pa;
700 }
373658eb 701#else // !WC_UTF16
ea8ce907
RR
702 if (buf)
703 *buf++ = res;
704 len++;
373658eb 705#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
706 }
707 }
708 if (invalid)
709 {
710 if (m_options & MAP_INVALID_UTF8_TO_PUA)
711 {
712 while (opsz < psz && (!buf || len < n))
713 {
714#ifdef WC_UTF16
715 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
716 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
717 wxASSERT(pa != (size_t)-1);
718 if (buf)
719 buf += pa;
720 opsz++;
721 len += pa;
722#else
723 if (buf)
724 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
725 opsz++;
726 len++;
727#endif
728 }
729 }
3698ae71 730 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
731 {
732 while (opsz < psz && (!buf || len < n))
733 {
3698ae71
VZ
734 if ( buf && len + 3 < n )
735 {
736 unsigned char n = *opsz;
737 *buf++ = L'\\';
b2c13097
WS
738 *buf++ = (wchar_t)( L'0' + n / 0100 );
739 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
740 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 741 }
ea8ce907
RR
742 opsz++;
743 len += 4;
744 }
745 }
3698ae71 746 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
747 {
748 return (size_t)-1;
749 }
4def3b35
VS
750 }
751 }
6001e347 752 }
dccce9ea 753 if (buf && (len < n))
4def3b35
VS
754 *buf = 0;
755 return len;
6001e347
RR
756}
757
3698ae71
VZ
758static inline bool isoctal(wchar_t wch)
759{
760 return L'0' <= wch && wch <= L'7';
761}
762
6001e347
RR
763size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
764{
4def3b35 765 size_t len = 0;
6001e347 766
dccce9ea 767 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
768 {
769 wxUint32 cc;
1cd52418 770#ifdef WC_UTF16
b5153fd8
VZ
771 // cast is ok for WC_UTF16
772 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 773 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 774#else
4def3b35
VS
775 cc=(*psz++) & 0x7fffffff;
776#endif
3698ae71
VZ
777
778 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
779 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 780 {
dccce9ea 781 if (buf)
ea8ce907 782 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 783 len++;
3698ae71
VZ
784 }
785 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
786 cc == L'\\' &&
787 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 788 {
dccce9ea 789 if (buf)
3698ae71 790 {
b2c13097
WS
791 *buf++ = (char) ((psz[0] - L'0')*0100 +
792 (psz[1] - L'0')*010 +
793 (psz[2] - L'0'));
3698ae71
VZ
794 }
795
796 psz += 3;
ea8ce907
RR
797 len++;
798 }
799 else
800 {
801 unsigned cnt;
802 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
803 if (!cnt)
4def3b35 804 {
ea8ce907
RR
805 // plain ASCII char
806 if (buf)
807 *buf++ = (char) cc;
808 len++;
809 }
810
811 else
812 {
813 len += cnt + 1;
814 if (buf)
815 {
816 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
817 while (cnt--)
818 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
819 }
4def3b35
VS
820 }
821 }
6001e347 822 }
4def3b35 823
3698ae71
VZ
824 if (buf && (len<n))
825 *buf = 0;
adb45366 826
4def3b35 827 return len;
6001e347
RR
828}
829
c91830cb
VZ
830// ----------------------------------------------------------------------------
831// UTF-16
832// ----------------------------------------------------------------------------
833
834#ifdef WORDS_BIGENDIAN
bde4baac
VZ
835 #define wxMBConvUTF16straight wxMBConvUTF16BE
836 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 837#else
bde4baac
VZ
838 #define wxMBConvUTF16swap wxMBConvUTF16BE
839 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
840#endif
841
842
c91830cb
VZ
843#ifdef WC_UTF16
844
c91830cb
VZ
845// copy 16bit MB to 16bit String
846size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
847{
848 size_t len=0;
849
850 while (*(wxUint16*)psz && (!buf || len < n))
851 {
852 if (buf)
853 *buf++ = *(wxUint16*)psz;
854 len++;
855
856 psz += sizeof(wxUint16);
857 }
858 if (buf && len<n) *buf=0;
859
860 return len;
861}
862
863
864// copy 16bit String to 16bit MB
865size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
866{
867 size_t len=0;
868
869 while (*psz && (!buf || len < n))
870 {
871 if (buf)
872 {
873 *(wxUint16*)buf = *psz;
874 buf += sizeof(wxUint16);
875 }
876 len += sizeof(wxUint16);
877 psz++;
878 }
879 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
880
881 return len;
882}
883
884
885// swap 16bit MB to 16bit String
886size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
887{
888 size_t len=0;
889
890 while (*(wxUint16*)psz && (!buf || len < n))
891 {
892 if (buf)
893 {
894 ((char *)buf)[0] = psz[1];
895 ((char *)buf)[1] = psz[0];
896 buf++;
897 }
898 len++;
899 psz += sizeof(wxUint16);
900 }
901 if (buf && len<n) *buf=0;
902
903 return len;
904}
905
906
907// swap 16bit MB to 16bit String
908size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
909{
910 size_t len=0;
911
912 while (*psz && (!buf || len < n))
913 {
914 if (buf)
915 {
916 *buf++ = ((char*)psz)[1];
917 *buf++ = ((char*)psz)[0];
918 }
919 len += sizeof(wxUint16);
920 psz++;
921 }
922 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
923
924 return len;
925}
926
927
928#else // WC_UTF16
929
930
931// copy 16bit MB to 32bit String
932size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
933{
934 size_t len=0;
935
936 while (*(wxUint16*)psz && (!buf || len < n))
937 {
938 wxUint32 cc;
939 size_t pa=decode_utf16((wxUint16*)psz, cc);
940 if (pa == (size_t)-1)
941 return pa;
942
943 if (buf)
944 *buf++ = cc;
945 len++;
946 psz += pa * sizeof(wxUint16);
947 }
948 if (buf && len<n) *buf=0;
949
950 return len;
951}
952
953
954// copy 32bit String to 16bit MB
955size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
956{
957 size_t len=0;
958
959 while (*psz && (!buf || len < n))
960 {
961 wxUint16 cc[2];
962 size_t pa=encode_utf16(*psz, cc);
963
964 if (pa == (size_t)-1)
965 return pa;
966
967 if (buf)
968 {
69b80d28 969 *(wxUint16*)buf = cc[0];
b5153fd8 970 buf += sizeof(wxUint16);
c91830cb 971 if (pa > 1)
69b80d28
VZ
972 {
973 *(wxUint16*)buf = cc[1];
974 buf += sizeof(wxUint16);
975 }
c91830cb
VZ
976 }
977
978 len += pa*sizeof(wxUint16);
979 psz++;
980 }
981 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
982
983 return len;
984}
985
986
987// swap 16bit MB to 32bit String
988size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
989{
990 size_t len=0;
991
992 while (*(wxUint16*)psz && (!buf || len < n))
993 {
994 wxUint32 cc;
995 char tmp[4];
996 tmp[0]=psz[1]; tmp[1]=psz[0];
997 tmp[2]=psz[3]; tmp[3]=psz[2];
998
999 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1000 if (pa == (size_t)-1)
1001 return pa;
1002
1003 if (buf)
1004 *buf++ = cc;
1005
1006 len++;
1007 psz += pa * sizeof(wxUint16);
1008 }
1009 if (buf && len<n) *buf=0;
1010
1011 return len;
1012}
1013
1014
1015// swap 32bit String to 16bit MB
1016size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1017{
1018 size_t len=0;
1019
1020 while (*psz && (!buf || len < n))
1021 {
1022 wxUint16 cc[2];
1023 size_t pa=encode_utf16(*psz, cc);
1024
1025 if (pa == (size_t)-1)
1026 return pa;
1027
1028 if (buf)
1029 {
1030 *buf++ = ((char*)cc)[1];
1031 *buf++ = ((char*)cc)[0];
1032 if (pa > 1)
1033 {
1034 *buf++ = ((char*)cc)[3];
1035 *buf++ = ((char*)cc)[2];
1036 }
1037 }
1038
1039 len += pa*sizeof(wxUint16);
1040 psz++;
1041 }
1042 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1043
1044 return len;
1045}
1046
1047#endif // WC_UTF16
1048
1049
1050// ----------------------------------------------------------------------------
1051// UTF-32
1052// ----------------------------------------------------------------------------
1053
1054#ifdef WORDS_BIGENDIAN
1055#define wxMBConvUTF32straight wxMBConvUTF32BE
1056#define wxMBConvUTF32swap wxMBConvUTF32LE
1057#else
1058#define wxMBConvUTF32swap wxMBConvUTF32BE
1059#define wxMBConvUTF32straight wxMBConvUTF32LE
1060#endif
1061
1062
1063WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1064WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1065
1066
1067#ifdef WC_UTF16
1068
1069// copy 32bit MB to 16bit String
1070size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1071{
1072 size_t len=0;
1073
1074 while (*(wxUint32*)psz && (!buf || len < n))
1075 {
1076 wxUint16 cc[2];
1077
1078 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1079 if (pa == (size_t)-1)
1080 return pa;
1081
1082 if (buf)
1083 {
1084 *buf++ = cc[0];
1085 if (pa > 1)
1086 *buf++ = cc[1];
1087 }
1088 len += pa;
1089 psz += sizeof(wxUint32);
1090 }
1091 if (buf && len<n) *buf=0;
1092
1093 return len;
1094}
1095
1096
1097// copy 16bit String to 32bit MB
1098size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1099{
1100 size_t len=0;
1101
1102 while (*psz && (!buf || len < n))
1103 {
1104 wxUint32 cc;
1105
b5153fd8
VZ
1106 // cast is ok for WC_UTF16
1107 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1108 if (pa == (size_t)-1)
1109 return pa;
1110
1111 if (buf)
1112 {
1113 *(wxUint32*)buf = cc;
1114 buf += sizeof(wxUint32);
1115 }
1116 len += sizeof(wxUint32);
1117 psz += pa;
1118 }
b5153fd8
VZ
1119
1120 if (buf && len<=n-sizeof(wxUint32))
1121 *(wxUint32*)buf=0;
c91830cb
VZ
1122
1123 return len;
1124}
1125
1126
1127
1128// swap 32bit MB to 16bit String
1129size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1130{
1131 size_t len=0;
1132
1133 while (*(wxUint32*)psz && (!buf || len < n))
1134 {
1135 char tmp[4];
1136 tmp[0] = psz[3]; tmp[1] = psz[2];
1137 tmp[2] = psz[1]; tmp[3] = psz[0];
1138
1139
1140 wxUint16 cc[2];
1141
1142 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1143 if (pa == (size_t)-1)
1144 return pa;
1145
1146 if (buf)
1147 {
1148 *buf++ = cc[0];
1149 if (pa > 1)
1150 *buf++ = cc[1];
1151 }
1152 len += pa;
1153 psz += sizeof(wxUint32);
1154 }
b5153fd8
VZ
1155
1156 if (buf && len<n)
1157 *buf=0;
c91830cb
VZ
1158
1159 return len;
1160}
1161
1162
1163// swap 16bit String to 32bit MB
1164size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1165{
1166 size_t len=0;
1167
1168 while (*psz && (!buf || len < n))
1169 {
1170 char cc[4];
1171
b5153fd8
VZ
1172 // cast is ok for WC_UTF16
1173 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1174 if (pa == (size_t)-1)
1175 return pa;
1176
1177 if (buf)
1178 {
1179 *buf++ = cc[3];
1180 *buf++ = cc[2];
1181 *buf++ = cc[1];
1182 *buf++ = cc[0];
1183 }
1184 len += sizeof(wxUint32);
1185 psz += pa;
1186 }
b5153fd8
VZ
1187
1188 if (buf && len<=n-sizeof(wxUint32))
1189 *(wxUint32*)buf=0;
c91830cb
VZ
1190
1191 return len;
1192}
1193
1194#else // WC_UTF16
1195
1196
1197// copy 32bit MB to 32bit String
1198size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1199{
1200 size_t len=0;
1201
1202 while (*(wxUint32*)psz && (!buf || len < n))
1203 {
1204 if (buf)
1205 *buf++ = *(wxUint32*)psz;
1206 len++;
1207 psz += sizeof(wxUint32);
1208 }
b5153fd8
VZ
1209
1210 if (buf && len<n)
1211 *buf=0;
c91830cb
VZ
1212
1213 return len;
1214}
1215
1216
1217// copy 32bit String to 32bit MB
1218size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1219{
1220 size_t len=0;
1221
1222 while (*psz && (!buf || len < n))
1223 {
1224 if (buf)
1225 {
1226 *(wxUint32*)buf = *psz;
1227 buf += sizeof(wxUint32);
1228 }
1229
1230 len += sizeof(wxUint32);
1231 psz++;
1232 }
1233
b5153fd8
VZ
1234 if (buf && len<=n-sizeof(wxUint32))
1235 *(wxUint32*)buf=0;
c91830cb
VZ
1236
1237 return len;
1238}
1239
1240
1241// swap 32bit MB to 32bit String
1242size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1243{
1244 size_t len=0;
1245
1246 while (*(wxUint32*)psz && (!buf || len < n))
1247 {
1248 if (buf)
1249 {
1250 ((char *)buf)[0] = psz[3];
1251 ((char *)buf)[1] = psz[2];
1252 ((char *)buf)[2] = psz[1];
1253 ((char *)buf)[3] = psz[0];
1254 buf++;
1255 }
1256 len++;
1257 psz += sizeof(wxUint32);
1258 }
b5153fd8
VZ
1259
1260 if (buf && len<n)
1261 *buf=0;
c91830cb
VZ
1262
1263 return len;
1264}
1265
1266
1267// swap 32bit String to 32bit MB
1268size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1269{
1270 size_t len=0;
1271
1272 while (*psz && (!buf || len < n))
1273 {
1274 if (buf)
1275 {
1276 *buf++ = ((char *)psz)[3];
1277 *buf++ = ((char *)psz)[2];
1278 *buf++ = ((char *)psz)[1];
1279 *buf++ = ((char *)psz)[0];
1280 }
1281 len += sizeof(wxUint32);
1282 psz++;
1283 }
b5153fd8
VZ
1284
1285 if (buf && len<=n-sizeof(wxUint32))
1286 *(wxUint32*)buf=0;
c91830cb
VZ
1287
1288 return len;
1289}
1290
1291
1292#endif // WC_UTF16
1293
1294
36acb880
VZ
1295// ============================================================================
1296// The classes doing conversion using the iconv_xxx() functions
1297// ============================================================================
3caec1bb 1298
b040e242 1299#ifdef HAVE_ICONV
3a0d76bc 1300
b1d547eb
VS
1301// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1302// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1303// (unless there's yet another bug in glibc) the only case when iconv()
1304// returns with (size_t)-1 (which means error) and says there are 0 bytes
1305// left in the input buffer -- when _real_ error occurs,
1306// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1307// iconv() failure.
3caec1bb
VS
1308// [This bug does not appear in glibc 2.2.]
1309#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1310#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1311 (errno != E2BIG || bufLeft != 0))
1312#else
1313#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1314#endif
1315
ab217dba 1316#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1317
1318// ----------------------------------------------------------------------------
e95354ec 1319// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1320// ----------------------------------------------------------------------------
1321
e95354ec 1322class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1323{
1324public:
e95354ec
VZ
1325 wxMBConv_iconv(const wxChar *name);
1326 virtual ~wxMBConv_iconv();
36acb880 1327
bde4baac
VZ
1328 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1329 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1330
e95354ec 1331 bool IsOk() const
36acb880
VZ
1332 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1333
1334protected:
1335 // the iconv handlers used to translate from multibyte to wide char and in
1336 // the other direction
1337 iconv_t m2w,
1338 w2m;
b1d547eb
VS
1339#if wxUSE_THREADS
1340 // guards access to m2w and w2m objects
1341 wxMutex m_iconvMutex;
1342#endif
36acb880
VZ
1343
1344private:
e95354ec 1345 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1346 // available on this machine, it will remain NULL
1347 static const char *ms_wcCharsetName;
1348
1349 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1350 // different endian-ness than the native one
405d8f46 1351 static bool ms_wcNeedsSwap;
36acb880
VZ
1352};
1353
e95354ec
VZ
1354const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1355bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1356
e95354ec 1357wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1358{
04c79127
RR
1359 // Do it the hard way
1360 char cname[100];
1361 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1362 cname[i] = (char) name[i];
1363
36acb880
VZ
1364 // check for charset that represents wchar_t:
1365 if (ms_wcCharsetName == NULL)
f1339c56 1366 {
e95354ec 1367 ms_wcNeedsSwap = false;
dccce9ea 1368
36acb880
VZ
1369 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1370 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1371 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1372
36acb880
VZ
1373 if (m2w == (iconv_t)-1)
1374 {
1375 // try charset w/o bytesex info (e.g. "UCS4")
1376 // and check for bytesex ourselves:
1377 ms_wcCharsetName = WC_NAME;
04c79127 1378 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1379
1380 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1381 if (m2w == (iconv_t)-1)
1382 {
36acb880 1383 ms_wcCharsetName = "WCHAR_T";
04c79127 1384 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1385 }
3a0d76bc 1386
36acb880
VZ
1387 if (m2w != (iconv_t)-1)
1388 {
1389 char buf[2], *bufPtr;
1390 wchar_t wbuf[2], *wbufPtr;
1391 size_t insz, outsz;
1392 size_t res;
1393
1394 buf[0] = 'A';
1395 buf[1] = 0;
1396 wbuf[0] = 0;
1397 insz = 2;
1398 outsz = SIZEOF_WCHAR_T * 2;
1399 wbufPtr = wbuf;
1400 bufPtr = buf;
1401
1402 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1403 (char**)&wbufPtr, &outsz);
1404
1405 if (ICONV_FAILED(res, insz))
3a0d76bc 1406 {
36acb880
VZ
1407 ms_wcCharsetName = NULL;
1408 wxLogLastError(wxT("iconv"));
2b5f62a0 1409 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1410 }
1411 else
1412 {
36acb880 1413 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1414 }
1415 }
36acb880
VZ
1416 else
1417 {
1418 ms_wcCharsetName = NULL;
373658eb 1419
77ffb593 1420 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1421 // fall back to using wxEncodingConverter.
1422 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1423 //wxLogError(
36acb880 1424 }
3a0d76bc 1425 }
36acb880 1426 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1427 }
36acb880 1428 else // we already have ms_wcCharsetName
3caec1bb 1429 {
04c79127 1430 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1431 }
dccce9ea 1432
36acb880
VZ
1433 // NB: don't ever pass NULL to iconv_open(), it may crash!
1434 if ( ms_wcCharsetName )
f1339c56 1435 {
04c79127 1436 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1437 }
405d8f46
VZ
1438 else
1439 {
1440 w2m = (iconv_t)-1;
1441 }
36acb880 1442}
3caec1bb 1443
e95354ec 1444wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1445{
1446 if ( m2w != (iconv_t)-1 )
1447 iconv_close(m2w);
1448 if ( w2m != (iconv_t)-1 )
1449 iconv_close(w2m);
1450}
3a0d76bc 1451
bde4baac 1452size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1453{
b1d547eb
VS
1454#if wxUSE_THREADS
1455 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1456 // Unfortunately there is a couple of global wxCSConv objects such as
1457 // wxConvLocal that are used all over wx code, so we have to make sure
1458 // the handle is used by at most one thread at the time. Otherwise
1459 // only a few wx classes would be safe to use from non-main threads
1460 // as MB<->WC conversion would fail "randomly".
1461 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1462#endif
3698ae71 1463
36acb880
VZ
1464 size_t inbuf = strlen(psz);
1465 size_t outbuf = n * SIZEOF_WCHAR_T;
1466 size_t res, cres;
1467 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1468 wchar_t *bufPtr = buf;
1469 const char *pszPtr = psz;
1470
1471 if (buf)
1472 {
1473 // have destination buffer, convert there
1474 cres = iconv(m2w,
1475 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1476 (char**)&bufPtr, &outbuf);
1477 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1478
36acb880 1479 if (ms_wcNeedsSwap)
3a0d76bc 1480 {
36acb880
VZ
1481 // convert to native endianness
1482 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1483 }
adb45366 1484
49dd9820
VS
1485 // NB: iconv was given only strlen(psz) characters on input, and so
1486 // it couldn't convert the trailing zero. Let's do it ourselves
1487 // if there's some room left for it in the output buffer.
1488 if (res < n)
1489 buf[res] = 0;
36acb880
VZ
1490 }
1491 else
1492 {
1493 // no destination buffer... convert using temp buffer
1494 // to calculate destination buffer requirement
1495 wchar_t tbuf[8];
1496 res = 0;
1497 do {
1498 bufPtr = tbuf;
1499 outbuf = 8*SIZEOF_WCHAR_T;
1500
1501 cres = iconv(m2w,
1502 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1503 (char**)&bufPtr, &outbuf );
1504
1505 res += 8-(outbuf/SIZEOF_WCHAR_T);
1506 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1507 }
dccce9ea 1508
36acb880 1509 if (ICONV_FAILED(cres, inbuf))
f1339c56 1510 {
36acb880
VZ
1511 //VS: it is ok if iconv fails, hence trace only
1512 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1513 return (size_t)-1;
1514 }
1515
1516 return res;
1517}
1518
bde4baac 1519size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1520{
b1d547eb
VS
1521#if wxUSE_THREADS
1522 // NB: explained in MB2WC
1523 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1524#endif
3698ae71 1525
f8d791e0 1526 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1527 size_t outbuf = n;
1528 size_t res, cres;
3a0d76bc 1529
36acb880 1530 wchar_t *tmpbuf = 0;
3caec1bb 1531
36acb880
VZ
1532 if (ms_wcNeedsSwap)
1533 {
1534 // need to copy to temp buffer to switch endianness
1535 // this absolutely doesn't rock!
1536 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1537 // could be in read-only memory, or be accessed in some other thread)
1538 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1539 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1540 WC_BSWAP(tmpbuf, inbuf)
1541 psz=tmpbuf;
1542 }
3a0d76bc 1543
36acb880
VZ
1544 if (buf)
1545 {
1546 // have destination buffer, convert there
1547 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1548
36acb880 1549 res = n-outbuf;
adb45366 1550
49dd9820
VS
1551 // NB: iconv was given only wcslen(psz) characters on input, and so
1552 // it couldn't convert the trailing zero. Let's do it ourselves
1553 // if there's some room left for it in the output buffer.
1554 if (res < n)
1555 buf[0] = 0;
36acb880
VZ
1556 }
1557 else
1558 {
1559 // no destination buffer... convert using temp buffer
1560 // to calculate destination buffer requirement
1561 char tbuf[16];
1562 res = 0;
1563 do {
1564 buf = tbuf; outbuf = 16;
1565
1566 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1567
36acb880
VZ
1568 res += 16 - outbuf;
1569 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1570 }
dccce9ea 1571
36acb880
VZ
1572 if (ms_wcNeedsSwap)
1573 {
1574 free(tmpbuf);
1575 }
dccce9ea 1576
36acb880
VZ
1577 if (ICONV_FAILED(cres, inbuf))
1578 {
1579 //VS: it is ok if iconv fails, hence trace only
1580 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1581 return (size_t)-1;
1582 }
1583
1584 return res;
1585}
1586
b040e242 1587#endif // HAVE_ICONV
36acb880 1588
e95354ec 1589
36acb880
VZ
1590// ============================================================================
1591// Win32 conversion classes
1592// ============================================================================
1cd52418 1593
e95354ec 1594#ifdef wxHAVE_WIN32_MB2WC
373658eb 1595
8b04d4c4 1596// from utils.cpp
d775fa82 1597#if wxUSE_FONTMAP
8b04d4c4
VZ
1598extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1599extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1600#endif
373658eb 1601
e95354ec 1602class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1603{
1604public:
bde4baac
VZ
1605 wxMBConv_win32()
1606 {
1607 m_CodePage = CP_ACP;
1608 }
1609
7608a683 1610#if wxUSE_FONTMAP
e95354ec 1611 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1612 {
1613 m_CodePage = wxCharsetToCodepage(name);
1614 }
dccce9ea 1615
e95354ec 1616 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1617 {
1618 m_CodePage = wxEncodingToCodepage(encoding);
1619 }
7608a683 1620#endif
8b04d4c4 1621
bde4baac 1622 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1623 {
02272c9c
VZ
1624 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1625 // the behaviour is not compatible with the Unix version (using iconv)
1626 // and break the library itself, e.g. wxTextInputStream::NextChar()
1627 // wouldn't work if reading an incomplete MB char didn't result in an
1628 // error
667e5b3e
VZ
1629 //
1630 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1631 // an error (tested under Windows Server 2003) and apparently it is
1632 // done on purpose, i.e. the function accepts any input in this case
1633 // and although I'd prefer to return error on ill-formed output, our
1634 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1635 // explicitly ill-formed according to RFC 2152) neither so we don't
1636 // even have any fallback here...
1637 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1638
2b5f62a0
VZ
1639 const size_t len = ::MultiByteToWideChar
1640 (
1641 m_CodePage, // code page
667e5b3e 1642 flags, // flags: fall on error
2b5f62a0
VZ
1643 psz, // input string
1644 -1, // its length (NUL-terminated)
b4da152e 1645 buf, // output string
2b5f62a0
VZ
1646 buf ? n : 0 // size of output buffer
1647 );
1648
03a991bc
VZ
1649 // note that it returns count of written chars for buf != NULL and size
1650 // of the needed buffer for buf == NULL so in either case the length of
1651 // the string (which never includes the terminating NUL) is one less
1652 return len ? len - 1 : (size_t)-1;
f1339c56 1653 }
dccce9ea 1654
13dd924a 1655 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1656 {
13dd924a
VZ
1657 /*
1658 we have a problem here: by default, WideCharToMultiByte() may
1659 replace characters unrepresentable in the target code page with bad
1660 quality approximations such as turning "1/2" symbol (U+00BD) into
1661 "1" for the code pages which don't have it and we, obviously, want
1662 to avoid this at any price
d775fa82 1663
13dd924a
VZ
1664 the trouble is that this function does it _silently_, i.e. it won't
1665 even tell us whether it did or not... Win98/2000 and higher provide
1666 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1667 we have to resort to a round trip, i.e. check that converting back
1668 results in the same string -- this is, of course, expensive but
1669 otherwise we simply can't be sure to not garble the data.
1670 */
1671
1672 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1673 // it doesn't work with CJK encodings (which we test for rather roughly
1674 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1675 // supporting it
907173e5
WS
1676 BOOL usedDef wxDUMMY_INITIALIZE(false);
1677 BOOL *pUsedDef;
13dd924a
VZ
1678 int flags;
1679 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1680 {
1681 // it's our lucky day
1682 flags = WC_NO_BEST_FIT_CHARS;
1683 pUsedDef = &usedDef;
1684 }
1685 else // old system or unsupported encoding
1686 {
1687 flags = 0;
1688 pUsedDef = NULL;
1689 }
1690
2b5f62a0
VZ
1691 const size_t len = ::WideCharToMultiByte
1692 (
1693 m_CodePage, // code page
13dd924a
VZ
1694 flags, // either none or no best fit
1695 pwz, // input string
2b5f62a0
VZ
1696 -1, // it is (wide) NUL-terminated
1697 buf, // output buffer
1698 buf ? n : 0, // and its size
1699 NULL, // default "replacement" char
13dd924a 1700 pUsedDef // [out] was it used?
2b5f62a0
VZ
1701 );
1702
13dd924a
VZ
1703 if ( !len )
1704 {
1705 // function totally failed
1706 return (size_t)-1;
1707 }
1708
1709 // if we were really converting, check if we succeeded
1710 if ( buf )
1711 {
1712 if ( flags )
1713 {
1714 // check if the conversion failed, i.e. if any replacements
1715 // were done
1716 if ( usedDef )
1717 return (size_t)-1;
1718 }
1719 else // we must resort to double tripping...
1720 {
1721 wxWCharBuffer wcBuf(n);
1722 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1723 wcscmp(wcBuf, pwz) != 0 )
1724 {
1725 // we didn't obtain the same thing we started from, hence
1726 // the conversion was lossy and we consider that it failed
1727 return (size_t)-1;
1728 }
1729 }
1730 }
1731
03a991bc 1732 // see the comment above for the reason of "len - 1"
13dd924a 1733 return len - 1;
f1339c56 1734 }
dccce9ea 1735
13dd924a
VZ
1736 bool IsOk() const { return m_CodePage != -1; }
1737
1738private:
1739 static bool CanUseNoBestFit()
1740 {
1741 static int s_isWin98Or2k = -1;
1742
1743 if ( s_isWin98Or2k == -1 )
1744 {
1745 int verMaj, verMin;
1746 switch ( wxGetOsVersion(&verMaj, &verMin) )
1747 {
1748 case wxWIN95:
1749 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1750 break;
1751
1752 case wxWINDOWS_NT:
1753 s_isWin98Or2k = verMaj >= 5;
1754 break;
1755
1756 default:
1757 // unknown, be conseravtive by default
1758 s_isWin98Or2k = 0;
1759 }
1760
1761 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1762 }
1763
1764 return s_isWin98Or2k == 1;
1765 }
f1339c56 1766
b1d66b54 1767 long m_CodePage;
1cd52418 1768};
e95354ec
VZ
1769
1770#endif // wxHAVE_WIN32_MB2WC
1771
f7e98dee
RN
1772// ============================================================================
1773// Cocoa conversion classes
1774// ============================================================================
1775
1776#if defined(__WXCOCOA__)
1777
ecd9653b 1778// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1779// Cocoa. Strangely enough, internally Core Foundation uses
1780// UTF 32 internally quite a bit - its just not public (yet).
1781
1782#include <CoreFoundation/CFString.h>
1783#include <CoreFoundation/CFStringEncodingExt.h>
1784
1785CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1786{
638357a0 1787 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1788 if ( encoding == wxFONTENCODING_DEFAULT )
1789 {
638357a0 1790 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1791 }
1792 else switch( encoding)
1793 {
1794 case wxFONTENCODING_ISO8859_1 :
1795 enc = kCFStringEncodingISOLatin1 ;
1796 break ;
1797 case wxFONTENCODING_ISO8859_2 :
1798 enc = kCFStringEncodingISOLatin2;
1799 break ;
1800 case wxFONTENCODING_ISO8859_3 :
1801 enc = kCFStringEncodingISOLatin3 ;
1802 break ;
1803 case wxFONTENCODING_ISO8859_4 :
1804 enc = kCFStringEncodingISOLatin4;
1805 break ;
1806 case wxFONTENCODING_ISO8859_5 :
1807 enc = kCFStringEncodingISOLatinCyrillic;
1808 break ;
1809 case wxFONTENCODING_ISO8859_6 :
1810 enc = kCFStringEncodingISOLatinArabic;
1811 break ;
1812 case wxFONTENCODING_ISO8859_7 :
1813 enc = kCFStringEncodingISOLatinGreek;
1814 break ;
1815 case wxFONTENCODING_ISO8859_8 :
1816 enc = kCFStringEncodingISOLatinHebrew;
1817 break ;
1818 case wxFONTENCODING_ISO8859_9 :
1819 enc = kCFStringEncodingISOLatin5;
1820 break ;
1821 case wxFONTENCODING_ISO8859_10 :
1822 enc = kCFStringEncodingISOLatin6;
1823 break ;
1824 case wxFONTENCODING_ISO8859_11 :
1825 enc = kCFStringEncodingISOLatinThai;
1826 break ;
1827 case wxFONTENCODING_ISO8859_13 :
1828 enc = kCFStringEncodingISOLatin7;
1829 break ;
1830 case wxFONTENCODING_ISO8859_14 :
1831 enc = kCFStringEncodingISOLatin8;
1832 break ;
1833 case wxFONTENCODING_ISO8859_15 :
1834 enc = kCFStringEncodingISOLatin9;
1835 break ;
1836
1837 case wxFONTENCODING_KOI8 :
1838 enc = kCFStringEncodingKOI8_R;
1839 break ;
1840 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1841 enc = kCFStringEncodingDOSRussian;
1842 break ;
1843
1844// case wxFONTENCODING_BULGARIAN :
1845// enc = ;
1846// break ;
1847
1848 case wxFONTENCODING_CP437 :
1849 enc =kCFStringEncodingDOSLatinUS ;
1850 break ;
1851 case wxFONTENCODING_CP850 :
1852 enc = kCFStringEncodingDOSLatin1;
1853 break ;
1854 case wxFONTENCODING_CP852 :
1855 enc = kCFStringEncodingDOSLatin2;
1856 break ;
1857 case wxFONTENCODING_CP855 :
1858 enc = kCFStringEncodingDOSCyrillic;
1859 break ;
1860 case wxFONTENCODING_CP866 :
1861 enc =kCFStringEncodingDOSRussian ;
1862 break ;
1863 case wxFONTENCODING_CP874 :
1864 enc = kCFStringEncodingDOSThai;
1865 break ;
1866 case wxFONTENCODING_CP932 :
1867 enc = kCFStringEncodingDOSJapanese;
1868 break ;
1869 case wxFONTENCODING_CP936 :
1870 enc =kCFStringEncodingDOSChineseSimplif ;
1871 break ;
1872 case wxFONTENCODING_CP949 :
1873 enc = kCFStringEncodingDOSKorean;
1874 break ;
1875 case wxFONTENCODING_CP950 :
1876 enc = kCFStringEncodingDOSChineseTrad;
1877 break ;
ecd9653b
WS
1878 case wxFONTENCODING_CP1250 :
1879 enc = kCFStringEncodingWindowsLatin2;
1880 break ;
1881 case wxFONTENCODING_CP1251 :
1882 enc =kCFStringEncodingWindowsCyrillic ;
1883 break ;
1884 case wxFONTENCODING_CP1252 :
1885 enc =kCFStringEncodingWindowsLatin1 ;
1886 break ;
1887 case wxFONTENCODING_CP1253 :
1888 enc = kCFStringEncodingWindowsGreek;
1889 break ;
1890 case wxFONTENCODING_CP1254 :
1891 enc = kCFStringEncodingWindowsLatin5;
1892 break ;
1893 case wxFONTENCODING_CP1255 :
1894 enc =kCFStringEncodingWindowsHebrew ;
1895 break ;
1896 case wxFONTENCODING_CP1256 :
1897 enc =kCFStringEncodingWindowsArabic ;
1898 break ;
1899 case wxFONTENCODING_CP1257 :
1900 enc = kCFStringEncodingWindowsBalticRim;
1901 break ;
638357a0
RN
1902// This only really encodes to UTF7 (if that) evidently
1903// case wxFONTENCODING_UTF7 :
1904// enc = kCFStringEncodingNonLossyASCII ;
1905// break ;
ecd9653b
WS
1906 case wxFONTENCODING_UTF8 :
1907 enc = kCFStringEncodingUTF8 ;
1908 break ;
1909 case wxFONTENCODING_EUC_JP :
1910 enc = kCFStringEncodingEUC_JP;
1911 break ;
1912 case wxFONTENCODING_UTF16 :
f7e98dee 1913 enc = kCFStringEncodingUnicode ;
ecd9653b 1914 break ;
f7e98dee
RN
1915 case wxFONTENCODING_MACROMAN :
1916 enc = kCFStringEncodingMacRoman ;
1917 break ;
1918 case wxFONTENCODING_MACJAPANESE :
1919 enc = kCFStringEncodingMacJapanese ;
1920 break ;
1921 case wxFONTENCODING_MACCHINESETRAD :
1922 enc = kCFStringEncodingMacChineseTrad ;
1923 break ;
1924 case wxFONTENCODING_MACKOREAN :
1925 enc = kCFStringEncodingMacKorean ;
1926 break ;
1927 case wxFONTENCODING_MACARABIC :
1928 enc = kCFStringEncodingMacArabic ;
1929 break ;
1930 case wxFONTENCODING_MACHEBREW :
1931 enc = kCFStringEncodingMacHebrew ;
1932 break ;
1933 case wxFONTENCODING_MACGREEK :
1934 enc = kCFStringEncodingMacGreek ;
1935 break ;
1936 case wxFONTENCODING_MACCYRILLIC :
1937 enc = kCFStringEncodingMacCyrillic ;
1938 break ;
1939 case wxFONTENCODING_MACDEVANAGARI :
1940 enc = kCFStringEncodingMacDevanagari ;
1941 break ;
1942 case wxFONTENCODING_MACGURMUKHI :
1943 enc = kCFStringEncodingMacGurmukhi ;
1944 break ;
1945 case wxFONTENCODING_MACGUJARATI :
1946 enc = kCFStringEncodingMacGujarati ;
1947 break ;
1948 case wxFONTENCODING_MACORIYA :
1949 enc = kCFStringEncodingMacOriya ;
1950 break ;
1951 case wxFONTENCODING_MACBENGALI :
1952 enc = kCFStringEncodingMacBengali ;
1953 break ;
1954 case wxFONTENCODING_MACTAMIL :
1955 enc = kCFStringEncodingMacTamil ;
1956 break ;
1957 case wxFONTENCODING_MACTELUGU :
1958 enc = kCFStringEncodingMacTelugu ;
1959 break ;
1960 case wxFONTENCODING_MACKANNADA :
1961 enc = kCFStringEncodingMacKannada ;
1962 break ;
1963 case wxFONTENCODING_MACMALAJALAM :
1964 enc = kCFStringEncodingMacMalayalam ;
1965 break ;
1966 case wxFONTENCODING_MACSINHALESE :
1967 enc = kCFStringEncodingMacSinhalese ;
1968 break ;
1969 case wxFONTENCODING_MACBURMESE :
1970 enc = kCFStringEncodingMacBurmese ;
1971 break ;
1972 case wxFONTENCODING_MACKHMER :
1973 enc = kCFStringEncodingMacKhmer ;
1974 break ;
1975 case wxFONTENCODING_MACTHAI :
1976 enc = kCFStringEncodingMacThai ;
1977 break ;
1978 case wxFONTENCODING_MACLAOTIAN :
1979 enc = kCFStringEncodingMacLaotian ;
1980 break ;
1981 case wxFONTENCODING_MACGEORGIAN :
1982 enc = kCFStringEncodingMacGeorgian ;
1983 break ;
1984 case wxFONTENCODING_MACARMENIAN :
1985 enc = kCFStringEncodingMacArmenian ;
1986 break ;
1987 case wxFONTENCODING_MACCHINESESIMP :
1988 enc = kCFStringEncodingMacChineseSimp ;
1989 break ;
1990 case wxFONTENCODING_MACTIBETAN :
1991 enc = kCFStringEncodingMacTibetan ;
1992 break ;
1993 case wxFONTENCODING_MACMONGOLIAN :
1994 enc = kCFStringEncodingMacMongolian ;
1995 break ;
1996 case wxFONTENCODING_MACETHIOPIC :
1997 enc = kCFStringEncodingMacEthiopic ;
1998 break ;
1999 case wxFONTENCODING_MACCENTRALEUR :
2000 enc = kCFStringEncodingMacCentralEurRoman ;
2001 break ;
2002 case wxFONTENCODING_MACVIATNAMESE :
2003 enc = kCFStringEncodingMacVietnamese ;
2004 break ;
2005 case wxFONTENCODING_MACARABICEXT :
2006 enc = kCFStringEncodingMacExtArabic ;
2007 break ;
2008 case wxFONTENCODING_MACSYMBOL :
2009 enc = kCFStringEncodingMacSymbol ;
2010 break ;
2011 case wxFONTENCODING_MACDINGBATS :
2012 enc = kCFStringEncodingMacDingbats ;
2013 break ;
2014 case wxFONTENCODING_MACTURKISH :
2015 enc = kCFStringEncodingMacTurkish ;
2016 break ;
2017 case wxFONTENCODING_MACCROATIAN :
2018 enc = kCFStringEncodingMacCroatian ;
2019 break ;
2020 case wxFONTENCODING_MACICELANDIC :
2021 enc = kCFStringEncodingMacIcelandic ;
2022 break ;
2023 case wxFONTENCODING_MACROMANIAN :
2024 enc = kCFStringEncodingMacRomanian ;
2025 break ;
2026 case wxFONTENCODING_MACCELTIC :
2027 enc = kCFStringEncodingMacCeltic ;
2028 break ;
2029 case wxFONTENCODING_MACGAELIC :
2030 enc = kCFStringEncodingMacGaelic ;
2031 break ;
ecd9653b
WS
2032// case wxFONTENCODING_MACKEYBOARD :
2033// enc = kCFStringEncodingMacKeyboardGlyphs ;
2034// break ;
2035 default :
2036 // because gcc is picky
2037 break ;
2038 } ;
2039 return enc ;
f7e98dee
RN
2040}
2041
f7e98dee
RN
2042class wxMBConv_cocoa : public wxMBConv
2043{
2044public:
2045 wxMBConv_cocoa()
2046 {
2047 Init(CFStringGetSystemEncoding()) ;
2048 }
2049
a6900d10 2050#if wxUSE_FONTMAP
f7e98dee
RN
2051 wxMBConv_cocoa(const wxChar* name)
2052 {
267e11c5 2053 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2054 }
a6900d10 2055#endif
f7e98dee
RN
2056
2057 wxMBConv_cocoa(wxFontEncoding encoding)
2058 {
2059 Init( wxCFStringEncFromFontEnc(encoding) );
2060 }
2061
2062 ~wxMBConv_cocoa()
2063 {
2064 }
2065
2066 void Init( CFStringEncoding encoding)
2067 {
638357a0 2068 m_encoding = encoding ;
f7e98dee
RN
2069 }
2070
2071 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2072 {
2073 wxASSERT(szUnConv);
ecd9653b 2074
638357a0
RN
2075 CFStringRef theString = CFStringCreateWithBytes (
2076 NULL, //the allocator
2077 (const UInt8*)szUnConv,
2078 strlen(szUnConv),
2079 m_encoding,
2080 false //no BOM/external representation
f7e98dee
RN
2081 );
2082
2083 wxASSERT(theString);
2084
638357a0
RN
2085 size_t nOutLength = CFStringGetLength(theString);
2086
2087 if (szOut == NULL)
f7e98dee 2088 {
f7e98dee 2089 CFRelease(theString);
638357a0 2090 return nOutLength;
f7e98dee 2091 }
ecd9653b 2092
638357a0 2093 CFRange theRange = { 0, nOutSize };
ecd9653b 2094
638357a0
RN
2095#if SIZEOF_WCHAR_T == 4
2096 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2097#endif
3698ae71 2098
f7e98dee 2099 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2100
f7e98dee 2101 CFRelease(theString);
ecd9653b 2102
638357a0 2103 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2104
2105#if SIZEOF_WCHAR_T == 4
2106 wxMBConvUTF16 converter ;
638357a0 2107 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2108 delete[] szUniCharBuffer;
2109#endif
3698ae71 2110
638357a0 2111 return nOutLength;
f7e98dee
RN
2112 }
2113
2114 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2115 {
638357a0 2116 wxASSERT(szUnConv);
3698ae71 2117
f7e98dee 2118 size_t nRealOutSize;
638357a0 2119 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2120 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2121
f7e98dee
RN
2122#if SIZEOF_WCHAR_T == 4
2123 wxMBConvUTF16BE converter ;
2124 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2125 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2126 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2127 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2128#endif
2129
2130 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2131 NULL, //allocator
2132 szUniBuffer,
2133 nBufSize,
638357a0 2134 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2135 );
ecd9653b 2136
f7e98dee 2137 wxASSERT(theString);
ecd9653b 2138
f7e98dee 2139 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2140 //so we check and use getchars instead in that case
2141 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2142 {
638357a0
RN
2143 if (szOut != NULL)
2144 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2145
638357a0
RN
2146 nRealOutSize = CFStringGetLength(theString) + 1;
2147 }
2148 else
2149 {
2150 CFStringGetBytes(
2151 theString,
2152 CFRangeMake(0, CFStringGetLength(theString)),
2153 m_encoding,
2154 0, //what to put in characters that can't be converted -
2155 //0 tells CFString to return NULL if it meets such a character
2156 false, //not an external representation
2157 (UInt8*) szOut,
3698ae71 2158 nOutSize,
638357a0
RN
2159 (CFIndex*) &nRealOutSize
2160 );
f7e98dee 2161 }
ecd9653b 2162
638357a0 2163 CFRelease(theString);
ecd9653b 2164
638357a0
RN
2165#if SIZEOF_WCHAR_T == 4
2166 delete[] szUniBuffer;
2167#endif
ecd9653b 2168
f7e98dee
RN
2169 return nRealOutSize - 1;
2170 }
2171
2172 bool IsOk() const
ecd9653b 2173 {
3698ae71 2174 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2175 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2176 }
2177
2178private:
638357a0 2179 CFStringEncoding m_encoding ;
f7e98dee
RN
2180};
2181
2182#endif // defined(__WXCOCOA__)
2183
335d31e0
SC
2184// ============================================================================
2185// Mac conversion classes
2186// ============================================================================
2187
2188#if defined(__WXMAC__) && defined(TARGET_CARBON)
2189
2190class wxMBConv_mac : public wxMBConv
2191{
2192public:
2193 wxMBConv_mac()
2194 {
2195 Init(CFStringGetSystemEncoding()) ;
2196 }
2197
2d1659cf 2198#if wxUSE_FONTMAP
335d31e0
SC
2199 wxMBConv_mac(const wxChar* name)
2200 {
267e11c5 2201 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2202 }
2d1659cf 2203#endif
335d31e0
SC
2204
2205 wxMBConv_mac(wxFontEncoding encoding)
2206 {
d775fa82
WS
2207 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2208 }
2209
2210 ~wxMBConv_mac()
2211 {
2212 OSStatus status = noErr ;
2213 status = TECDisposeConverter(m_MB2WC_converter);
2214 status = TECDisposeConverter(m_WC2MB_converter);
2215 }
2216
2217
2218 void Init( TextEncodingBase encoding)
2219 {
2220 OSStatus status = noErr ;
2221 m_char_encoding = encoding ;
2222 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2223
2224 status = TECCreateConverter(&m_MB2WC_converter,
2225 m_char_encoding,
2226 m_unicode_encoding);
2227 status = TECCreateConverter(&m_WC2MB_converter,
2228 m_unicode_encoding,
2229 m_char_encoding);
2230 }
2231
335d31e0
SC
2232 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2233 {
d775fa82
WS
2234 OSStatus status = noErr ;
2235 ByteCount byteOutLen ;
2236 ByteCount byteInLen = strlen(psz) ;
2237 wchar_t *tbuf = NULL ;
2238 UniChar* ubuf = NULL ;
2239 size_t res = 0 ;
2240
2241 if (buf == NULL)
2242 {
638357a0 2243 //apple specs say at least 32
c543817b 2244 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2245 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2246 }
2247 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2248#if SIZEOF_WCHAR_T == 4
d775fa82 2249 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2250#else
d775fa82 2251 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2252#endif
d775fa82
WS
2253 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2254 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2255#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2256 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2257 // is not properly terminated we get random characters at the end
2258 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2259 wxMBConvUTF16BE converter ;
2260 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2261 free( ubuf ) ;
f3a355ce 2262#else
d775fa82 2263 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2264#endif
d775fa82
WS
2265 if ( buf == NULL )
2266 free(tbuf) ;
335d31e0 2267
335d31e0
SC
2268 if ( buf && res < n)
2269 buf[res] = 0;
2270
d775fa82 2271 return res ;
335d31e0
SC
2272 }
2273
2274 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2275 {
2276 OSStatus status = noErr ;
2277 ByteCount byteOutLen ;
2278 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2279
2280 char *tbuf = NULL ;
2281
2282 if (buf == NULL)
2283 {
638357a0 2284 //apple specs say at least 32
c543817b 2285 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2286 tbuf = (char*) malloc( n ) ;
2287 }
2288
2289 ByteCount byteBufferLen = n ;
2290 UniChar* ubuf = NULL ;
f3a355ce 2291#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2292 wxMBConvUTF16BE converter ;
2293 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2294 byteInLen = unicharlen ;
2295 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2296 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2297#else
d775fa82 2298 ubuf = (UniChar*) psz ;
f3a355ce 2299#endif
d775fa82
WS
2300 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2301 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2302#if SIZEOF_WCHAR_T == 4
d775fa82 2303 free( ubuf ) ;
f3a355ce 2304#endif
d775fa82
WS
2305 if ( buf == NULL )
2306 free(tbuf) ;
335d31e0 2307
d775fa82 2308 size_t res = byteOutLen ;
335d31e0 2309 if ( buf && res < n)
638357a0 2310 {
335d31e0 2311 buf[res] = 0;
3698ae71 2312
638357a0
RN
2313 //we need to double-trip to verify it didn't insert any ? in place
2314 //of bogus characters
2315 wxWCharBuffer wcBuf(n);
2316 size_t pszlen = wxWcslen(psz);
2317 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2318 wxWcslen(wcBuf) != pszlen ||
2319 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2320 {
2321 // we didn't obtain the same thing we started from, hence
2322 // the conversion was lossy and we consider that it failed
2323 return (size_t)-1;
2324 }
2325 }
335d31e0 2326
d775fa82 2327 return res ;
335d31e0
SC
2328 }
2329
2330 bool IsOk() const
2331 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2332
2333private:
d775fa82
WS
2334 TECObjectRef m_MB2WC_converter ;
2335 TECObjectRef m_WC2MB_converter ;
2336
2337 TextEncodingBase m_char_encoding ;
2338 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2339};
2340
2341#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2342
36acb880
VZ
2343// ============================================================================
2344// wxEncodingConverter based conversion classes
2345// ============================================================================
2346
1e6feb95 2347#if wxUSE_FONTMAP
1cd52418 2348
e95354ec 2349class wxMBConv_wxwin : public wxMBConv
1cd52418 2350{
8b04d4c4
VZ
2351private:
2352 void Init()
2353 {
2354 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2355 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2356 }
2357
6001e347 2358public:
f1339c56
RR
2359 // temporarily just use wxEncodingConverter stuff,
2360 // so that it works while a better implementation is built
e95354ec 2361 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2362 {
2363 if (name)
267e11c5 2364 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2365 else
2366 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2367
8b04d4c4
VZ
2368 Init();
2369 }
2370
e95354ec 2371 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2372 {
2373 m_enc = enc;
2374
2375 Init();
f1339c56 2376 }
dccce9ea 2377
bde4baac 2378 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2379 {
2380 size_t inbuf = strlen(psz);
dccce9ea 2381 if (buf)
c643a977
VS
2382 {
2383 if (!m2w.Convert(psz,buf))
2384 return (size_t)-1;
2385 }
f1339c56
RR
2386 return inbuf;
2387 }
dccce9ea 2388
bde4baac 2389 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2390 {
f8d791e0 2391 const size_t inbuf = wxWcslen(psz);
f1339c56 2392 if (buf)
c643a977
VS
2393 {
2394 if (!w2m.Convert(psz,buf))
2395 return (size_t)-1;
2396 }
dccce9ea 2397
f1339c56
RR
2398 return inbuf;
2399 }
dccce9ea 2400
e95354ec 2401 bool IsOk() const { return m_ok; }
f1339c56
RR
2402
2403public:
8b04d4c4 2404 wxFontEncoding m_enc;
f1339c56 2405 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2406
2407 // were we initialized successfully?
2408 bool m_ok;
fc7a2a60 2409
e95354ec 2410 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2411};
6001e347 2412
1e6feb95
VZ
2413#endif // wxUSE_FONTMAP
2414
36acb880
VZ
2415// ============================================================================
2416// wxCSConv implementation
2417// ============================================================================
2418
8b04d4c4 2419void wxCSConv::Init()
6001e347 2420{
e95354ec
VZ
2421 m_name = NULL;
2422 m_convReal = NULL;
2423 m_deferred = true;
2424}
2425
8b04d4c4
VZ
2426wxCSConv::wxCSConv(const wxChar *charset)
2427{
2428 Init();
82713003 2429
e95354ec
VZ
2430 if ( charset )
2431 {
e95354ec
VZ
2432 SetName(charset);
2433 }
bda3d86a
VZ
2434
2435 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2436}
2437
8b04d4c4
VZ
2438wxCSConv::wxCSConv(wxFontEncoding encoding)
2439{
bda3d86a 2440 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2441 {
2442 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2443
2444 encoding = wxFONTENCODING_SYSTEM;
2445 }
2446
8b04d4c4
VZ
2447 Init();
2448
bda3d86a 2449 m_encoding = encoding;
8b04d4c4
VZ
2450}
2451
6001e347
RR
2452wxCSConv::~wxCSConv()
2453{
65e50848
JS
2454 Clear();
2455}
2456
54380f29 2457wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2458 : wxMBConv()
54380f29 2459{
8b04d4c4
VZ
2460 Init();
2461
54380f29 2462 SetName(conv.m_name);
8b04d4c4 2463 m_encoding = conv.m_encoding;
54380f29
GD
2464}
2465
2466wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2467{
2468 Clear();
8b04d4c4 2469
54380f29 2470 SetName(conv.m_name);
8b04d4c4
VZ
2471 m_encoding = conv.m_encoding;
2472
54380f29
GD
2473 return *this;
2474}
2475
65e50848
JS
2476void wxCSConv::Clear()
2477{
8b04d4c4 2478 free(m_name);
e95354ec 2479 delete m_convReal;
8b04d4c4 2480
65e50848 2481 m_name = NULL;
e95354ec 2482 m_convReal = NULL;
6001e347
RR
2483}
2484
2485void wxCSConv::SetName(const wxChar *charset)
2486{
f1339c56
RR
2487 if (charset)
2488 {
2489 m_name = wxStrdup(charset);
e95354ec 2490 m_deferred = true;
f1339c56 2491 }
6001e347
RR
2492}
2493
e95354ec
VZ
2494wxMBConv *wxCSConv::DoCreate() const
2495{
c547282d
VZ
2496 // check for the special case of ASCII or ISO8859-1 charset: as we have
2497 // special knowledge of it anyhow, we don't need to create a special
2498 // conversion object
2499 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2500 {
e95354ec
VZ
2501 // don't convert at all
2502 return NULL;
2503 }
dccce9ea 2504
e95354ec
VZ
2505 // we trust OS to do conversion better than we can so try external
2506 // conversion methods first
2507 //
2508 // the full order is:
2509 // 1. OS conversion (iconv() under Unix or Win32 API)
2510 // 2. hard coded conversions for UTF
2511 // 3. wxEncodingConverter as fall back
2512
2513 // step (1)
2514#ifdef HAVE_ICONV
c547282d 2515#if !wxUSE_FONTMAP
e95354ec 2516 if ( m_name )
c547282d 2517#endif // !wxUSE_FONTMAP
e95354ec 2518 {
c547282d
VZ
2519 wxString name(m_name);
2520
2521#if wxUSE_FONTMAP
2522 if ( name.empty() )
267e11c5 2523 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2524#endif // wxUSE_FONTMAP
2525
2526 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2527 if ( conv->IsOk() )
2528 return conv;
2529
2530 delete conv;
2531 }
2532#endif // HAVE_ICONV
2533
2534#ifdef wxHAVE_WIN32_MB2WC
2535 {
7608a683 2536#if wxUSE_FONTMAP
e95354ec
VZ
2537 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2538 : new wxMBConv_win32(m_encoding);
2539 if ( conv->IsOk() )
2540 return conv;
2541
2542 delete conv;
7608a683
WS
2543#else
2544 return NULL;
2545#endif
e95354ec
VZ
2546 }
2547#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2548#if defined(__WXMAC__)
2549 {
5c3c8676 2550 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2551 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2552 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2553 {
2554
2d1659cf 2555#if wxUSE_FONTMAP
d775fa82
WS
2556 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2557 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2558#else
2559 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2560#endif
d775fa82 2561 if ( conv->IsOk() )
f7e98dee
RN
2562 return conv;
2563
2564 delete conv;
2565 }
2566 }
2567#endif
2568#if defined(__WXCOCOA__)
2569 {
2570 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2571 {
2572
a6900d10 2573#if wxUSE_FONTMAP
f7e98dee
RN
2574 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2575 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2576#else
2577 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2578#endif
f7e98dee 2579 if ( conv->IsOk() )
d775fa82
WS
2580 return conv;
2581
2582 delete conv;
2583 }
335d31e0
SC
2584 }
2585#endif
e95354ec
VZ
2586 // step (2)
2587 wxFontEncoding enc = m_encoding;
2588#if wxUSE_FONTMAP
c547282d
VZ
2589 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2590 {
2591 // use "false" to suppress interactive dialogs -- we can be called from
2592 // anywhere and popping up a dialog from here is the last thing we want to
2593 // do
267e11c5 2594 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2595 }
e95354ec
VZ
2596#endif // wxUSE_FONTMAP
2597
2598 switch ( enc )
2599 {
2600 case wxFONTENCODING_UTF7:
2601 return new wxMBConvUTF7;
2602
2603 case wxFONTENCODING_UTF8:
2604 return new wxMBConvUTF8;
2605
e95354ec
VZ
2606 case wxFONTENCODING_UTF16BE:
2607 return new wxMBConvUTF16BE;
2608
2609 case wxFONTENCODING_UTF16LE:
2610 return new wxMBConvUTF16LE;
2611
e95354ec
VZ
2612 case wxFONTENCODING_UTF32BE:
2613 return new wxMBConvUTF32BE;
2614
2615 case wxFONTENCODING_UTF32LE:
2616 return new wxMBConvUTF32LE;
2617
2618 default:
2619 // nothing to do but put here to suppress gcc warnings
2620 ;
2621 }
2622
2623 // step (3)
2624#if wxUSE_FONTMAP
2625 {
2626 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2627 : new wxMBConv_wxwin(m_encoding);
2628 if ( conv->IsOk() )
2629 return conv;
2630
2631 delete conv;
2632 }
2633#endif // wxUSE_FONTMAP
2634
a58d4f4d
VS
2635 // NB: This is a hack to prevent deadlock. What could otherwise happen
2636 // in Unicode build: wxConvLocal creation ends up being here
2637 // because of some failure and logs the error. But wxLog will try to
2638 // attach timestamp, for which it will need wxConvLocal (to convert
2639 // time to char* and then wchar_t*), but that fails, tries to log
2640 // error, but wxLog has a (already locked) critical section that
2641 // guards static buffer.
2642 static bool alreadyLoggingError = false;
2643 if (!alreadyLoggingError)
2644 {
2645 alreadyLoggingError = true;
2646 wxLogError(_("Cannot convert from the charset '%s'!"),
2647 m_name ? m_name
e95354ec
VZ
2648 :
2649#if wxUSE_FONTMAP
267e11c5 2650 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2651#else // !wxUSE_FONTMAP
2652 wxString::Format(_("encoding %s"), m_encoding).c_str()
2653#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2654 );
a58d4f4d
VS
2655 alreadyLoggingError = false;
2656 }
e95354ec
VZ
2657
2658 return NULL;
2659}
2660
2661void wxCSConv::CreateConvIfNeeded() const
2662{
2663 if ( m_deferred )
2664 {
2665 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2666
2667#if wxUSE_INTL
2668 // if we don't have neither the name nor the encoding, use the default
2669 // encoding for this system
2670 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2671 {
4d312c22 2672 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2673 }
2674#endif // wxUSE_INTL
2675
e95354ec
VZ
2676 self->m_convReal = DoCreate();
2677 self->m_deferred = false;
6001e347 2678 }
6001e347
RR
2679}
2680
2681size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2682{
e95354ec 2683 CreateConvIfNeeded();
dccce9ea 2684
e95354ec
VZ
2685 if (m_convReal)
2686 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2687
2688 // latin-1 (direct)
4def3b35 2689 size_t len = strlen(psz);
dccce9ea 2690
f1339c56
RR
2691 if (buf)
2692 {
4def3b35 2693 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2694 buf[c] = (unsigned char)(psz[c]);
2695 }
dccce9ea 2696
f1339c56 2697 return len;
6001e347
RR
2698}
2699
2700size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2701{
e95354ec 2702 CreateConvIfNeeded();
dccce9ea 2703
e95354ec
VZ
2704 if (m_convReal)
2705 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2706
f1339c56 2707 // latin-1 (direct)
f8d791e0 2708 const size_t len = wxWcslen(psz);
f1339c56
RR
2709 if (buf)
2710 {
4def3b35 2711 for (size_t c = 0; c <= len; c++)
24642831
VS
2712 {
2713 if (psz[c] > 0xFF)
2714 return (size_t)-1;
907173e5 2715 buf[c] = (char)psz[c];
24642831
VS
2716 }
2717 }
2718 else
2719 {
2720 for (size_t c = 0; c <= len; c++)
2721 {
2722 if (psz[c] > 0xFF)
2723 return (size_t)-1;
2724 }
f1339c56 2725 }
dccce9ea 2726
f1339c56 2727 return len;
6001e347
RR
2728}
2729
bde4baac
VZ
2730// ----------------------------------------------------------------------------
2731// globals
2732// ----------------------------------------------------------------------------
2733
2734#ifdef __WINDOWS__
2735 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2736#elif defined(__WXMAC__) && !defined(__MACH__)
2737 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2738#else
dcc8fac0 2739 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2740#endif
2741
2742static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2743static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2744static wxMBConvUTF7 wxConvUTF7Obj;
2745static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2746
bde4baac
VZ
2747WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2748WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2749WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2750WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2751WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2752WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2753WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2754#ifdef __WXOSX__
ea8ce907 2755 wxConvUTF8Obj;
f5a1953b 2756#else
ea8ce907 2757 wxConvLibcObj;
f5a1953b
VZ
2758#endif
2759
bde4baac
VZ
2760
2761#else // !wxUSE_WCHAR_T
2762
2763// stand-ins in absence of wchar_t
2764WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2765 wxConvISO8859_1,
2766 wxConvLocal,
2767 wxConvUTF8;
2768
2769#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2770
2771