]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
move code ignoring VK_SPACE and VK_RETURN WM_CHAR messages to MSWDefWindowProc()...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea 154 }
cdb14ecb 155 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
3698ae71 281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
3698ae71 316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
3698ae71 343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
e1bfe89e 359
66bf0099 360#ifdef __UNIX__
c12b7f79 361
e1bfe89e 362// ----------------------------------------------------------------------------
66bf0099 363// wxConvBrokenFileNames
e1bfe89e
RR
364// ----------------------------------------------------------------------------
365
c12b7f79 366wxConvBrokenFileNames::wxConvBrokenFileNames()
ea8ce907 367{
c12b7f79
VZ
368 // decide which conversion to use for the file names
369
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
914955aa
MW
372 wxString encName(wxGetenv(_T("G_FILENAME_ENCODING")));
373 encName.MakeUpper();
374 if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
c12b7f79
VZ
375 {
376 m_conv = new wxCSConv(encName);
377 }
378 else // no G_FILENAME_ENCODING
379 {
914955aa
MW
380 if ( encName.empty() )
381 encName = wxLocale::GetSystemEncodingName().Upper();
382
c12b7f79
VZ
383 // (2) if a non default locale is set, assume that the user wants his
384 // filenames in this locale too
914955aa
MW
385 if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
386 {
387 wxSetEnv(_T("G_FILENAME_ENCODING"), encName);
388 m_conv = new wxMBConvLibc;
389 }
390 else
c12b7f79 391 {
c12b7f79 392 // (3) finally use UTF-8 by default
914955aa 393 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
c12b7f79
VZ
394 }
395 }
ea8ce907
RR
396}
397
c12b7f79
VZ
398size_t
399wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
400 const char *psz,
401 size_t outputSize) const
e1bfe89e 402{
c12b7f79 403 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
404}
405
c12b7f79
VZ
406size_t
407wxConvBrokenFileNames::WC2MB(char *outputBuf,
408 const wchar_t *psz,
409 size_t outputSize) const
e1bfe89e 410{
c12b7f79 411 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
412}
413
66bf0099 414#endif
c12b7f79 415
bde4baac 416// ----------------------------------------------------------------------------
3698ae71 417// UTF-7
bde4baac 418// ----------------------------------------------------------------------------
6001e347 419
15f2ee32 420// Implementation (C) 2004 Fredrik Roubert
6001e347 421
15f2ee32
RN
422//
423// BASE64 decoding table
424//
425static const unsigned char utf7unb64[] =
6001e347 426{
15f2ee32
RN
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
433 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
434 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
436 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
437 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
438 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
440 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
441 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
442 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
459};
460
461size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
462{
15f2ee32
RN
463 size_t len = 0;
464
465 while (*psz && ((!buf) || (len < n)))
466 {
467 unsigned char cc = *psz++;
468 if (cc != '+')
469 {
470 // plain ASCII char
471 if (buf)
472 *buf++ = cc;
473 len++;
474 }
475 else if (*psz == '-')
476 {
477 // encoded plus sign
478 if (buf)
479 *buf++ = cc;
480 len++;
481 psz++;
482 }
483 else
484 {
485 // BASE64 encoded string
486 bool lsb;
487 unsigned char c;
488 unsigned int d, l;
489 for (lsb = false, d = 0, l = 0;
490 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
491 {
492 d <<= 6;
493 d += cc;
494 for (l += 6; l >= 8; lsb = !lsb)
495 {
6356d52a 496 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
497 if (lsb)
498 {
499 if (buf)
500 *buf++ |= c;
501 len ++;
502 }
503 else
504 if (buf)
6356d52a 505 *buf = (wchar_t)(c << 8);
15f2ee32
RN
506 }
507 }
508 if (*psz == '-')
509 psz++;
510 }
511 }
512 if (buf && (len < n))
513 *buf = 0;
514 return len;
6001e347
RR
515}
516
15f2ee32
RN
517//
518// BASE64 encoding table
519//
520static const unsigned char utf7enb64[] =
521{
522 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
523 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
524 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
525 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
526 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
527 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
528 'w', 'x', 'y', 'z', '0', '1', '2', '3',
529 '4', '5', '6', '7', '8', '9', '+', '/'
530};
531
532//
533// UTF-7 encoding table
534//
535// 0 - Set D (directly encoded characters)
536// 1 - Set O (optional direct characters)
537// 2 - whitespace characters (optional)
538// 3 - special characters
539//
540static const unsigned char utf7encode[128] =
6001e347 541{
15f2ee32
RN
542 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
543 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
544 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
548 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
550};
551
667e5b3e 552size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
553{
554
555
556 size_t len = 0;
557
558 while (*psz && ((!buf) || (len < n)))
559 {
560 wchar_t cc = *psz++;
561 if (cc < 0x80 && utf7encode[cc] < 1)
562 {
563 // plain ASCII char
564 if (buf)
565 *buf++ = (char)cc;
566 len++;
567 }
568#ifndef WC_UTF16
79c78d42 569 else if (((wxUint32)cc) > 0xffff)
b2c13097 570 {
15f2ee32
RN
571 // no surrogate pair generation (yet?)
572 return (size_t)-1;
573 }
574#endif
575 else
576 {
577 if (buf)
578 *buf++ = '+';
579 len++;
580 if (cc != '+')
581 {
582 // BASE64 encode string
583 unsigned int lsb, d, l;
584 for (d = 0, l = 0;; psz++)
585 {
586 for (lsb = 0; lsb < 2; lsb ++)
587 {
588 d <<= 8;
589 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
590
591 for (l += 8; l >= 6; )
592 {
593 l -= 6;
594 if (buf)
595 *buf++ = utf7enb64[(d >> l) % 64];
596 len++;
597 }
598 }
599 cc = *psz;
600 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
601 break;
602 }
603 if (l != 0)
604 {
605 if (buf)
606 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
607 len++;
608 }
609 }
610 if (buf)
611 *buf++ = '-';
612 len++;
613 }
614 }
615 if (buf && (len < n))
616 *buf = 0;
617 return len;
6001e347
RR
618}
619
f6bcfd97 620// ----------------------------------------------------------------------------
6001e347 621// UTF-8
f6bcfd97 622// ----------------------------------------------------------------------------
6001e347 623
dccce9ea 624static wxUint32 utf8_max[]=
4def3b35 625 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 626
3698ae71
VZ
627// boundaries of the private use area we use to (temporarily) remap invalid
628// characters invalid in a UTF-8 encoded string
ea8ce907
RR
629const wxUint32 wxUnicodePUA = 0x100000;
630const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
631
6001e347
RR
632size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
633{
4def3b35
VS
634 size_t len = 0;
635
dccce9ea 636 while (*psz && ((!buf) || (len < n)))
4def3b35 637 {
ea8ce907
RR
638 const char *opsz = psz;
639 bool invalid = false;
4def3b35
VS
640 unsigned char cc = *psz++, fc = cc;
641 unsigned cnt;
dccce9ea 642 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 643 fc <<= 1;
dccce9ea 644 if (!cnt)
4def3b35
VS
645 {
646 // plain ASCII char
dccce9ea 647 if (buf)
4def3b35
VS
648 *buf++ = cc;
649 len++;
561488ef
MW
650
651 // escape the escape character for octal escapes
652 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
653 && cc == '\\' && (!buf || len < n))
654 {
655 if (buf)
656 *buf++ = cc;
657 len++;
658 }
dccce9ea
VZ
659 }
660 else
4def3b35
VS
661 {
662 cnt--;
dccce9ea 663 if (!cnt)
4def3b35
VS
664 {
665 // invalid UTF-8 sequence
ea8ce907 666 invalid = true;
dccce9ea
VZ
667 }
668 else
4def3b35
VS
669 {
670 unsigned ocnt = cnt - 1;
671 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 672 while (cnt--)
4def3b35 673 {
ea8ce907 674 cc = *psz;
dccce9ea 675 if ((cc & 0xC0) != 0x80)
4def3b35
VS
676 {
677 // invalid UTF-8 sequence
ea8ce907
RR
678 invalid = true;
679 break;
4def3b35 680 }
ea8ce907 681 psz++;
4def3b35
VS
682 res = (res << 6) | (cc & 0x3f);
683 }
ea8ce907 684 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
685 {
686 // illegal UTF-8 encoding
ea8ce907 687 invalid = true;
4def3b35 688 }
ea8ce907
RR
689 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
690 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
691 {
692 // if one of our PUA characters turns up externally
693 // it must also be treated as an illegal sequence
694 // (a bit like you have to escape an escape character)
695 invalid = true;
696 }
697 else
698 {
1cd52418 699#ifdef WC_UTF16
ea8ce907
RR
700 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
701 size_t pa = encode_utf16(res, (wxUint16 *)buf);
702 if (pa == (size_t)-1)
703 {
704 invalid = true;
705 }
706 else
707 {
708 if (buf)
709 buf += pa;
710 len += pa;
711 }
373658eb 712#else // !WC_UTF16
ea8ce907
RR
713 if (buf)
714 *buf++ = res;
715 len++;
373658eb 716#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
717 }
718 }
719 if (invalid)
720 {
721 if (m_options & MAP_INVALID_UTF8_TO_PUA)
722 {
723 while (opsz < psz && (!buf || len < n))
724 {
725#ifdef WC_UTF16
726 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
727 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
728 wxASSERT(pa != (size_t)-1);
729 if (buf)
730 buf += pa;
731 opsz++;
732 len += pa;
733#else
734 if (buf)
735 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
736 opsz++;
737 len++;
738#endif
739 }
740 }
3698ae71 741 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
742 {
743 while (opsz < psz && (!buf || len < n))
744 {
3698ae71
VZ
745 if ( buf && len + 3 < n )
746 {
747 unsigned char n = *opsz;
748 *buf++ = L'\\';
b2c13097
WS
749 *buf++ = (wchar_t)( L'0' + n / 0100 );
750 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
751 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 752 }
ea8ce907
RR
753 opsz++;
754 len += 4;
755 }
756 }
3698ae71 757 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
758 {
759 return (size_t)-1;
760 }
4def3b35
VS
761 }
762 }
6001e347 763 }
dccce9ea 764 if (buf && (len < n))
4def3b35
VS
765 *buf = 0;
766 return len;
6001e347
RR
767}
768
3698ae71
VZ
769static inline bool isoctal(wchar_t wch)
770{
771 return L'0' <= wch && wch <= L'7';
772}
773
6001e347
RR
774size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
775{
4def3b35 776 size_t len = 0;
6001e347 777
dccce9ea 778 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
779 {
780 wxUint32 cc;
1cd52418 781#ifdef WC_UTF16
b5153fd8
VZ
782 // cast is ok for WC_UTF16
783 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 784 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 785#else
4def3b35
VS
786 cc=(*psz++) & 0x7fffffff;
787#endif
3698ae71
VZ
788
789 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
790 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 791 {
dccce9ea 792 if (buf)
ea8ce907 793 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 794 len++;
3698ae71 795 }
561488ef
MW
796 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
797 && cc == L'\\' && psz[0] == L'\\' )
798 {
799 if (buf)
800 *buf++ = (char)cc;
801 psz++;
802 len++;
803 }
3698ae71
VZ
804 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
805 cc == L'\\' &&
806 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 807 {
dccce9ea 808 if (buf)
3698ae71 809 {
b2c13097
WS
810 *buf++ = (char) ((psz[0] - L'0')*0100 +
811 (psz[1] - L'0')*010 +
812 (psz[2] - L'0'));
3698ae71
VZ
813 }
814
815 psz += 3;
ea8ce907
RR
816 len++;
817 }
818 else
819 {
820 unsigned cnt;
821 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
822 if (!cnt)
4def3b35 823 {
ea8ce907
RR
824 // plain ASCII char
825 if (buf)
826 *buf++ = (char) cc;
827 len++;
828 }
829
830 else
831 {
832 len += cnt + 1;
833 if (buf)
834 {
835 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
836 while (cnt--)
837 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
838 }
4def3b35
VS
839 }
840 }
6001e347 841 }
4def3b35 842
3698ae71
VZ
843 if (buf && (len<n))
844 *buf = 0;
adb45366 845
4def3b35 846 return len;
6001e347
RR
847}
848
c91830cb
VZ
849// ----------------------------------------------------------------------------
850// UTF-16
851// ----------------------------------------------------------------------------
852
853#ifdef WORDS_BIGENDIAN
bde4baac
VZ
854 #define wxMBConvUTF16straight wxMBConvUTF16BE
855 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 856#else
bde4baac
VZ
857 #define wxMBConvUTF16swap wxMBConvUTF16BE
858 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
859#endif
860
861
c91830cb
VZ
862#ifdef WC_UTF16
863
c91830cb
VZ
864// copy 16bit MB to 16bit String
865size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
866{
867 size_t len=0;
868
869 while (*(wxUint16*)psz && (!buf || len < n))
870 {
871 if (buf)
872 *buf++ = *(wxUint16*)psz;
873 len++;
874
875 psz += sizeof(wxUint16);
876 }
877 if (buf && len<n) *buf=0;
878
879 return len;
880}
881
882
883// copy 16bit String to 16bit MB
884size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
885{
886 size_t len=0;
887
888 while (*psz && (!buf || len < n))
889 {
890 if (buf)
891 {
892 *(wxUint16*)buf = *psz;
893 buf += sizeof(wxUint16);
894 }
895 len += sizeof(wxUint16);
896 psz++;
897 }
898 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
899
900 return len;
901}
902
903
904// swap 16bit MB to 16bit String
905size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
906{
907 size_t len=0;
908
909 while (*(wxUint16*)psz && (!buf || len < n))
910 {
911 if (buf)
912 {
913 ((char *)buf)[0] = psz[1];
914 ((char *)buf)[1] = psz[0];
915 buf++;
916 }
917 len++;
918 psz += sizeof(wxUint16);
919 }
920 if (buf && len<n) *buf=0;
921
922 return len;
923}
924
925
926// swap 16bit MB to 16bit String
927size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
928{
929 size_t len=0;
930
931 while (*psz && (!buf || len < n))
932 {
933 if (buf)
934 {
935 *buf++ = ((char*)psz)[1];
936 *buf++ = ((char*)psz)[0];
937 }
938 len += sizeof(wxUint16);
939 psz++;
940 }
941 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
942
943 return len;
944}
945
946
947#else // WC_UTF16
948
949
950// copy 16bit MB to 32bit String
951size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
952{
953 size_t len=0;
954
955 while (*(wxUint16*)psz && (!buf || len < n))
956 {
957 wxUint32 cc;
958 size_t pa=decode_utf16((wxUint16*)psz, cc);
959 if (pa == (size_t)-1)
960 return pa;
961
962 if (buf)
963 *buf++ = cc;
964 len++;
965 psz += pa * sizeof(wxUint16);
966 }
967 if (buf && len<n) *buf=0;
968
969 return len;
970}
971
972
973// copy 32bit String to 16bit MB
974size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
975{
976 size_t len=0;
977
978 while (*psz && (!buf || len < n))
979 {
980 wxUint16 cc[2];
981 size_t pa=encode_utf16(*psz, cc);
982
983 if (pa == (size_t)-1)
984 return pa;
985
986 if (buf)
987 {
69b80d28 988 *(wxUint16*)buf = cc[0];
b5153fd8 989 buf += sizeof(wxUint16);
c91830cb 990 if (pa > 1)
69b80d28
VZ
991 {
992 *(wxUint16*)buf = cc[1];
993 buf += sizeof(wxUint16);
994 }
c91830cb
VZ
995 }
996
997 len += pa*sizeof(wxUint16);
998 psz++;
999 }
1000 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1001
1002 return len;
1003}
1004
1005
1006// swap 16bit MB to 32bit String
1007size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1008{
1009 size_t len=0;
1010
1011 while (*(wxUint16*)psz && (!buf || len < n))
1012 {
1013 wxUint32 cc;
1014 char tmp[4];
1015 tmp[0]=psz[1]; tmp[1]=psz[0];
1016 tmp[2]=psz[3]; tmp[3]=psz[2];
1017
1018 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1019 if (pa == (size_t)-1)
1020 return pa;
1021
1022 if (buf)
1023 *buf++ = cc;
1024
1025 len++;
1026 psz += pa * sizeof(wxUint16);
1027 }
1028 if (buf && len<n) *buf=0;
1029
1030 return len;
1031}
1032
1033
1034// swap 32bit String to 16bit MB
1035size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1036{
1037 size_t len=0;
1038
1039 while (*psz && (!buf || len < n))
1040 {
1041 wxUint16 cc[2];
1042 size_t pa=encode_utf16(*psz, cc);
1043
1044 if (pa == (size_t)-1)
1045 return pa;
1046
1047 if (buf)
1048 {
1049 *buf++ = ((char*)cc)[1];
1050 *buf++ = ((char*)cc)[0];
1051 if (pa > 1)
1052 {
1053 *buf++ = ((char*)cc)[3];
1054 *buf++ = ((char*)cc)[2];
1055 }
1056 }
1057
1058 len += pa*sizeof(wxUint16);
1059 psz++;
1060 }
1061 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1062
1063 return len;
1064}
1065
1066#endif // WC_UTF16
1067
1068
1069// ----------------------------------------------------------------------------
1070// UTF-32
1071// ----------------------------------------------------------------------------
1072
1073#ifdef WORDS_BIGENDIAN
1074#define wxMBConvUTF32straight wxMBConvUTF32BE
1075#define wxMBConvUTF32swap wxMBConvUTF32LE
1076#else
1077#define wxMBConvUTF32swap wxMBConvUTF32BE
1078#define wxMBConvUTF32straight wxMBConvUTF32LE
1079#endif
1080
1081
1082WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1083WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1084
1085
1086#ifdef WC_UTF16
1087
1088// copy 32bit MB to 16bit String
1089size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1090{
1091 size_t len=0;
1092
1093 while (*(wxUint32*)psz && (!buf || len < n))
1094 {
1095 wxUint16 cc[2];
1096
1097 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1098 if (pa == (size_t)-1)
1099 return pa;
1100
1101 if (buf)
1102 {
1103 *buf++ = cc[0];
1104 if (pa > 1)
1105 *buf++ = cc[1];
1106 }
1107 len += pa;
1108 psz += sizeof(wxUint32);
1109 }
1110 if (buf && len<n) *buf=0;
1111
1112 return len;
1113}
1114
1115
1116// copy 16bit String to 32bit MB
1117size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1118{
1119 size_t len=0;
1120
1121 while (*psz && (!buf || len < n))
1122 {
1123 wxUint32 cc;
1124
b5153fd8
VZ
1125 // cast is ok for WC_UTF16
1126 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1127 if (pa == (size_t)-1)
1128 return pa;
1129
1130 if (buf)
1131 {
1132 *(wxUint32*)buf = cc;
1133 buf += sizeof(wxUint32);
1134 }
1135 len += sizeof(wxUint32);
1136 psz += pa;
1137 }
b5153fd8
VZ
1138
1139 if (buf && len<=n-sizeof(wxUint32))
1140 *(wxUint32*)buf=0;
c91830cb
VZ
1141
1142 return len;
1143}
1144
1145
1146
1147// swap 32bit MB to 16bit String
1148size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1149{
1150 size_t len=0;
1151
1152 while (*(wxUint32*)psz && (!buf || len < n))
1153 {
1154 char tmp[4];
1155 tmp[0] = psz[3]; tmp[1] = psz[2];
1156 tmp[2] = psz[1]; tmp[3] = psz[0];
1157
1158
1159 wxUint16 cc[2];
1160
1161 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1162 if (pa == (size_t)-1)
1163 return pa;
1164
1165 if (buf)
1166 {
1167 *buf++ = cc[0];
1168 if (pa > 1)
1169 *buf++ = cc[1];
1170 }
1171 len += pa;
1172 psz += sizeof(wxUint32);
1173 }
b5153fd8
VZ
1174
1175 if (buf && len<n)
1176 *buf=0;
c91830cb
VZ
1177
1178 return len;
1179}
1180
1181
1182// swap 16bit String to 32bit MB
1183size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1184{
1185 size_t len=0;
1186
1187 while (*psz && (!buf || len < n))
1188 {
1189 char cc[4];
1190
b5153fd8
VZ
1191 // cast is ok for WC_UTF16
1192 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1193 if (pa == (size_t)-1)
1194 return pa;
1195
1196 if (buf)
1197 {
1198 *buf++ = cc[3];
1199 *buf++ = cc[2];
1200 *buf++ = cc[1];
1201 *buf++ = cc[0];
1202 }
1203 len += sizeof(wxUint32);
1204 psz += pa;
1205 }
b5153fd8
VZ
1206
1207 if (buf && len<=n-sizeof(wxUint32))
1208 *(wxUint32*)buf=0;
c91830cb
VZ
1209
1210 return len;
1211}
1212
1213#else // WC_UTF16
1214
1215
1216// copy 32bit MB to 32bit String
1217size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1218{
1219 size_t len=0;
1220
1221 while (*(wxUint32*)psz && (!buf || len < n))
1222 {
1223 if (buf)
1224 *buf++ = *(wxUint32*)psz;
1225 len++;
1226 psz += sizeof(wxUint32);
1227 }
b5153fd8
VZ
1228
1229 if (buf && len<n)
1230 *buf=0;
c91830cb
VZ
1231
1232 return len;
1233}
1234
1235
1236// copy 32bit String to 32bit MB
1237size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1238{
1239 size_t len=0;
1240
1241 while (*psz && (!buf || len < n))
1242 {
1243 if (buf)
1244 {
1245 *(wxUint32*)buf = *psz;
1246 buf += sizeof(wxUint32);
1247 }
1248
1249 len += sizeof(wxUint32);
1250 psz++;
1251 }
1252
b5153fd8
VZ
1253 if (buf && len<=n-sizeof(wxUint32))
1254 *(wxUint32*)buf=0;
c91830cb
VZ
1255
1256 return len;
1257}
1258
1259
1260// swap 32bit MB to 32bit String
1261size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1262{
1263 size_t len=0;
1264
1265 while (*(wxUint32*)psz && (!buf || len < n))
1266 {
1267 if (buf)
1268 {
1269 ((char *)buf)[0] = psz[3];
1270 ((char *)buf)[1] = psz[2];
1271 ((char *)buf)[2] = psz[1];
1272 ((char *)buf)[3] = psz[0];
1273 buf++;
1274 }
1275 len++;
1276 psz += sizeof(wxUint32);
1277 }
b5153fd8
VZ
1278
1279 if (buf && len<n)
1280 *buf=0;
c91830cb
VZ
1281
1282 return len;
1283}
1284
1285
1286// swap 32bit String to 32bit MB
1287size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1288{
1289 size_t len=0;
1290
1291 while (*psz && (!buf || len < n))
1292 {
1293 if (buf)
1294 {
1295 *buf++ = ((char *)psz)[3];
1296 *buf++ = ((char *)psz)[2];
1297 *buf++ = ((char *)psz)[1];
1298 *buf++ = ((char *)psz)[0];
1299 }
1300 len += sizeof(wxUint32);
1301 psz++;
1302 }
b5153fd8
VZ
1303
1304 if (buf && len<=n-sizeof(wxUint32))
1305 *(wxUint32*)buf=0;
c91830cb
VZ
1306
1307 return len;
1308}
1309
1310
1311#endif // WC_UTF16
1312
1313
36acb880
VZ
1314// ============================================================================
1315// The classes doing conversion using the iconv_xxx() functions
1316// ============================================================================
3caec1bb 1317
b040e242 1318#ifdef HAVE_ICONV
3a0d76bc 1319
b1d547eb
VS
1320// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1321// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1322// (unless there's yet another bug in glibc) the only case when iconv()
1323// returns with (size_t)-1 (which means error) and says there are 0 bytes
1324// left in the input buffer -- when _real_ error occurs,
1325// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1326// iconv() failure.
3caec1bb
VS
1327// [This bug does not appear in glibc 2.2.]
1328#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1329#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1330 (errno != E2BIG || bufLeft != 0))
1331#else
1332#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1333#endif
1334
ab217dba 1335#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1336
1337// ----------------------------------------------------------------------------
e95354ec 1338// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1339// ----------------------------------------------------------------------------
1340
e95354ec 1341class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1342{
1343public:
e95354ec
VZ
1344 wxMBConv_iconv(const wxChar *name);
1345 virtual ~wxMBConv_iconv();
36acb880 1346
bde4baac
VZ
1347 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1348 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1349
e95354ec 1350 bool IsOk() const
36acb880
VZ
1351 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1352
1353protected:
1354 // the iconv handlers used to translate from multibyte to wide char and in
1355 // the other direction
1356 iconv_t m2w,
1357 w2m;
b1d547eb
VS
1358#if wxUSE_THREADS
1359 // guards access to m2w and w2m objects
1360 wxMutex m_iconvMutex;
1361#endif
36acb880
VZ
1362
1363private:
e95354ec 1364 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1365 // available on this machine, it will remain NULL
1366 static const char *ms_wcCharsetName;
1367
1368 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1369 // different endian-ness than the native one
405d8f46 1370 static bool ms_wcNeedsSwap;
36acb880
VZ
1371};
1372
e95354ec
VZ
1373const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1374bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1375
e95354ec 1376wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1377{
04c79127
RR
1378 // Do it the hard way
1379 char cname[100];
1380 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1381 cname[i] = (char) name[i];
1382
36acb880
VZ
1383 // check for charset that represents wchar_t:
1384 if (ms_wcCharsetName == NULL)
f1339c56 1385 {
e95354ec 1386 ms_wcNeedsSwap = false;
dccce9ea 1387
36acb880
VZ
1388 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1389 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1390 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1391
36acb880
VZ
1392 if (m2w == (iconv_t)-1)
1393 {
1394 // try charset w/o bytesex info (e.g. "UCS4")
1395 // and check for bytesex ourselves:
1396 ms_wcCharsetName = WC_NAME;
04c79127 1397 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1398
1399 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1400 if (m2w == (iconv_t)-1)
1401 {
36acb880 1402 ms_wcCharsetName = "WCHAR_T";
04c79127 1403 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1404 }
3a0d76bc 1405
36acb880
VZ
1406 if (m2w != (iconv_t)-1)
1407 {
1408 char buf[2], *bufPtr;
1409 wchar_t wbuf[2], *wbufPtr;
1410 size_t insz, outsz;
1411 size_t res;
1412
1413 buf[0] = 'A';
1414 buf[1] = 0;
1415 wbuf[0] = 0;
1416 insz = 2;
1417 outsz = SIZEOF_WCHAR_T * 2;
1418 wbufPtr = wbuf;
1419 bufPtr = buf;
1420
1421 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1422 (char**)&wbufPtr, &outsz);
1423
1424 if (ICONV_FAILED(res, insz))
3a0d76bc 1425 {
36acb880
VZ
1426 ms_wcCharsetName = NULL;
1427 wxLogLastError(wxT("iconv"));
2b5f62a0 1428 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1429 }
1430 else
1431 {
36acb880 1432 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1433 }
1434 }
36acb880
VZ
1435 else
1436 {
1437 ms_wcCharsetName = NULL;
373658eb 1438
77ffb593 1439 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1440 // fall back to using wxEncodingConverter.
1441 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1442 //wxLogError(
36acb880 1443 }
3a0d76bc 1444 }
36acb880 1445 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1446 }
36acb880 1447 else // we already have ms_wcCharsetName
3caec1bb 1448 {
04c79127 1449 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1450 }
dccce9ea 1451
36acb880
VZ
1452 // NB: don't ever pass NULL to iconv_open(), it may crash!
1453 if ( ms_wcCharsetName )
f1339c56 1454 {
04c79127 1455 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1456 }
405d8f46
VZ
1457 else
1458 {
1459 w2m = (iconv_t)-1;
1460 }
36acb880 1461}
3caec1bb 1462
e95354ec 1463wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1464{
1465 if ( m2w != (iconv_t)-1 )
1466 iconv_close(m2w);
1467 if ( w2m != (iconv_t)-1 )
1468 iconv_close(w2m);
1469}
3a0d76bc 1470
bde4baac 1471size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1472{
b1d547eb
VS
1473#if wxUSE_THREADS
1474 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1475 // Unfortunately there is a couple of global wxCSConv objects such as
1476 // wxConvLocal that are used all over wx code, so we have to make sure
1477 // the handle is used by at most one thread at the time. Otherwise
1478 // only a few wx classes would be safe to use from non-main threads
1479 // as MB<->WC conversion would fail "randomly".
1480 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1481#endif
3698ae71 1482
36acb880
VZ
1483 size_t inbuf = strlen(psz);
1484 size_t outbuf = n * SIZEOF_WCHAR_T;
1485 size_t res, cres;
1486 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1487 wchar_t *bufPtr = buf;
1488 const char *pszPtr = psz;
1489
1490 if (buf)
1491 {
1492 // have destination buffer, convert there
1493 cres = iconv(m2w,
1494 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1495 (char**)&bufPtr, &outbuf);
1496 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1497
36acb880 1498 if (ms_wcNeedsSwap)
3a0d76bc 1499 {
36acb880
VZ
1500 // convert to native endianness
1501 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1502 }
adb45366 1503
49dd9820
VS
1504 // NB: iconv was given only strlen(psz) characters on input, and so
1505 // it couldn't convert the trailing zero. Let's do it ourselves
1506 // if there's some room left for it in the output buffer.
1507 if (res < n)
1508 buf[res] = 0;
36acb880
VZ
1509 }
1510 else
1511 {
1512 // no destination buffer... convert using temp buffer
1513 // to calculate destination buffer requirement
1514 wchar_t tbuf[8];
1515 res = 0;
1516 do {
1517 bufPtr = tbuf;
1518 outbuf = 8*SIZEOF_WCHAR_T;
1519
1520 cres = iconv(m2w,
1521 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1522 (char**)&bufPtr, &outbuf );
1523
1524 res += 8-(outbuf/SIZEOF_WCHAR_T);
1525 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1526 }
dccce9ea 1527
36acb880 1528 if (ICONV_FAILED(cres, inbuf))
f1339c56 1529 {
36acb880
VZ
1530 //VS: it is ok if iconv fails, hence trace only
1531 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1532 return (size_t)-1;
1533 }
1534
1535 return res;
1536}
1537
bde4baac 1538size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1539{
b1d547eb
VS
1540#if wxUSE_THREADS
1541 // NB: explained in MB2WC
1542 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1543#endif
3698ae71 1544
f8d791e0 1545 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1546 size_t outbuf = n;
1547 size_t res, cres;
3a0d76bc 1548
36acb880 1549 wchar_t *tmpbuf = 0;
3caec1bb 1550
36acb880
VZ
1551 if (ms_wcNeedsSwap)
1552 {
1553 // need to copy to temp buffer to switch endianness
1554 // this absolutely doesn't rock!
1555 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1556 // could be in read-only memory, or be accessed in some other thread)
1557 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1558 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1559 WC_BSWAP(tmpbuf, inbuf)
1560 psz=tmpbuf;
1561 }
3a0d76bc 1562
36acb880
VZ
1563 if (buf)
1564 {
1565 // have destination buffer, convert there
1566 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1567
36acb880 1568 res = n-outbuf;
adb45366 1569
49dd9820
VS
1570 // NB: iconv was given only wcslen(psz) characters on input, and so
1571 // it couldn't convert the trailing zero. Let's do it ourselves
1572 // if there's some room left for it in the output buffer.
1573 if (res < n)
1574 buf[0] = 0;
36acb880
VZ
1575 }
1576 else
1577 {
1578 // no destination buffer... convert using temp buffer
1579 // to calculate destination buffer requirement
1580 char tbuf[16];
1581 res = 0;
1582 do {
1583 buf = tbuf; outbuf = 16;
1584
1585 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1586
36acb880
VZ
1587 res += 16 - outbuf;
1588 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1589 }
dccce9ea 1590
36acb880
VZ
1591 if (ms_wcNeedsSwap)
1592 {
1593 free(tmpbuf);
1594 }
dccce9ea 1595
36acb880
VZ
1596 if (ICONV_FAILED(cres, inbuf))
1597 {
1598 //VS: it is ok if iconv fails, hence trace only
1599 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1600 return (size_t)-1;
1601 }
1602
1603 return res;
1604}
1605
b040e242 1606#endif // HAVE_ICONV
36acb880 1607
e95354ec 1608
36acb880
VZ
1609// ============================================================================
1610// Win32 conversion classes
1611// ============================================================================
1cd52418 1612
e95354ec 1613#ifdef wxHAVE_WIN32_MB2WC
373658eb 1614
8b04d4c4 1615// from utils.cpp
d775fa82 1616#if wxUSE_FONTMAP
8b04d4c4
VZ
1617extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1618extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1619#endif
373658eb 1620
e95354ec 1621class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1622{
1623public:
bde4baac
VZ
1624 wxMBConv_win32()
1625 {
1626 m_CodePage = CP_ACP;
1627 }
1628
7608a683 1629#if wxUSE_FONTMAP
e95354ec 1630 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1631 {
1632 m_CodePage = wxCharsetToCodepage(name);
1633 }
dccce9ea 1634
e95354ec 1635 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1636 {
1637 m_CodePage = wxEncodingToCodepage(encoding);
1638 }
7608a683 1639#endif
8b04d4c4 1640
bde4baac 1641 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1642 {
02272c9c
VZ
1643 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1644 // the behaviour is not compatible with the Unix version (using iconv)
1645 // and break the library itself, e.g. wxTextInputStream::NextChar()
1646 // wouldn't work if reading an incomplete MB char didn't result in an
1647 // error
667e5b3e
VZ
1648 //
1649 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1650 // an error (tested under Windows Server 2003) and apparently it is
1651 // done on purpose, i.e. the function accepts any input in this case
1652 // and although I'd prefer to return error on ill-formed output, our
1653 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1654 // explicitly ill-formed according to RFC 2152) neither so we don't
1655 // even have any fallback here...
1656 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1657
2b5f62a0
VZ
1658 const size_t len = ::MultiByteToWideChar
1659 (
1660 m_CodePage, // code page
667e5b3e 1661 flags, // flags: fall on error
2b5f62a0
VZ
1662 psz, // input string
1663 -1, // its length (NUL-terminated)
b4da152e 1664 buf, // output string
2b5f62a0
VZ
1665 buf ? n : 0 // size of output buffer
1666 );
1667
03a991bc
VZ
1668 // note that it returns count of written chars for buf != NULL and size
1669 // of the needed buffer for buf == NULL so in either case the length of
1670 // the string (which never includes the terminating NUL) is one less
1671 return len ? len - 1 : (size_t)-1;
f1339c56 1672 }
dccce9ea 1673
13dd924a 1674 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1675 {
13dd924a
VZ
1676 /*
1677 we have a problem here: by default, WideCharToMultiByte() may
1678 replace characters unrepresentable in the target code page with bad
1679 quality approximations such as turning "1/2" symbol (U+00BD) into
1680 "1" for the code pages which don't have it and we, obviously, want
1681 to avoid this at any price
d775fa82 1682
13dd924a
VZ
1683 the trouble is that this function does it _silently_, i.e. it won't
1684 even tell us whether it did or not... Win98/2000 and higher provide
1685 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1686 we have to resort to a round trip, i.e. check that converting back
1687 results in the same string -- this is, of course, expensive but
1688 otherwise we simply can't be sure to not garble the data.
1689 */
1690
1691 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1692 // it doesn't work with CJK encodings (which we test for rather roughly
1693 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1694 // supporting it
907173e5
WS
1695 BOOL usedDef wxDUMMY_INITIALIZE(false);
1696 BOOL *pUsedDef;
13dd924a
VZ
1697 int flags;
1698 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1699 {
1700 // it's our lucky day
1701 flags = WC_NO_BEST_FIT_CHARS;
1702 pUsedDef = &usedDef;
1703 }
1704 else // old system or unsupported encoding
1705 {
1706 flags = 0;
1707 pUsedDef = NULL;
1708 }
1709
2b5f62a0
VZ
1710 const size_t len = ::WideCharToMultiByte
1711 (
1712 m_CodePage, // code page
13dd924a
VZ
1713 flags, // either none or no best fit
1714 pwz, // input string
2b5f62a0
VZ
1715 -1, // it is (wide) NUL-terminated
1716 buf, // output buffer
1717 buf ? n : 0, // and its size
1718 NULL, // default "replacement" char
13dd924a 1719 pUsedDef // [out] was it used?
2b5f62a0
VZ
1720 );
1721
13dd924a
VZ
1722 if ( !len )
1723 {
1724 // function totally failed
1725 return (size_t)-1;
1726 }
1727
1728 // if we were really converting, check if we succeeded
1729 if ( buf )
1730 {
1731 if ( flags )
1732 {
1733 // check if the conversion failed, i.e. if any replacements
1734 // were done
1735 if ( usedDef )
1736 return (size_t)-1;
1737 }
1738 else // we must resort to double tripping...
1739 {
1740 wxWCharBuffer wcBuf(n);
1741 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1742 wcscmp(wcBuf, pwz) != 0 )
1743 {
1744 // we didn't obtain the same thing we started from, hence
1745 // the conversion was lossy and we consider that it failed
1746 return (size_t)-1;
1747 }
1748 }
1749 }
1750
03a991bc 1751 // see the comment above for the reason of "len - 1"
13dd924a 1752 return len - 1;
f1339c56 1753 }
dccce9ea 1754
13dd924a
VZ
1755 bool IsOk() const { return m_CodePage != -1; }
1756
1757private:
1758 static bool CanUseNoBestFit()
1759 {
1760 static int s_isWin98Or2k = -1;
1761
1762 if ( s_isWin98Or2k == -1 )
1763 {
1764 int verMaj, verMin;
1765 switch ( wxGetOsVersion(&verMaj, &verMin) )
1766 {
1767 case wxWIN95:
1768 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1769 break;
1770
1771 case wxWINDOWS_NT:
1772 s_isWin98Or2k = verMaj >= 5;
1773 break;
1774
1775 default:
1776 // unknown, be conseravtive by default
1777 s_isWin98Or2k = 0;
1778 }
1779
1780 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1781 }
1782
1783 return s_isWin98Or2k == 1;
1784 }
f1339c56 1785
b1d66b54 1786 long m_CodePage;
1cd52418 1787};
e95354ec
VZ
1788
1789#endif // wxHAVE_WIN32_MB2WC
1790
f7e98dee
RN
1791// ============================================================================
1792// Cocoa conversion classes
1793// ============================================================================
1794
1795#if defined(__WXCOCOA__)
1796
ecd9653b 1797// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1798// Cocoa. Strangely enough, internally Core Foundation uses
1799// UTF 32 internally quite a bit - its just not public (yet).
1800
1801#include <CoreFoundation/CFString.h>
1802#include <CoreFoundation/CFStringEncodingExt.h>
1803
1804CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1805{
638357a0 1806 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1807 if ( encoding == wxFONTENCODING_DEFAULT )
1808 {
638357a0 1809 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1810 }
1811 else switch( encoding)
1812 {
1813 case wxFONTENCODING_ISO8859_1 :
1814 enc = kCFStringEncodingISOLatin1 ;
1815 break ;
1816 case wxFONTENCODING_ISO8859_2 :
1817 enc = kCFStringEncodingISOLatin2;
1818 break ;
1819 case wxFONTENCODING_ISO8859_3 :
1820 enc = kCFStringEncodingISOLatin3 ;
1821 break ;
1822 case wxFONTENCODING_ISO8859_4 :
1823 enc = kCFStringEncodingISOLatin4;
1824 break ;
1825 case wxFONTENCODING_ISO8859_5 :
1826 enc = kCFStringEncodingISOLatinCyrillic;
1827 break ;
1828 case wxFONTENCODING_ISO8859_6 :
1829 enc = kCFStringEncodingISOLatinArabic;
1830 break ;
1831 case wxFONTENCODING_ISO8859_7 :
1832 enc = kCFStringEncodingISOLatinGreek;
1833 break ;
1834 case wxFONTENCODING_ISO8859_8 :
1835 enc = kCFStringEncodingISOLatinHebrew;
1836 break ;
1837 case wxFONTENCODING_ISO8859_9 :
1838 enc = kCFStringEncodingISOLatin5;
1839 break ;
1840 case wxFONTENCODING_ISO8859_10 :
1841 enc = kCFStringEncodingISOLatin6;
1842 break ;
1843 case wxFONTENCODING_ISO8859_11 :
1844 enc = kCFStringEncodingISOLatinThai;
1845 break ;
1846 case wxFONTENCODING_ISO8859_13 :
1847 enc = kCFStringEncodingISOLatin7;
1848 break ;
1849 case wxFONTENCODING_ISO8859_14 :
1850 enc = kCFStringEncodingISOLatin8;
1851 break ;
1852 case wxFONTENCODING_ISO8859_15 :
1853 enc = kCFStringEncodingISOLatin9;
1854 break ;
1855
1856 case wxFONTENCODING_KOI8 :
1857 enc = kCFStringEncodingKOI8_R;
1858 break ;
1859 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1860 enc = kCFStringEncodingDOSRussian;
1861 break ;
1862
1863// case wxFONTENCODING_BULGARIAN :
1864// enc = ;
1865// break ;
1866
1867 case wxFONTENCODING_CP437 :
1868 enc =kCFStringEncodingDOSLatinUS ;
1869 break ;
1870 case wxFONTENCODING_CP850 :
1871 enc = kCFStringEncodingDOSLatin1;
1872 break ;
1873 case wxFONTENCODING_CP852 :
1874 enc = kCFStringEncodingDOSLatin2;
1875 break ;
1876 case wxFONTENCODING_CP855 :
1877 enc = kCFStringEncodingDOSCyrillic;
1878 break ;
1879 case wxFONTENCODING_CP866 :
1880 enc =kCFStringEncodingDOSRussian ;
1881 break ;
1882 case wxFONTENCODING_CP874 :
1883 enc = kCFStringEncodingDOSThai;
1884 break ;
1885 case wxFONTENCODING_CP932 :
1886 enc = kCFStringEncodingDOSJapanese;
1887 break ;
1888 case wxFONTENCODING_CP936 :
1889 enc =kCFStringEncodingDOSChineseSimplif ;
1890 break ;
1891 case wxFONTENCODING_CP949 :
1892 enc = kCFStringEncodingDOSKorean;
1893 break ;
1894 case wxFONTENCODING_CP950 :
1895 enc = kCFStringEncodingDOSChineseTrad;
1896 break ;
ecd9653b
WS
1897 case wxFONTENCODING_CP1250 :
1898 enc = kCFStringEncodingWindowsLatin2;
1899 break ;
1900 case wxFONTENCODING_CP1251 :
1901 enc =kCFStringEncodingWindowsCyrillic ;
1902 break ;
1903 case wxFONTENCODING_CP1252 :
1904 enc =kCFStringEncodingWindowsLatin1 ;
1905 break ;
1906 case wxFONTENCODING_CP1253 :
1907 enc = kCFStringEncodingWindowsGreek;
1908 break ;
1909 case wxFONTENCODING_CP1254 :
1910 enc = kCFStringEncodingWindowsLatin5;
1911 break ;
1912 case wxFONTENCODING_CP1255 :
1913 enc =kCFStringEncodingWindowsHebrew ;
1914 break ;
1915 case wxFONTENCODING_CP1256 :
1916 enc =kCFStringEncodingWindowsArabic ;
1917 break ;
1918 case wxFONTENCODING_CP1257 :
1919 enc = kCFStringEncodingWindowsBalticRim;
1920 break ;
638357a0
RN
1921// This only really encodes to UTF7 (if that) evidently
1922// case wxFONTENCODING_UTF7 :
1923// enc = kCFStringEncodingNonLossyASCII ;
1924// break ;
ecd9653b
WS
1925 case wxFONTENCODING_UTF8 :
1926 enc = kCFStringEncodingUTF8 ;
1927 break ;
1928 case wxFONTENCODING_EUC_JP :
1929 enc = kCFStringEncodingEUC_JP;
1930 break ;
1931 case wxFONTENCODING_UTF16 :
f7e98dee 1932 enc = kCFStringEncodingUnicode ;
ecd9653b 1933 break ;
f7e98dee
RN
1934 case wxFONTENCODING_MACROMAN :
1935 enc = kCFStringEncodingMacRoman ;
1936 break ;
1937 case wxFONTENCODING_MACJAPANESE :
1938 enc = kCFStringEncodingMacJapanese ;
1939 break ;
1940 case wxFONTENCODING_MACCHINESETRAD :
1941 enc = kCFStringEncodingMacChineseTrad ;
1942 break ;
1943 case wxFONTENCODING_MACKOREAN :
1944 enc = kCFStringEncodingMacKorean ;
1945 break ;
1946 case wxFONTENCODING_MACARABIC :
1947 enc = kCFStringEncodingMacArabic ;
1948 break ;
1949 case wxFONTENCODING_MACHEBREW :
1950 enc = kCFStringEncodingMacHebrew ;
1951 break ;
1952 case wxFONTENCODING_MACGREEK :
1953 enc = kCFStringEncodingMacGreek ;
1954 break ;
1955 case wxFONTENCODING_MACCYRILLIC :
1956 enc = kCFStringEncodingMacCyrillic ;
1957 break ;
1958 case wxFONTENCODING_MACDEVANAGARI :
1959 enc = kCFStringEncodingMacDevanagari ;
1960 break ;
1961 case wxFONTENCODING_MACGURMUKHI :
1962 enc = kCFStringEncodingMacGurmukhi ;
1963 break ;
1964 case wxFONTENCODING_MACGUJARATI :
1965 enc = kCFStringEncodingMacGujarati ;
1966 break ;
1967 case wxFONTENCODING_MACORIYA :
1968 enc = kCFStringEncodingMacOriya ;
1969 break ;
1970 case wxFONTENCODING_MACBENGALI :
1971 enc = kCFStringEncodingMacBengali ;
1972 break ;
1973 case wxFONTENCODING_MACTAMIL :
1974 enc = kCFStringEncodingMacTamil ;
1975 break ;
1976 case wxFONTENCODING_MACTELUGU :
1977 enc = kCFStringEncodingMacTelugu ;
1978 break ;
1979 case wxFONTENCODING_MACKANNADA :
1980 enc = kCFStringEncodingMacKannada ;
1981 break ;
1982 case wxFONTENCODING_MACMALAJALAM :
1983 enc = kCFStringEncodingMacMalayalam ;
1984 break ;
1985 case wxFONTENCODING_MACSINHALESE :
1986 enc = kCFStringEncodingMacSinhalese ;
1987 break ;
1988 case wxFONTENCODING_MACBURMESE :
1989 enc = kCFStringEncodingMacBurmese ;
1990 break ;
1991 case wxFONTENCODING_MACKHMER :
1992 enc = kCFStringEncodingMacKhmer ;
1993 break ;
1994 case wxFONTENCODING_MACTHAI :
1995 enc = kCFStringEncodingMacThai ;
1996 break ;
1997 case wxFONTENCODING_MACLAOTIAN :
1998 enc = kCFStringEncodingMacLaotian ;
1999 break ;
2000 case wxFONTENCODING_MACGEORGIAN :
2001 enc = kCFStringEncodingMacGeorgian ;
2002 break ;
2003 case wxFONTENCODING_MACARMENIAN :
2004 enc = kCFStringEncodingMacArmenian ;
2005 break ;
2006 case wxFONTENCODING_MACCHINESESIMP :
2007 enc = kCFStringEncodingMacChineseSimp ;
2008 break ;
2009 case wxFONTENCODING_MACTIBETAN :
2010 enc = kCFStringEncodingMacTibetan ;
2011 break ;
2012 case wxFONTENCODING_MACMONGOLIAN :
2013 enc = kCFStringEncodingMacMongolian ;
2014 break ;
2015 case wxFONTENCODING_MACETHIOPIC :
2016 enc = kCFStringEncodingMacEthiopic ;
2017 break ;
2018 case wxFONTENCODING_MACCENTRALEUR :
2019 enc = kCFStringEncodingMacCentralEurRoman ;
2020 break ;
2021 case wxFONTENCODING_MACVIATNAMESE :
2022 enc = kCFStringEncodingMacVietnamese ;
2023 break ;
2024 case wxFONTENCODING_MACARABICEXT :
2025 enc = kCFStringEncodingMacExtArabic ;
2026 break ;
2027 case wxFONTENCODING_MACSYMBOL :
2028 enc = kCFStringEncodingMacSymbol ;
2029 break ;
2030 case wxFONTENCODING_MACDINGBATS :
2031 enc = kCFStringEncodingMacDingbats ;
2032 break ;
2033 case wxFONTENCODING_MACTURKISH :
2034 enc = kCFStringEncodingMacTurkish ;
2035 break ;
2036 case wxFONTENCODING_MACCROATIAN :
2037 enc = kCFStringEncodingMacCroatian ;
2038 break ;
2039 case wxFONTENCODING_MACICELANDIC :
2040 enc = kCFStringEncodingMacIcelandic ;
2041 break ;
2042 case wxFONTENCODING_MACROMANIAN :
2043 enc = kCFStringEncodingMacRomanian ;
2044 break ;
2045 case wxFONTENCODING_MACCELTIC :
2046 enc = kCFStringEncodingMacCeltic ;
2047 break ;
2048 case wxFONTENCODING_MACGAELIC :
2049 enc = kCFStringEncodingMacGaelic ;
2050 break ;
ecd9653b
WS
2051// case wxFONTENCODING_MACKEYBOARD :
2052// enc = kCFStringEncodingMacKeyboardGlyphs ;
2053// break ;
2054 default :
2055 // because gcc is picky
2056 break ;
2057 } ;
2058 return enc ;
f7e98dee
RN
2059}
2060
f7e98dee
RN
2061class wxMBConv_cocoa : public wxMBConv
2062{
2063public:
2064 wxMBConv_cocoa()
2065 {
2066 Init(CFStringGetSystemEncoding()) ;
2067 }
2068
a6900d10 2069#if wxUSE_FONTMAP
f7e98dee
RN
2070 wxMBConv_cocoa(const wxChar* name)
2071 {
267e11c5 2072 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2073 }
a6900d10 2074#endif
f7e98dee
RN
2075
2076 wxMBConv_cocoa(wxFontEncoding encoding)
2077 {
2078 Init( wxCFStringEncFromFontEnc(encoding) );
2079 }
2080
2081 ~wxMBConv_cocoa()
2082 {
2083 }
2084
2085 void Init( CFStringEncoding encoding)
2086 {
638357a0 2087 m_encoding = encoding ;
f7e98dee
RN
2088 }
2089
2090 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2091 {
2092 wxASSERT(szUnConv);
ecd9653b 2093
638357a0
RN
2094 CFStringRef theString = CFStringCreateWithBytes (
2095 NULL, //the allocator
2096 (const UInt8*)szUnConv,
2097 strlen(szUnConv),
2098 m_encoding,
2099 false //no BOM/external representation
f7e98dee
RN
2100 );
2101
2102 wxASSERT(theString);
2103
638357a0
RN
2104 size_t nOutLength = CFStringGetLength(theString);
2105
2106 if (szOut == NULL)
f7e98dee 2107 {
f7e98dee 2108 CFRelease(theString);
638357a0 2109 return nOutLength;
f7e98dee 2110 }
ecd9653b 2111
638357a0 2112 CFRange theRange = { 0, nOutSize };
ecd9653b 2113
638357a0
RN
2114#if SIZEOF_WCHAR_T == 4
2115 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2116#endif
3698ae71 2117
f7e98dee 2118 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2119
f7e98dee 2120 CFRelease(theString);
ecd9653b 2121
638357a0 2122 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2123
2124#if SIZEOF_WCHAR_T == 4
2125 wxMBConvUTF16 converter ;
638357a0 2126 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2127 delete[] szUniCharBuffer;
2128#endif
3698ae71 2129
638357a0 2130 return nOutLength;
f7e98dee
RN
2131 }
2132
2133 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2134 {
638357a0 2135 wxASSERT(szUnConv);
3698ae71 2136
f7e98dee 2137 size_t nRealOutSize;
638357a0 2138 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2139 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2140
f7e98dee
RN
2141#if SIZEOF_WCHAR_T == 4
2142 wxMBConvUTF16BE converter ;
2143 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2144 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2145 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2146 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2147#endif
2148
2149 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2150 NULL, //allocator
2151 szUniBuffer,
2152 nBufSize,
638357a0 2153 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2154 );
ecd9653b 2155
f7e98dee 2156 wxASSERT(theString);
ecd9653b 2157
f7e98dee 2158 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2159 //so we check and use getchars instead in that case
2160 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2161 {
638357a0
RN
2162 if (szOut != NULL)
2163 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2164
638357a0
RN
2165 nRealOutSize = CFStringGetLength(theString) + 1;
2166 }
2167 else
2168 {
2169 CFStringGetBytes(
2170 theString,
2171 CFRangeMake(0, CFStringGetLength(theString)),
2172 m_encoding,
2173 0, //what to put in characters that can't be converted -
2174 //0 tells CFString to return NULL if it meets such a character
2175 false, //not an external representation
2176 (UInt8*) szOut,
3698ae71 2177 nOutSize,
638357a0
RN
2178 (CFIndex*) &nRealOutSize
2179 );
f7e98dee 2180 }
ecd9653b 2181
638357a0 2182 CFRelease(theString);
ecd9653b 2183
638357a0
RN
2184#if SIZEOF_WCHAR_T == 4
2185 delete[] szUniBuffer;
2186#endif
ecd9653b 2187
f7e98dee
RN
2188 return nRealOutSize - 1;
2189 }
2190
2191 bool IsOk() const
ecd9653b 2192 {
3698ae71 2193 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2194 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2195 }
2196
2197private:
638357a0 2198 CFStringEncoding m_encoding ;
f7e98dee
RN
2199};
2200
2201#endif // defined(__WXCOCOA__)
2202
335d31e0
SC
2203// ============================================================================
2204// Mac conversion classes
2205// ============================================================================
2206
2207#if defined(__WXMAC__) && defined(TARGET_CARBON)
2208
2209class wxMBConv_mac : public wxMBConv
2210{
2211public:
2212 wxMBConv_mac()
2213 {
2214 Init(CFStringGetSystemEncoding()) ;
2215 }
2216
2d1659cf 2217#if wxUSE_FONTMAP
335d31e0
SC
2218 wxMBConv_mac(const wxChar* name)
2219 {
267e11c5 2220 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2221 }
2d1659cf 2222#endif
335d31e0
SC
2223
2224 wxMBConv_mac(wxFontEncoding encoding)
2225 {
d775fa82
WS
2226 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2227 }
2228
2229 ~wxMBConv_mac()
2230 {
2231 OSStatus status = noErr ;
2232 status = TECDisposeConverter(m_MB2WC_converter);
2233 status = TECDisposeConverter(m_WC2MB_converter);
2234 }
2235
2236
2237 void Init( TextEncodingBase encoding)
2238 {
2239 OSStatus status = noErr ;
2240 m_char_encoding = encoding ;
2241 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2242
2243 status = TECCreateConverter(&m_MB2WC_converter,
2244 m_char_encoding,
2245 m_unicode_encoding);
2246 status = TECCreateConverter(&m_WC2MB_converter,
2247 m_unicode_encoding,
2248 m_char_encoding);
2249 }
2250
335d31e0
SC
2251 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2252 {
d775fa82
WS
2253 OSStatus status = noErr ;
2254 ByteCount byteOutLen ;
2255 ByteCount byteInLen = strlen(psz) ;
2256 wchar_t *tbuf = NULL ;
2257 UniChar* ubuf = NULL ;
2258 size_t res = 0 ;
2259
2260 if (buf == NULL)
2261 {
638357a0 2262 //apple specs say at least 32
c543817b 2263 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2264 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2265 }
2266 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2267#if SIZEOF_WCHAR_T == 4
d775fa82 2268 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2269#else
d775fa82 2270 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2271#endif
d775fa82
WS
2272 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2273 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2274#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2275 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2276 // is not properly terminated we get random characters at the end
2277 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2278 wxMBConvUTF16BE converter ;
2279 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2280 free( ubuf ) ;
f3a355ce 2281#else
d775fa82 2282 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2283#endif
d775fa82
WS
2284 if ( buf == NULL )
2285 free(tbuf) ;
335d31e0 2286
335d31e0
SC
2287 if ( buf && res < n)
2288 buf[res] = 0;
2289
d775fa82 2290 return res ;
335d31e0
SC
2291 }
2292
2293 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2294 {
2295 OSStatus status = noErr ;
2296 ByteCount byteOutLen ;
2297 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2298
2299 char *tbuf = NULL ;
2300
2301 if (buf == NULL)
2302 {
638357a0 2303 //apple specs say at least 32
c543817b 2304 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2305 tbuf = (char*) malloc( n ) ;
2306 }
2307
2308 ByteCount byteBufferLen = n ;
2309 UniChar* ubuf = NULL ;
f3a355ce 2310#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2311 wxMBConvUTF16BE converter ;
2312 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2313 byteInLen = unicharlen ;
2314 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2315 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2316#else
d775fa82 2317 ubuf = (UniChar*) psz ;
f3a355ce 2318#endif
d775fa82
WS
2319 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2320 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2321#if SIZEOF_WCHAR_T == 4
d775fa82 2322 free( ubuf ) ;
f3a355ce 2323#endif
d775fa82
WS
2324 if ( buf == NULL )
2325 free(tbuf) ;
335d31e0 2326
d775fa82 2327 size_t res = byteOutLen ;
335d31e0 2328 if ( buf && res < n)
638357a0 2329 {
335d31e0 2330 buf[res] = 0;
3698ae71 2331
638357a0
RN
2332 //we need to double-trip to verify it didn't insert any ? in place
2333 //of bogus characters
2334 wxWCharBuffer wcBuf(n);
2335 size_t pszlen = wxWcslen(psz);
2336 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2337 wxWcslen(wcBuf) != pszlen ||
2338 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2339 {
2340 // we didn't obtain the same thing we started from, hence
2341 // the conversion was lossy and we consider that it failed
2342 return (size_t)-1;
2343 }
2344 }
335d31e0 2345
d775fa82 2346 return res ;
335d31e0
SC
2347 }
2348
2349 bool IsOk() const
2350 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2351
2352private:
d775fa82
WS
2353 TECObjectRef m_MB2WC_converter ;
2354 TECObjectRef m_WC2MB_converter ;
2355
2356 TextEncodingBase m_char_encoding ;
2357 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2358};
2359
2360#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2361
36acb880
VZ
2362// ============================================================================
2363// wxEncodingConverter based conversion classes
2364// ============================================================================
2365
1e6feb95 2366#if wxUSE_FONTMAP
1cd52418 2367
e95354ec 2368class wxMBConv_wxwin : public wxMBConv
1cd52418 2369{
8b04d4c4
VZ
2370private:
2371 void Init()
2372 {
2373 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2374 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2375 }
2376
6001e347 2377public:
f1339c56
RR
2378 // temporarily just use wxEncodingConverter stuff,
2379 // so that it works while a better implementation is built
e95354ec 2380 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2381 {
2382 if (name)
267e11c5 2383 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2384 else
2385 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2386
8b04d4c4
VZ
2387 Init();
2388 }
2389
e95354ec 2390 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2391 {
2392 m_enc = enc;
2393
2394 Init();
f1339c56 2395 }
dccce9ea 2396
bde4baac 2397 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2398 {
2399 size_t inbuf = strlen(psz);
dccce9ea 2400 if (buf)
c643a977
VS
2401 {
2402 if (!m2w.Convert(psz,buf))
2403 return (size_t)-1;
2404 }
f1339c56
RR
2405 return inbuf;
2406 }
dccce9ea 2407
bde4baac 2408 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2409 {
f8d791e0 2410 const size_t inbuf = wxWcslen(psz);
f1339c56 2411 if (buf)
c643a977
VS
2412 {
2413 if (!w2m.Convert(psz,buf))
2414 return (size_t)-1;
2415 }
dccce9ea 2416
f1339c56
RR
2417 return inbuf;
2418 }
dccce9ea 2419
e95354ec 2420 bool IsOk() const { return m_ok; }
f1339c56
RR
2421
2422public:
8b04d4c4 2423 wxFontEncoding m_enc;
f1339c56 2424 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2425
2426 // were we initialized successfully?
2427 bool m_ok;
fc7a2a60 2428
e95354ec 2429 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2430};
6001e347 2431
1e6feb95
VZ
2432#endif // wxUSE_FONTMAP
2433
36acb880
VZ
2434// ============================================================================
2435// wxCSConv implementation
2436// ============================================================================
2437
8b04d4c4 2438void wxCSConv::Init()
6001e347 2439{
e95354ec
VZ
2440 m_name = NULL;
2441 m_convReal = NULL;
2442 m_deferred = true;
2443}
2444
8b04d4c4
VZ
2445wxCSConv::wxCSConv(const wxChar *charset)
2446{
2447 Init();
82713003 2448
e95354ec
VZ
2449 if ( charset )
2450 {
e95354ec
VZ
2451 SetName(charset);
2452 }
bda3d86a
VZ
2453
2454 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2455}
2456
8b04d4c4
VZ
2457wxCSConv::wxCSConv(wxFontEncoding encoding)
2458{
bda3d86a 2459 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2460 {
2461 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2462
2463 encoding = wxFONTENCODING_SYSTEM;
2464 }
2465
8b04d4c4
VZ
2466 Init();
2467
bda3d86a 2468 m_encoding = encoding;
8b04d4c4
VZ
2469}
2470
6001e347
RR
2471wxCSConv::~wxCSConv()
2472{
65e50848
JS
2473 Clear();
2474}
2475
54380f29 2476wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2477 : wxMBConv()
54380f29 2478{
8b04d4c4
VZ
2479 Init();
2480
54380f29 2481 SetName(conv.m_name);
8b04d4c4 2482 m_encoding = conv.m_encoding;
54380f29
GD
2483}
2484
2485wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2486{
2487 Clear();
8b04d4c4 2488
54380f29 2489 SetName(conv.m_name);
8b04d4c4
VZ
2490 m_encoding = conv.m_encoding;
2491
54380f29
GD
2492 return *this;
2493}
2494
65e50848
JS
2495void wxCSConv::Clear()
2496{
8b04d4c4 2497 free(m_name);
e95354ec 2498 delete m_convReal;
8b04d4c4 2499
65e50848 2500 m_name = NULL;
e95354ec 2501 m_convReal = NULL;
6001e347
RR
2502}
2503
2504void wxCSConv::SetName(const wxChar *charset)
2505{
f1339c56
RR
2506 if (charset)
2507 {
2508 m_name = wxStrdup(charset);
e95354ec 2509 m_deferred = true;
f1339c56 2510 }
6001e347
RR
2511}
2512
e95354ec
VZ
2513wxMBConv *wxCSConv::DoCreate() const
2514{
c547282d
VZ
2515 // check for the special case of ASCII or ISO8859-1 charset: as we have
2516 // special knowledge of it anyhow, we don't need to create a special
2517 // conversion object
2518 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2519 {
e95354ec
VZ
2520 // don't convert at all
2521 return NULL;
2522 }
dccce9ea 2523
e95354ec
VZ
2524 // we trust OS to do conversion better than we can so try external
2525 // conversion methods first
2526 //
2527 // the full order is:
2528 // 1. OS conversion (iconv() under Unix or Win32 API)
2529 // 2. hard coded conversions for UTF
2530 // 3. wxEncodingConverter as fall back
2531
2532 // step (1)
2533#ifdef HAVE_ICONV
c547282d 2534#if !wxUSE_FONTMAP
e95354ec 2535 if ( m_name )
c547282d 2536#endif // !wxUSE_FONTMAP
e95354ec 2537 {
c547282d
VZ
2538 wxString name(m_name);
2539
2540#if wxUSE_FONTMAP
2541 if ( name.empty() )
267e11c5 2542 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2543#endif // wxUSE_FONTMAP
2544
2545 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2546 if ( conv->IsOk() )
2547 return conv;
2548
2549 delete conv;
2550 }
2551#endif // HAVE_ICONV
2552
2553#ifdef wxHAVE_WIN32_MB2WC
2554 {
7608a683 2555#if wxUSE_FONTMAP
e95354ec
VZ
2556 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2557 : new wxMBConv_win32(m_encoding);
2558 if ( conv->IsOk() )
2559 return conv;
2560
2561 delete conv;
7608a683
WS
2562#else
2563 return NULL;
2564#endif
e95354ec
VZ
2565 }
2566#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2567#if defined(__WXMAC__)
2568 {
5c3c8676 2569 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2570 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2571 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2572 {
2573
2d1659cf 2574#if wxUSE_FONTMAP
d775fa82
WS
2575 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2576 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2577#else
2578 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2579#endif
d775fa82 2580 if ( conv->IsOk() )
f7e98dee
RN
2581 return conv;
2582
2583 delete conv;
2584 }
2585 }
2586#endif
2587#if defined(__WXCOCOA__)
2588 {
2589 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2590 {
2591
a6900d10 2592#if wxUSE_FONTMAP
f7e98dee
RN
2593 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2594 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2595#else
2596 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2597#endif
f7e98dee 2598 if ( conv->IsOk() )
d775fa82
WS
2599 return conv;
2600
2601 delete conv;
2602 }
335d31e0
SC
2603 }
2604#endif
e95354ec
VZ
2605 // step (2)
2606 wxFontEncoding enc = m_encoding;
2607#if wxUSE_FONTMAP
c547282d
VZ
2608 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2609 {
2610 // use "false" to suppress interactive dialogs -- we can be called from
2611 // anywhere and popping up a dialog from here is the last thing we want to
2612 // do
267e11c5 2613 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2614 }
e95354ec
VZ
2615#endif // wxUSE_FONTMAP
2616
2617 switch ( enc )
2618 {
2619 case wxFONTENCODING_UTF7:
2620 return new wxMBConvUTF7;
2621
2622 case wxFONTENCODING_UTF8:
2623 return new wxMBConvUTF8;
2624
e95354ec
VZ
2625 case wxFONTENCODING_UTF16BE:
2626 return new wxMBConvUTF16BE;
2627
2628 case wxFONTENCODING_UTF16LE:
2629 return new wxMBConvUTF16LE;
2630
e95354ec
VZ
2631 case wxFONTENCODING_UTF32BE:
2632 return new wxMBConvUTF32BE;
2633
2634 case wxFONTENCODING_UTF32LE:
2635 return new wxMBConvUTF32LE;
2636
2637 default:
2638 // nothing to do but put here to suppress gcc warnings
2639 ;
2640 }
2641
2642 // step (3)
2643#if wxUSE_FONTMAP
2644 {
2645 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2646 : new wxMBConv_wxwin(m_encoding);
2647 if ( conv->IsOk() )
2648 return conv;
2649
2650 delete conv;
2651 }
2652#endif // wxUSE_FONTMAP
2653
a58d4f4d
VS
2654 // NB: This is a hack to prevent deadlock. What could otherwise happen
2655 // in Unicode build: wxConvLocal creation ends up being here
2656 // because of some failure and logs the error. But wxLog will try to
2657 // attach timestamp, for which it will need wxConvLocal (to convert
2658 // time to char* and then wchar_t*), but that fails, tries to log
2659 // error, but wxLog has a (already locked) critical section that
2660 // guards static buffer.
2661 static bool alreadyLoggingError = false;
2662 if (!alreadyLoggingError)
2663 {
2664 alreadyLoggingError = true;
2665 wxLogError(_("Cannot convert from the charset '%s'!"),
2666 m_name ? m_name
e95354ec
VZ
2667 :
2668#if wxUSE_FONTMAP
267e11c5 2669 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2670#else // !wxUSE_FONTMAP
2671 wxString::Format(_("encoding %s"), m_encoding).c_str()
2672#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2673 );
a58d4f4d
VS
2674 alreadyLoggingError = false;
2675 }
e95354ec
VZ
2676
2677 return NULL;
2678}
2679
2680void wxCSConv::CreateConvIfNeeded() const
2681{
2682 if ( m_deferred )
2683 {
2684 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2685
2686#if wxUSE_INTL
2687 // if we don't have neither the name nor the encoding, use the default
2688 // encoding for this system
2689 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2690 {
4d312c22 2691 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2692 }
2693#endif // wxUSE_INTL
2694
e95354ec
VZ
2695 self->m_convReal = DoCreate();
2696 self->m_deferred = false;
6001e347 2697 }
6001e347
RR
2698}
2699
2700size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2701{
e95354ec 2702 CreateConvIfNeeded();
dccce9ea 2703
e95354ec
VZ
2704 if (m_convReal)
2705 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2706
2707 // latin-1 (direct)
4def3b35 2708 size_t len = strlen(psz);
dccce9ea 2709
f1339c56
RR
2710 if (buf)
2711 {
4def3b35 2712 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2713 buf[c] = (unsigned char)(psz[c]);
2714 }
dccce9ea 2715
f1339c56 2716 return len;
6001e347
RR
2717}
2718
2719size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2720{
e95354ec 2721 CreateConvIfNeeded();
dccce9ea 2722
e95354ec
VZ
2723 if (m_convReal)
2724 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2725
f1339c56 2726 // latin-1 (direct)
f8d791e0 2727 const size_t len = wxWcslen(psz);
f1339c56
RR
2728 if (buf)
2729 {
4def3b35 2730 for (size_t c = 0; c <= len; c++)
24642831
VS
2731 {
2732 if (psz[c] > 0xFF)
2733 return (size_t)-1;
907173e5 2734 buf[c] = (char)psz[c];
24642831
VS
2735 }
2736 }
2737 else
2738 {
2739 for (size_t c = 0; c <= len; c++)
2740 {
2741 if (psz[c] > 0xFF)
2742 return (size_t)-1;
2743 }
f1339c56 2744 }
dccce9ea 2745
f1339c56 2746 return len;
6001e347
RR
2747}
2748
bde4baac
VZ
2749// ----------------------------------------------------------------------------
2750// globals
2751// ----------------------------------------------------------------------------
2752
2753#ifdef __WINDOWS__
2754 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2755#elif defined(__WXMAC__) && !defined(__MACH__)
2756 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2757#else
dcc8fac0 2758 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2759#endif
2760
2761static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2762static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2763static wxMBConvUTF7 wxConvUTF7Obj;
2764static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2765
bde4baac
VZ
2766WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2767WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2768WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2769WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2770WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2771WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2772WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2773#ifdef __WXOSX__
ea8ce907 2774 wxConvUTF8Obj;
f5a1953b 2775#else
ea8ce907 2776 wxConvLibcObj;
f5a1953b
VZ
2777#endif
2778
bde4baac
VZ
2779
2780#else // !wxUSE_WCHAR_T
2781
2782// stand-ins in absence of wchar_t
2783WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2784 wxConvISO8859_1,
2785 wxConvLocal,
2786 wxConvUTF8;
2787
2788#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2789
2790