]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Cross-hair cursor appears as an I-beam on WinXP for some reason;
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea 154 }
cdb14ecb 155 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
3698ae71 281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
3698ae71 316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
3698ae71 343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
e1bfe89e 359
66bf0099 360#ifdef __UNIX__
c12b7f79 361
e1bfe89e 362// ----------------------------------------------------------------------------
66bf0099 363// wxConvBrokenFileNames
e1bfe89e
RR
364// ----------------------------------------------------------------------------
365
c12b7f79 366wxConvBrokenFileNames::wxConvBrokenFileNames()
ea8ce907 367{
c12b7f79
VZ
368 // decide which conversion to use for the file names
369
370 // (1) this variable exists for the sole purpose of specifying the encoding
371 // of the filenames for GTK+ programs, so use it if it is set
914955aa
MW
372 wxString encName(wxGetenv(_T("G_FILENAME_ENCODING")));
373 encName.MakeUpper();
374 if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
c12b7f79
VZ
375 {
376 m_conv = new wxCSConv(encName);
377 }
378 else // no G_FILENAME_ENCODING
379 {
914955aa
MW
380 if ( encName.empty() )
381 encName = wxLocale::GetSystemEncodingName().Upper();
382
c12b7f79
VZ
383 // (2) if a non default locale is set, assume that the user wants his
384 // filenames in this locale too
914955aa
MW
385 if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
386 {
387 wxSetEnv(_T("G_FILENAME_ENCODING"), encName);
388 m_conv = new wxMBConvLibc;
389 }
390 else
c12b7f79 391 {
c12b7f79 392 // (3) finally use UTF-8 by default
914955aa 393 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
c12b7f79
VZ
394 }
395 }
ea8ce907
RR
396}
397
c12b7f79
VZ
398size_t
399wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
400 const char *psz,
401 size_t outputSize) const
e1bfe89e 402{
c12b7f79 403 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
404}
405
c12b7f79
VZ
406size_t
407wxConvBrokenFileNames::WC2MB(char *outputBuf,
408 const wchar_t *psz,
409 size_t outputSize) const
e1bfe89e 410{
c12b7f79 411 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
412}
413
66bf0099 414#endif
c12b7f79 415
bde4baac 416// ----------------------------------------------------------------------------
3698ae71 417// UTF-7
bde4baac 418// ----------------------------------------------------------------------------
6001e347 419
15f2ee32 420// Implementation (C) 2004 Fredrik Roubert
6001e347 421
15f2ee32
RN
422//
423// BASE64 decoding table
424//
425static const unsigned char utf7unb64[] =
6001e347 426{
15f2ee32
RN
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
433 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
434 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
436 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
437 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
438 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
440 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
441 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
442 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
459};
460
461size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
462{
15f2ee32
RN
463 size_t len = 0;
464
465 while (*psz && ((!buf) || (len < n)))
466 {
467 unsigned char cc = *psz++;
468 if (cc != '+')
469 {
470 // plain ASCII char
471 if (buf)
472 *buf++ = cc;
473 len++;
474 }
475 else if (*psz == '-')
476 {
477 // encoded plus sign
478 if (buf)
479 *buf++ = cc;
480 len++;
481 psz++;
482 }
483 else
484 {
485 // BASE64 encoded string
486 bool lsb;
487 unsigned char c;
488 unsigned int d, l;
489 for (lsb = false, d = 0, l = 0;
490 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
491 {
492 d <<= 6;
493 d += cc;
494 for (l += 6; l >= 8; lsb = !lsb)
495 {
6356d52a 496 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
497 if (lsb)
498 {
499 if (buf)
500 *buf++ |= c;
501 len ++;
502 }
503 else
504 if (buf)
6356d52a 505 *buf = (wchar_t)(c << 8);
15f2ee32
RN
506 }
507 }
508 if (*psz == '-')
509 psz++;
510 }
511 }
512 if (buf && (len < n))
513 *buf = 0;
514 return len;
6001e347
RR
515}
516
15f2ee32
RN
517//
518// BASE64 encoding table
519//
520static const unsigned char utf7enb64[] =
521{
522 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
523 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
524 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
525 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
526 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
527 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
528 'w', 'x', 'y', 'z', '0', '1', '2', '3',
529 '4', '5', '6', '7', '8', '9', '+', '/'
530};
531
532//
533// UTF-7 encoding table
534//
535// 0 - Set D (directly encoded characters)
536// 1 - Set O (optional direct characters)
537// 2 - whitespace characters (optional)
538// 3 - special characters
539//
540static const unsigned char utf7encode[128] =
6001e347 541{
15f2ee32
RN
542 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
543 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
544 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
546 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
548 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
550};
551
667e5b3e 552size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
553{
554
555
556 size_t len = 0;
557
558 while (*psz && ((!buf) || (len < n)))
559 {
560 wchar_t cc = *psz++;
561 if (cc < 0x80 && utf7encode[cc] < 1)
562 {
563 // plain ASCII char
564 if (buf)
565 *buf++ = (char)cc;
566 len++;
567 }
568#ifndef WC_UTF16
79c78d42 569 else if (((wxUint32)cc) > 0xffff)
b2c13097 570 {
15f2ee32
RN
571 // no surrogate pair generation (yet?)
572 return (size_t)-1;
573 }
574#endif
575 else
576 {
577 if (buf)
578 *buf++ = '+';
579 len++;
580 if (cc != '+')
581 {
582 // BASE64 encode string
583 unsigned int lsb, d, l;
584 for (d = 0, l = 0;; psz++)
585 {
586 for (lsb = 0; lsb < 2; lsb ++)
587 {
588 d <<= 8;
589 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
590
591 for (l += 8; l >= 6; )
592 {
593 l -= 6;
594 if (buf)
595 *buf++ = utf7enb64[(d >> l) % 64];
596 len++;
597 }
598 }
599 cc = *psz;
600 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
601 break;
602 }
603 if (l != 0)
604 {
605 if (buf)
606 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
607 len++;
608 }
609 }
610 if (buf)
611 *buf++ = '-';
612 len++;
613 }
614 }
615 if (buf && (len < n))
616 *buf = 0;
617 return len;
6001e347
RR
618}
619
f6bcfd97 620// ----------------------------------------------------------------------------
6001e347 621// UTF-8
f6bcfd97 622// ----------------------------------------------------------------------------
6001e347 623
dccce9ea 624static wxUint32 utf8_max[]=
4def3b35 625 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 626
3698ae71
VZ
627// boundaries of the private use area we use to (temporarily) remap invalid
628// characters invalid in a UTF-8 encoded string
ea8ce907
RR
629const wxUint32 wxUnicodePUA = 0x100000;
630const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
631
6001e347
RR
632size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
633{
4def3b35
VS
634 size_t len = 0;
635
dccce9ea 636 while (*psz && ((!buf) || (len < n)))
4def3b35 637 {
ea8ce907
RR
638 const char *opsz = psz;
639 bool invalid = false;
4def3b35
VS
640 unsigned char cc = *psz++, fc = cc;
641 unsigned cnt;
dccce9ea 642 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 643 fc <<= 1;
dccce9ea 644 if (!cnt)
4def3b35
VS
645 {
646 // plain ASCII char
dccce9ea 647 if (buf)
4def3b35
VS
648 *buf++ = cc;
649 len++;
dccce9ea
VZ
650 }
651 else
4def3b35
VS
652 {
653 cnt--;
dccce9ea 654 if (!cnt)
4def3b35
VS
655 {
656 // invalid UTF-8 sequence
ea8ce907 657 invalid = true;
dccce9ea
VZ
658 }
659 else
4def3b35
VS
660 {
661 unsigned ocnt = cnt - 1;
662 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 663 while (cnt--)
4def3b35 664 {
ea8ce907 665 cc = *psz;
dccce9ea 666 if ((cc & 0xC0) != 0x80)
4def3b35
VS
667 {
668 // invalid UTF-8 sequence
ea8ce907
RR
669 invalid = true;
670 break;
4def3b35 671 }
ea8ce907 672 psz++;
4def3b35
VS
673 res = (res << 6) | (cc & 0x3f);
674 }
ea8ce907 675 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
676 {
677 // illegal UTF-8 encoding
ea8ce907 678 invalid = true;
4def3b35 679 }
ea8ce907
RR
680 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
681 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
682 {
683 // if one of our PUA characters turns up externally
684 // it must also be treated as an illegal sequence
685 // (a bit like you have to escape an escape character)
686 invalid = true;
687 }
688 else
689 {
1cd52418 690#ifdef WC_UTF16
ea8ce907
RR
691 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
692 size_t pa = encode_utf16(res, (wxUint16 *)buf);
693 if (pa == (size_t)-1)
694 {
695 invalid = true;
696 }
697 else
698 {
699 if (buf)
700 buf += pa;
701 len += pa;
702 }
373658eb 703#else // !WC_UTF16
ea8ce907
RR
704 if (buf)
705 *buf++ = res;
706 len++;
373658eb 707#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
708 }
709 }
710 if (invalid)
711 {
712 if (m_options & MAP_INVALID_UTF8_TO_PUA)
713 {
714 while (opsz < psz && (!buf || len < n))
715 {
716#ifdef WC_UTF16
717 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
718 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
719 wxASSERT(pa != (size_t)-1);
720 if (buf)
721 buf += pa;
722 opsz++;
723 len += pa;
724#else
725 if (buf)
726 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
727 opsz++;
728 len++;
729#endif
730 }
731 }
3698ae71 732 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
733 {
734 while (opsz < psz && (!buf || len < n))
735 {
3698ae71
VZ
736 if ( buf && len + 3 < n )
737 {
738 unsigned char n = *opsz;
739 *buf++ = L'\\';
b2c13097
WS
740 *buf++ = (wchar_t)( L'0' + n / 0100 );
741 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
742 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 743 }
ea8ce907
RR
744 opsz++;
745 len += 4;
746 }
747 }
3698ae71 748 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
749 {
750 return (size_t)-1;
751 }
4def3b35
VS
752 }
753 }
6001e347 754 }
dccce9ea 755 if (buf && (len < n))
4def3b35
VS
756 *buf = 0;
757 return len;
6001e347
RR
758}
759
3698ae71
VZ
760static inline bool isoctal(wchar_t wch)
761{
762 return L'0' <= wch && wch <= L'7';
763}
764
6001e347
RR
765size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
766{
4def3b35 767 size_t len = 0;
6001e347 768
dccce9ea 769 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
770 {
771 wxUint32 cc;
1cd52418 772#ifdef WC_UTF16
b5153fd8
VZ
773 // cast is ok for WC_UTF16
774 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 775 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 776#else
4def3b35
VS
777 cc=(*psz++) & 0x7fffffff;
778#endif
3698ae71
VZ
779
780 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
781 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 782 {
dccce9ea 783 if (buf)
ea8ce907 784 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 785 len++;
3698ae71
VZ
786 }
787 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
788 cc == L'\\' &&
789 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 790 {
dccce9ea 791 if (buf)
3698ae71 792 {
b2c13097
WS
793 *buf++ = (char) ((psz[0] - L'0')*0100 +
794 (psz[1] - L'0')*010 +
795 (psz[2] - L'0'));
3698ae71
VZ
796 }
797
798 psz += 3;
ea8ce907
RR
799 len++;
800 }
801 else
802 {
803 unsigned cnt;
804 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
805 if (!cnt)
4def3b35 806 {
ea8ce907
RR
807 // plain ASCII char
808 if (buf)
809 *buf++ = (char) cc;
810 len++;
811 }
812
813 else
814 {
815 len += cnt + 1;
816 if (buf)
817 {
818 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
819 while (cnt--)
820 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
821 }
4def3b35
VS
822 }
823 }
6001e347 824 }
4def3b35 825
3698ae71
VZ
826 if (buf && (len<n))
827 *buf = 0;
adb45366 828
4def3b35 829 return len;
6001e347
RR
830}
831
c91830cb
VZ
832// ----------------------------------------------------------------------------
833// UTF-16
834// ----------------------------------------------------------------------------
835
836#ifdef WORDS_BIGENDIAN
bde4baac
VZ
837 #define wxMBConvUTF16straight wxMBConvUTF16BE
838 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 839#else
bde4baac
VZ
840 #define wxMBConvUTF16swap wxMBConvUTF16BE
841 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
842#endif
843
844
c91830cb
VZ
845#ifdef WC_UTF16
846
c91830cb
VZ
847// copy 16bit MB to 16bit String
848size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
849{
850 size_t len=0;
851
852 while (*(wxUint16*)psz && (!buf || len < n))
853 {
854 if (buf)
855 *buf++ = *(wxUint16*)psz;
856 len++;
857
858 psz += sizeof(wxUint16);
859 }
860 if (buf && len<n) *buf=0;
861
862 return len;
863}
864
865
866// copy 16bit String to 16bit MB
867size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
868{
869 size_t len=0;
870
871 while (*psz && (!buf || len < n))
872 {
873 if (buf)
874 {
875 *(wxUint16*)buf = *psz;
876 buf += sizeof(wxUint16);
877 }
878 len += sizeof(wxUint16);
879 psz++;
880 }
881 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
882
883 return len;
884}
885
886
887// swap 16bit MB to 16bit String
888size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
889{
890 size_t len=0;
891
892 while (*(wxUint16*)psz && (!buf || len < n))
893 {
894 if (buf)
895 {
896 ((char *)buf)[0] = psz[1];
897 ((char *)buf)[1] = psz[0];
898 buf++;
899 }
900 len++;
901 psz += sizeof(wxUint16);
902 }
903 if (buf && len<n) *buf=0;
904
905 return len;
906}
907
908
909// swap 16bit MB to 16bit String
910size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
911{
912 size_t len=0;
913
914 while (*psz && (!buf || len < n))
915 {
916 if (buf)
917 {
918 *buf++ = ((char*)psz)[1];
919 *buf++ = ((char*)psz)[0];
920 }
921 len += sizeof(wxUint16);
922 psz++;
923 }
924 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
925
926 return len;
927}
928
929
930#else // WC_UTF16
931
932
933// copy 16bit MB to 32bit String
934size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
935{
936 size_t len=0;
937
938 while (*(wxUint16*)psz && (!buf || len < n))
939 {
940 wxUint32 cc;
941 size_t pa=decode_utf16((wxUint16*)psz, cc);
942 if (pa == (size_t)-1)
943 return pa;
944
945 if (buf)
946 *buf++ = cc;
947 len++;
948 psz += pa * sizeof(wxUint16);
949 }
950 if (buf && len<n) *buf=0;
951
952 return len;
953}
954
955
956// copy 32bit String to 16bit MB
957size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
958{
959 size_t len=0;
960
961 while (*psz && (!buf || len < n))
962 {
963 wxUint16 cc[2];
964 size_t pa=encode_utf16(*psz, cc);
965
966 if (pa == (size_t)-1)
967 return pa;
968
969 if (buf)
970 {
69b80d28 971 *(wxUint16*)buf = cc[0];
b5153fd8 972 buf += sizeof(wxUint16);
c91830cb 973 if (pa > 1)
69b80d28
VZ
974 {
975 *(wxUint16*)buf = cc[1];
976 buf += sizeof(wxUint16);
977 }
c91830cb
VZ
978 }
979
980 len += pa*sizeof(wxUint16);
981 psz++;
982 }
983 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
984
985 return len;
986}
987
988
989// swap 16bit MB to 32bit String
990size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
991{
992 size_t len=0;
993
994 while (*(wxUint16*)psz && (!buf || len < n))
995 {
996 wxUint32 cc;
997 char tmp[4];
998 tmp[0]=psz[1]; tmp[1]=psz[0];
999 tmp[2]=psz[3]; tmp[3]=psz[2];
1000
1001 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1002 if (pa == (size_t)-1)
1003 return pa;
1004
1005 if (buf)
1006 *buf++ = cc;
1007
1008 len++;
1009 psz += pa * sizeof(wxUint16);
1010 }
1011 if (buf && len<n) *buf=0;
1012
1013 return len;
1014}
1015
1016
1017// swap 32bit String to 16bit MB
1018size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1019{
1020 size_t len=0;
1021
1022 while (*psz && (!buf || len < n))
1023 {
1024 wxUint16 cc[2];
1025 size_t pa=encode_utf16(*psz, cc);
1026
1027 if (pa == (size_t)-1)
1028 return pa;
1029
1030 if (buf)
1031 {
1032 *buf++ = ((char*)cc)[1];
1033 *buf++ = ((char*)cc)[0];
1034 if (pa > 1)
1035 {
1036 *buf++ = ((char*)cc)[3];
1037 *buf++ = ((char*)cc)[2];
1038 }
1039 }
1040
1041 len += pa*sizeof(wxUint16);
1042 psz++;
1043 }
1044 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1045
1046 return len;
1047}
1048
1049#endif // WC_UTF16
1050
1051
1052// ----------------------------------------------------------------------------
1053// UTF-32
1054// ----------------------------------------------------------------------------
1055
1056#ifdef WORDS_BIGENDIAN
1057#define wxMBConvUTF32straight wxMBConvUTF32BE
1058#define wxMBConvUTF32swap wxMBConvUTF32LE
1059#else
1060#define wxMBConvUTF32swap wxMBConvUTF32BE
1061#define wxMBConvUTF32straight wxMBConvUTF32LE
1062#endif
1063
1064
1065WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1066WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1067
1068
1069#ifdef WC_UTF16
1070
1071// copy 32bit MB to 16bit String
1072size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1073{
1074 size_t len=0;
1075
1076 while (*(wxUint32*)psz && (!buf || len < n))
1077 {
1078 wxUint16 cc[2];
1079
1080 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1081 if (pa == (size_t)-1)
1082 return pa;
1083
1084 if (buf)
1085 {
1086 *buf++ = cc[0];
1087 if (pa > 1)
1088 *buf++ = cc[1];
1089 }
1090 len += pa;
1091 psz += sizeof(wxUint32);
1092 }
1093 if (buf && len<n) *buf=0;
1094
1095 return len;
1096}
1097
1098
1099// copy 16bit String to 32bit MB
1100size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1101{
1102 size_t len=0;
1103
1104 while (*psz && (!buf || len < n))
1105 {
1106 wxUint32 cc;
1107
b5153fd8
VZ
1108 // cast is ok for WC_UTF16
1109 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1110 if (pa == (size_t)-1)
1111 return pa;
1112
1113 if (buf)
1114 {
1115 *(wxUint32*)buf = cc;
1116 buf += sizeof(wxUint32);
1117 }
1118 len += sizeof(wxUint32);
1119 psz += pa;
1120 }
b5153fd8
VZ
1121
1122 if (buf && len<=n-sizeof(wxUint32))
1123 *(wxUint32*)buf=0;
c91830cb
VZ
1124
1125 return len;
1126}
1127
1128
1129
1130// swap 32bit MB to 16bit String
1131size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1132{
1133 size_t len=0;
1134
1135 while (*(wxUint32*)psz && (!buf || len < n))
1136 {
1137 char tmp[4];
1138 tmp[0] = psz[3]; tmp[1] = psz[2];
1139 tmp[2] = psz[1]; tmp[3] = psz[0];
1140
1141
1142 wxUint16 cc[2];
1143
1144 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1145 if (pa == (size_t)-1)
1146 return pa;
1147
1148 if (buf)
1149 {
1150 *buf++ = cc[0];
1151 if (pa > 1)
1152 *buf++ = cc[1];
1153 }
1154 len += pa;
1155 psz += sizeof(wxUint32);
1156 }
b5153fd8
VZ
1157
1158 if (buf && len<n)
1159 *buf=0;
c91830cb
VZ
1160
1161 return len;
1162}
1163
1164
1165// swap 16bit String to 32bit MB
1166size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1167{
1168 size_t len=0;
1169
1170 while (*psz && (!buf || len < n))
1171 {
1172 char cc[4];
1173
b5153fd8
VZ
1174 // cast is ok for WC_UTF16
1175 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1176 if (pa == (size_t)-1)
1177 return pa;
1178
1179 if (buf)
1180 {
1181 *buf++ = cc[3];
1182 *buf++ = cc[2];
1183 *buf++ = cc[1];
1184 *buf++ = cc[0];
1185 }
1186 len += sizeof(wxUint32);
1187 psz += pa;
1188 }
b5153fd8
VZ
1189
1190 if (buf && len<=n-sizeof(wxUint32))
1191 *(wxUint32*)buf=0;
c91830cb
VZ
1192
1193 return len;
1194}
1195
1196#else // WC_UTF16
1197
1198
1199// copy 32bit MB to 32bit String
1200size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1201{
1202 size_t len=0;
1203
1204 while (*(wxUint32*)psz && (!buf || len < n))
1205 {
1206 if (buf)
1207 *buf++ = *(wxUint32*)psz;
1208 len++;
1209 psz += sizeof(wxUint32);
1210 }
b5153fd8
VZ
1211
1212 if (buf && len<n)
1213 *buf=0;
c91830cb
VZ
1214
1215 return len;
1216}
1217
1218
1219// copy 32bit String to 32bit MB
1220size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1221{
1222 size_t len=0;
1223
1224 while (*psz && (!buf || len < n))
1225 {
1226 if (buf)
1227 {
1228 *(wxUint32*)buf = *psz;
1229 buf += sizeof(wxUint32);
1230 }
1231
1232 len += sizeof(wxUint32);
1233 psz++;
1234 }
1235
b5153fd8
VZ
1236 if (buf && len<=n-sizeof(wxUint32))
1237 *(wxUint32*)buf=0;
c91830cb
VZ
1238
1239 return len;
1240}
1241
1242
1243// swap 32bit MB to 32bit String
1244size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1245{
1246 size_t len=0;
1247
1248 while (*(wxUint32*)psz && (!buf || len < n))
1249 {
1250 if (buf)
1251 {
1252 ((char *)buf)[0] = psz[3];
1253 ((char *)buf)[1] = psz[2];
1254 ((char *)buf)[2] = psz[1];
1255 ((char *)buf)[3] = psz[0];
1256 buf++;
1257 }
1258 len++;
1259 psz += sizeof(wxUint32);
1260 }
b5153fd8
VZ
1261
1262 if (buf && len<n)
1263 *buf=0;
c91830cb
VZ
1264
1265 return len;
1266}
1267
1268
1269// swap 32bit String to 32bit MB
1270size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1271{
1272 size_t len=0;
1273
1274 while (*psz && (!buf || len < n))
1275 {
1276 if (buf)
1277 {
1278 *buf++ = ((char *)psz)[3];
1279 *buf++ = ((char *)psz)[2];
1280 *buf++ = ((char *)psz)[1];
1281 *buf++ = ((char *)psz)[0];
1282 }
1283 len += sizeof(wxUint32);
1284 psz++;
1285 }
b5153fd8
VZ
1286
1287 if (buf && len<=n-sizeof(wxUint32))
1288 *(wxUint32*)buf=0;
c91830cb
VZ
1289
1290 return len;
1291}
1292
1293
1294#endif // WC_UTF16
1295
1296
36acb880
VZ
1297// ============================================================================
1298// The classes doing conversion using the iconv_xxx() functions
1299// ============================================================================
3caec1bb 1300
b040e242 1301#ifdef HAVE_ICONV
3a0d76bc 1302
b1d547eb
VS
1303// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1304// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1305// (unless there's yet another bug in glibc) the only case when iconv()
1306// returns with (size_t)-1 (which means error) and says there are 0 bytes
1307// left in the input buffer -- when _real_ error occurs,
1308// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1309// iconv() failure.
3caec1bb
VS
1310// [This bug does not appear in glibc 2.2.]
1311#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1312#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1313 (errno != E2BIG || bufLeft != 0))
1314#else
1315#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1316#endif
1317
ab217dba 1318#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1319
1320// ----------------------------------------------------------------------------
e95354ec 1321// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1322// ----------------------------------------------------------------------------
1323
e95354ec 1324class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1325{
1326public:
e95354ec
VZ
1327 wxMBConv_iconv(const wxChar *name);
1328 virtual ~wxMBConv_iconv();
36acb880 1329
bde4baac
VZ
1330 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1331 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1332
e95354ec 1333 bool IsOk() const
36acb880
VZ
1334 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1335
1336protected:
1337 // the iconv handlers used to translate from multibyte to wide char and in
1338 // the other direction
1339 iconv_t m2w,
1340 w2m;
b1d547eb
VS
1341#if wxUSE_THREADS
1342 // guards access to m2w and w2m objects
1343 wxMutex m_iconvMutex;
1344#endif
36acb880
VZ
1345
1346private:
e95354ec 1347 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1348 // available on this machine, it will remain NULL
1349 static const char *ms_wcCharsetName;
1350
1351 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1352 // different endian-ness than the native one
405d8f46 1353 static bool ms_wcNeedsSwap;
36acb880
VZ
1354};
1355
e95354ec
VZ
1356const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1357bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1358
e95354ec 1359wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1360{
04c79127
RR
1361 // Do it the hard way
1362 char cname[100];
1363 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1364 cname[i] = (char) name[i];
1365
36acb880
VZ
1366 // check for charset that represents wchar_t:
1367 if (ms_wcCharsetName == NULL)
f1339c56 1368 {
e95354ec 1369 ms_wcNeedsSwap = false;
dccce9ea 1370
36acb880
VZ
1371 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1372 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1373 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1374
36acb880
VZ
1375 if (m2w == (iconv_t)-1)
1376 {
1377 // try charset w/o bytesex info (e.g. "UCS4")
1378 // and check for bytesex ourselves:
1379 ms_wcCharsetName = WC_NAME;
04c79127 1380 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1381
1382 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1383 if (m2w == (iconv_t)-1)
1384 {
36acb880 1385 ms_wcCharsetName = "WCHAR_T";
04c79127 1386 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1387 }
3a0d76bc 1388
36acb880
VZ
1389 if (m2w != (iconv_t)-1)
1390 {
1391 char buf[2], *bufPtr;
1392 wchar_t wbuf[2], *wbufPtr;
1393 size_t insz, outsz;
1394 size_t res;
1395
1396 buf[0] = 'A';
1397 buf[1] = 0;
1398 wbuf[0] = 0;
1399 insz = 2;
1400 outsz = SIZEOF_WCHAR_T * 2;
1401 wbufPtr = wbuf;
1402 bufPtr = buf;
1403
1404 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1405 (char**)&wbufPtr, &outsz);
1406
1407 if (ICONV_FAILED(res, insz))
3a0d76bc 1408 {
36acb880
VZ
1409 ms_wcCharsetName = NULL;
1410 wxLogLastError(wxT("iconv"));
2b5f62a0 1411 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1412 }
1413 else
1414 {
36acb880 1415 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1416 }
1417 }
36acb880
VZ
1418 else
1419 {
1420 ms_wcCharsetName = NULL;
373658eb 1421
77ffb593 1422 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1423 // fall back to using wxEncodingConverter.
1424 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1425 //wxLogError(
36acb880 1426 }
3a0d76bc 1427 }
36acb880 1428 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1429 }
36acb880 1430 else // we already have ms_wcCharsetName
3caec1bb 1431 {
04c79127 1432 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1433 }
dccce9ea 1434
36acb880
VZ
1435 // NB: don't ever pass NULL to iconv_open(), it may crash!
1436 if ( ms_wcCharsetName )
f1339c56 1437 {
04c79127 1438 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1439 }
405d8f46
VZ
1440 else
1441 {
1442 w2m = (iconv_t)-1;
1443 }
36acb880 1444}
3caec1bb 1445
e95354ec 1446wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1447{
1448 if ( m2w != (iconv_t)-1 )
1449 iconv_close(m2w);
1450 if ( w2m != (iconv_t)-1 )
1451 iconv_close(w2m);
1452}
3a0d76bc 1453
bde4baac 1454size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1455{
b1d547eb
VS
1456#if wxUSE_THREADS
1457 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1458 // Unfortunately there is a couple of global wxCSConv objects such as
1459 // wxConvLocal that are used all over wx code, so we have to make sure
1460 // the handle is used by at most one thread at the time. Otherwise
1461 // only a few wx classes would be safe to use from non-main threads
1462 // as MB<->WC conversion would fail "randomly".
1463 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1464#endif
3698ae71 1465
36acb880
VZ
1466 size_t inbuf = strlen(psz);
1467 size_t outbuf = n * SIZEOF_WCHAR_T;
1468 size_t res, cres;
1469 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1470 wchar_t *bufPtr = buf;
1471 const char *pszPtr = psz;
1472
1473 if (buf)
1474 {
1475 // have destination buffer, convert there
1476 cres = iconv(m2w,
1477 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1478 (char**)&bufPtr, &outbuf);
1479 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1480
36acb880 1481 if (ms_wcNeedsSwap)
3a0d76bc 1482 {
36acb880
VZ
1483 // convert to native endianness
1484 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1485 }
adb45366 1486
49dd9820
VS
1487 // NB: iconv was given only strlen(psz) characters on input, and so
1488 // it couldn't convert the trailing zero. Let's do it ourselves
1489 // if there's some room left for it in the output buffer.
1490 if (res < n)
1491 buf[res] = 0;
36acb880
VZ
1492 }
1493 else
1494 {
1495 // no destination buffer... convert using temp buffer
1496 // to calculate destination buffer requirement
1497 wchar_t tbuf[8];
1498 res = 0;
1499 do {
1500 bufPtr = tbuf;
1501 outbuf = 8*SIZEOF_WCHAR_T;
1502
1503 cres = iconv(m2w,
1504 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1505 (char**)&bufPtr, &outbuf );
1506
1507 res += 8-(outbuf/SIZEOF_WCHAR_T);
1508 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1509 }
dccce9ea 1510
36acb880 1511 if (ICONV_FAILED(cres, inbuf))
f1339c56 1512 {
36acb880
VZ
1513 //VS: it is ok if iconv fails, hence trace only
1514 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1515 return (size_t)-1;
1516 }
1517
1518 return res;
1519}
1520
bde4baac 1521size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1522{
b1d547eb
VS
1523#if wxUSE_THREADS
1524 // NB: explained in MB2WC
1525 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1526#endif
3698ae71 1527
f8d791e0 1528 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1529 size_t outbuf = n;
1530 size_t res, cres;
3a0d76bc 1531
36acb880 1532 wchar_t *tmpbuf = 0;
3caec1bb 1533
36acb880
VZ
1534 if (ms_wcNeedsSwap)
1535 {
1536 // need to copy to temp buffer to switch endianness
1537 // this absolutely doesn't rock!
1538 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1539 // could be in read-only memory, or be accessed in some other thread)
1540 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1541 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1542 WC_BSWAP(tmpbuf, inbuf)
1543 psz=tmpbuf;
1544 }
3a0d76bc 1545
36acb880
VZ
1546 if (buf)
1547 {
1548 // have destination buffer, convert there
1549 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1550
36acb880 1551 res = n-outbuf;
adb45366 1552
49dd9820
VS
1553 // NB: iconv was given only wcslen(psz) characters on input, and so
1554 // it couldn't convert the trailing zero. Let's do it ourselves
1555 // if there's some room left for it in the output buffer.
1556 if (res < n)
1557 buf[0] = 0;
36acb880
VZ
1558 }
1559 else
1560 {
1561 // no destination buffer... convert using temp buffer
1562 // to calculate destination buffer requirement
1563 char tbuf[16];
1564 res = 0;
1565 do {
1566 buf = tbuf; outbuf = 16;
1567
1568 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1569
36acb880
VZ
1570 res += 16 - outbuf;
1571 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1572 }
dccce9ea 1573
36acb880
VZ
1574 if (ms_wcNeedsSwap)
1575 {
1576 free(tmpbuf);
1577 }
dccce9ea 1578
36acb880
VZ
1579 if (ICONV_FAILED(cres, inbuf))
1580 {
1581 //VS: it is ok if iconv fails, hence trace only
1582 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1583 return (size_t)-1;
1584 }
1585
1586 return res;
1587}
1588
b040e242 1589#endif // HAVE_ICONV
36acb880 1590
e95354ec 1591
36acb880
VZ
1592// ============================================================================
1593// Win32 conversion classes
1594// ============================================================================
1cd52418 1595
e95354ec 1596#ifdef wxHAVE_WIN32_MB2WC
373658eb 1597
8b04d4c4 1598// from utils.cpp
d775fa82 1599#if wxUSE_FONTMAP
8b04d4c4
VZ
1600extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1601extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1602#endif
373658eb 1603
e95354ec 1604class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1605{
1606public:
bde4baac
VZ
1607 wxMBConv_win32()
1608 {
1609 m_CodePage = CP_ACP;
1610 }
1611
7608a683 1612#if wxUSE_FONTMAP
e95354ec 1613 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1614 {
1615 m_CodePage = wxCharsetToCodepage(name);
1616 }
dccce9ea 1617
e95354ec 1618 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1619 {
1620 m_CodePage = wxEncodingToCodepage(encoding);
1621 }
7608a683 1622#endif
8b04d4c4 1623
bde4baac 1624 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1625 {
02272c9c
VZ
1626 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1627 // the behaviour is not compatible with the Unix version (using iconv)
1628 // and break the library itself, e.g. wxTextInputStream::NextChar()
1629 // wouldn't work if reading an incomplete MB char didn't result in an
1630 // error
667e5b3e
VZ
1631 //
1632 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1633 // an error (tested under Windows Server 2003) and apparently it is
1634 // done on purpose, i.e. the function accepts any input in this case
1635 // and although I'd prefer to return error on ill-formed output, our
1636 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1637 // explicitly ill-formed according to RFC 2152) neither so we don't
1638 // even have any fallback here...
1639 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1640
2b5f62a0
VZ
1641 const size_t len = ::MultiByteToWideChar
1642 (
1643 m_CodePage, // code page
667e5b3e 1644 flags, // flags: fall on error
2b5f62a0
VZ
1645 psz, // input string
1646 -1, // its length (NUL-terminated)
b4da152e 1647 buf, // output string
2b5f62a0
VZ
1648 buf ? n : 0 // size of output buffer
1649 );
1650
03a991bc
VZ
1651 // note that it returns count of written chars for buf != NULL and size
1652 // of the needed buffer for buf == NULL so in either case the length of
1653 // the string (which never includes the terminating NUL) is one less
1654 return len ? len - 1 : (size_t)-1;
f1339c56 1655 }
dccce9ea 1656
13dd924a 1657 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1658 {
13dd924a
VZ
1659 /*
1660 we have a problem here: by default, WideCharToMultiByte() may
1661 replace characters unrepresentable in the target code page with bad
1662 quality approximations such as turning "1/2" symbol (U+00BD) into
1663 "1" for the code pages which don't have it and we, obviously, want
1664 to avoid this at any price
d775fa82 1665
13dd924a
VZ
1666 the trouble is that this function does it _silently_, i.e. it won't
1667 even tell us whether it did or not... Win98/2000 and higher provide
1668 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1669 we have to resort to a round trip, i.e. check that converting back
1670 results in the same string -- this is, of course, expensive but
1671 otherwise we simply can't be sure to not garble the data.
1672 */
1673
1674 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1675 // it doesn't work with CJK encodings (which we test for rather roughly
1676 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1677 // supporting it
907173e5
WS
1678 BOOL usedDef wxDUMMY_INITIALIZE(false);
1679 BOOL *pUsedDef;
13dd924a
VZ
1680 int flags;
1681 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1682 {
1683 // it's our lucky day
1684 flags = WC_NO_BEST_FIT_CHARS;
1685 pUsedDef = &usedDef;
1686 }
1687 else // old system or unsupported encoding
1688 {
1689 flags = 0;
1690 pUsedDef = NULL;
1691 }
1692
2b5f62a0
VZ
1693 const size_t len = ::WideCharToMultiByte
1694 (
1695 m_CodePage, // code page
13dd924a
VZ
1696 flags, // either none or no best fit
1697 pwz, // input string
2b5f62a0
VZ
1698 -1, // it is (wide) NUL-terminated
1699 buf, // output buffer
1700 buf ? n : 0, // and its size
1701 NULL, // default "replacement" char
13dd924a 1702 pUsedDef // [out] was it used?
2b5f62a0
VZ
1703 );
1704
13dd924a
VZ
1705 if ( !len )
1706 {
1707 // function totally failed
1708 return (size_t)-1;
1709 }
1710
1711 // if we were really converting, check if we succeeded
1712 if ( buf )
1713 {
1714 if ( flags )
1715 {
1716 // check if the conversion failed, i.e. if any replacements
1717 // were done
1718 if ( usedDef )
1719 return (size_t)-1;
1720 }
1721 else // we must resort to double tripping...
1722 {
1723 wxWCharBuffer wcBuf(n);
1724 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1725 wcscmp(wcBuf, pwz) != 0 )
1726 {
1727 // we didn't obtain the same thing we started from, hence
1728 // the conversion was lossy and we consider that it failed
1729 return (size_t)-1;
1730 }
1731 }
1732 }
1733
03a991bc 1734 // see the comment above for the reason of "len - 1"
13dd924a 1735 return len - 1;
f1339c56 1736 }
dccce9ea 1737
13dd924a
VZ
1738 bool IsOk() const { return m_CodePage != -1; }
1739
1740private:
1741 static bool CanUseNoBestFit()
1742 {
1743 static int s_isWin98Or2k = -1;
1744
1745 if ( s_isWin98Or2k == -1 )
1746 {
1747 int verMaj, verMin;
1748 switch ( wxGetOsVersion(&verMaj, &verMin) )
1749 {
1750 case wxWIN95:
1751 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1752 break;
1753
1754 case wxWINDOWS_NT:
1755 s_isWin98Or2k = verMaj >= 5;
1756 break;
1757
1758 default:
1759 // unknown, be conseravtive by default
1760 s_isWin98Or2k = 0;
1761 }
1762
1763 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1764 }
1765
1766 return s_isWin98Or2k == 1;
1767 }
f1339c56 1768
b1d66b54 1769 long m_CodePage;
1cd52418 1770};
e95354ec
VZ
1771
1772#endif // wxHAVE_WIN32_MB2WC
1773
f7e98dee
RN
1774// ============================================================================
1775// Cocoa conversion classes
1776// ============================================================================
1777
1778#if defined(__WXCOCOA__)
1779
ecd9653b 1780// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1781// Cocoa. Strangely enough, internally Core Foundation uses
1782// UTF 32 internally quite a bit - its just not public (yet).
1783
1784#include <CoreFoundation/CFString.h>
1785#include <CoreFoundation/CFStringEncodingExt.h>
1786
1787CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1788{
638357a0 1789 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1790 if ( encoding == wxFONTENCODING_DEFAULT )
1791 {
638357a0 1792 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1793 }
1794 else switch( encoding)
1795 {
1796 case wxFONTENCODING_ISO8859_1 :
1797 enc = kCFStringEncodingISOLatin1 ;
1798 break ;
1799 case wxFONTENCODING_ISO8859_2 :
1800 enc = kCFStringEncodingISOLatin2;
1801 break ;
1802 case wxFONTENCODING_ISO8859_3 :
1803 enc = kCFStringEncodingISOLatin3 ;
1804 break ;
1805 case wxFONTENCODING_ISO8859_4 :
1806 enc = kCFStringEncodingISOLatin4;
1807 break ;
1808 case wxFONTENCODING_ISO8859_5 :
1809 enc = kCFStringEncodingISOLatinCyrillic;
1810 break ;
1811 case wxFONTENCODING_ISO8859_6 :
1812 enc = kCFStringEncodingISOLatinArabic;
1813 break ;
1814 case wxFONTENCODING_ISO8859_7 :
1815 enc = kCFStringEncodingISOLatinGreek;
1816 break ;
1817 case wxFONTENCODING_ISO8859_8 :
1818 enc = kCFStringEncodingISOLatinHebrew;
1819 break ;
1820 case wxFONTENCODING_ISO8859_9 :
1821 enc = kCFStringEncodingISOLatin5;
1822 break ;
1823 case wxFONTENCODING_ISO8859_10 :
1824 enc = kCFStringEncodingISOLatin6;
1825 break ;
1826 case wxFONTENCODING_ISO8859_11 :
1827 enc = kCFStringEncodingISOLatinThai;
1828 break ;
1829 case wxFONTENCODING_ISO8859_13 :
1830 enc = kCFStringEncodingISOLatin7;
1831 break ;
1832 case wxFONTENCODING_ISO8859_14 :
1833 enc = kCFStringEncodingISOLatin8;
1834 break ;
1835 case wxFONTENCODING_ISO8859_15 :
1836 enc = kCFStringEncodingISOLatin9;
1837 break ;
1838
1839 case wxFONTENCODING_KOI8 :
1840 enc = kCFStringEncodingKOI8_R;
1841 break ;
1842 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1843 enc = kCFStringEncodingDOSRussian;
1844 break ;
1845
1846// case wxFONTENCODING_BULGARIAN :
1847// enc = ;
1848// break ;
1849
1850 case wxFONTENCODING_CP437 :
1851 enc =kCFStringEncodingDOSLatinUS ;
1852 break ;
1853 case wxFONTENCODING_CP850 :
1854 enc = kCFStringEncodingDOSLatin1;
1855 break ;
1856 case wxFONTENCODING_CP852 :
1857 enc = kCFStringEncodingDOSLatin2;
1858 break ;
1859 case wxFONTENCODING_CP855 :
1860 enc = kCFStringEncodingDOSCyrillic;
1861 break ;
1862 case wxFONTENCODING_CP866 :
1863 enc =kCFStringEncodingDOSRussian ;
1864 break ;
1865 case wxFONTENCODING_CP874 :
1866 enc = kCFStringEncodingDOSThai;
1867 break ;
1868 case wxFONTENCODING_CP932 :
1869 enc = kCFStringEncodingDOSJapanese;
1870 break ;
1871 case wxFONTENCODING_CP936 :
1872 enc =kCFStringEncodingDOSChineseSimplif ;
1873 break ;
1874 case wxFONTENCODING_CP949 :
1875 enc = kCFStringEncodingDOSKorean;
1876 break ;
1877 case wxFONTENCODING_CP950 :
1878 enc = kCFStringEncodingDOSChineseTrad;
1879 break ;
ecd9653b
WS
1880 case wxFONTENCODING_CP1250 :
1881 enc = kCFStringEncodingWindowsLatin2;
1882 break ;
1883 case wxFONTENCODING_CP1251 :
1884 enc =kCFStringEncodingWindowsCyrillic ;
1885 break ;
1886 case wxFONTENCODING_CP1252 :
1887 enc =kCFStringEncodingWindowsLatin1 ;
1888 break ;
1889 case wxFONTENCODING_CP1253 :
1890 enc = kCFStringEncodingWindowsGreek;
1891 break ;
1892 case wxFONTENCODING_CP1254 :
1893 enc = kCFStringEncodingWindowsLatin5;
1894 break ;
1895 case wxFONTENCODING_CP1255 :
1896 enc =kCFStringEncodingWindowsHebrew ;
1897 break ;
1898 case wxFONTENCODING_CP1256 :
1899 enc =kCFStringEncodingWindowsArabic ;
1900 break ;
1901 case wxFONTENCODING_CP1257 :
1902 enc = kCFStringEncodingWindowsBalticRim;
1903 break ;
638357a0
RN
1904// This only really encodes to UTF7 (if that) evidently
1905// case wxFONTENCODING_UTF7 :
1906// enc = kCFStringEncodingNonLossyASCII ;
1907// break ;
ecd9653b
WS
1908 case wxFONTENCODING_UTF8 :
1909 enc = kCFStringEncodingUTF8 ;
1910 break ;
1911 case wxFONTENCODING_EUC_JP :
1912 enc = kCFStringEncodingEUC_JP;
1913 break ;
1914 case wxFONTENCODING_UTF16 :
f7e98dee 1915 enc = kCFStringEncodingUnicode ;
ecd9653b 1916 break ;
f7e98dee
RN
1917 case wxFONTENCODING_MACROMAN :
1918 enc = kCFStringEncodingMacRoman ;
1919 break ;
1920 case wxFONTENCODING_MACJAPANESE :
1921 enc = kCFStringEncodingMacJapanese ;
1922 break ;
1923 case wxFONTENCODING_MACCHINESETRAD :
1924 enc = kCFStringEncodingMacChineseTrad ;
1925 break ;
1926 case wxFONTENCODING_MACKOREAN :
1927 enc = kCFStringEncodingMacKorean ;
1928 break ;
1929 case wxFONTENCODING_MACARABIC :
1930 enc = kCFStringEncodingMacArabic ;
1931 break ;
1932 case wxFONTENCODING_MACHEBREW :
1933 enc = kCFStringEncodingMacHebrew ;
1934 break ;
1935 case wxFONTENCODING_MACGREEK :
1936 enc = kCFStringEncodingMacGreek ;
1937 break ;
1938 case wxFONTENCODING_MACCYRILLIC :
1939 enc = kCFStringEncodingMacCyrillic ;
1940 break ;
1941 case wxFONTENCODING_MACDEVANAGARI :
1942 enc = kCFStringEncodingMacDevanagari ;
1943 break ;
1944 case wxFONTENCODING_MACGURMUKHI :
1945 enc = kCFStringEncodingMacGurmukhi ;
1946 break ;
1947 case wxFONTENCODING_MACGUJARATI :
1948 enc = kCFStringEncodingMacGujarati ;
1949 break ;
1950 case wxFONTENCODING_MACORIYA :
1951 enc = kCFStringEncodingMacOriya ;
1952 break ;
1953 case wxFONTENCODING_MACBENGALI :
1954 enc = kCFStringEncodingMacBengali ;
1955 break ;
1956 case wxFONTENCODING_MACTAMIL :
1957 enc = kCFStringEncodingMacTamil ;
1958 break ;
1959 case wxFONTENCODING_MACTELUGU :
1960 enc = kCFStringEncodingMacTelugu ;
1961 break ;
1962 case wxFONTENCODING_MACKANNADA :
1963 enc = kCFStringEncodingMacKannada ;
1964 break ;
1965 case wxFONTENCODING_MACMALAJALAM :
1966 enc = kCFStringEncodingMacMalayalam ;
1967 break ;
1968 case wxFONTENCODING_MACSINHALESE :
1969 enc = kCFStringEncodingMacSinhalese ;
1970 break ;
1971 case wxFONTENCODING_MACBURMESE :
1972 enc = kCFStringEncodingMacBurmese ;
1973 break ;
1974 case wxFONTENCODING_MACKHMER :
1975 enc = kCFStringEncodingMacKhmer ;
1976 break ;
1977 case wxFONTENCODING_MACTHAI :
1978 enc = kCFStringEncodingMacThai ;
1979 break ;
1980 case wxFONTENCODING_MACLAOTIAN :
1981 enc = kCFStringEncodingMacLaotian ;
1982 break ;
1983 case wxFONTENCODING_MACGEORGIAN :
1984 enc = kCFStringEncodingMacGeorgian ;
1985 break ;
1986 case wxFONTENCODING_MACARMENIAN :
1987 enc = kCFStringEncodingMacArmenian ;
1988 break ;
1989 case wxFONTENCODING_MACCHINESESIMP :
1990 enc = kCFStringEncodingMacChineseSimp ;
1991 break ;
1992 case wxFONTENCODING_MACTIBETAN :
1993 enc = kCFStringEncodingMacTibetan ;
1994 break ;
1995 case wxFONTENCODING_MACMONGOLIAN :
1996 enc = kCFStringEncodingMacMongolian ;
1997 break ;
1998 case wxFONTENCODING_MACETHIOPIC :
1999 enc = kCFStringEncodingMacEthiopic ;
2000 break ;
2001 case wxFONTENCODING_MACCENTRALEUR :
2002 enc = kCFStringEncodingMacCentralEurRoman ;
2003 break ;
2004 case wxFONTENCODING_MACVIATNAMESE :
2005 enc = kCFStringEncodingMacVietnamese ;
2006 break ;
2007 case wxFONTENCODING_MACARABICEXT :
2008 enc = kCFStringEncodingMacExtArabic ;
2009 break ;
2010 case wxFONTENCODING_MACSYMBOL :
2011 enc = kCFStringEncodingMacSymbol ;
2012 break ;
2013 case wxFONTENCODING_MACDINGBATS :
2014 enc = kCFStringEncodingMacDingbats ;
2015 break ;
2016 case wxFONTENCODING_MACTURKISH :
2017 enc = kCFStringEncodingMacTurkish ;
2018 break ;
2019 case wxFONTENCODING_MACCROATIAN :
2020 enc = kCFStringEncodingMacCroatian ;
2021 break ;
2022 case wxFONTENCODING_MACICELANDIC :
2023 enc = kCFStringEncodingMacIcelandic ;
2024 break ;
2025 case wxFONTENCODING_MACROMANIAN :
2026 enc = kCFStringEncodingMacRomanian ;
2027 break ;
2028 case wxFONTENCODING_MACCELTIC :
2029 enc = kCFStringEncodingMacCeltic ;
2030 break ;
2031 case wxFONTENCODING_MACGAELIC :
2032 enc = kCFStringEncodingMacGaelic ;
2033 break ;
ecd9653b
WS
2034// case wxFONTENCODING_MACKEYBOARD :
2035// enc = kCFStringEncodingMacKeyboardGlyphs ;
2036// break ;
2037 default :
2038 // because gcc is picky
2039 break ;
2040 } ;
2041 return enc ;
f7e98dee
RN
2042}
2043
f7e98dee
RN
2044class wxMBConv_cocoa : public wxMBConv
2045{
2046public:
2047 wxMBConv_cocoa()
2048 {
2049 Init(CFStringGetSystemEncoding()) ;
2050 }
2051
a6900d10 2052#if wxUSE_FONTMAP
f7e98dee
RN
2053 wxMBConv_cocoa(const wxChar* name)
2054 {
267e11c5 2055 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2056 }
a6900d10 2057#endif
f7e98dee
RN
2058
2059 wxMBConv_cocoa(wxFontEncoding encoding)
2060 {
2061 Init( wxCFStringEncFromFontEnc(encoding) );
2062 }
2063
2064 ~wxMBConv_cocoa()
2065 {
2066 }
2067
2068 void Init( CFStringEncoding encoding)
2069 {
638357a0 2070 m_encoding = encoding ;
f7e98dee
RN
2071 }
2072
2073 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2074 {
2075 wxASSERT(szUnConv);
ecd9653b 2076
638357a0
RN
2077 CFStringRef theString = CFStringCreateWithBytes (
2078 NULL, //the allocator
2079 (const UInt8*)szUnConv,
2080 strlen(szUnConv),
2081 m_encoding,
2082 false //no BOM/external representation
f7e98dee
RN
2083 );
2084
2085 wxASSERT(theString);
2086
638357a0
RN
2087 size_t nOutLength = CFStringGetLength(theString);
2088
2089 if (szOut == NULL)
f7e98dee 2090 {
f7e98dee 2091 CFRelease(theString);
638357a0 2092 return nOutLength;
f7e98dee 2093 }
ecd9653b 2094
638357a0 2095 CFRange theRange = { 0, nOutSize };
ecd9653b 2096
638357a0
RN
2097#if SIZEOF_WCHAR_T == 4
2098 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2099#endif
3698ae71 2100
f7e98dee 2101 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2102
f7e98dee 2103 CFRelease(theString);
ecd9653b 2104
638357a0 2105 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2106
2107#if SIZEOF_WCHAR_T == 4
2108 wxMBConvUTF16 converter ;
638357a0 2109 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2110 delete[] szUniCharBuffer;
2111#endif
3698ae71 2112
638357a0 2113 return nOutLength;
f7e98dee
RN
2114 }
2115
2116 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2117 {
638357a0 2118 wxASSERT(szUnConv);
3698ae71 2119
f7e98dee 2120 size_t nRealOutSize;
638357a0 2121 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2122 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2123
f7e98dee
RN
2124#if SIZEOF_WCHAR_T == 4
2125 wxMBConvUTF16BE converter ;
2126 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2127 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2128 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2129 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2130#endif
2131
2132 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2133 NULL, //allocator
2134 szUniBuffer,
2135 nBufSize,
638357a0 2136 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2137 );
ecd9653b 2138
f7e98dee 2139 wxASSERT(theString);
ecd9653b 2140
f7e98dee 2141 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2142 //so we check and use getchars instead in that case
2143 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2144 {
638357a0
RN
2145 if (szOut != NULL)
2146 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2147
638357a0
RN
2148 nRealOutSize = CFStringGetLength(theString) + 1;
2149 }
2150 else
2151 {
2152 CFStringGetBytes(
2153 theString,
2154 CFRangeMake(0, CFStringGetLength(theString)),
2155 m_encoding,
2156 0, //what to put in characters that can't be converted -
2157 //0 tells CFString to return NULL if it meets such a character
2158 false, //not an external representation
2159 (UInt8*) szOut,
3698ae71 2160 nOutSize,
638357a0
RN
2161 (CFIndex*) &nRealOutSize
2162 );
f7e98dee 2163 }
ecd9653b 2164
638357a0 2165 CFRelease(theString);
ecd9653b 2166
638357a0
RN
2167#if SIZEOF_WCHAR_T == 4
2168 delete[] szUniBuffer;
2169#endif
ecd9653b 2170
f7e98dee
RN
2171 return nRealOutSize - 1;
2172 }
2173
2174 bool IsOk() const
ecd9653b 2175 {
3698ae71 2176 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2177 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2178 }
2179
2180private:
638357a0 2181 CFStringEncoding m_encoding ;
f7e98dee
RN
2182};
2183
2184#endif // defined(__WXCOCOA__)
2185
335d31e0
SC
2186// ============================================================================
2187// Mac conversion classes
2188// ============================================================================
2189
2190#if defined(__WXMAC__) && defined(TARGET_CARBON)
2191
2192class wxMBConv_mac : public wxMBConv
2193{
2194public:
2195 wxMBConv_mac()
2196 {
2197 Init(CFStringGetSystemEncoding()) ;
2198 }
2199
2d1659cf 2200#if wxUSE_FONTMAP
335d31e0
SC
2201 wxMBConv_mac(const wxChar* name)
2202 {
267e11c5 2203 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2204 }
2d1659cf 2205#endif
335d31e0
SC
2206
2207 wxMBConv_mac(wxFontEncoding encoding)
2208 {
d775fa82
WS
2209 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2210 }
2211
2212 ~wxMBConv_mac()
2213 {
2214 OSStatus status = noErr ;
2215 status = TECDisposeConverter(m_MB2WC_converter);
2216 status = TECDisposeConverter(m_WC2MB_converter);
2217 }
2218
2219
2220 void Init( TextEncodingBase encoding)
2221 {
2222 OSStatus status = noErr ;
2223 m_char_encoding = encoding ;
2224 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2225
2226 status = TECCreateConverter(&m_MB2WC_converter,
2227 m_char_encoding,
2228 m_unicode_encoding);
2229 status = TECCreateConverter(&m_WC2MB_converter,
2230 m_unicode_encoding,
2231 m_char_encoding);
2232 }
2233
335d31e0
SC
2234 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2235 {
d775fa82
WS
2236 OSStatus status = noErr ;
2237 ByteCount byteOutLen ;
2238 ByteCount byteInLen = strlen(psz) ;
2239 wchar_t *tbuf = NULL ;
2240 UniChar* ubuf = NULL ;
2241 size_t res = 0 ;
2242
2243 if (buf == NULL)
2244 {
638357a0 2245 //apple specs say at least 32
c543817b 2246 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2247 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2248 }
2249 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2250#if SIZEOF_WCHAR_T == 4
d775fa82 2251 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2252#else
d775fa82 2253 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2254#endif
d775fa82
WS
2255 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2256 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2257#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2258 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2259 // is not properly terminated we get random characters at the end
2260 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2261 wxMBConvUTF16BE converter ;
2262 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2263 free( ubuf ) ;
f3a355ce 2264#else
d775fa82 2265 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2266#endif
d775fa82
WS
2267 if ( buf == NULL )
2268 free(tbuf) ;
335d31e0 2269
335d31e0
SC
2270 if ( buf && res < n)
2271 buf[res] = 0;
2272
d775fa82 2273 return res ;
335d31e0
SC
2274 }
2275
2276 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2277 {
2278 OSStatus status = noErr ;
2279 ByteCount byteOutLen ;
2280 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2281
2282 char *tbuf = NULL ;
2283
2284 if (buf == NULL)
2285 {
638357a0 2286 //apple specs say at least 32
c543817b 2287 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2288 tbuf = (char*) malloc( n ) ;
2289 }
2290
2291 ByteCount byteBufferLen = n ;
2292 UniChar* ubuf = NULL ;
f3a355ce 2293#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2294 wxMBConvUTF16BE converter ;
2295 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2296 byteInLen = unicharlen ;
2297 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2298 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2299#else
d775fa82 2300 ubuf = (UniChar*) psz ;
f3a355ce 2301#endif
d775fa82
WS
2302 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2303 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2304#if SIZEOF_WCHAR_T == 4
d775fa82 2305 free( ubuf ) ;
f3a355ce 2306#endif
d775fa82
WS
2307 if ( buf == NULL )
2308 free(tbuf) ;
335d31e0 2309
d775fa82 2310 size_t res = byteOutLen ;
335d31e0 2311 if ( buf && res < n)
638357a0 2312 {
335d31e0 2313 buf[res] = 0;
3698ae71 2314
638357a0
RN
2315 //we need to double-trip to verify it didn't insert any ? in place
2316 //of bogus characters
2317 wxWCharBuffer wcBuf(n);
2318 size_t pszlen = wxWcslen(psz);
2319 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2320 wxWcslen(wcBuf) != pszlen ||
2321 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2322 {
2323 // we didn't obtain the same thing we started from, hence
2324 // the conversion was lossy and we consider that it failed
2325 return (size_t)-1;
2326 }
2327 }
335d31e0 2328
d775fa82 2329 return res ;
335d31e0
SC
2330 }
2331
2332 bool IsOk() const
2333 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2334
2335private:
d775fa82
WS
2336 TECObjectRef m_MB2WC_converter ;
2337 TECObjectRef m_WC2MB_converter ;
2338
2339 TextEncodingBase m_char_encoding ;
2340 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2341};
2342
2343#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2344
36acb880
VZ
2345// ============================================================================
2346// wxEncodingConverter based conversion classes
2347// ============================================================================
2348
1e6feb95 2349#if wxUSE_FONTMAP
1cd52418 2350
e95354ec 2351class wxMBConv_wxwin : public wxMBConv
1cd52418 2352{
8b04d4c4
VZ
2353private:
2354 void Init()
2355 {
2356 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2357 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2358 }
2359
6001e347 2360public:
f1339c56
RR
2361 // temporarily just use wxEncodingConverter stuff,
2362 // so that it works while a better implementation is built
e95354ec 2363 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2364 {
2365 if (name)
267e11c5 2366 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2367 else
2368 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2369
8b04d4c4
VZ
2370 Init();
2371 }
2372
e95354ec 2373 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2374 {
2375 m_enc = enc;
2376
2377 Init();
f1339c56 2378 }
dccce9ea 2379
bde4baac 2380 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2381 {
2382 size_t inbuf = strlen(psz);
dccce9ea 2383 if (buf)
c643a977
VS
2384 {
2385 if (!m2w.Convert(psz,buf))
2386 return (size_t)-1;
2387 }
f1339c56
RR
2388 return inbuf;
2389 }
dccce9ea 2390
bde4baac 2391 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2392 {
f8d791e0 2393 const size_t inbuf = wxWcslen(psz);
f1339c56 2394 if (buf)
c643a977
VS
2395 {
2396 if (!w2m.Convert(psz,buf))
2397 return (size_t)-1;
2398 }
dccce9ea 2399
f1339c56
RR
2400 return inbuf;
2401 }
dccce9ea 2402
e95354ec 2403 bool IsOk() const { return m_ok; }
f1339c56
RR
2404
2405public:
8b04d4c4 2406 wxFontEncoding m_enc;
f1339c56 2407 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2408
2409 // were we initialized successfully?
2410 bool m_ok;
fc7a2a60 2411
e95354ec 2412 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2413};
6001e347 2414
1e6feb95
VZ
2415#endif // wxUSE_FONTMAP
2416
36acb880
VZ
2417// ============================================================================
2418// wxCSConv implementation
2419// ============================================================================
2420
8b04d4c4 2421void wxCSConv::Init()
6001e347 2422{
e95354ec
VZ
2423 m_name = NULL;
2424 m_convReal = NULL;
2425 m_deferred = true;
2426}
2427
8b04d4c4
VZ
2428wxCSConv::wxCSConv(const wxChar *charset)
2429{
2430 Init();
82713003 2431
e95354ec
VZ
2432 if ( charset )
2433 {
e95354ec
VZ
2434 SetName(charset);
2435 }
bda3d86a
VZ
2436
2437 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2438}
2439
8b04d4c4
VZ
2440wxCSConv::wxCSConv(wxFontEncoding encoding)
2441{
bda3d86a 2442 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2443 {
2444 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2445
2446 encoding = wxFONTENCODING_SYSTEM;
2447 }
2448
8b04d4c4
VZ
2449 Init();
2450
bda3d86a 2451 m_encoding = encoding;
8b04d4c4
VZ
2452}
2453
6001e347
RR
2454wxCSConv::~wxCSConv()
2455{
65e50848
JS
2456 Clear();
2457}
2458
54380f29 2459wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2460 : wxMBConv()
54380f29 2461{
8b04d4c4
VZ
2462 Init();
2463
54380f29 2464 SetName(conv.m_name);
8b04d4c4 2465 m_encoding = conv.m_encoding;
54380f29
GD
2466}
2467
2468wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2469{
2470 Clear();
8b04d4c4 2471
54380f29 2472 SetName(conv.m_name);
8b04d4c4
VZ
2473 m_encoding = conv.m_encoding;
2474
54380f29
GD
2475 return *this;
2476}
2477
65e50848
JS
2478void wxCSConv::Clear()
2479{
8b04d4c4 2480 free(m_name);
e95354ec 2481 delete m_convReal;
8b04d4c4 2482
65e50848 2483 m_name = NULL;
e95354ec 2484 m_convReal = NULL;
6001e347
RR
2485}
2486
2487void wxCSConv::SetName(const wxChar *charset)
2488{
f1339c56
RR
2489 if (charset)
2490 {
2491 m_name = wxStrdup(charset);
e95354ec 2492 m_deferred = true;
f1339c56 2493 }
6001e347
RR
2494}
2495
e95354ec
VZ
2496wxMBConv *wxCSConv::DoCreate() const
2497{
c547282d
VZ
2498 // check for the special case of ASCII or ISO8859-1 charset: as we have
2499 // special knowledge of it anyhow, we don't need to create a special
2500 // conversion object
2501 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2502 {
e95354ec
VZ
2503 // don't convert at all
2504 return NULL;
2505 }
dccce9ea 2506
e95354ec
VZ
2507 // we trust OS to do conversion better than we can so try external
2508 // conversion methods first
2509 //
2510 // the full order is:
2511 // 1. OS conversion (iconv() under Unix or Win32 API)
2512 // 2. hard coded conversions for UTF
2513 // 3. wxEncodingConverter as fall back
2514
2515 // step (1)
2516#ifdef HAVE_ICONV
c547282d 2517#if !wxUSE_FONTMAP
e95354ec 2518 if ( m_name )
c547282d 2519#endif // !wxUSE_FONTMAP
e95354ec 2520 {
c547282d
VZ
2521 wxString name(m_name);
2522
2523#if wxUSE_FONTMAP
2524 if ( name.empty() )
267e11c5 2525 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2526#endif // wxUSE_FONTMAP
2527
2528 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2529 if ( conv->IsOk() )
2530 return conv;
2531
2532 delete conv;
2533 }
2534#endif // HAVE_ICONV
2535
2536#ifdef wxHAVE_WIN32_MB2WC
2537 {
7608a683 2538#if wxUSE_FONTMAP
e95354ec
VZ
2539 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2540 : new wxMBConv_win32(m_encoding);
2541 if ( conv->IsOk() )
2542 return conv;
2543
2544 delete conv;
7608a683
WS
2545#else
2546 return NULL;
2547#endif
e95354ec
VZ
2548 }
2549#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2550#if defined(__WXMAC__)
2551 {
5c3c8676 2552 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2553 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2554 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2555 {
2556
2d1659cf 2557#if wxUSE_FONTMAP
d775fa82
WS
2558 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2559 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2560#else
2561 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2562#endif
d775fa82 2563 if ( conv->IsOk() )
f7e98dee
RN
2564 return conv;
2565
2566 delete conv;
2567 }
2568 }
2569#endif
2570#if defined(__WXCOCOA__)
2571 {
2572 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2573 {
2574
a6900d10 2575#if wxUSE_FONTMAP
f7e98dee
RN
2576 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2577 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2578#else
2579 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2580#endif
f7e98dee 2581 if ( conv->IsOk() )
d775fa82
WS
2582 return conv;
2583
2584 delete conv;
2585 }
335d31e0
SC
2586 }
2587#endif
e95354ec
VZ
2588 // step (2)
2589 wxFontEncoding enc = m_encoding;
2590#if wxUSE_FONTMAP
c547282d
VZ
2591 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2592 {
2593 // use "false" to suppress interactive dialogs -- we can be called from
2594 // anywhere and popping up a dialog from here is the last thing we want to
2595 // do
267e11c5 2596 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2597 }
e95354ec
VZ
2598#endif // wxUSE_FONTMAP
2599
2600 switch ( enc )
2601 {
2602 case wxFONTENCODING_UTF7:
2603 return new wxMBConvUTF7;
2604
2605 case wxFONTENCODING_UTF8:
2606 return new wxMBConvUTF8;
2607
e95354ec
VZ
2608 case wxFONTENCODING_UTF16BE:
2609 return new wxMBConvUTF16BE;
2610
2611 case wxFONTENCODING_UTF16LE:
2612 return new wxMBConvUTF16LE;
2613
e95354ec
VZ
2614 case wxFONTENCODING_UTF32BE:
2615 return new wxMBConvUTF32BE;
2616
2617 case wxFONTENCODING_UTF32LE:
2618 return new wxMBConvUTF32LE;
2619
2620 default:
2621 // nothing to do but put here to suppress gcc warnings
2622 ;
2623 }
2624
2625 // step (3)
2626#if wxUSE_FONTMAP
2627 {
2628 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2629 : new wxMBConv_wxwin(m_encoding);
2630 if ( conv->IsOk() )
2631 return conv;
2632
2633 delete conv;
2634 }
2635#endif // wxUSE_FONTMAP
2636
a58d4f4d
VS
2637 // NB: This is a hack to prevent deadlock. What could otherwise happen
2638 // in Unicode build: wxConvLocal creation ends up being here
2639 // because of some failure and logs the error. But wxLog will try to
2640 // attach timestamp, for which it will need wxConvLocal (to convert
2641 // time to char* and then wchar_t*), but that fails, tries to log
2642 // error, but wxLog has a (already locked) critical section that
2643 // guards static buffer.
2644 static bool alreadyLoggingError = false;
2645 if (!alreadyLoggingError)
2646 {
2647 alreadyLoggingError = true;
2648 wxLogError(_("Cannot convert from the charset '%s'!"),
2649 m_name ? m_name
e95354ec
VZ
2650 :
2651#if wxUSE_FONTMAP
267e11c5 2652 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2653#else // !wxUSE_FONTMAP
2654 wxString::Format(_("encoding %s"), m_encoding).c_str()
2655#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2656 );
a58d4f4d
VS
2657 alreadyLoggingError = false;
2658 }
e95354ec
VZ
2659
2660 return NULL;
2661}
2662
2663void wxCSConv::CreateConvIfNeeded() const
2664{
2665 if ( m_deferred )
2666 {
2667 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2668
2669#if wxUSE_INTL
2670 // if we don't have neither the name nor the encoding, use the default
2671 // encoding for this system
2672 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2673 {
4d312c22 2674 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2675 }
2676#endif // wxUSE_INTL
2677
e95354ec
VZ
2678 self->m_convReal = DoCreate();
2679 self->m_deferred = false;
6001e347 2680 }
6001e347
RR
2681}
2682
2683size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2684{
e95354ec 2685 CreateConvIfNeeded();
dccce9ea 2686
e95354ec
VZ
2687 if (m_convReal)
2688 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2689
2690 // latin-1 (direct)
4def3b35 2691 size_t len = strlen(psz);
dccce9ea 2692
f1339c56
RR
2693 if (buf)
2694 {
4def3b35 2695 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2696 buf[c] = (unsigned char)(psz[c]);
2697 }
dccce9ea 2698
f1339c56 2699 return len;
6001e347
RR
2700}
2701
2702size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2703{
e95354ec 2704 CreateConvIfNeeded();
dccce9ea 2705
e95354ec
VZ
2706 if (m_convReal)
2707 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2708
f1339c56 2709 // latin-1 (direct)
f8d791e0 2710 const size_t len = wxWcslen(psz);
f1339c56
RR
2711 if (buf)
2712 {
4def3b35 2713 for (size_t c = 0; c <= len; c++)
24642831
VS
2714 {
2715 if (psz[c] > 0xFF)
2716 return (size_t)-1;
907173e5 2717 buf[c] = (char)psz[c];
24642831
VS
2718 }
2719 }
2720 else
2721 {
2722 for (size_t c = 0; c <= len; c++)
2723 {
2724 if (psz[c] > 0xFF)
2725 return (size_t)-1;
2726 }
f1339c56 2727 }
dccce9ea 2728
f1339c56 2729 return len;
6001e347
RR
2730}
2731
bde4baac
VZ
2732// ----------------------------------------------------------------------------
2733// globals
2734// ----------------------------------------------------------------------------
2735
2736#ifdef __WINDOWS__
2737 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2738#elif defined(__WXMAC__) && !defined(__MACH__)
2739 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2740#else
dcc8fac0 2741 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2742#endif
2743
2744static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2745static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2746static wxMBConvUTF7 wxConvUTF7Obj;
2747static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2748
bde4baac
VZ
2749WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2750WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2751WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2752WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2753WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2754WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2755WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2756#ifdef __WXOSX__
ea8ce907 2757 wxConvUTF8Obj;
f5a1953b 2758#else
ea8ce907 2759 wxConvLibcObj;
f5a1953b
VZ
2760#endif
2761
bde4baac
VZ
2762
2763#else // !wxUSE_WCHAR_T
2764
2765// stand-ins in absence of wchar_t
2766WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2767 wxConvISO8859_1,
2768 wxConvLocal,
2769 wxConvUTF8;
2770
2771#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2772
2773