]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
destroying native and wx controls
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
ea8ce907
RR
58#ifdef HAVE_LANGINFO_H
59 #include <langinfo.h>
60#endif
6001e347 61
e95354ec
VZ
62#if defined(__WIN32__) && !defined(__WXMICROWIN__)
63 #define wxHAVE_WIN32_MB2WC
64#endif // __WIN32__ but !__WXMICROWIN__
65
373658eb
VZ
66// ----------------------------------------------------------------------------
67// headers
68// ----------------------------------------------------------------------------
7af284fd 69
6001e347 70#ifdef __SALFORDC__
373658eb 71 #include <clib.h>
6001e347
RR
72#endif
73
b040e242 74#ifdef HAVE_ICONV
373658eb 75 #include <iconv.h>
b1d547eb 76 #include "wx/thread.h"
1cd52418 77#endif
1cd52418 78
373658eb
VZ
79#include "wx/encconv.h"
80#include "wx/fontmap.h"
7608a683 81#include "wx/utils.h"
373658eb 82
335d31e0 83#ifdef __WXMAC__
4227afa4
SC
84#include <ATSUnicode.h>
85#include <TextCommon.h>
86#include <TextEncodingConverter.h>
335d31e0
SC
87
88#include "wx/mac/private.h" // includes mac headers
89#endif
373658eb
VZ
90// ----------------------------------------------------------------------------
91// macros
92// ----------------------------------------------------------------------------
3e61dfb0 93
1cd52418 94#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 95#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
96
97#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
98 #define WC_NAME "UCS4"
99 #define WC_BSWAP BSWAP_UCS4
100 #ifdef WORDS_BIGENDIAN
101 #define WC_NAME_BEST "UCS-4BE"
102 #else
103 #define WC_NAME_BEST "UCS-4LE"
104 #endif
1cd52418 105#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
106 #define WC_NAME "UTF16"
107 #define WC_BSWAP BSWAP_UTF16
a3f2769e 108 #define WC_UTF16
3a0d76bc
VS
109 #ifdef WORDS_BIGENDIAN
110 #define WC_NAME_BEST "UTF-16BE"
111 #else
112 #define WC_NAME_BEST "UTF-16LE"
113 #endif
bab1e722 114#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
115 // does this ever happen?
116 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
117#endif
118
373658eb
VZ
119// ============================================================================
120// implementation
121// ============================================================================
122
123// ----------------------------------------------------------------------------
c91830cb 124// UTF-16 en/decoding to/from UCS-4
373658eb 125// ----------------------------------------------------------------------------
6001e347 126
b0a6bb75 127
c91830cb 128static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 129{
dccce9ea 130 if (input<=0xffff)
4def3b35 131 {
999836aa
VZ
132 if (output)
133 *output = (wxUint16) input;
4def3b35 134 return 1;
dccce9ea
VZ
135 }
136 else if (input>=0x110000)
4def3b35
VS
137 {
138 return (size_t)-1;
dccce9ea
VZ
139 }
140 else
4def3b35 141 {
dccce9ea 142 if (output)
4def3b35 143 {
c91830cb 144 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 145 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
146 }
147 return 2;
1cd52418 148 }
1cd52418
OK
149}
150
c91830cb 151static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 152{
dccce9ea 153 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
154 {
155 output = *input;
156 return 1;
dccce9ea
VZ
157 }
158 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
159 {
160 output = *input;
161 return (size_t)-1;
dccce9ea
VZ
162 }
163 else
4def3b35
VS
164 {
165 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
166 return 2;
167 }
1cd52418
OK
168}
169
b0a6bb75 170
f6bcfd97 171// ----------------------------------------------------------------------------
6001e347 172// wxMBConv
f6bcfd97 173// ----------------------------------------------------------------------------
2c53a80a
WS
174
175wxMBConv::~wxMBConv()
176{
177 // nothing to do here (necessary for Darwin linking probably)
178}
6001e347 179
6001e347
RR
180const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181{
2b5f62a0 182 if ( psz )
6001e347 183 {
2b5f62a0
VZ
184 // calculate the length of the buffer needed first
185 size_t nLen = MB2WC(NULL, psz, 0);
186 if ( nLen != (size_t)-1 )
187 {
188 // now do the actual conversion
189 wxWCharBuffer buf(nLen);
635f33ce
VS
190 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
191 if ( nLen != (size_t)-1 )
192 {
193 return buf;
194 }
2b5f62a0 195 }
f6bcfd97 196 }
2b5f62a0
VZ
197
198 wxWCharBuffer buf((wchar_t *)NULL);
199
200 return buf;
6001e347
RR
201}
202
e5cceba0 203const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 204{
2b5f62a0
VZ
205 if ( pwz )
206 {
207 size_t nLen = WC2MB(NULL, pwz, 0);
208 if ( nLen != (size_t)-1 )
209 {
c91830cb 210 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
211 nLen = WC2MB(buf.data(), pwz, nLen + 4);
212 if ( nLen != (size_t)-1 )
213 {
214 return buf;
215 }
2b5f62a0
VZ
216 }
217 }
218
219 wxCharBuffer buf((char *)NULL);
e5cceba0 220
e5cceba0 221 return buf;
6001e347
RR
222}
223
f5fb6871 224const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 225{
f5fb6871
RN
226 wxASSERT(pOutSize != NULL);
227
e4e3bbb4
RN
228 const char* szEnd = szString + nStringLen + 1;
229 const char* szPos = szString;
230 const char* szStart = szPos;
231
232 size_t nActualLength = 0;
f5fb6871
RN
233 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
234
235 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
236
237 //Convert the string until the length() is reached, continuing the
238 //loop every time a null character is reached
239 while(szPos != szEnd)
240 {
241 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
242
243 //Get the length of the current (sub)string
244 size_t nLen = MB2WC(NULL, szPos, 0);
245
246 //Invalid conversion?
247 if( nLen == (size_t)-1 )
f5fb6871
RN
248 {
249 *pOutSize = 0;
250 theBuffer.data()[0u] = wxT('\0');
251 return theBuffer;
252 }
253
e4e3bbb4
RN
254
255 //Increase the actual length (+1 for current null character)
256 nActualLength += nLen + 1;
257
f5fb6871
RN
258 //if buffer too big, realloc the buffer
259 if (nActualLength > (nCurrentSize+1))
260 {
261 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
262 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
263 theBuffer = theNewBuffer;
264 nCurrentSize <<= 1;
265 }
266
267 //Convert the current (sub)string
268 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 269 {
f5fb6871
RN
270 *pOutSize = 0;
271 theBuffer.data()[0u] = wxT('\0');
272 return theBuffer;
e4e3bbb4
RN
273 }
274
275 //Increment to next (sub)string
276 //Note that we have to use strlen here instead of nLen
277 //here because XX2XX gives us the size of the output buffer,
278 //not neccessarly the length of the string
279 szPos += strlen(szPos) + 1;
280 }
281
f5fb6871
RN
282 //success - return actual length and the buffer
283 *pOutSize = nActualLength;
3698ae71 284 return theBuffer;
e4e3bbb4
RN
285}
286
f5fb6871 287const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 288{
f5fb6871
RN
289 wxASSERT(pOutSize != NULL);
290
e4e3bbb4
RN
291 const wchar_t* szEnd = szString + nStringLen + 1;
292 const wchar_t* szPos = szString;
293 const wchar_t* szStart = szPos;
294
295 size_t nActualLength = 0;
f5fb6871
RN
296 size_t nCurrentSize = nStringLen << 2; //try * 4 first
297
298 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
299
300 //Convert the string until the length() is reached, continuing the
301 //loop every time a null character is reached
302 while(szPos != szEnd)
303 {
304 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
305
306 //Get the length of the current (sub)string
307 size_t nLen = WC2MB(NULL, szPos, 0);
308
309 //Invalid conversion?
310 if( nLen == (size_t)-1 )
f5fb6871
RN
311 {
312 *pOutSize = 0;
313 theBuffer.data()[0u] = wxT('\0');
314 return theBuffer;
315 }
e4e3bbb4
RN
316
317 //Increase the actual length (+1 for current null character)
318 nActualLength += nLen + 1;
3698ae71 319
f5fb6871
RN
320 //if buffer too big, realloc the buffer
321 if (nActualLength > (nCurrentSize+1))
322 {
323 wxCharBuffer theNewBuffer(nCurrentSize << 1);
324 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
325 theBuffer = theNewBuffer;
326 nCurrentSize <<= 1;
327 }
328
329 //Convert the current (sub)string
330 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 331 {
f5fb6871
RN
332 *pOutSize = 0;
333 theBuffer.data()[0u] = wxT('\0');
334 return theBuffer;
e4e3bbb4
RN
335 }
336
337 //Increment to next (sub)string
338 //Note that we have to use wxWcslen here instead of nLen
339 //here because XX2XX gives us the size of the output buffer,
340 //not neccessarly the length of the string
341 szPos += wxWcslen(szPos) + 1;
342 }
343
f5fb6871
RN
344 //success - return actual length and the buffer
345 *pOutSize = nActualLength;
3698ae71 346 return theBuffer;
e4e3bbb4
RN
347}
348
6001e347 349// ----------------------------------------------------------------------------
bde4baac 350// wxMBConvLibc
6001e347
RR
351// ----------------------------------------------------------------------------
352
bde4baac
VZ
353size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
354{
355 return wxMB2WC(buf, psz, n);
356}
357
358size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
359{
360 return wxWC2MB(buf, psz, n);
361}
e1bfe89e 362
c12b7f79
VZ
363#ifdef __WXGTK20__
364
e1bfe89e
RR
365// ----------------------------------------------------------------------------
366// wxConvBrokenFileNames is made for GTK2 in Unicode mode when
367// files are accidentally written in an encoding which is not
368// the system encoding. Typically, the system encoding will be
3698ae71 369// UTF8 but there might be files stored in ISO8859-1 on disk.
e1bfe89e
RR
370// ----------------------------------------------------------------------------
371
c12b7f79 372class wxConvBrokenFileNames : public wxMBConv
e1bfe89e
RR
373{
374public:
c12b7f79
VZ
375 wxConvBrokenFileNames();
376 virtual ~wxConvBrokenFileNames() { delete m_conv; }
377
e1bfe89e
RR
378 virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
379 virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
c12b7f79 380
ea8ce907 381private:
c12b7f79
VZ
382 // the conversion object we forward to
383 wxMBConv *m_conv;
e1bfe89e
RR
384};
385
c12b7f79 386wxConvBrokenFileNames::wxConvBrokenFileNames()
ea8ce907 387{
c12b7f79
VZ
388 // decide which conversion to use for the file names
389
390 // (1) this variable exists for the sole purpose of specifying the encoding
391 // of the filenames for GTK+ programs, so use it if it is set
392 const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
393 if ( encName )
394 {
395 m_conv = new wxCSConv(encName);
396 }
397 else // no G_FILENAME_ENCODING
398 {
399 // (2) if a non default locale is set, assume that the user wants his
400 // filenames in this locale too
401 switch ( wxLocale::GetSystemEncoding() )
402 {
403 default:
404 m_conv = new wxMBConvLibc;
405 break;
406
407 // (3) finally use UTF-8 by default
408 case wxFONTENCODING_SYSTEM:
409 case wxFONTENCODING_UTF8:
410 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
411 break;
412 }
413 }
ea8ce907
RR
414}
415
c12b7f79
VZ
416size_t
417wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
418 const char *psz,
419 size_t outputSize) const
e1bfe89e 420{
c12b7f79 421 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
422}
423
c12b7f79
VZ
424size_t
425wxConvBrokenFileNames::WC2MB(char *outputBuf,
426 const wchar_t *psz,
427 size_t outputSize) const
e1bfe89e 428{
c12b7f79 429 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
430}
431
c12b7f79
VZ
432#endif // __WXGTK20__
433
bde4baac 434// ----------------------------------------------------------------------------
3698ae71 435// UTF-7
bde4baac 436// ----------------------------------------------------------------------------
6001e347 437
15f2ee32 438// Implementation (C) 2004 Fredrik Roubert
6001e347 439
15f2ee32
RN
440//
441// BASE64 decoding table
442//
443static const unsigned char utf7unb64[] =
6001e347 444{
15f2ee32
RN
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
451 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
452 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
454 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
455 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
456 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
458 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
459 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
460 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
461 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
462 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
470 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
473 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
474 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
475 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
476 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
477};
478
479size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
480{
15f2ee32
RN
481 size_t len = 0;
482
483 while (*psz && ((!buf) || (len < n)))
484 {
485 unsigned char cc = *psz++;
486 if (cc != '+')
487 {
488 // plain ASCII char
489 if (buf)
490 *buf++ = cc;
491 len++;
492 }
493 else if (*psz == '-')
494 {
495 // encoded plus sign
496 if (buf)
497 *buf++ = cc;
498 len++;
499 psz++;
500 }
501 else
502 {
503 // BASE64 encoded string
504 bool lsb;
505 unsigned char c;
506 unsigned int d, l;
507 for (lsb = false, d = 0, l = 0;
508 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
509 {
510 d <<= 6;
511 d += cc;
512 for (l += 6; l >= 8; lsb = !lsb)
513 {
6356d52a 514 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
515 if (lsb)
516 {
517 if (buf)
518 *buf++ |= c;
519 len ++;
520 }
521 else
522 if (buf)
6356d52a 523 *buf = (wchar_t)(c << 8);
15f2ee32
RN
524 }
525 }
526 if (*psz == '-')
527 psz++;
528 }
529 }
530 if (buf && (len < n))
531 *buf = 0;
532 return len;
6001e347
RR
533}
534
15f2ee32
RN
535//
536// BASE64 encoding table
537//
538static const unsigned char utf7enb64[] =
539{
540 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
541 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
542 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
543 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
544 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
545 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
546 'w', 'x', 'y', 'z', '0', '1', '2', '3',
547 '4', '5', '6', '7', '8', '9', '+', '/'
548};
549
550//
551// UTF-7 encoding table
552//
553// 0 - Set D (directly encoded characters)
554// 1 - Set O (optional direct characters)
555// 2 - whitespace characters (optional)
556// 3 - special characters
557//
558static const unsigned char utf7encode[128] =
6001e347 559{
15f2ee32
RN
560 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
561 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
562 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
564 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
566 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
568};
569
667e5b3e 570size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
571{
572
573
574 size_t len = 0;
575
576 while (*psz && ((!buf) || (len < n)))
577 {
578 wchar_t cc = *psz++;
579 if (cc < 0x80 && utf7encode[cc] < 1)
580 {
581 // plain ASCII char
582 if (buf)
583 *buf++ = (char)cc;
584 len++;
585 }
586#ifndef WC_UTF16
79c78d42 587 else if (((wxUint32)cc) > 0xffff)
6e394fc6 588 {
15f2ee32
RN
589 // no surrogate pair generation (yet?)
590 return (size_t)-1;
591 }
592#endif
593 else
594 {
595 if (buf)
596 *buf++ = '+';
597 len++;
598 if (cc != '+')
599 {
600 // BASE64 encode string
601 unsigned int lsb, d, l;
602 for (d = 0, l = 0;; psz++)
603 {
604 for (lsb = 0; lsb < 2; lsb ++)
605 {
606 d <<= 8;
607 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
608
609 for (l += 8; l >= 6; )
610 {
611 l -= 6;
612 if (buf)
613 *buf++ = utf7enb64[(d >> l) % 64];
614 len++;
615 }
616 }
617 cc = *psz;
618 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
619 break;
620 }
621 if (l != 0)
622 {
623 if (buf)
624 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
625 len++;
626 }
627 }
628 if (buf)
629 *buf++ = '-';
630 len++;
631 }
632 }
633 if (buf && (len < n))
634 *buf = 0;
635 return len;
6001e347
RR
636}
637
f6bcfd97 638// ----------------------------------------------------------------------------
6001e347 639// UTF-8
f6bcfd97 640// ----------------------------------------------------------------------------
6001e347 641
dccce9ea 642static wxUint32 utf8_max[]=
4def3b35 643 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 644
3698ae71
VZ
645// boundaries of the private use area we use to (temporarily) remap invalid
646// characters invalid in a UTF-8 encoded string
ea8ce907
RR
647const wxUint32 wxUnicodePUA = 0x100000;
648const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
649
6001e347
RR
650size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
651{
4def3b35
VS
652 size_t len = 0;
653
dccce9ea 654 while (*psz && ((!buf) || (len < n)))
4def3b35 655 {
ea8ce907
RR
656 const char *opsz = psz;
657 bool invalid = false;
4def3b35
VS
658 unsigned char cc = *psz++, fc = cc;
659 unsigned cnt;
dccce9ea 660 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 661 fc <<= 1;
dccce9ea 662 if (!cnt)
4def3b35
VS
663 {
664 // plain ASCII char
dccce9ea 665 if (buf)
4def3b35
VS
666 *buf++ = cc;
667 len++;
dccce9ea
VZ
668 }
669 else
4def3b35
VS
670 {
671 cnt--;
dccce9ea 672 if (!cnt)
4def3b35
VS
673 {
674 // invalid UTF-8 sequence
ea8ce907 675 invalid = true;
dccce9ea
VZ
676 }
677 else
4def3b35
VS
678 {
679 unsigned ocnt = cnt - 1;
680 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 681 while (cnt--)
4def3b35 682 {
ea8ce907 683 cc = *psz;
dccce9ea 684 if ((cc & 0xC0) != 0x80)
4def3b35
VS
685 {
686 // invalid UTF-8 sequence
ea8ce907
RR
687 invalid = true;
688 break;
4def3b35 689 }
ea8ce907 690 psz++;
4def3b35
VS
691 res = (res << 6) | (cc & 0x3f);
692 }
ea8ce907 693 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
694 {
695 // illegal UTF-8 encoding
ea8ce907 696 invalid = true;
4def3b35 697 }
ea8ce907
RR
698 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
699 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
700 {
701 // if one of our PUA characters turns up externally
702 // it must also be treated as an illegal sequence
703 // (a bit like you have to escape an escape character)
704 invalid = true;
705 }
706 else
707 {
1cd52418 708#ifdef WC_UTF16
ea8ce907
RR
709 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
710 size_t pa = encode_utf16(res, (wxUint16 *)buf);
711 if (pa == (size_t)-1)
712 {
713 invalid = true;
714 }
715 else
716 {
717 if (buf)
718 buf += pa;
719 len += pa;
720 }
373658eb 721#else // !WC_UTF16
ea8ce907
RR
722 if (buf)
723 *buf++ = res;
724 len++;
373658eb 725#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
726 }
727 }
728 if (invalid)
729 {
730 if (m_options & MAP_INVALID_UTF8_TO_PUA)
731 {
732 while (opsz < psz && (!buf || len < n))
733 {
734#ifdef WC_UTF16
735 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
736 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
737 wxASSERT(pa != (size_t)-1);
738 if (buf)
739 buf += pa;
740 opsz++;
741 len += pa;
742#else
743 if (buf)
744 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
745 opsz++;
746 len++;
747#endif
748 }
749 }
3698ae71 750 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
751 {
752 while (opsz < psz && (!buf || len < n))
753 {
3698ae71
VZ
754 if ( buf && len + 3 < n )
755 {
756 unsigned char n = *opsz;
757 *buf++ = L'\\';
758 *buf++ = L'0' + n / 0100;
759 *buf++ = L'0' + (n % 0100) / 010;
760 *buf++ = L'0' + n % 010;
761 }
ea8ce907
RR
762 opsz++;
763 len += 4;
764 }
765 }
3698ae71 766 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
767 {
768 return (size_t)-1;
769 }
4def3b35
VS
770 }
771 }
6001e347 772 }
dccce9ea 773 if (buf && (len < n))
4def3b35
VS
774 *buf = 0;
775 return len;
6001e347
RR
776}
777
3698ae71
VZ
778static inline bool isoctal(wchar_t wch)
779{
780 return L'0' <= wch && wch <= L'7';
781}
782
6001e347
RR
783size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
784{
4def3b35 785 size_t len = 0;
6001e347 786
dccce9ea 787 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
788 {
789 wxUint32 cc;
1cd52418 790#ifdef WC_UTF16
b5153fd8
VZ
791 // cast is ok for WC_UTF16
792 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 793 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 794#else
4def3b35
VS
795 cc=(*psz++) & 0x7fffffff;
796#endif
3698ae71
VZ
797
798 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
799 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 800 {
dccce9ea 801 if (buf)
ea8ce907 802 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 803 len++;
3698ae71
VZ
804 }
805 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
806 cc == L'\\' &&
807 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 808 {
dccce9ea 809 if (buf)
3698ae71
VZ
810 {
811 *buf++ = (char) (psz[0] - L'0')*0100 +
812 (psz[1] - L'0')*010 +
813 (psz[2] - L'0');
814 }
815
816 psz += 3;
ea8ce907
RR
817 len++;
818 }
819 else
820 {
821 unsigned cnt;
822 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
823 if (!cnt)
4def3b35 824 {
ea8ce907
RR
825 // plain ASCII char
826 if (buf)
827 *buf++ = (char) cc;
828 len++;
829 }
830
831 else
832 {
833 len += cnt + 1;
834 if (buf)
835 {
836 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
837 while (cnt--)
838 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
839 }
4def3b35
VS
840 }
841 }
6001e347 842 }
4def3b35 843
3698ae71
VZ
844 if (buf && (len<n))
845 *buf = 0;
adb45366 846
4def3b35 847 return len;
6001e347
RR
848}
849
c91830cb
VZ
850// ----------------------------------------------------------------------------
851// UTF-16
852// ----------------------------------------------------------------------------
853
854#ifdef WORDS_BIGENDIAN
bde4baac
VZ
855 #define wxMBConvUTF16straight wxMBConvUTF16BE
856 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 857#else
bde4baac
VZ
858 #define wxMBConvUTF16swap wxMBConvUTF16BE
859 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
860#endif
861
862
c91830cb
VZ
863#ifdef WC_UTF16
864
c91830cb
VZ
865// copy 16bit MB to 16bit String
866size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
867{
868 size_t len=0;
869
870 while (*(wxUint16*)psz && (!buf || len < n))
871 {
872 if (buf)
873 *buf++ = *(wxUint16*)psz;
874 len++;
875
876 psz += sizeof(wxUint16);
877 }
878 if (buf && len<n) *buf=0;
879
880 return len;
881}
882
883
884// copy 16bit String to 16bit MB
885size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
886{
887 size_t len=0;
888
889 while (*psz && (!buf || len < n))
890 {
891 if (buf)
892 {
893 *(wxUint16*)buf = *psz;
894 buf += sizeof(wxUint16);
895 }
896 len += sizeof(wxUint16);
897 psz++;
898 }
899 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
900
901 return len;
902}
903
904
905// swap 16bit MB to 16bit String
906size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
907{
908 size_t len=0;
909
910 while (*(wxUint16*)psz && (!buf || len < n))
911 {
912 if (buf)
913 {
914 ((char *)buf)[0] = psz[1];
915 ((char *)buf)[1] = psz[0];
916 buf++;
917 }
918 len++;
919 psz += sizeof(wxUint16);
920 }
921 if (buf && len<n) *buf=0;
922
923 return len;
924}
925
926
927// swap 16bit MB to 16bit String
928size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
929{
930 size_t len=0;
931
932 while (*psz && (!buf || len < n))
933 {
934 if (buf)
935 {
936 *buf++ = ((char*)psz)[1];
937 *buf++ = ((char*)psz)[0];
938 }
939 len += sizeof(wxUint16);
940 psz++;
941 }
942 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
943
944 return len;
945}
946
947
948#else // WC_UTF16
949
950
951// copy 16bit MB to 32bit String
952size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
953{
954 size_t len=0;
955
956 while (*(wxUint16*)psz && (!buf || len < n))
957 {
958 wxUint32 cc;
959 size_t pa=decode_utf16((wxUint16*)psz, cc);
960 if (pa == (size_t)-1)
961 return pa;
962
963 if (buf)
964 *buf++ = cc;
965 len++;
966 psz += pa * sizeof(wxUint16);
967 }
968 if (buf && len<n) *buf=0;
969
970 return len;
971}
972
973
974// copy 32bit String to 16bit MB
975size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
976{
977 size_t len=0;
978
979 while (*psz && (!buf || len < n))
980 {
981 wxUint16 cc[2];
982 size_t pa=encode_utf16(*psz, cc);
983
984 if (pa == (size_t)-1)
985 return pa;
986
987 if (buf)
988 {
69b80d28 989 *(wxUint16*)buf = cc[0];
b5153fd8 990 buf += sizeof(wxUint16);
c91830cb 991 if (pa > 1)
69b80d28
VZ
992 {
993 *(wxUint16*)buf = cc[1];
994 buf += sizeof(wxUint16);
995 }
c91830cb
VZ
996 }
997
998 len += pa*sizeof(wxUint16);
999 psz++;
1000 }
1001 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1002
1003 return len;
1004}
1005
1006
1007// swap 16bit MB to 32bit String
1008size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1009{
1010 size_t len=0;
1011
1012 while (*(wxUint16*)psz && (!buf || len < n))
1013 {
1014 wxUint32 cc;
1015 char tmp[4];
1016 tmp[0]=psz[1]; tmp[1]=psz[0];
1017 tmp[2]=psz[3]; tmp[3]=psz[2];
1018
1019 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1020 if (pa == (size_t)-1)
1021 return pa;
1022
1023 if (buf)
1024 *buf++ = cc;
1025
1026 len++;
1027 psz += pa * sizeof(wxUint16);
1028 }
1029 if (buf && len<n) *buf=0;
1030
1031 return len;
1032}
1033
1034
1035// swap 32bit String to 16bit MB
1036size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1037{
1038 size_t len=0;
1039
1040 while (*psz && (!buf || len < n))
1041 {
1042 wxUint16 cc[2];
1043 size_t pa=encode_utf16(*psz, cc);
1044
1045 if (pa == (size_t)-1)
1046 return pa;
1047
1048 if (buf)
1049 {
1050 *buf++ = ((char*)cc)[1];
1051 *buf++ = ((char*)cc)[0];
1052 if (pa > 1)
1053 {
1054 *buf++ = ((char*)cc)[3];
1055 *buf++ = ((char*)cc)[2];
1056 }
1057 }
1058
1059 len += pa*sizeof(wxUint16);
1060 psz++;
1061 }
1062 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1063
1064 return len;
1065}
1066
1067#endif // WC_UTF16
1068
1069
1070// ----------------------------------------------------------------------------
1071// UTF-32
1072// ----------------------------------------------------------------------------
1073
1074#ifdef WORDS_BIGENDIAN
1075#define wxMBConvUTF32straight wxMBConvUTF32BE
1076#define wxMBConvUTF32swap wxMBConvUTF32LE
1077#else
1078#define wxMBConvUTF32swap wxMBConvUTF32BE
1079#define wxMBConvUTF32straight wxMBConvUTF32LE
1080#endif
1081
1082
1083WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1084WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1085
1086
1087#ifdef WC_UTF16
1088
1089// copy 32bit MB to 16bit String
1090size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091{
1092 size_t len=0;
1093
1094 while (*(wxUint32*)psz && (!buf || len < n))
1095 {
1096 wxUint16 cc[2];
1097
1098 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1099 if (pa == (size_t)-1)
1100 return pa;
1101
1102 if (buf)
1103 {
1104 *buf++ = cc[0];
1105 if (pa > 1)
1106 *buf++ = cc[1];
1107 }
1108 len += pa;
1109 psz += sizeof(wxUint32);
1110 }
1111 if (buf && len<n) *buf=0;
1112
1113 return len;
1114}
1115
1116
1117// copy 16bit String to 32bit MB
1118size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1119{
1120 size_t len=0;
1121
1122 while (*psz && (!buf || len < n))
1123 {
1124 wxUint32 cc;
1125
b5153fd8
VZ
1126 // cast is ok for WC_UTF16
1127 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1128 if (pa == (size_t)-1)
1129 return pa;
1130
1131 if (buf)
1132 {
1133 *(wxUint32*)buf = cc;
1134 buf += sizeof(wxUint32);
1135 }
1136 len += sizeof(wxUint32);
1137 psz += pa;
1138 }
b5153fd8
VZ
1139
1140 if (buf && len<=n-sizeof(wxUint32))
1141 *(wxUint32*)buf=0;
c91830cb
VZ
1142
1143 return len;
1144}
1145
1146
1147
1148// swap 32bit MB to 16bit String
1149size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150{
1151 size_t len=0;
1152
1153 while (*(wxUint32*)psz && (!buf || len < n))
1154 {
1155 char tmp[4];
1156 tmp[0] = psz[3]; tmp[1] = psz[2];
1157 tmp[2] = psz[1]; tmp[3] = psz[0];
1158
1159
1160 wxUint16 cc[2];
1161
1162 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1163 if (pa == (size_t)-1)
1164 return pa;
1165
1166 if (buf)
1167 {
1168 *buf++ = cc[0];
1169 if (pa > 1)
1170 *buf++ = cc[1];
1171 }
1172 len += pa;
1173 psz += sizeof(wxUint32);
1174 }
b5153fd8
VZ
1175
1176 if (buf && len<n)
1177 *buf=0;
c91830cb
VZ
1178
1179 return len;
1180}
1181
1182
1183// swap 16bit String to 32bit MB
1184size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1185{
1186 size_t len=0;
1187
1188 while (*psz && (!buf || len < n))
1189 {
1190 char cc[4];
1191
b5153fd8
VZ
1192 // cast is ok for WC_UTF16
1193 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1194 if (pa == (size_t)-1)
1195 return pa;
1196
1197 if (buf)
1198 {
1199 *buf++ = cc[3];
1200 *buf++ = cc[2];
1201 *buf++ = cc[1];
1202 *buf++ = cc[0];
1203 }
1204 len += sizeof(wxUint32);
1205 psz += pa;
1206 }
b5153fd8
VZ
1207
1208 if (buf && len<=n-sizeof(wxUint32))
1209 *(wxUint32*)buf=0;
c91830cb
VZ
1210
1211 return len;
1212}
1213
1214#else // WC_UTF16
1215
1216
1217// copy 32bit MB to 32bit String
1218size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1219{
1220 size_t len=0;
1221
1222 while (*(wxUint32*)psz && (!buf || len < n))
1223 {
1224 if (buf)
1225 *buf++ = *(wxUint32*)psz;
1226 len++;
1227 psz += sizeof(wxUint32);
1228 }
b5153fd8
VZ
1229
1230 if (buf && len<n)
1231 *buf=0;
c91830cb
VZ
1232
1233 return len;
1234}
1235
1236
1237// copy 32bit String to 32bit MB
1238size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1239{
1240 size_t len=0;
1241
1242 while (*psz && (!buf || len < n))
1243 {
1244 if (buf)
1245 {
1246 *(wxUint32*)buf = *psz;
1247 buf += sizeof(wxUint32);
1248 }
1249
1250 len += sizeof(wxUint32);
1251 psz++;
1252 }
1253
b5153fd8
VZ
1254 if (buf && len<=n-sizeof(wxUint32))
1255 *(wxUint32*)buf=0;
c91830cb
VZ
1256
1257 return len;
1258}
1259
1260
1261// swap 32bit MB to 32bit String
1262size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1263{
1264 size_t len=0;
1265
1266 while (*(wxUint32*)psz && (!buf || len < n))
1267 {
1268 if (buf)
1269 {
1270 ((char *)buf)[0] = psz[3];
1271 ((char *)buf)[1] = psz[2];
1272 ((char *)buf)[2] = psz[1];
1273 ((char *)buf)[3] = psz[0];
1274 buf++;
1275 }
1276 len++;
1277 psz += sizeof(wxUint32);
1278 }
b5153fd8
VZ
1279
1280 if (buf && len<n)
1281 *buf=0;
c91830cb
VZ
1282
1283 return len;
1284}
1285
1286
1287// swap 32bit String to 32bit MB
1288size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1289{
1290 size_t len=0;
1291
1292 while (*psz && (!buf || len < n))
1293 {
1294 if (buf)
1295 {
1296 *buf++ = ((char *)psz)[3];
1297 *buf++ = ((char *)psz)[2];
1298 *buf++ = ((char *)psz)[1];
1299 *buf++ = ((char *)psz)[0];
1300 }
1301 len += sizeof(wxUint32);
1302 psz++;
1303 }
b5153fd8
VZ
1304
1305 if (buf && len<=n-sizeof(wxUint32))
1306 *(wxUint32*)buf=0;
c91830cb
VZ
1307
1308 return len;
1309}
1310
1311
1312#endif // WC_UTF16
1313
1314
36acb880
VZ
1315// ============================================================================
1316// The classes doing conversion using the iconv_xxx() functions
1317// ============================================================================
3caec1bb 1318
b040e242 1319#ifdef HAVE_ICONV
3a0d76bc 1320
b1d547eb
VS
1321// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1322// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1323// (unless there's yet another bug in glibc) the only case when iconv()
1324// returns with (size_t)-1 (which means error) and says there are 0 bytes
1325// left in the input buffer -- when _real_ error occurs,
1326// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1327// iconv() failure.
3caec1bb
VS
1328// [This bug does not appear in glibc 2.2.]
1329#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1330#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1331 (errno != E2BIG || bufLeft != 0))
1332#else
1333#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1334#endif
1335
ab217dba 1336#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1337
1338// ----------------------------------------------------------------------------
e95354ec 1339// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1340// ----------------------------------------------------------------------------
1341
e95354ec 1342class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1343{
1344public:
e95354ec
VZ
1345 wxMBConv_iconv(const wxChar *name);
1346 virtual ~wxMBConv_iconv();
36acb880 1347
bde4baac
VZ
1348 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1349 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1350
e95354ec 1351 bool IsOk() const
36acb880
VZ
1352 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1353
1354protected:
1355 // the iconv handlers used to translate from multibyte to wide char and in
1356 // the other direction
1357 iconv_t m2w,
1358 w2m;
b1d547eb
VS
1359#if wxUSE_THREADS
1360 // guards access to m2w and w2m objects
1361 wxMutex m_iconvMutex;
1362#endif
36acb880
VZ
1363
1364private:
e95354ec 1365 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1366 // available on this machine, it will remain NULL
1367 static const char *ms_wcCharsetName;
1368
1369 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1370 // different endian-ness than the native one
405d8f46 1371 static bool ms_wcNeedsSwap;
36acb880
VZ
1372};
1373
e95354ec
VZ
1374const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1375bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1376
e95354ec 1377wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1378{
04c79127
RR
1379 // Do it the hard way
1380 char cname[100];
1381 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1382 cname[i] = (char) name[i];
1383
36acb880
VZ
1384 // check for charset that represents wchar_t:
1385 if (ms_wcCharsetName == NULL)
f1339c56 1386 {
e95354ec 1387 ms_wcNeedsSwap = false;
dccce9ea 1388
36acb880
VZ
1389 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1390 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1391 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1392
36acb880
VZ
1393 if (m2w == (iconv_t)-1)
1394 {
1395 // try charset w/o bytesex info (e.g. "UCS4")
1396 // and check for bytesex ourselves:
1397 ms_wcCharsetName = WC_NAME;
04c79127 1398 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1399
1400 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1401 if (m2w == (iconv_t)-1)
1402 {
36acb880 1403 ms_wcCharsetName = "WCHAR_T";
04c79127 1404 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1405 }
3a0d76bc 1406
36acb880
VZ
1407 if (m2w != (iconv_t)-1)
1408 {
1409 char buf[2], *bufPtr;
1410 wchar_t wbuf[2], *wbufPtr;
1411 size_t insz, outsz;
1412 size_t res;
1413
1414 buf[0] = 'A';
1415 buf[1] = 0;
1416 wbuf[0] = 0;
1417 insz = 2;
1418 outsz = SIZEOF_WCHAR_T * 2;
1419 wbufPtr = wbuf;
1420 bufPtr = buf;
1421
1422 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1423 (char**)&wbufPtr, &outsz);
1424
1425 if (ICONV_FAILED(res, insz))
3a0d76bc 1426 {
36acb880
VZ
1427 ms_wcCharsetName = NULL;
1428 wxLogLastError(wxT("iconv"));
2b5f62a0 1429 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1430 }
1431 else
1432 {
36acb880 1433 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1434 }
1435 }
36acb880
VZ
1436 else
1437 {
1438 ms_wcCharsetName = NULL;
373658eb 1439
77ffb593 1440 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1441 // fall back to using wxEncodingConverter.
1442 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1443 //wxLogError(
36acb880 1444 }
3a0d76bc 1445 }
36acb880 1446 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1447 }
36acb880 1448 else // we already have ms_wcCharsetName
3caec1bb 1449 {
04c79127 1450 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1451 }
dccce9ea 1452
36acb880
VZ
1453 // NB: don't ever pass NULL to iconv_open(), it may crash!
1454 if ( ms_wcCharsetName )
f1339c56 1455 {
04c79127 1456 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1457 }
405d8f46
VZ
1458 else
1459 {
1460 w2m = (iconv_t)-1;
1461 }
36acb880 1462}
3caec1bb 1463
e95354ec 1464wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1465{
1466 if ( m2w != (iconv_t)-1 )
1467 iconv_close(m2w);
1468 if ( w2m != (iconv_t)-1 )
1469 iconv_close(w2m);
1470}
3a0d76bc 1471
bde4baac 1472size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1473{
b1d547eb
VS
1474#if wxUSE_THREADS
1475 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1476 // Unfortunately there is a couple of global wxCSConv objects such as
1477 // wxConvLocal that are used all over wx code, so we have to make sure
1478 // the handle is used by at most one thread at the time. Otherwise
1479 // only a few wx classes would be safe to use from non-main threads
1480 // as MB<->WC conversion would fail "randomly".
1481 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1482#endif
3698ae71 1483
36acb880
VZ
1484 size_t inbuf = strlen(psz);
1485 size_t outbuf = n * SIZEOF_WCHAR_T;
1486 size_t res, cres;
1487 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1488 wchar_t *bufPtr = buf;
1489 const char *pszPtr = psz;
1490
1491 if (buf)
1492 {
1493 // have destination buffer, convert there
1494 cres = iconv(m2w,
1495 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496 (char**)&bufPtr, &outbuf);
1497 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1498
36acb880 1499 if (ms_wcNeedsSwap)
3a0d76bc 1500 {
36acb880
VZ
1501 // convert to native endianness
1502 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1503 }
adb45366 1504
49dd9820
VS
1505 // NB: iconv was given only strlen(psz) characters on input, and so
1506 // it couldn't convert the trailing zero. Let's do it ourselves
1507 // if there's some room left for it in the output buffer.
1508 if (res < n)
1509 buf[res] = 0;
36acb880
VZ
1510 }
1511 else
1512 {
1513 // no destination buffer... convert using temp buffer
1514 // to calculate destination buffer requirement
1515 wchar_t tbuf[8];
1516 res = 0;
1517 do {
1518 bufPtr = tbuf;
1519 outbuf = 8*SIZEOF_WCHAR_T;
1520
1521 cres = iconv(m2w,
1522 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1523 (char**)&bufPtr, &outbuf );
1524
1525 res += 8-(outbuf/SIZEOF_WCHAR_T);
1526 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1527 }
dccce9ea 1528
36acb880 1529 if (ICONV_FAILED(cres, inbuf))
f1339c56 1530 {
36acb880
VZ
1531 //VS: it is ok if iconv fails, hence trace only
1532 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1533 return (size_t)-1;
1534 }
1535
1536 return res;
1537}
1538
bde4baac 1539size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1540{
b1d547eb
VS
1541#if wxUSE_THREADS
1542 // NB: explained in MB2WC
1543 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1544#endif
3698ae71 1545
f8d791e0 1546 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1547 size_t outbuf = n;
1548 size_t res, cres;
3a0d76bc 1549
36acb880 1550 wchar_t *tmpbuf = 0;
3caec1bb 1551
36acb880
VZ
1552 if (ms_wcNeedsSwap)
1553 {
1554 // need to copy to temp buffer to switch endianness
1555 // this absolutely doesn't rock!
1556 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1557 // could be in read-only memory, or be accessed in some other thread)
1558 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1559 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1560 WC_BSWAP(tmpbuf, inbuf)
1561 psz=tmpbuf;
1562 }
3a0d76bc 1563
36acb880
VZ
1564 if (buf)
1565 {
1566 // have destination buffer, convert there
1567 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1568
36acb880 1569 res = n-outbuf;
adb45366 1570
49dd9820
VS
1571 // NB: iconv was given only wcslen(psz) characters on input, and so
1572 // it couldn't convert the trailing zero. Let's do it ourselves
1573 // if there's some room left for it in the output buffer.
1574 if (res < n)
1575 buf[0] = 0;
36acb880
VZ
1576 }
1577 else
1578 {
1579 // no destination buffer... convert using temp buffer
1580 // to calculate destination buffer requirement
1581 char tbuf[16];
1582 res = 0;
1583 do {
1584 buf = tbuf; outbuf = 16;
1585
1586 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1587
36acb880
VZ
1588 res += 16 - outbuf;
1589 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1590 }
dccce9ea 1591
36acb880
VZ
1592 if (ms_wcNeedsSwap)
1593 {
1594 free(tmpbuf);
1595 }
dccce9ea 1596
36acb880
VZ
1597 if (ICONV_FAILED(cres, inbuf))
1598 {
1599 //VS: it is ok if iconv fails, hence trace only
1600 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1601 return (size_t)-1;
1602 }
1603
1604 return res;
1605}
1606
b040e242 1607#endif // HAVE_ICONV
36acb880 1608
e95354ec 1609
36acb880
VZ
1610// ============================================================================
1611// Win32 conversion classes
1612// ============================================================================
1cd52418 1613
e95354ec 1614#ifdef wxHAVE_WIN32_MB2WC
373658eb 1615
8b04d4c4 1616// from utils.cpp
d775fa82 1617#if wxUSE_FONTMAP
8b04d4c4
VZ
1618extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1619extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1620#endif
373658eb 1621
e95354ec 1622class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1623{
1624public:
bde4baac
VZ
1625 wxMBConv_win32()
1626 {
1627 m_CodePage = CP_ACP;
1628 }
1629
7608a683 1630#if wxUSE_FONTMAP
e95354ec 1631 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1632 {
1633 m_CodePage = wxCharsetToCodepage(name);
1634 }
dccce9ea 1635
e95354ec 1636 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1637 {
1638 m_CodePage = wxEncodingToCodepage(encoding);
1639 }
7608a683 1640#endif
8b04d4c4 1641
bde4baac 1642 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1643 {
02272c9c
VZ
1644 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1645 // the behaviour is not compatible with the Unix version (using iconv)
1646 // and break the library itself, e.g. wxTextInputStream::NextChar()
1647 // wouldn't work if reading an incomplete MB char didn't result in an
1648 // error
667e5b3e
VZ
1649 //
1650 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1651 // an error (tested under Windows Server 2003) and apparently it is
1652 // done on purpose, i.e. the function accepts any input in this case
1653 // and although I'd prefer to return error on ill-formed output, our
1654 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1655 // explicitly ill-formed according to RFC 2152) neither so we don't
1656 // even have any fallback here...
1657 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1658
2b5f62a0
VZ
1659 const size_t len = ::MultiByteToWideChar
1660 (
1661 m_CodePage, // code page
667e5b3e 1662 flags, // flags: fall on error
2b5f62a0
VZ
1663 psz, // input string
1664 -1, // its length (NUL-terminated)
b4da152e 1665 buf, // output string
2b5f62a0
VZ
1666 buf ? n : 0 // size of output buffer
1667 );
1668
03a991bc
VZ
1669 // note that it returns count of written chars for buf != NULL and size
1670 // of the needed buffer for buf == NULL so in either case the length of
1671 // the string (which never includes the terminating NUL) is one less
1672 return len ? len - 1 : (size_t)-1;
f1339c56 1673 }
dccce9ea 1674
13dd924a 1675 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1676 {
13dd924a
VZ
1677 /*
1678 we have a problem here: by default, WideCharToMultiByte() may
1679 replace characters unrepresentable in the target code page with bad
1680 quality approximations such as turning "1/2" symbol (U+00BD) into
1681 "1" for the code pages which don't have it and we, obviously, want
1682 to avoid this at any price
d775fa82 1683
13dd924a
VZ
1684 the trouble is that this function does it _silently_, i.e. it won't
1685 even tell us whether it did or not... Win98/2000 and higher provide
1686 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1687 we have to resort to a round trip, i.e. check that converting back
1688 results in the same string -- this is, of course, expensive but
1689 otherwise we simply can't be sure to not garble the data.
1690 */
1691
1692 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1693 // it doesn't work with CJK encodings (which we test for rather roughly
1694 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1695 // supporting it
907173e5
WS
1696 BOOL usedDef wxDUMMY_INITIALIZE(false);
1697 BOOL *pUsedDef;
13dd924a
VZ
1698 int flags;
1699 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1700 {
1701 // it's our lucky day
1702 flags = WC_NO_BEST_FIT_CHARS;
1703 pUsedDef = &usedDef;
1704 }
1705 else // old system or unsupported encoding
1706 {
1707 flags = 0;
1708 pUsedDef = NULL;
1709 }
1710
2b5f62a0
VZ
1711 const size_t len = ::WideCharToMultiByte
1712 (
1713 m_CodePage, // code page
13dd924a
VZ
1714 flags, // either none or no best fit
1715 pwz, // input string
2b5f62a0
VZ
1716 -1, // it is (wide) NUL-terminated
1717 buf, // output buffer
1718 buf ? n : 0, // and its size
1719 NULL, // default "replacement" char
13dd924a 1720 pUsedDef // [out] was it used?
2b5f62a0
VZ
1721 );
1722
13dd924a
VZ
1723 if ( !len )
1724 {
1725 // function totally failed
1726 return (size_t)-1;
1727 }
1728
1729 // if we were really converting, check if we succeeded
1730 if ( buf )
1731 {
1732 if ( flags )
1733 {
1734 // check if the conversion failed, i.e. if any replacements
1735 // were done
1736 if ( usedDef )
1737 return (size_t)-1;
1738 }
1739 else // we must resort to double tripping...
1740 {
1741 wxWCharBuffer wcBuf(n);
1742 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1743 wcscmp(wcBuf, pwz) != 0 )
1744 {
1745 // we didn't obtain the same thing we started from, hence
1746 // the conversion was lossy and we consider that it failed
1747 return (size_t)-1;
1748 }
1749 }
1750 }
1751
03a991bc 1752 // see the comment above for the reason of "len - 1"
13dd924a 1753 return len - 1;
f1339c56 1754 }
dccce9ea 1755
13dd924a
VZ
1756 bool IsOk() const { return m_CodePage != -1; }
1757
1758private:
1759 static bool CanUseNoBestFit()
1760 {
1761 static int s_isWin98Or2k = -1;
1762
1763 if ( s_isWin98Or2k == -1 )
1764 {
1765 int verMaj, verMin;
1766 switch ( wxGetOsVersion(&verMaj, &verMin) )
1767 {
1768 case wxWIN95:
1769 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1770 break;
1771
1772 case wxWINDOWS_NT:
1773 s_isWin98Or2k = verMaj >= 5;
1774 break;
1775
1776 default:
1777 // unknown, be conseravtive by default
1778 s_isWin98Or2k = 0;
1779 }
1780
1781 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1782 }
1783
1784 return s_isWin98Or2k == 1;
1785 }
f1339c56 1786
b1d66b54 1787 long m_CodePage;
1cd52418 1788};
e95354ec
VZ
1789
1790#endif // wxHAVE_WIN32_MB2WC
1791
f7e98dee
RN
1792// ============================================================================
1793// Cocoa conversion classes
1794// ============================================================================
1795
1796#if defined(__WXCOCOA__)
1797
ecd9653b 1798// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1799// Cocoa. Strangely enough, internally Core Foundation uses
1800// UTF 32 internally quite a bit - its just not public (yet).
1801
1802#include <CoreFoundation/CFString.h>
1803#include <CoreFoundation/CFStringEncodingExt.h>
1804
1805CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1806{
638357a0 1807 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1808 if ( encoding == wxFONTENCODING_DEFAULT )
1809 {
638357a0 1810 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1811 }
1812 else switch( encoding)
1813 {
1814 case wxFONTENCODING_ISO8859_1 :
1815 enc = kCFStringEncodingISOLatin1 ;
1816 break ;
1817 case wxFONTENCODING_ISO8859_2 :
1818 enc = kCFStringEncodingISOLatin2;
1819 break ;
1820 case wxFONTENCODING_ISO8859_3 :
1821 enc = kCFStringEncodingISOLatin3 ;
1822 break ;
1823 case wxFONTENCODING_ISO8859_4 :
1824 enc = kCFStringEncodingISOLatin4;
1825 break ;
1826 case wxFONTENCODING_ISO8859_5 :
1827 enc = kCFStringEncodingISOLatinCyrillic;
1828 break ;
1829 case wxFONTENCODING_ISO8859_6 :
1830 enc = kCFStringEncodingISOLatinArabic;
1831 break ;
1832 case wxFONTENCODING_ISO8859_7 :
1833 enc = kCFStringEncodingISOLatinGreek;
1834 break ;
1835 case wxFONTENCODING_ISO8859_8 :
1836 enc = kCFStringEncodingISOLatinHebrew;
1837 break ;
1838 case wxFONTENCODING_ISO8859_9 :
1839 enc = kCFStringEncodingISOLatin5;
1840 break ;
1841 case wxFONTENCODING_ISO8859_10 :
1842 enc = kCFStringEncodingISOLatin6;
1843 break ;
1844 case wxFONTENCODING_ISO8859_11 :
1845 enc = kCFStringEncodingISOLatinThai;
1846 break ;
1847 case wxFONTENCODING_ISO8859_13 :
1848 enc = kCFStringEncodingISOLatin7;
1849 break ;
1850 case wxFONTENCODING_ISO8859_14 :
1851 enc = kCFStringEncodingISOLatin8;
1852 break ;
1853 case wxFONTENCODING_ISO8859_15 :
1854 enc = kCFStringEncodingISOLatin9;
1855 break ;
1856
1857 case wxFONTENCODING_KOI8 :
1858 enc = kCFStringEncodingKOI8_R;
1859 break ;
1860 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1861 enc = kCFStringEncodingDOSRussian;
1862 break ;
1863
1864// case wxFONTENCODING_BULGARIAN :
1865// enc = ;
1866// break ;
1867
1868 case wxFONTENCODING_CP437 :
1869 enc =kCFStringEncodingDOSLatinUS ;
1870 break ;
1871 case wxFONTENCODING_CP850 :
1872 enc = kCFStringEncodingDOSLatin1;
1873 break ;
1874 case wxFONTENCODING_CP852 :
1875 enc = kCFStringEncodingDOSLatin2;
1876 break ;
1877 case wxFONTENCODING_CP855 :
1878 enc = kCFStringEncodingDOSCyrillic;
1879 break ;
1880 case wxFONTENCODING_CP866 :
1881 enc =kCFStringEncodingDOSRussian ;
1882 break ;
1883 case wxFONTENCODING_CP874 :
1884 enc = kCFStringEncodingDOSThai;
1885 break ;
1886 case wxFONTENCODING_CP932 :
1887 enc = kCFStringEncodingDOSJapanese;
1888 break ;
1889 case wxFONTENCODING_CP936 :
1890 enc =kCFStringEncodingDOSChineseSimplif ;
1891 break ;
1892 case wxFONTENCODING_CP949 :
1893 enc = kCFStringEncodingDOSKorean;
1894 break ;
1895 case wxFONTENCODING_CP950 :
1896 enc = kCFStringEncodingDOSChineseTrad;
1897 break ;
ecd9653b
WS
1898 case wxFONTENCODING_CP1250 :
1899 enc = kCFStringEncodingWindowsLatin2;
1900 break ;
1901 case wxFONTENCODING_CP1251 :
1902 enc =kCFStringEncodingWindowsCyrillic ;
1903 break ;
1904 case wxFONTENCODING_CP1252 :
1905 enc =kCFStringEncodingWindowsLatin1 ;
1906 break ;
1907 case wxFONTENCODING_CP1253 :
1908 enc = kCFStringEncodingWindowsGreek;
1909 break ;
1910 case wxFONTENCODING_CP1254 :
1911 enc = kCFStringEncodingWindowsLatin5;
1912 break ;
1913 case wxFONTENCODING_CP1255 :
1914 enc =kCFStringEncodingWindowsHebrew ;
1915 break ;
1916 case wxFONTENCODING_CP1256 :
1917 enc =kCFStringEncodingWindowsArabic ;
1918 break ;
1919 case wxFONTENCODING_CP1257 :
1920 enc = kCFStringEncodingWindowsBalticRim;
1921 break ;
638357a0
RN
1922// This only really encodes to UTF7 (if that) evidently
1923// case wxFONTENCODING_UTF7 :
1924// enc = kCFStringEncodingNonLossyASCII ;
1925// break ;
ecd9653b
WS
1926 case wxFONTENCODING_UTF8 :
1927 enc = kCFStringEncodingUTF8 ;
1928 break ;
1929 case wxFONTENCODING_EUC_JP :
1930 enc = kCFStringEncodingEUC_JP;
1931 break ;
1932 case wxFONTENCODING_UTF16 :
f7e98dee 1933 enc = kCFStringEncodingUnicode ;
ecd9653b 1934 break ;
f7e98dee
RN
1935 case wxFONTENCODING_MACROMAN :
1936 enc = kCFStringEncodingMacRoman ;
1937 break ;
1938 case wxFONTENCODING_MACJAPANESE :
1939 enc = kCFStringEncodingMacJapanese ;
1940 break ;
1941 case wxFONTENCODING_MACCHINESETRAD :
1942 enc = kCFStringEncodingMacChineseTrad ;
1943 break ;
1944 case wxFONTENCODING_MACKOREAN :
1945 enc = kCFStringEncodingMacKorean ;
1946 break ;
1947 case wxFONTENCODING_MACARABIC :
1948 enc = kCFStringEncodingMacArabic ;
1949 break ;
1950 case wxFONTENCODING_MACHEBREW :
1951 enc = kCFStringEncodingMacHebrew ;
1952 break ;
1953 case wxFONTENCODING_MACGREEK :
1954 enc = kCFStringEncodingMacGreek ;
1955 break ;
1956 case wxFONTENCODING_MACCYRILLIC :
1957 enc = kCFStringEncodingMacCyrillic ;
1958 break ;
1959 case wxFONTENCODING_MACDEVANAGARI :
1960 enc = kCFStringEncodingMacDevanagari ;
1961 break ;
1962 case wxFONTENCODING_MACGURMUKHI :
1963 enc = kCFStringEncodingMacGurmukhi ;
1964 break ;
1965 case wxFONTENCODING_MACGUJARATI :
1966 enc = kCFStringEncodingMacGujarati ;
1967 break ;
1968 case wxFONTENCODING_MACORIYA :
1969 enc = kCFStringEncodingMacOriya ;
1970 break ;
1971 case wxFONTENCODING_MACBENGALI :
1972 enc = kCFStringEncodingMacBengali ;
1973 break ;
1974 case wxFONTENCODING_MACTAMIL :
1975 enc = kCFStringEncodingMacTamil ;
1976 break ;
1977 case wxFONTENCODING_MACTELUGU :
1978 enc = kCFStringEncodingMacTelugu ;
1979 break ;
1980 case wxFONTENCODING_MACKANNADA :
1981 enc = kCFStringEncodingMacKannada ;
1982 break ;
1983 case wxFONTENCODING_MACMALAJALAM :
1984 enc = kCFStringEncodingMacMalayalam ;
1985 break ;
1986 case wxFONTENCODING_MACSINHALESE :
1987 enc = kCFStringEncodingMacSinhalese ;
1988 break ;
1989 case wxFONTENCODING_MACBURMESE :
1990 enc = kCFStringEncodingMacBurmese ;
1991 break ;
1992 case wxFONTENCODING_MACKHMER :
1993 enc = kCFStringEncodingMacKhmer ;
1994 break ;
1995 case wxFONTENCODING_MACTHAI :
1996 enc = kCFStringEncodingMacThai ;
1997 break ;
1998 case wxFONTENCODING_MACLAOTIAN :
1999 enc = kCFStringEncodingMacLaotian ;
2000 break ;
2001 case wxFONTENCODING_MACGEORGIAN :
2002 enc = kCFStringEncodingMacGeorgian ;
2003 break ;
2004 case wxFONTENCODING_MACARMENIAN :
2005 enc = kCFStringEncodingMacArmenian ;
2006 break ;
2007 case wxFONTENCODING_MACCHINESESIMP :
2008 enc = kCFStringEncodingMacChineseSimp ;
2009 break ;
2010 case wxFONTENCODING_MACTIBETAN :
2011 enc = kCFStringEncodingMacTibetan ;
2012 break ;
2013 case wxFONTENCODING_MACMONGOLIAN :
2014 enc = kCFStringEncodingMacMongolian ;
2015 break ;
2016 case wxFONTENCODING_MACETHIOPIC :
2017 enc = kCFStringEncodingMacEthiopic ;
2018 break ;
2019 case wxFONTENCODING_MACCENTRALEUR :
2020 enc = kCFStringEncodingMacCentralEurRoman ;
2021 break ;
2022 case wxFONTENCODING_MACVIATNAMESE :
2023 enc = kCFStringEncodingMacVietnamese ;
2024 break ;
2025 case wxFONTENCODING_MACARABICEXT :
2026 enc = kCFStringEncodingMacExtArabic ;
2027 break ;
2028 case wxFONTENCODING_MACSYMBOL :
2029 enc = kCFStringEncodingMacSymbol ;
2030 break ;
2031 case wxFONTENCODING_MACDINGBATS :
2032 enc = kCFStringEncodingMacDingbats ;
2033 break ;
2034 case wxFONTENCODING_MACTURKISH :
2035 enc = kCFStringEncodingMacTurkish ;
2036 break ;
2037 case wxFONTENCODING_MACCROATIAN :
2038 enc = kCFStringEncodingMacCroatian ;
2039 break ;
2040 case wxFONTENCODING_MACICELANDIC :
2041 enc = kCFStringEncodingMacIcelandic ;
2042 break ;
2043 case wxFONTENCODING_MACROMANIAN :
2044 enc = kCFStringEncodingMacRomanian ;
2045 break ;
2046 case wxFONTENCODING_MACCELTIC :
2047 enc = kCFStringEncodingMacCeltic ;
2048 break ;
2049 case wxFONTENCODING_MACGAELIC :
2050 enc = kCFStringEncodingMacGaelic ;
2051 break ;
ecd9653b
WS
2052// case wxFONTENCODING_MACKEYBOARD :
2053// enc = kCFStringEncodingMacKeyboardGlyphs ;
2054// break ;
2055 default :
2056 // because gcc is picky
2057 break ;
2058 } ;
2059 return enc ;
f7e98dee
RN
2060}
2061
f7e98dee
RN
2062class wxMBConv_cocoa : public wxMBConv
2063{
2064public:
2065 wxMBConv_cocoa()
2066 {
2067 Init(CFStringGetSystemEncoding()) ;
2068 }
2069
a6900d10 2070#if wxUSE_FONTMAP
f7e98dee
RN
2071 wxMBConv_cocoa(const wxChar* name)
2072 {
267e11c5 2073 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2074 }
a6900d10 2075#endif
f7e98dee
RN
2076
2077 wxMBConv_cocoa(wxFontEncoding encoding)
2078 {
2079 Init( wxCFStringEncFromFontEnc(encoding) );
2080 }
2081
2082 ~wxMBConv_cocoa()
2083 {
2084 }
2085
2086 void Init( CFStringEncoding encoding)
2087 {
638357a0 2088 m_encoding = encoding ;
f7e98dee
RN
2089 }
2090
2091 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2092 {
2093 wxASSERT(szUnConv);
ecd9653b 2094
638357a0
RN
2095 CFStringRef theString = CFStringCreateWithBytes (
2096 NULL, //the allocator
2097 (const UInt8*)szUnConv,
2098 strlen(szUnConv),
2099 m_encoding,
2100 false //no BOM/external representation
f7e98dee
RN
2101 );
2102
2103 wxASSERT(theString);
2104
638357a0
RN
2105 size_t nOutLength = CFStringGetLength(theString);
2106
2107 if (szOut == NULL)
f7e98dee 2108 {
f7e98dee 2109 CFRelease(theString);
638357a0 2110 return nOutLength;
f7e98dee 2111 }
ecd9653b 2112
638357a0 2113 CFRange theRange = { 0, nOutSize };
ecd9653b 2114
638357a0
RN
2115#if SIZEOF_WCHAR_T == 4
2116 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2117#endif
3698ae71 2118
f7e98dee 2119 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2120
f7e98dee 2121 CFRelease(theString);
ecd9653b 2122
638357a0 2123 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2124
2125#if SIZEOF_WCHAR_T == 4
2126 wxMBConvUTF16 converter ;
638357a0 2127 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2128 delete[] szUniCharBuffer;
2129#endif
3698ae71 2130
638357a0 2131 return nOutLength;
f7e98dee
RN
2132 }
2133
2134 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2135 {
638357a0 2136 wxASSERT(szUnConv);
3698ae71 2137
f7e98dee 2138 size_t nRealOutSize;
638357a0 2139 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2140 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2141
f7e98dee
RN
2142#if SIZEOF_WCHAR_T == 4
2143 wxMBConvUTF16BE converter ;
2144 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2145 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2146 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2147 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2148#endif
2149
2150 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2151 NULL, //allocator
2152 szUniBuffer,
2153 nBufSize,
638357a0 2154 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2155 );
ecd9653b 2156
f7e98dee 2157 wxASSERT(theString);
ecd9653b 2158
f7e98dee 2159 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2160 //so we check and use getchars instead in that case
2161 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2162 {
638357a0
RN
2163 if (szOut != NULL)
2164 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2165
638357a0
RN
2166 nRealOutSize = CFStringGetLength(theString) + 1;
2167 }
2168 else
2169 {
2170 CFStringGetBytes(
2171 theString,
2172 CFRangeMake(0, CFStringGetLength(theString)),
2173 m_encoding,
2174 0, //what to put in characters that can't be converted -
2175 //0 tells CFString to return NULL if it meets such a character
2176 false, //not an external representation
2177 (UInt8*) szOut,
3698ae71 2178 nOutSize,
638357a0
RN
2179 (CFIndex*) &nRealOutSize
2180 );
f7e98dee 2181 }
ecd9653b 2182
638357a0 2183 CFRelease(theString);
ecd9653b 2184
638357a0
RN
2185#if SIZEOF_WCHAR_T == 4
2186 delete[] szUniBuffer;
2187#endif
ecd9653b 2188
f7e98dee
RN
2189 return nRealOutSize - 1;
2190 }
2191
2192 bool IsOk() const
ecd9653b 2193 {
3698ae71 2194 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2195 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2196 }
2197
2198private:
638357a0 2199 CFStringEncoding m_encoding ;
f7e98dee
RN
2200};
2201
2202#endif // defined(__WXCOCOA__)
2203
335d31e0
SC
2204// ============================================================================
2205// Mac conversion classes
2206// ============================================================================
2207
2208#if defined(__WXMAC__) && defined(TARGET_CARBON)
2209
2210class wxMBConv_mac : public wxMBConv
2211{
2212public:
2213 wxMBConv_mac()
2214 {
2215 Init(CFStringGetSystemEncoding()) ;
2216 }
2217
2d1659cf 2218#if wxUSE_FONTMAP
335d31e0
SC
2219 wxMBConv_mac(const wxChar* name)
2220 {
267e11c5 2221 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2222 }
2d1659cf 2223#endif
335d31e0
SC
2224
2225 wxMBConv_mac(wxFontEncoding encoding)
2226 {
d775fa82
WS
2227 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2228 }
2229
2230 ~wxMBConv_mac()
2231 {
2232 OSStatus status = noErr ;
2233 status = TECDisposeConverter(m_MB2WC_converter);
2234 status = TECDisposeConverter(m_WC2MB_converter);
2235 }
2236
2237
2238 void Init( TextEncodingBase encoding)
2239 {
2240 OSStatus status = noErr ;
2241 m_char_encoding = encoding ;
2242 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2243
2244 status = TECCreateConverter(&m_MB2WC_converter,
2245 m_char_encoding,
2246 m_unicode_encoding);
2247 status = TECCreateConverter(&m_WC2MB_converter,
2248 m_unicode_encoding,
2249 m_char_encoding);
2250 }
2251
335d31e0
SC
2252 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2253 {
d775fa82
WS
2254 OSStatus status = noErr ;
2255 ByteCount byteOutLen ;
2256 ByteCount byteInLen = strlen(psz) ;
2257 wchar_t *tbuf = NULL ;
2258 UniChar* ubuf = NULL ;
2259 size_t res = 0 ;
2260
2261 if (buf == NULL)
2262 {
638357a0 2263 //apple specs say at least 32
c543817b 2264 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2265 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2266 }
2267 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2268#if SIZEOF_WCHAR_T == 4
d775fa82 2269 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2270#else
d775fa82 2271 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2272#endif
d775fa82
WS
2273 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2274 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2275#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2276 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2277 // is not properly terminated we get random characters at the end
2278 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2279 wxMBConvUTF16BE converter ;
2280 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2281 free( ubuf ) ;
f3a355ce 2282#else
d775fa82 2283 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2284#endif
d775fa82
WS
2285 if ( buf == NULL )
2286 free(tbuf) ;
335d31e0 2287
335d31e0
SC
2288 if ( buf && res < n)
2289 buf[res] = 0;
2290
d775fa82 2291 return res ;
335d31e0
SC
2292 }
2293
2294 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2295 {
2296 OSStatus status = noErr ;
2297 ByteCount byteOutLen ;
2298 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2299
2300 char *tbuf = NULL ;
2301
2302 if (buf == NULL)
2303 {
638357a0 2304 //apple specs say at least 32
c543817b 2305 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2306 tbuf = (char*) malloc( n ) ;
2307 }
2308
2309 ByteCount byteBufferLen = n ;
2310 UniChar* ubuf = NULL ;
f3a355ce 2311#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2312 wxMBConvUTF16BE converter ;
2313 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2314 byteInLen = unicharlen ;
2315 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2316 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2317#else
d775fa82 2318 ubuf = (UniChar*) psz ;
f3a355ce 2319#endif
d775fa82
WS
2320 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2321 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2322#if SIZEOF_WCHAR_T == 4
d775fa82 2323 free( ubuf ) ;
f3a355ce 2324#endif
d775fa82
WS
2325 if ( buf == NULL )
2326 free(tbuf) ;
335d31e0 2327
d775fa82 2328 size_t res = byteOutLen ;
335d31e0 2329 if ( buf && res < n)
638357a0 2330 {
335d31e0 2331 buf[res] = 0;
3698ae71 2332
638357a0
RN
2333 //we need to double-trip to verify it didn't insert any ? in place
2334 //of bogus characters
2335 wxWCharBuffer wcBuf(n);
2336 size_t pszlen = wxWcslen(psz);
2337 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2338 wxWcslen(wcBuf) != pszlen ||
2339 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2340 {
2341 // we didn't obtain the same thing we started from, hence
2342 // the conversion was lossy and we consider that it failed
2343 return (size_t)-1;
2344 }
2345 }
335d31e0 2346
d775fa82 2347 return res ;
335d31e0
SC
2348 }
2349
2350 bool IsOk() const
2351 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2352
2353private:
d775fa82
WS
2354 TECObjectRef m_MB2WC_converter ;
2355 TECObjectRef m_WC2MB_converter ;
2356
2357 TextEncodingBase m_char_encoding ;
2358 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2359};
2360
2361#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2362
36acb880
VZ
2363// ============================================================================
2364// wxEncodingConverter based conversion classes
2365// ============================================================================
2366
1e6feb95 2367#if wxUSE_FONTMAP
1cd52418 2368
e95354ec 2369class wxMBConv_wxwin : public wxMBConv
1cd52418 2370{
8b04d4c4
VZ
2371private:
2372 void Init()
2373 {
2374 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2375 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2376 }
2377
6001e347 2378public:
f1339c56
RR
2379 // temporarily just use wxEncodingConverter stuff,
2380 // so that it works while a better implementation is built
e95354ec 2381 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2382 {
2383 if (name)
267e11c5 2384 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2385 else
2386 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2387
8b04d4c4
VZ
2388 Init();
2389 }
2390
e95354ec 2391 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2392 {
2393 m_enc = enc;
2394
2395 Init();
f1339c56 2396 }
dccce9ea 2397
bde4baac 2398 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2399 {
2400 size_t inbuf = strlen(psz);
dccce9ea 2401 if (buf)
c643a977
VS
2402 {
2403 if (!m2w.Convert(psz,buf))
2404 return (size_t)-1;
2405 }
f1339c56
RR
2406 return inbuf;
2407 }
dccce9ea 2408
bde4baac 2409 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2410 {
f8d791e0 2411 const size_t inbuf = wxWcslen(psz);
f1339c56 2412 if (buf)
c643a977
VS
2413 {
2414 if (!w2m.Convert(psz,buf))
2415 return (size_t)-1;
2416 }
dccce9ea 2417
f1339c56
RR
2418 return inbuf;
2419 }
dccce9ea 2420
e95354ec 2421 bool IsOk() const { return m_ok; }
f1339c56
RR
2422
2423public:
8b04d4c4 2424 wxFontEncoding m_enc;
f1339c56 2425 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2426
2427 // were we initialized successfully?
2428 bool m_ok;
fc7a2a60 2429
e95354ec 2430 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2431};
6001e347 2432
1e6feb95
VZ
2433#endif // wxUSE_FONTMAP
2434
36acb880
VZ
2435// ============================================================================
2436// wxCSConv implementation
2437// ============================================================================
2438
8b04d4c4 2439void wxCSConv::Init()
6001e347 2440{
e95354ec
VZ
2441 m_name = NULL;
2442 m_convReal = NULL;
2443 m_deferred = true;
2444}
2445
8b04d4c4
VZ
2446wxCSConv::wxCSConv(const wxChar *charset)
2447{
2448 Init();
82713003 2449
e95354ec
VZ
2450 if ( charset )
2451 {
e95354ec
VZ
2452 SetName(charset);
2453 }
bda3d86a
VZ
2454
2455 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2456}
2457
8b04d4c4
VZ
2458wxCSConv::wxCSConv(wxFontEncoding encoding)
2459{
bda3d86a 2460 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2461 {
2462 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463
2464 encoding = wxFONTENCODING_SYSTEM;
2465 }
2466
8b04d4c4
VZ
2467 Init();
2468
bda3d86a 2469 m_encoding = encoding;
8b04d4c4
VZ
2470}
2471
6001e347
RR
2472wxCSConv::~wxCSConv()
2473{
65e50848
JS
2474 Clear();
2475}
2476
54380f29 2477wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2478 : wxMBConv()
54380f29 2479{
8b04d4c4
VZ
2480 Init();
2481
54380f29 2482 SetName(conv.m_name);
8b04d4c4 2483 m_encoding = conv.m_encoding;
54380f29
GD
2484}
2485
2486wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2487{
2488 Clear();
8b04d4c4 2489
54380f29 2490 SetName(conv.m_name);
8b04d4c4
VZ
2491 m_encoding = conv.m_encoding;
2492
54380f29
GD
2493 return *this;
2494}
2495
65e50848
JS
2496void wxCSConv::Clear()
2497{
8b04d4c4 2498 free(m_name);
e95354ec 2499 delete m_convReal;
8b04d4c4 2500
65e50848 2501 m_name = NULL;
e95354ec 2502 m_convReal = NULL;
6001e347
RR
2503}
2504
2505void wxCSConv::SetName(const wxChar *charset)
2506{
f1339c56
RR
2507 if (charset)
2508 {
2509 m_name = wxStrdup(charset);
e95354ec 2510 m_deferred = true;
f1339c56 2511 }
6001e347
RR
2512}
2513
e95354ec
VZ
2514wxMBConv *wxCSConv::DoCreate() const
2515{
c547282d
VZ
2516 // check for the special case of ASCII or ISO8859-1 charset: as we have
2517 // special knowledge of it anyhow, we don't need to create a special
2518 // conversion object
2519 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2520 {
e95354ec
VZ
2521 // don't convert at all
2522 return NULL;
2523 }
dccce9ea 2524
e95354ec
VZ
2525 // we trust OS to do conversion better than we can so try external
2526 // conversion methods first
2527 //
2528 // the full order is:
2529 // 1. OS conversion (iconv() under Unix or Win32 API)
2530 // 2. hard coded conversions for UTF
2531 // 3. wxEncodingConverter as fall back
2532
2533 // step (1)
2534#ifdef HAVE_ICONV
c547282d 2535#if !wxUSE_FONTMAP
e95354ec 2536 if ( m_name )
c547282d 2537#endif // !wxUSE_FONTMAP
e95354ec 2538 {
c547282d
VZ
2539 wxString name(m_name);
2540
2541#if wxUSE_FONTMAP
2542 if ( name.empty() )
267e11c5 2543 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2544#endif // wxUSE_FONTMAP
2545
2546 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2547 if ( conv->IsOk() )
2548 return conv;
2549
2550 delete conv;
2551 }
2552#endif // HAVE_ICONV
2553
2554#ifdef wxHAVE_WIN32_MB2WC
2555 {
7608a683 2556#if wxUSE_FONTMAP
e95354ec
VZ
2557 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2558 : new wxMBConv_win32(m_encoding);
2559 if ( conv->IsOk() )
2560 return conv;
2561
2562 delete conv;
7608a683
WS
2563#else
2564 return NULL;
2565#endif
e95354ec
VZ
2566 }
2567#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2568#if defined(__WXMAC__)
2569 {
5c3c8676 2570 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2571 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2572 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2573 {
2574
2d1659cf 2575#if wxUSE_FONTMAP
d775fa82
WS
2576 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2577 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2578#else
2579 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2580#endif
d775fa82 2581 if ( conv->IsOk() )
f7e98dee
RN
2582 return conv;
2583
2584 delete conv;
2585 }
2586 }
2587#endif
2588#if defined(__WXCOCOA__)
2589 {
2590 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2591 {
2592
a6900d10 2593#if wxUSE_FONTMAP
f7e98dee
RN
2594 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2595 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2596#else
2597 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2598#endif
f7e98dee 2599 if ( conv->IsOk() )
d775fa82
WS
2600 return conv;
2601
2602 delete conv;
2603 }
335d31e0
SC
2604 }
2605#endif
e95354ec
VZ
2606 // step (2)
2607 wxFontEncoding enc = m_encoding;
2608#if wxUSE_FONTMAP
c547282d
VZ
2609 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2610 {
2611 // use "false" to suppress interactive dialogs -- we can be called from
2612 // anywhere and popping up a dialog from here is the last thing we want to
2613 // do
267e11c5 2614 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2615 }
e95354ec
VZ
2616#endif // wxUSE_FONTMAP
2617
2618 switch ( enc )
2619 {
2620 case wxFONTENCODING_UTF7:
2621 return new wxMBConvUTF7;
2622
2623 case wxFONTENCODING_UTF8:
2624 return new wxMBConvUTF8;
2625
e95354ec
VZ
2626 case wxFONTENCODING_UTF16BE:
2627 return new wxMBConvUTF16BE;
2628
2629 case wxFONTENCODING_UTF16LE:
2630 return new wxMBConvUTF16LE;
2631
e95354ec
VZ
2632 case wxFONTENCODING_UTF32BE:
2633 return new wxMBConvUTF32BE;
2634
2635 case wxFONTENCODING_UTF32LE:
2636 return new wxMBConvUTF32LE;
2637
2638 default:
2639 // nothing to do but put here to suppress gcc warnings
2640 ;
2641 }
2642
2643 // step (3)
2644#if wxUSE_FONTMAP
2645 {
2646 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2647 : new wxMBConv_wxwin(m_encoding);
2648 if ( conv->IsOk() )
2649 return conv;
2650
2651 delete conv;
2652 }
2653#endif // wxUSE_FONTMAP
2654
a58d4f4d
VS
2655 // NB: This is a hack to prevent deadlock. What could otherwise happen
2656 // in Unicode build: wxConvLocal creation ends up being here
2657 // because of some failure and logs the error. But wxLog will try to
2658 // attach timestamp, for which it will need wxConvLocal (to convert
2659 // time to char* and then wchar_t*), but that fails, tries to log
2660 // error, but wxLog has a (already locked) critical section that
2661 // guards static buffer.
2662 static bool alreadyLoggingError = false;
2663 if (!alreadyLoggingError)
2664 {
2665 alreadyLoggingError = true;
2666 wxLogError(_("Cannot convert from the charset '%s'!"),
2667 m_name ? m_name
e95354ec
VZ
2668 :
2669#if wxUSE_FONTMAP
267e11c5 2670 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2671#else // !wxUSE_FONTMAP
2672 wxString::Format(_("encoding %s"), m_encoding).c_str()
2673#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2674 );
a58d4f4d
VS
2675 alreadyLoggingError = false;
2676 }
e95354ec
VZ
2677
2678 return NULL;
2679}
2680
2681void wxCSConv::CreateConvIfNeeded() const
2682{
2683 if ( m_deferred )
2684 {
2685 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2686
2687#if wxUSE_INTL
2688 // if we don't have neither the name nor the encoding, use the default
2689 // encoding for this system
2690 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2691 {
4d312c22 2692 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2693 }
2694#endif // wxUSE_INTL
2695
e95354ec
VZ
2696 self->m_convReal = DoCreate();
2697 self->m_deferred = false;
6001e347 2698 }
6001e347
RR
2699}
2700
2701size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2702{
e95354ec 2703 CreateConvIfNeeded();
dccce9ea 2704
e95354ec
VZ
2705 if (m_convReal)
2706 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2707
2708 // latin-1 (direct)
4def3b35 2709 size_t len = strlen(psz);
dccce9ea 2710
f1339c56
RR
2711 if (buf)
2712 {
4def3b35 2713 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2714 buf[c] = (unsigned char)(psz[c]);
2715 }
dccce9ea 2716
f1339c56 2717 return len;
6001e347
RR
2718}
2719
2720size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2721{
e95354ec 2722 CreateConvIfNeeded();
dccce9ea 2723
e95354ec
VZ
2724 if (m_convReal)
2725 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2726
f1339c56 2727 // latin-1 (direct)
f8d791e0 2728 const size_t len = wxWcslen(psz);
f1339c56
RR
2729 if (buf)
2730 {
4def3b35 2731 for (size_t c = 0; c <= len; c++)
24642831
VS
2732 {
2733 if (psz[c] > 0xFF)
2734 return (size_t)-1;
907173e5 2735 buf[c] = (char)psz[c];
24642831
VS
2736 }
2737 }
2738 else
2739 {
2740 for (size_t c = 0; c <= len; c++)
2741 {
2742 if (psz[c] > 0xFF)
2743 return (size_t)-1;
2744 }
f1339c56 2745 }
dccce9ea 2746
f1339c56 2747 return len;
6001e347
RR
2748}
2749
bde4baac
VZ
2750// ----------------------------------------------------------------------------
2751// globals
2752// ----------------------------------------------------------------------------
2753
2754#ifdef __WINDOWS__
2755 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2756#elif defined(__WXMAC__) && !defined(__MACH__)
2757 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2758#else
dcc8fac0 2759 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2760#endif
2761
2762static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2763static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2764static wxMBConvUTF7 wxConvUTF7Obj;
2765static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79
VZ
2766
2767#ifdef __WXGTK20__
2768 static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
2769#endif
bde4baac 2770
bde4baac
VZ
2771WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2772WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2773WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2774WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2775WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2776WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2777WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2778#ifdef __WXOSX__
ea8ce907
RR
2779 wxConvUTF8Obj;
2780#elif __WXGTK20__
2781 wxConvBrokenFileNamesObj;
f5a1953b 2782#else
ea8ce907 2783 wxConvLibcObj;
f5a1953b
VZ
2784#endif
2785
bde4baac
VZ
2786
2787#else // !wxUSE_WCHAR_T
2788
2789// stand-ins in absence of wchar_t
2790WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2791 wxConvISO8859_1,
2792 wxConvLocal,
2793 wxConvUTF8;
2794
2795#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2796
2797