]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
use popen() instead of wxExecute(), it works inside wxYield() unlike the latter
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
ea8ce907
RR
58#ifdef HAVE_LANGINFO_H
59 #include <langinfo.h>
60#endif
6001e347 61
e95354ec
VZ
62#if defined(__WIN32__) && !defined(__WXMICROWIN__)
63 #define wxHAVE_WIN32_MB2WC
64#endif // __WIN32__ but !__WXMICROWIN__
65
373658eb
VZ
66// ----------------------------------------------------------------------------
67// headers
68// ----------------------------------------------------------------------------
7af284fd 69
6001e347 70#ifdef __SALFORDC__
373658eb 71 #include <clib.h>
6001e347
RR
72#endif
73
b040e242 74#ifdef HAVE_ICONV
373658eb 75 #include <iconv.h>
b1d547eb 76 #include "wx/thread.h"
1cd52418 77#endif
1cd52418 78
373658eb
VZ
79#include "wx/encconv.h"
80#include "wx/fontmap.h"
7608a683 81#include "wx/utils.h"
373658eb 82
335d31e0 83#ifdef __WXMAC__
4227afa4
SC
84#include <ATSUnicode.h>
85#include <TextCommon.h>
86#include <TextEncodingConverter.h>
335d31e0
SC
87
88#include "wx/mac/private.h" // includes mac headers
89#endif
373658eb
VZ
90// ----------------------------------------------------------------------------
91// macros
92// ----------------------------------------------------------------------------
3e61dfb0 93
1cd52418 94#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 95#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
96
97#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
98 #define WC_NAME "UCS4"
99 #define WC_BSWAP BSWAP_UCS4
100 #ifdef WORDS_BIGENDIAN
101 #define WC_NAME_BEST "UCS-4BE"
102 #else
103 #define WC_NAME_BEST "UCS-4LE"
104 #endif
1cd52418 105#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
106 #define WC_NAME "UTF16"
107 #define WC_BSWAP BSWAP_UTF16
a3f2769e 108 #define WC_UTF16
3a0d76bc
VS
109 #ifdef WORDS_BIGENDIAN
110 #define WC_NAME_BEST "UTF-16BE"
111 #else
112 #define WC_NAME_BEST "UTF-16LE"
113 #endif
bab1e722 114#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
115 // does this ever happen?
116 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
117#endif
118
373658eb
VZ
119// ============================================================================
120// implementation
121// ============================================================================
122
123// ----------------------------------------------------------------------------
c91830cb 124// UTF-16 en/decoding to/from UCS-4
373658eb 125// ----------------------------------------------------------------------------
6001e347 126
b0a6bb75 127
c91830cb 128static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 129{
dccce9ea 130 if (input<=0xffff)
4def3b35 131 {
999836aa
VZ
132 if (output)
133 *output = (wxUint16) input;
4def3b35 134 return 1;
dccce9ea
VZ
135 }
136 else if (input>=0x110000)
4def3b35
VS
137 {
138 return (size_t)-1;
dccce9ea
VZ
139 }
140 else
4def3b35 141 {
dccce9ea 142 if (output)
4def3b35 143 {
c91830cb 144 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 145 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
146 }
147 return 2;
1cd52418 148 }
1cd52418
OK
149}
150
c91830cb 151static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 152{
dccce9ea 153 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
154 {
155 output = *input;
156 return 1;
dccce9ea
VZ
157 }
158 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
159 {
160 output = *input;
161 return (size_t)-1;
dccce9ea
VZ
162 }
163 else
4def3b35
VS
164 {
165 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
166 return 2;
167 }
1cd52418
OK
168}
169
b0a6bb75 170
f6bcfd97 171// ----------------------------------------------------------------------------
6001e347 172// wxMBConv
f6bcfd97 173// ----------------------------------------------------------------------------
2c53a80a
WS
174
175wxMBConv::~wxMBConv()
176{
177 // nothing to do here (necessary for Darwin linking probably)
178}
6001e347 179
6001e347
RR
180const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181{
2b5f62a0 182 if ( psz )
6001e347 183 {
2b5f62a0
VZ
184 // calculate the length of the buffer needed first
185 size_t nLen = MB2WC(NULL, psz, 0);
186 if ( nLen != (size_t)-1 )
187 {
188 // now do the actual conversion
189 wxWCharBuffer buf(nLen);
635f33ce
VS
190 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
191 if ( nLen != (size_t)-1 )
192 {
193 return buf;
194 }
2b5f62a0 195 }
f6bcfd97 196 }
2b5f62a0
VZ
197
198 wxWCharBuffer buf((wchar_t *)NULL);
199
200 return buf;
6001e347
RR
201}
202
e5cceba0 203const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 204{
2b5f62a0
VZ
205 if ( pwz )
206 {
207 size_t nLen = WC2MB(NULL, pwz, 0);
208 if ( nLen != (size_t)-1 )
209 {
c91830cb 210 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
211 nLen = WC2MB(buf.data(), pwz, nLen + 4);
212 if ( nLen != (size_t)-1 )
213 {
214 return buf;
215 }
2b5f62a0
VZ
216 }
217 }
218
219 wxCharBuffer buf((char *)NULL);
e5cceba0 220
e5cceba0 221 return buf;
6001e347
RR
222}
223
f5fb6871 224const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 225{
f5fb6871
RN
226 wxASSERT(pOutSize != NULL);
227
e4e3bbb4
RN
228 const char* szEnd = szString + nStringLen + 1;
229 const char* szPos = szString;
230 const char* szStart = szPos;
231
232 size_t nActualLength = 0;
f5fb6871
RN
233 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
234
235 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
236
237 //Convert the string until the length() is reached, continuing the
238 //loop every time a null character is reached
239 while(szPos != szEnd)
240 {
241 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
242
243 //Get the length of the current (sub)string
244 size_t nLen = MB2WC(NULL, szPos, 0);
245
246 //Invalid conversion?
247 if( nLen == (size_t)-1 )
f5fb6871
RN
248 {
249 *pOutSize = 0;
250 theBuffer.data()[0u] = wxT('\0');
251 return theBuffer;
252 }
253
e4e3bbb4
RN
254
255 //Increase the actual length (+1 for current null character)
256 nActualLength += nLen + 1;
257
f5fb6871
RN
258 //if buffer too big, realloc the buffer
259 if (nActualLength > (nCurrentSize+1))
260 {
261 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
262 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
263 theBuffer = theNewBuffer;
264 nCurrentSize <<= 1;
265 }
266
267 //Convert the current (sub)string
268 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 269 {
f5fb6871
RN
270 *pOutSize = 0;
271 theBuffer.data()[0u] = wxT('\0');
272 return theBuffer;
e4e3bbb4
RN
273 }
274
275 //Increment to next (sub)string
276 //Note that we have to use strlen here instead of nLen
277 //here because XX2XX gives us the size of the output buffer,
278 //not neccessarly the length of the string
279 szPos += strlen(szPos) + 1;
280 }
281
f5fb6871
RN
282 //success - return actual length and the buffer
283 *pOutSize = nActualLength;
3698ae71 284 return theBuffer;
e4e3bbb4
RN
285}
286
f5fb6871 287const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 288{
f5fb6871
RN
289 wxASSERT(pOutSize != NULL);
290
e4e3bbb4
RN
291 const wchar_t* szEnd = szString + nStringLen + 1;
292 const wchar_t* szPos = szString;
293 const wchar_t* szStart = szPos;
294
295 size_t nActualLength = 0;
f5fb6871
RN
296 size_t nCurrentSize = nStringLen << 2; //try * 4 first
297
298 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
299
300 //Convert the string until the length() is reached, continuing the
301 //loop every time a null character is reached
302 while(szPos != szEnd)
303 {
304 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
305
306 //Get the length of the current (sub)string
307 size_t nLen = WC2MB(NULL, szPos, 0);
308
309 //Invalid conversion?
310 if( nLen == (size_t)-1 )
f5fb6871
RN
311 {
312 *pOutSize = 0;
313 theBuffer.data()[0u] = wxT('\0');
314 return theBuffer;
315 }
e4e3bbb4
RN
316
317 //Increase the actual length (+1 for current null character)
318 nActualLength += nLen + 1;
3698ae71 319
f5fb6871
RN
320 //if buffer too big, realloc the buffer
321 if (nActualLength > (nCurrentSize+1))
322 {
323 wxCharBuffer theNewBuffer(nCurrentSize << 1);
324 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
325 theBuffer = theNewBuffer;
326 nCurrentSize <<= 1;
327 }
328
329 //Convert the current (sub)string
330 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 331 {
f5fb6871
RN
332 *pOutSize = 0;
333 theBuffer.data()[0u] = wxT('\0');
334 return theBuffer;
e4e3bbb4
RN
335 }
336
337 //Increment to next (sub)string
338 //Note that we have to use wxWcslen here instead of nLen
339 //here because XX2XX gives us the size of the output buffer,
340 //not neccessarly the length of the string
341 szPos += wxWcslen(szPos) + 1;
342 }
343
f5fb6871
RN
344 //success - return actual length and the buffer
345 *pOutSize = nActualLength;
3698ae71 346 return theBuffer;
e4e3bbb4
RN
347}
348
6001e347 349// ----------------------------------------------------------------------------
bde4baac 350// wxMBConvLibc
6001e347
RR
351// ----------------------------------------------------------------------------
352
bde4baac
VZ
353size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
354{
355 return wxMB2WC(buf, psz, n);
356}
357
358size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
359{
360 return wxWC2MB(buf, psz, n);
361}
e1bfe89e
RR
362
363// ----------------------------------------------------------------------------
364// wxConvBrokenFileNames is made for GTK2 in Unicode mode when
365// files are accidentally written in an encoding which is not
366// the system encoding. Typically, the system encoding will be
3698ae71 367// UTF8 but there might be files stored in ISO8859-1 on disk.
e1bfe89e
RR
368// ----------------------------------------------------------------------------
369
370class wxConvBrokenFileNames: public wxMBConvLibc
371{
372public:
ea8ce907 373 wxConvBrokenFileNames() : m_utf8conv(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL) { }
e1bfe89e
RR
374 virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
375 virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
ea8ce907
RR
376 inline bool UseUTF8() const;
377private:
378 wxMBConvUTF8 m_utf8conv;
e1bfe89e
RR
379};
380
ea8ce907
RR
381bool wxConvBrokenFileNames::UseUTF8() const
382{
383#if defined HAVE_LANGINFO_H && defined CODESET
384 char *codeset = nl_langinfo(CODESET);
385 return strcmp(codeset, "UTF-8") == 0;
386#else
387 return false;
388#endif
389}
390
e1bfe89e
RR
391size_t wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const
392{
ea8ce907
RR
393 if (UseUTF8())
394 return m_utf8conv.MB2WC( outputBuf, psz, outputSize );
e1bfe89e 395 else
ea8ce907 396 return wxMBConvLibc::MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
397}
398
399size_t wxConvBrokenFileNames::WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const
400{
ea8ce907
RR
401 if (UseUTF8())
402 return m_utf8conv.WC2MB( outputBuf, psz, outputSize );
403 else
404 return wxMBConvLibc::WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
405}
406
bde4baac 407// ----------------------------------------------------------------------------
3698ae71 408// UTF-7
bde4baac 409// ----------------------------------------------------------------------------
6001e347 410
15f2ee32 411// Implementation (C) 2004 Fredrik Roubert
6001e347 412
15f2ee32
RN
413//
414// BASE64 decoding table
415//
416static const unsigned char utf7unb64[] =
6001e347 417{
15f2ee32
RN
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
424 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
425 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
427 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
428 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
429 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
431 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
432 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
433 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
450};
451
452size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
453{
15f2ee32
RN
454 size_t len = 0;
455
456 while (*psz && ((!buf) || (len < n)))
457 {
458 unsigned char cc = *psz++;
459 if (cc != '+')
460 {
461 // plain ASCII char
462 if (buf)
463 *buf++ = cc;
464 len++;
465 }
466 else if (*psz == '-')
467 {
468 // encoded plus sign
469 if (buf)
470 *buf++ = cc;
471 len++;
472 psz++;
473 }
474 else
475 {
476 // BASE64 encoded string
477 bool lsb;
478 unsigned char c;
479 unsigned int d, l;
480 for (lsb = false, d = 0, l = 0;
481 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
482 {
483 d <<= 6;
484 d += cc;
485 for (l += 6; l >= 8; lsb = !lsb)
486 {
6356d52a 487 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
488 if (lsb)
489 {
490 if (buf)
491 *buf++ |= c;
492 len ++;
493 }
494 else
495 if (buf)
6356d52a 496 *buf = (wchar_t)(c << 8);
15f2ee32
RN
497 }
498 }
499 if (*psz == '-')
500 psz++;
501 }
502 }
503 if (buf && (len < n))
504 *buf = 0;
505 return len;
6001e347
RR
506}
507
15f2ee32
RN
508//
509// BASE64 encoding table
510//
511static const unsigned char utf7enb64[] =
512{
513 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
514 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
515 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
516 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
517 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
518 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
519 'w', 'x', 'y', 'z', '0', '1', '2', '3',
520 '4', '5', '6', '7', '8', '9', '+', '/'
521};
522
523//
524// UTF-7 encoding table
525//
526// 0 - Set D (directly encoded characters)
527// 1 - Set O (optional direct characters)
528// 2 - whitespace characters (optional)
529// 3 - special characters
530//
531static const unsigned char utf7encode[128] =
6001e347 532{
15f2ee32
RN
533 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
534 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
535 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
537 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
539 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
541};
542
667e5b3e 543size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
544{
545
546
547 size_t len = 0;
548
549 while (*psz && ((!buf) || (len < n)))
550 {
551 wchar_t cc = *psz++;
552 if (cc < 0x80 && utf7encode[cc] < 1)
553 {
554 // plain ASCII char
555 if (buf)
556 *buf++ = (char)cc;
557 len++;
558 }
559#ifndef WC_UTF16
79c78d42 560 else if (((wxUint32)cc) > 0xffff)
6e394fc6 561 {
15f2ee32
RN
562 // no surrogate pair generation (yet?)
563 return (size_t)-1;
564 }
565#endif
566 else
567 {
568 if (buf)
569 *buf++ = '+';
570 len++;
571 if (cc != '+')
572 {
573 // BASE64 encode string
574 unsigned int lsb, d, l;
575 for (d = 0, l = 0;; psz++)
576 {
577 for (lsb = 0; lsb < 2; lsb ++)
578 {
579 d <<= 8;
580 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
581
582 for (l += 8; l >= 6; )
583 {
584 l -= 6;
585 if (buf)
586 *buf++ = utf7enb64[(d >> l) % 64];
587 len++;
588 }
589 }
590 cc = *psz;
591 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
592 break;
593 }
594 if (l != 0)
595 {
596 if (buf)
597 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
598 len++;
599 }
600 }
601 if (buf)
602 *buf++ = '-';
603 len++;
604 }
605 }
606 if (buf && (len < n))
607 *buf = 0;
608 return len;
6001e347
RR
609}
610
f6bcfd97 611// ----------------------------------------------------------------------------
6001e347 612// UTF-8
f6bcfd97 613// ----------------------------------------------------------------------------
6001e347 614
dccce9ea 615static wxUint32 utf8_max[]=
4def3b35 616 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 617
3698ae71
VZ
618// boundaries of the private use area we use to (temporarily) remap invalid
619// characters invalid in a UTF-8 encoded string
ea8ce907
RR
620const wxUint32 wxUnicodePUA = 0x100000;
621const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
622
6001e347
RR
623size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
624{
4def3b35
VS
625 size_t len = 0;
626
dccce9ea 627 while (*psz && ((!buf) || (len < n)))
4def3b35 628 {
ea8ce907
RR
629 const char *opsz = psz;
630 bool invalid = false;
4def3b35
VS
631 unsigned char cc = *psz++, fc = cc;
632 unsigned cnt;
dccce9ea 633 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 634 fc <<= 1;
dccce9ea 635 if (!cnt)
4def3b35
VS
636 {
637 // plain ASCII char
dccce9ea 638 if (buf)
4def3b35
VS
639 *buf++ = cc;
640 len++;
dccce9ea
VZ
641 }
642 else
4def3b35
VS
643 {
644 cnt--;
dccce9ea 645 if (!cnt)
4def3b35
VS
646 {
647 // invalid UTF-8 sequence
ea8ce907 648 invalid = true;
dccce9ea
VZ
649 }
650 else
4def3b35
VS
651 {
652 unsigned ocnt = cnt - 1;
653 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 654 while (cnt--)
4def3b35 655 {
ea8ce907 656 cc = *psz;
dccce9ea 657 if ((cc & 0xC0) != 0x80)
4def3b35
VS
658 {
659 // invalid UTF-8 sequence
ea8ce907
RR
660 invalid = true;
661 break;
4def3b35 662 }
ea8ce907 663 psz++;
4def3b35
VS
664 res = (res << 6) | (cc & 0x3f);
665 }
ea8ce907 666 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
667 {
668 // illegal UTF-8 encoding
ea8ce907 669 invalid = true;
4def3b35 670 }
ea8ce907
RR
671 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
672 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
673 {
674 // if one of our PUA characters turns up externally
675 // it must also be treated as an illegal sequence
676 // (a bit like you have to escape an escape character)
677 invalid = true;
678 }
679 else
680 {
1cd52418 681#ifdef WC_UTF16
ea8ce907
RR
682 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
683 size_t pa = encode_utf16(res, (wxUint16 *)buf);
684 if (pa == (size_t)-1)
685 {
686 invalid = true;
687 }
688 else
689 {
690 if (buf)
691 buf += pa;
692 len += pa;
693 }
373658eb 694#else // !WC_UTF16
ea8ce907
RR
695 if (buf)
696 *buf++ = res;
697 len++;
373658eb 698#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
699 }
700 }
701 if (invalid)
702 {
703 if (m_options & MAP_INVALID_UTF8_TO_PUA)
704 {
705 while (opsz < psz && (!buf || len < n))
706 {
707#ifdef WC_UTF16
708 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
709 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
710 wxASSERT(pa != (size_t)-1);
711 if (buf)
712 buf += pa;
713 opsz++;
714 len += pa;
715#else
716 if (buf)
717 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
718 opsz++;
719 len++;
720#endif
721 }
722 }
3698ae71 723 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
724 {
725 while (opsz < psz && (!buf || len < n))
726 {
3698ae71
VZ
727 if ( buf && len + 3 < n )
728 {
729 unsigned char n = *opsz;
730 *buf++ = L'\\';
731 *buf++ = L'0' + n / 0100;
732 *buf++ = L'0' + (n % 0100) / 010;
733 *buf++ = L'0' + n % 010;
734 }
ea8ce907
RR
735 opsz++;
736 len += 4;
737 }
738 }
3698ae71 739 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
740 {
741 return (size_t)-1;
742 }
4def3b35
VS
743 }
744 }
6001e347 745 }
dccce9ea 746 if (buf && (len < n))
4def3b35
VS
747 *buf = 0;
748 return len;
6001e347
RR
749}
750
3698ae71
VZ
751static inline bool isoctal(wchar_t wch)
752{
753 return L'0' <= wch && wch <= L'7';
754}
755
6001e347
RR
756size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
757{
4def3b35 758 size_t len = 0;
6001e347 759
dccce9ea 760 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
761 {
762 wxUint32 cc;
1cd52418 763#ifdef WC_UTF16
b5153fd8
VZ
764 // cast is ok for WC_UTF16
765 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 766 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 767#else
4def3b35
VS
768 cc=(*psz++) & 0x7fffffff;
769#endif
3698ae71
VZ
770
771 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
772 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 773 {
dccce9ea 774 if (buf)
ea8ce907 775 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 776 len++;
3698ae71
VZ
777 }
778 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
779 cc == L'\\' &&
780 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 781 {
dccce9ea 782 if (buf)
3698ae71
VZ
783 {
784 *buf++ = (char) (psz[0] - L'0')*0100 +
785 (psz[1] - L'0')*010 +
786 (psz[2] - L'0');
787 }
788
789 psz += 3;
ea8ce907
RR
790 len++;
791 }
792 else
793 {
794 unsigned cnt;
795 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
796 if (!cnt)
4def3b35 797 {
ea8ce907
RR
798 // plain ASCII char
799 if (buf)
800 *buf++ = (char) cc;
801 len++;
802 }
803
804 else
805 {
806 len += cnt + 1;
807 if (buf)
808 {
809 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
810 while (cnt--)
811 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
812 }
4def3b35
VS
813 }
814 }
6001e347 815 }
4def3b35 816
3698ae71
VZ
817 if (buf && (len<n))
818 *buf = 0;
adb45366 819
4def3b35 820 return len;
6001e347
RR
821}
822
c91830cb
VZ
823// ----------------------------------------------------------------------------
824// UTF-16
825// ----------------------------------------------------------------------------
826
827#ifdef WORDS_BIGENDIAN
bde4baac
VZ
828 #define wxMBConvUTF16straight wxMBConvUTF16BE
829 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 830#else
bde4baac
VZ
831 #define wxMBConvUTF16swap wxMBConvUTF16BE
832 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
833#endif
834
835
c91830cb
VZ
836#ifdef WC_UTF16
837
c91830cb
VZ
838// copy 16bit MB to 16bit String
839size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
840{
841 size_t len=0;
842
843 while (*(wxUint16*)psz && (!buf || len < n))
844 {
845 if (buf)
846 *buf++ = *(wxUint16*)psz;
847 len++;
848
849 psz += sizeof(wxUint16);
850 }
851 if (buf && len<n) *buf=0;
852
853 return len;
854}
855
856
857// copy 16bit String to 16bit MB
858size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
859{
860 size_t len=0;
861
862 while (*psz && (!buf || len < n))
863 {
864 if (buf)
865 {
866 *(wxUint16*)buf = *psz;
867 buf += sizeof(wxUint16);
868 }
869 len += sizeof(wxUint16);
870 psz++;
871 }
872 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
873
874 return len;
875}
876
877
878// swap 16bit MB to 16bit String
879size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
880{
881 size_t len=0;
882
883 while (*(wxUint16*)psz && (!buf || len < n))
884 {
885 if (buf)
886 {
887 ((char *)buf)[0] = psz[1];
888 ((char *)buf)[1] = psz[0];
889 buf++;
890 }
891 len++;
892 psz += sizeof(wxUint16);
893 }
894 if (buf && len<n) *buf=0;
895
896 return len;
897}
898
899
900// swap 16bit MB to 16bit String
901size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
902{
903 size_t len=0;
904
905 while (*psz && (!buf || len < n))
906 {
907 if (buf)
908 {
909 *buf++ = ((char*)psz)[1];
910 *buf++ = ((char*)psz)[0];
911 }
912 len += sizeof(wxUint16);
913 psz++;
914 }
915 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
916
917 return len;
918}
919
920
921#else // WC_UTF16
922
923
924// copy 16bit MB to 32bit String
925size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
926{
927 size_t len=0;
928
929 while (*(wxUint16*)psz && (!buf || len < n))
930 {
931 wxUint32 cc;
932 size_t pa=decode_utf16((wxUint16*)psz, cc);
933 if (pa == (size_t)-1)
934 return pa;
935
936 if (buf)
937 *buf++ = cc;
938 len++;
939 psz += pa * sizeof(wxUint16);
940 }
941 if (buf && len<n) *buf=0;
942
943 return len;
944}
945
946
947// copy 32bit String to 16bit MB
948size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
949{
950 size_t len=0;
951
952 while (*psz && (!buf || len < n))
953 {
954 wxUint16 cc[2];
955 size_t pa=encode_utf16(*psz, cc);
956
957 if (pa == (size_t)-1)
958 return pa;
959
960 if (buf)
961 {
69b80d28 962 *(wxUint16*)buf = cc[0];
b5153fd8 963 buf += sizeof(wxUint16);
c91830cb 964 if (pa > 1)
69b80d28
VZ
965 {
966 *(wxUint16*)buf = cc[1];
967 buf += sizeof(wxUint16);
968 }
c91830cb
VZ
969 }
970
971 len += pa*sizeof(wxUint16);
972 psz++;
973 }
974 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
975
976 return len;
977}
978
979
980// swap 16bit MB to 32bit String
981size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
982{
983 size_t len=0;
984
985 while (*(wxUint16*)psz && (!buf || len < n))
986 {
987 wxUint32 cc;
988 char tmp[4];
989 tmp[0]=psz[1]; tmp[1]=psz[0];
990 tmp[2]=psz[3]; tmp[3]=psz[2];
991
992 size_t pa=decode_utf16((wxUint16*)tmp, cc);
993 if (pa == (size_t)-1)
994 return pa;
995
996 if (buf)
997 *buf++ = cc;
998
999 len++;
1000 psz += pa * sizeof(wxUint16);
1001 }
1002 if (buf && len<n) *buf=0;
1003
1004 return len;
1005}
1006
1007
1008// swap 32bit String to 16bit MB
1009size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1010{
1011 size_t len=0;
1012
1013 while (*psz && (!buf || len < n))
1014 {
1015 wxUint16 cc[2];
1016 size_t pa=encode_utf16(*psz, cc);
1017
1018 if (pa == (size_t)-1)
1019 return pa;
1020
1021 if (buf)
1022 {
1023 *buf++ = ((char*)cc)[1];
1024 *buf++ = ((char*)cc)[0];
1025 if (pa > 1)
1026 {
1027 *buf++ = ((char*)cc)[3];
1028 *buf++ = ((char*)cc)[2];
1029 }
1030 }
1031
1032 len += pa*sizeof(wxUint16);
1033 psz++;
1034 }
1035 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1036
1037 return len;
1038}
1039
1040#endif // WC_UTF16
1041
1042
1043// ----------------------------------------------------------------------------
1044// UTF-32
1045// ----------------------------------------------------------------------------
1046
1047#ifdef WORDS_BIGENDIAN
1048#define wxMBConvUTF32straight wxMBConvUTF32BE
1049#define wxMBConvUTF32swap wxMBConvUTF32LE
1050#else
1051#define wxMBConvUTF32swap wxMBConvUTF32BE
1052#define wxMBConvUTF32straight wxMBConvUTF32LE
1053#endif
1054
1055
1056WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1057WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1058
1059
1060#ifdef WC_UTF16
1061
1062// copy 32bit MB to 16bit String
1063size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1064{
1065 size_t len=0;
1066
1067 while (*(wxUint32*)psz && (!buf || len < n))
1068 {
1069 wxUint16 cc[2];
1070
1071 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1072 if (pa == (size_t)-1)
1073 return pa;
1074
1075 if (buf)
1076 {
1077 *buf++ = cc[0];
1078 if (pa > 1)
1079 *buf++ = cc[1];
1080 }
1081 len += pa;
1082 psz += sizeof(wxUint32);
1083 }
1084 if (buf && len<n) *buf=0;
1085
1086 return len;
1087}
1088
1089
1090// copy 16bit String to 32bit MB
1091size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1092{
1093 size_t len=0;
1094
1095 while (*psz && (!buf || len < n))
1096 {
1097 wxUint32 cc;
1098
b5153fd8
VZ
1099 // cast is ok for WC_UTF16
1100 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1101 if (pa == (size_t)-1)
1102 return pa;
1103
1104 if (buf)
1105 {
1106 *(wxUint32*)buf = cc;
1107 buf += sizeof(wxUint32);
1108 }
1109 len += sizeof(wxUint32);
1110 psz += pa;
1111 }
b5153fd8
VZ
1112
1113 if (buf && len<=n-sizeof(wxUint32))
1114 *(wxUint32*)buf=0;
c91830cb
VZ
1115
1116 return len;
1117}
1118
1119
1120
1121// swap 32bit MB to 16bit String
1122size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1123{
1124 size_t len=0;
1125
1126 while (*(wxUint32*)psz && (!buf || len < n))
1127 {
1128 char tmp[4];
1129 tmp[0] = psz[3]; tmp[1] = psz[2];
1130 tmp[2] = psz[1]; tmp[3] = psz[0];
1131
1132
1133 wxUint16 cc[2];
1134
1135 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1136 if (pa == (size_t)-1)
1137 return pa;
1138
1139 if (buf)
1140 {
1141 *buf++ = cc[0];
1142 if (pa > 1)
1143 *buf++ = cc[1];
1144 }
1145 len += pa;
1146 psz += sizeof(wxUint32);
1147 }
b5153fd8
VZ
1148
1149 if (buf && len<n)
1150 *buf=0;
c91830cb
VZ
1151
1152 return len;
1153}
1154
1155
1156// swap 16bit String to 32bit MB
1157size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1158{
1159 size_t len=0;
1160
1161 while (*psz && (!buf || len < n))
1162 {
1163 char cc[4];
1164
b5153fd8
VZ
1165 // cast is ok for WC_UTF16
1166 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1167 if (pa == (size_t)-1)
1168 return pa;
1169
1170 if (buf)
1171 {
1172 *buf++ = cc[3];
1173 *buf++ = cc[2];
1174 *buf++ = cc[1];
1175 *buf++ = cc[0];
1176 }
1177 len += sizeof(wxUint32);
1178 psz += pa;
1179 }
b5153fd8
VZ
1180
1181 if (buf && len<=n-sizeof(wxUint32))
1182 *(wxUint32*)buf=0;
c91830cb
VZ
1183
1184 return len;
1185}
1186
1187#else // WC_UTF16
1188
1189
1190// copy 32bit MB to 32bit String
1191size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1192{
1193 size_t len=0;
1194
1195 while (*(wxUint32*)psz && (!buf || len < n))
1196 {
1197 if (buf)
1198 *buf++ = *(wxUint32*)psz;
1199 len++;
1200 psz += sizeof(wxUint32);
1201 }
b5153fd8
VZ
1202
1203 if (buf && len<n)
1204 *buf=0;
c91830cb
VZ
1205
1206 return len;
1207}
1208
1209
1210// copy 32bit String to 32bit MB
1211size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1212{
1213 size_t len=0;
1214
1215 while (*psz && (!buf || len < n))
1216 {
1217 if (buf)
1218 {
1219 *(wxUint32*)buf = *psz;
1220 buf += sizeof(wxUint32);
1221 }
1222
1223 len += sizeof(wxUint32);
1224 psz++;
1225 }
1226
b5153fd8
VZ
1227 if (buf && len<=n-sizeof(wxUint32))
1228 *(wxUint32*)buf=0;
c91830cb
VZ
1229
1230 return len;
1231}
1232
1233
1234// swap 32bit MB to 32bit String
1235size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1236{
1237 size_t len=0;
1238
1239 while (*(wxUint32*)psz && (!buf || len < n))
1240 {
1241 if (buf)
1242 {
1243 ((char *)buf)[0] = psz[3];
1244 ((char *)buf)[1] = psz[2];
1245 ((char *)buf)[2] = psz[1];
1246 ((char *)buf)[3] = psz[0];
1247 buf++;
1248 }
1249 len++;
1250 psz += sizeof(wxUint32);
1251 }
b5153fd8
VZ
1252
1253 if (buf && len<n)
1254 *buf=0;
c91830cb
VZ
1255
1256 return len;
1257}
1258
1259
1260// swap 32bit String to 32bit MB
1261size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1262{
1263 size_t len=0;
1264
1265 while (*psz && (!buf || len < n))
1266 {
1267 if (buf)
1268 {
1269 *buf++ = ((char *)psz)[3];
1270 *buf++ = ((char *)psz)[2];
1271 *buf++ = ((char *)psz)[1];
1272 *buf++ = ((char *)psz)[0];
1273 }
1274 len += sizeof(wxUint32);
1275 psz++;
1276 }
b5153fd8
VZ
1277
1278 if (buf && len<=n-sizeof(wxUint32))
1279 *(wxUint32*)buf=0;
c91830cb
VZ
1280
1281 return len;
1282}
1283
1284
1285#endif // WC_UTF16
1286
1287
36acb880
VZ
1288// ============================================================================
1289// The classes doing conversion using the iconv_xxx() functions
1290// ============================================================================
3caec1bb 1291
b040e242 1292#ifdef HAVE_ICONV
3a0d76bc 1293
b1d547eb
VS
1294// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1295// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1296// (unless there's yet another bug in glibc) the only case when iconv()
1297// returns with (size_t)-1 (which means error) and says there are 0 bytes
1298// left in the input buffer -- when _real_ error occurs,
1299// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1300// iconv() failure.
3caec1bb
VS
1301// [This bug does not appear in glibc 2.2.]
1302#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1303#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1304 (errno != E2BIG || bufLeft != 0))
1305#else
1306#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1307#endif
1308
ab217dba 1309#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1310
1311// ----------------------------------------------------------------------------
e95354ec 1312// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1313// ----------------------------------------------------------------------------
1314
e95354ec 1315class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1316{
1317public:
e95354ec
VZ
1318 wxMBConv_iconv(const wxChar *name);
1319 virtual ~wxMBConv_iconv();
36acb880 1320
bde4baac
VZ
1321 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1322 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1323
e95354ec 1324 bool IsOk() const
36acb880
VZ
1325 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1326
1327protected:
1328 // the iconv handlers used to translate from multibyte to wide char and in
1329 // the other direction
1330 iconv_t m2w,
1331 w2m;
b1d547eb
VS
1332#if wxUSE_THREADS
1333 // guards access to m2w and w2m objects
1334 wxMutex m_iconvMutex;
1335#endif
36acb880
VZ
1336
1337private:
e95354ec 1338 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1339 // available on this machine, it will remain NULL
1340 static const char *ms_wcCharsetName;
1341
1342 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1343 // different endian-ness than the native one
405d8f46 1344 static bool ms_wcNeedsSwap;
36acb880
VZ
1345};
1346
e95354ec
VZ
1347const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1348bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1349
e95354ec 1350wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1351{
04c79127
RR
1352 // Do it the hard way
1353 char cname[100];
1354 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1355 cname[i] = (char) name[i];
1356
36acb880
VZ
1357 // check for charset that represents wchar_t:
1358 if (ms_wcCharsetName == NULL)
f1339c56 1359 {
e95354ec 1360 ms_wcNeedsSwap = false;
dccce9ea 1361
36acb880
VZ
1362 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1363 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1364 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1365
36acb880
VZ
1366 if (m2w == (iconv_t)-1)
1367 {
1368 // try charset w/o bytesex info (e.g. "UCS4")
1369 // and check for bytesex ourselves:
1370 ms_wcCharsetName = WC_NAME;
04c79127 1371 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1372
1373 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1374 if (m2w == (iconv_t)-1)
1375 {
36acb880 1376 ms_wcCharsetName = "WCHAR_T";
04c79127 1377 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1378 }
3a0d76bc 1379
36acb880
VZ
1380 if (m2w != (iconv_t)-1)
1381 {
1382 char buf[2], *bufPtr;
1383 wchar_t wbuf[2], *wbufPtr;
1384 size_t insz, outsz;
1385 size_t res;
1386
1387 buf[0] = 'A';
1388 buf[1] = 0;
1389 wbuf[0] = 0;
1390 insz = 2;
1391 outsz = SIZEOF_WCHAR_T * 2;
1392 wbufPtr = wbuf;
1393 bufPtr = buf;
1394
1395 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1396 (char**)&wbufPtr, &outsz);
1397
1398 if (ICONV_FAILED(res, insz))
3a0d76bc 1399 {
36acb880
VZ
1400 ms_wcCharsetName = NULL;
1401 wxLogLastError(wxT("iconv"));
2b5f62a0 1402 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1403 }
1404 else
1405 {
36acb880 1406 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1407 }
1408 }
36acb880
VZ
1409 else
1410 {
1411 ms_wcCharsetName = NULL;
373658eb 1412
77ffb593 1413 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1414 // fall back to using wxEncodingConverter.
1415 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1416 //wxLogError(
36acb880 1417 }
3a0d76bc 1418 }
36acb880 1419 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1420 }
36acb880 1421 else // we already have ms_wcCharsetName
3caec1bb 1422 {
04c79127 1423 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1424 }
dccce9ea 1425
36acb880
VZ
1426 // NB: don't ever pass NULL to iconv_open(), it may crash!
1427 if ( ms_wcCharsetName )
f1339c56 1428 {
04c79127 1429 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1430 }
405d8f46
VZ
1431 else
1432 {
1433 w2m = (iconv_t)-1;
1434 }
36acb880 1435}
3caec1bb 1436
e95354ec 1437wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1438{
1439 if ( m2w != (iconv_t)-1 )
1440 iconv_close(m2w);
1441 if ( w2m != (iconv_t)-1 )
1442 iconv_close(w2m);
1443}
3a0d76bc 1444
bde4baac 1445size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1446{
b1d547eb
VS
1447#if wxUSE_THREADS
1448 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1449 // Unfortunately there is a couple of global wxCSConv objects such as
1450 // wxConvLocal that are used all over wx code, so we have to make sure
1451 // the handle is used by at most one thread at the time. Otherwise
1452 // only a few wx classes would be safe to use from non-main threads
1453 // as MB<->WC conversion would fail "randomly".
1454 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1455#endif
3698ae71 1456
36acb880
VZ
1457 size_t inbuf = strlen(psz);
1458 size_t outbuf = n * SIZEOF_WCHAR_T;
1459 size_t res, cres;
1460 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1461 wchar_t *bufPtr = buf;
1462 const char *pszPtr = psz;
1463
1464 if (buf)
1465 {
1466 // have destination buffer, convert there
1467 cres = iconv(m2w,
1468 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1469 (char**)&bufPtr, &outbuf);
1470 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1471
36acb880 1472 if (ms_wcNeedsSwap)
3a0d76bc 1473 {
36acb880
VZ
1474 // convert to native endianness
1475 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1476 }
adb45366 1477
49dd9820
VS
1478 // NB: iconv was given only strlen(psz) characters on input, and so
1479 // it couldn't convert the trailing zero. Let's do it ourselves
1480 // if there's some room left for it in the output buffer.
1481 if (res < n)
1482 buf[res] = 0;
36acb880
VZ
1483 }
1484 else
1485 {
1486 // no destination buffer... convert using temp buffer
1487 // to calculate destination buffer requirement
1488 wchar_t tbuf[8];
1489 res = 0;
1490 do {
1491 bufPtr = tbuf;
1492 outbuf = 8*SIZEOF_WCHAR_T;
1493
1494 cres = iconv(m2w,
1495 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496 (char**)&bufPtr, &outbuf );
1497
1498 res += 8-(outbuf/SIZEOF_WCHAR_T);
1499 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1500 }
dccce9ea 1501
36acb880 1502 if (ICONV_FAILED(cres, inbuf))
f1339c56 1503 {
36acb880
VZ
1504 //VS: it is ok if iconv fails, hence trace only
1505 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1506 return (size_t)-1;
1507 }
1508
1509 return res;
1510}
1511
bde4baac 1512size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1513{
b1d547eb
VS
1514#if wxUSE_THREADS
1515 // NB: explained in MB2WC
1516 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1517#endif
3698ae71 1518
f8d791e0 1519 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1520 size_t outbuf = n;
1521 size_t res, cres;
3a0d76bc 1522
36acb880 1523 wchar_t *tmpbuf = 0;
3caec1bb 1524
36acb880
VZ
1525 if (ms_wcNeedsSwap)
1526 {
1527 // need to copy to temp buffer to switch endianness
1528 // this absolutely doesn't rock!
1529 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1530 // could be in read-only memory, or be accessed in some other thread)
1531 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1532 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1533 WC_BSWAP(tmpbuf, inbuf)
1534 psz=tmpbuf;
1535 }
3a0d76bc 1536
36acb880
VZ
1537 if (buf)
1538 {
1539 // have destination buffer, convert there
1540 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1541
36acb880 1542 res = n-outbuf;
adb45366 1543
49dd9820
VS
1544 // NB: iconv was given only wcslen(psz) characters on input, and so
1545 // it couldn't convert the trailing zero. Let's do it ourselves
1546 // if there's some room left for it in the output buffer.
1547 if (res < n)
1548 buf[0] = 0;
36acb880
VZ
1549 }
1550 else
1551 {
1552 // no destination buffer... convert using temp buffer
1553 // to calculate destination buffer requirement
1554 char tbuf[16];
1555 res = 0;
1556 do {
1557 buf = tbuf; outbuf = 16;
1558
1559 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1560
36acb880
VZ
1561 res += 16 - outbuf;
1562 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1563 }
dccce9ea 1564
36acb880
VZ
1565 if (ms_wcNeedsSwap)
1566 {
1567 free(tmpbuf);
1568 }
dccce9ea 1569
36acb880
VZ
1570 if (ICONV_FAILED(cres, inbuf))
1571 {
1572 //VS: it is ok if iconv fails, hence trace only
1573 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1574 return (size_t)-1;
1575 }
1576
1577 return res;
1578}
1579
b040e242 1580#endif // HAVE_ICONV
36acb880 1581
e95354ec 1582
36acb880
VZ
1583// ============================================================================
1584// Win32 conversion classes
1585// ============================================================================
1cd52418 1586
e95354ec 1587#ifdef wxHAVE_WIN32_MB2WC
373658eb 1588
8b04d4c4 1589// from utils.cpp
d775fa82 1590#if wxUSE_FONTMAP
8b04d4c4
VZ
1591extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1592extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1593#endif
373658eb 1594
e95354ec 1595class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1596{
1597public:
bde4baac
VZ
1598 wxMBConv_win32()
1599 {
1600 m_CodePage = CP_ACP;
1601 }
1602
7608a683 1603#if wxUSE_FONTMAP
e95354ec 1604 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1605 {
1606 m_CodePage = wxCharsetToCodepage(name);
1607 }
dccce9ea 1608
e95354ec 1609 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1610 {
1611 m_CodePage = wxEncodingToCodepage(encoding);
1612 }
7608a683 1613#endif
8b04d4c4 1614
bde4baac 1615 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1616 {
02272c9c
VZ
1617 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1618 // the behaviour is not compatible with the Unix version (using iconv)
1619 // and break the library itself, e.g. wxTextInputStream::NextChar()
1620 // wouldn't work if reading an incomplete MB char didn't result in an
1621 // error
667e5b3e
VZ
1622 //
1623 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1624 // an error (tested under Windows Server 2003) and apparently it is
1625 // done on purpose, i.e. the function accepts any input in this case
1626 // and although I'd prefer to return error on ill-formed output, our
1627 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1628 // explicitly ill-formed according to RFC 2152) neither so we don't
1629 // even have any fallback here...
1630 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1631
2b5f62a0
VZ
1632 const size_t len = ::MultiByteToWideChar
1633 (
1634 m_CodePage, // code page
667e5b3e 1635 flags, // flags: fall on error
2b5f62a0
VZ
1636 psz, // input string
1637 -1, // its length (NUL-terminated)
b4da152e 1638 buf, // output string
2b5f62a0
VZ
1639 buf ? n : 0 // size of output buffer
1640 );
1641
03a991bc
VZ
1642 // note that it returns count of written chars for buf != NULL and size
1643 // of the needed buffer for buf == NULL so in either case the length of
1644 // the string (which never includes the terminating NUL) is one less
1645 return len ? len - 1 : (size_t)-1;
f1339c56 1646 }
dccce9ea 1647
13dd924a 1648 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1649 {
13dd924a
VZ
1650 /*
1651 we have a problem here: by default, WideCharToMultiByte() may
1652 replace characters unrepresentable in the target code page with bad
1653 quality approximations such as turning "1/2" symbol (U+00BD) into
1654 "1" for the code pages which don't have it and we, obviously, want
1655 to avoid this at any price
d775fa82 1656
13dd924a
VZ
1657 the trouble is that this function does it _silently_, i.e. it won't
1658 even tell us whether it did or not... Win98/2000 and higher provide
1659 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1660 we have to resort to a round trip, i.e. check that converting back
1661 results in the same string -- this is, of course, expensive but
1662 otherwise we simply can't be sure to not garble the data.
1663 */
1664
1665 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1666 // it doesn't work with CJK encodings (which we test for rather roughly
1667 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1668 // supporting it
907173e5
WS
1669 BOOL usedDef wxDUMMY_INITIALIZE(false);
1670 BOOL *pUsedDef;
13dd924a
VZ
1671 int flags;
1672 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1673 {
1674 // it's our lucky day
1675 flags = WC_NO_BEST_FIT_CHARS;
1676 pUsedDef = &usedDef;
1677 }
1678 else // old system or unsupported encoding
1679 {
1680 flags = 0;
1681 pUsedDef = NULL;
1682 }
1683
2b5f62a0
VZ
1684 const size_t len = ::WideCharToMultiByte
1685 (
1686 m_CodePage, // code page
13dd924a
VZ
1687 flags, // either none or no best fit
1688 pwz, // input string
2b5f62a0
VZ
1689 -1, // it is (wide) NUL-terminated
1690 buf, // output buffer
1691 buf ? n : 0, // and its size
1692 NULL, // default "replacement" char
13dd924a 1693 pUsedDef // [out] was it used?
2b5f62a0
VZ
1694 );
1695
13dd924a
VZ
1696 if ( !len )
1697 {
1698 // function totally failed
1699 return (size_t)-1;
1700 }
1701
1702 // if we were really converting, check if we succeeded
1703 if ( buf )
1704 {
1705 if ( flags )
1706 {
1707 // check if the conversion failed, i.e. if any replacements
1708 // were done
1709 if ( usedDef )
1710 return (size_t)-1;
1711 }
1712 else // we must resort to double tripping...
1713 {
1714 wxWCharBuffer wcBuf(n);
1715 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1716 wcscmp(wcBuf, pwz) != 0 )
1717 {
1718 // we didn't obtain the same thing we started from, hence
1719 // the conversion was lossy and we consider that it failed
1720 return (size_t)-1;
1721 }
1722 }
1723 }
1724
03a991bc 1725 // see the comment above for the reason of "len - 1"
13dd924a 1726 return len - 1;
f1339c56 1727 }
dccce9ea 1728
13dd924a
VZ
1729 bool IsOk() const { return m_CodePage != -1; }
1730
1731private:
1732 static bool CanUseNoBestFit()
1733 {
1734 static int s_isWin98Or2k = -1;
1735
1736 if ( s_isWin98Or2k == -1 )
1737 {
1738 int verMaj, verMin;
1739 switch ( wxGetOsVersion(&verMaj, &verMin) )
1740 {
1741 case wxWIN95:
1742 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1743 break;
1744
1745 case wxWINDOWS_NT:
1746 s_isWin98Or2k = verMaj >= 5;
1747 break;
1748
1749 default:
1750 // unknown, be conseravtive by default
1751 s_isWin98Or2k = 0;
1752 }
1753
1754 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1755 }
1756
1757 return s_isWin98Or2k == 1;
1758 }
f1339c56 1759
b1d66b54 1760 long m_CodePage;
1cd52418 1761};
e95354ec
VZ
1762
1763#endif // wxHAVE_WIN32_MB2WC
1764
f7e98dee
RN
1765// ============================================================================
1766// Cocoa conversion classes
1767// ============================================================================
1768
1769#if defined(__WXCOCOA__)
1770
ecd9653b 1771// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1772// Cocoa. Strangely enough, internally Core Foundation uses
1773// UTF 32 internally quite a bit - its just not public (yet).
1774
1775#include <CoreFoundation/CFString.h>
1776#include <CoreFoundation/CFStringEncodingExt.h>
1777
1778CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1779{
638357a0 1780 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1781 if ( encoding == wxFONTENCODING_DEFAULT )
1782 {
638357a0 1783 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1784 }
1785 else switch( encoding)
1786 {
1787 case wxFONTENCODING_ISO8859_1 :
1788 enc = kCFStringEncodingISOLatin1 ;
1789 break ;
1790 case wxFONTENCODING_ISO8859_2 :
1791 enc = kCFStringEncodingISOLatin2;
1792 break ;
1793 case wxFONTENCODING_ISO8859_3 :
1794 enc = kCFStringEncodingISOLatin3 ;
1795 break ;
1796 case wxFONTENCODING_ISO8859_4 :
1797 enc = kCFStringEncodingISOLatin4;
1798 break ;
1799 case wxFONTENCODING_ISO8859_5 :
1800 enc = kCFStringEncodingISOLatinCyrillic;
1801 break ;
1802 case wxFONTENCODING_ISO8859_6 :
1803 enc = kCFStringEncodingISOLatinArabic;
1804 break ;
1805 case wxFONTENCODING_ISO8859_7 :
1806 enc = kCFStringEncodingISOLatinGreek;
1807 break ;
1808 case wxFONTENCODING_ISO8859_8 :
1809 enc = kCFStringEncodingISOLatinHebrew;
1810 break ;
1811 case wxFONTENCODING_ISO8859_9 :
1812 enc = kCFStringEncodingISOLatin5;
1813 break ;
1814 case wxFONTENCODING_ISO8859_10 :
1815 enc = kCFStringEncodingISOLatin6;
1816 break ;
1817 case wxFONTENCODING_ISO8859_11 :
1818 enc = kCFStringEncodingISOLatinThai;
1819 break ;
1820 case wxFONTENCODING_ISO8859_13 :
1821 enc = kCFStringEncodingISOLatin7;
1822 break ;
1823 case wxFONTENCODING_ISO8859_14 :
1824 enc = kCFStringEncodingISOLatin8;
1825 break ;
1826 case wxFONTENCODING_ISO8859_15 :
1827 enc = kCFStringEncodingISOLatin9;
1828 break ;
1829
1830 case wxFONTENCODING_KOI8 :
1831 enc = kCFStringEncodingKOI8_R;
1832 break ;
1833 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1834 enc = kCFStringEncodingDOSRussian;
1835 break ;
1836
1837// case wxFONTENCODING_BULGARIAN :
1838// enc = ;
1839// break ;
1840
1841 case wxFONTENCODING_CP437 :
1842 enc =kCFStringEncodingDOSLatinUS ;
1843 break ;
1844 case wxFONTENCODING_CP850 :
1845 enc = kCFStringEncodingDOSLatin1;
1846 break ;
1847 case wxFONTENCODING_CP852 :
1848 enc = kCFStringEncodingDOSLatin2;
1849 break ;
1850 case wxFONTENCODING_CP855 :
1851 enc = kCFStringEncodingDOSCyrillic;
1852 break ;
1853 case wxFONTENCODING_CP866 :
1854 enc =kCFStringEncodingDOSRussian ;
1855 break ;
1856 case wxFONTENCODING_CP874 :
1857 enc = kCFStringEncodingDOSThai;
1858 break ;
1859 case wxFONTENCODING_CP932 :
1860 enc = kCFStringEncodingDOSJapanese;
1861 break ;
1862 case wxFONTENCODING_CP936 :
1863 enc =kCFStringEncodingDOSChineseSimplif ;
1864 break ;
1865 case wxFONTENCODING_CP949 :
1866 enc = kCFStringEncodingDOSKorean;
1867 break ;
1868 case wxFONTENCODING_CP950 :
1869 enc = kCFStringEncodingDOSChineseTrad;
1870 break ;
ecd9653b
WS
1871 case wxFONTENCODING_CP1250 :
1872 enc = kCFStringEncodingWindowsLatin2;
1873 break ;
1874 case wxFONTENCODING_CP1251 :
1875 enc =kCFStringEncodingWindowsCyrillic ;
1876 break ;
1877 case wxFONTENCODING_CP1252 :
1878 enc =kCFStringEncodingWindowsLatin1 ;
1879 break ;
1880 case wxFONTENCODING_CP1253 :
1881 enc = kCFStringEncodingWindowsGreek;
1882 break ;
1883 case wxFONTENCODING_CP1254 :
1884 enc = kCFStringEncodingWindowsLatin5;
1885 break ;
1886 case wxFONTENCODING_CP1255 :
1887 enc =kCFStringEncodingWindowsHebrew ;
1888 break ;
1889 case wxFONTENCODING_CP1256 :
1890 enc =kCFStringEncodingWindowsArabic ;
1891 break ;
1892 case wxFONTENCODING_CP1257 :
1893 enc = kCFStringEncodingWindowsBalticRim;
1894 break ;
638357a0
RN
1895// This only really encodes to UTF7 (if that) evidently
1896// case wxFONTENCODING_UTF7 :
1897// enc = kCFStringEncodingNonLossyASCII ;
1898// break ;
ecd9653b
WS
1899 case wxFONTENCODING_UTF8 :
1900 enc = kCFStringEncodingUTF8 ;
1901 break ;
1902 case wxFONTENCODING_EUC_JP :
1903 enc = kCFStringEncodingEUC_JP;
1904 break ;
1905 case wxFONTENCODING_UTF16 :
f7e98dee 1906 enc = kCFStringEncodingUnicode ;
ecd9653b 1907 break ;
f7e98dee
RN
1908 case wxFONTENCODING_MACROMAN :
1909 enc = kCFStringEncodingMacRoman ;
1910 break ;
1911 case wxFONTENCODING_MACJAPANESE :
1912 enc = kCFStringEncodingMacJapanese ;
1913 break ;
1914 case wxFONTENCODING_MACCHINESETRAD :
1915 enc = kCFStringEncodingMacChineseTrad ;
1916 break ;
1917 case wxFONTENCODING_MACKOREAN :
1918 enc = kCFStringEncodingMacKorean ;
1919 break ;
1920 case wxFONTENCODING_MACARABIC :
1921 enc = kCFStringEncodingMacArabic ;
1922 break ;
1923 case wxFONTENCODING_MACHEBREW :
1924 enc = kCFStringEncodingMacHebrew ;
1925 break ;
1926 case wxFONTENCODING_MACGREEK :
1927 enc = kCFStringEncodingMacGreek ;
1928 break ;
1929 case wxFONTENCODING_MACCYRILLIC :
1930 enc = kCFStringEncodingMacCyrillic ;
1931 break ;
1932 case wxFONTENCODING_MACDEVANAGARI :
1933 enc = kCFStringEncodingMacDevanagari ;
1934 break ;
1935 case wxFONTENCODING_MACGURMUKHI :
1936 enc = kCFStringEncodingMacGurmukhi ;
1937 break ;
1938 case wxFONTENCODING_MACGUJARATI :
1939 enc = kCFStringEncodingMacGujarati ;
1940 break ;
1941 case wxFONTENCODING_MACORIYA :
1942 enc = kCFStringEncodingMacOriya ;
1943 break ;
1944 case wxFONTENCODING_MACBENGALI :
1945 enc = kCFStringEncodingMacBengali ;
1946 break ;
1947 case wxFONTENCODING_MACTAMIL :
1948 enc = kCFStringEncodingMacTamil ;
1949 break ;
1950 case wxFONTENCODING_MACTELUGU :
1951 enc = kCFStringEncodingMacTelugu ;
1952 break ;
1953 case wxFONTENCODING_MACKANNADA :
1954 enc = kCFStringEncodingMacKannada ;
1955 break ;
1956 case wxFONTENCODING_MACMALAJALAM :
1957 enc = kCFStringEncodingMacMalayalam ;
1958 break ;
1959 case wxFONTENCODING_MACSINHALESE :
1960 enc = kCFStringEncodingMacSinhalese ;
1961 break ;
1962 case wxFONTENCODING_MACBURMESE :
1963 enc = kCFStringEncodingMacBurmese ;
1964 break ;
1965 case wxFONTENCODING_MACKHMER :
1966 enc = kCFStringEncodingMacKhmer ;
1967 break ;
1968 case wxFONTENCODING_MACTHAI :
1969 enc = kCFStringEncodingMacThai ;
1970 break ;
1971 case wxFONTENCODING_MACLAOTIAN :
1972 enc = kCFStringEncodingMacLaotian ;
1973 break ;
1974 case wxFONTENCODING_MACGEORGIAN :
1975 enc = kCFStringEncodingMacGeorgian ;
1976 break ;
1977 case wxFONTENCODING_MACARMENIAN :
1978 enc = kCFStringEncodingMacArmenian ;
1979 break ;
1980 case wxFONTENCODING_MACCHINESESIMP :
1981 enc = kCFStringEncodingMacChineseSimp ;
1982 break ;
1983 case wxFONTENCODING_MACTIBETAN :
1984 enc = kCFStringEncodingMacTibetan ;
1985 break ;
1986 case wxFONTENCODING_MACMONGOLIAN :
1987 enc = kCFStringEncodingMacMongolian ;
1988 break ;
1989 case wxFONTENCODING_MACETHIOPIC :
1990 enc = kCFStringEncodingMacEthiopic ;
1991 break ;
1992 case wxFONTENCODING_MACCENTRALEUR :
1993 enc = kCFStringEncodingMacCentralEurRoman ;
1994 break ;
1995 case wxFONTENCODING_MACVIATNAMESE :
1996 enc = kCFStringEncodingMacVietnamese ;
1997 break ;
1998 case wxFONTENCODING_MACARABICEXT :
1999 enc = kCFStringEncodingMacExtArabic ;
2000 break ;
2001 case wxFONTENCODING_MACSYMBOL :
2002 enc = kCFStringEncodingMacSymbol ;
2003 break ;
2004 case wxFONTENCODING_MACDINGBATS :
2005 enc = kCFStringEncodingMacDingbats ;
2006 break ;
2007 case wxFONTENCODING_MACTURKISH :
2008 enc = kCFStringEncodingMacTurkish ;
2009 break ;
2010 case wxFONTENCODING_MACCROATIAN :
2011 enc = kCFStringEncodingMacCroatian ;
2012 break ;
2013 case wxFONTENCODING_MACICELANDIC :
2014 enc = kCFStringEncodingMacIcelandic ;
2015 break ;
2016 case wxFONTENCODING_MACROMANIAN :
2017 enc = kCFStringEncodingMacRomanian ;
2018 break ;
2019 case wxFONTENCODING_MACCELTIC :
2020 enc = kCFStringEncodingMacCeltic ;
2021 break ;
2022 case wxFONTENCODING_MACGAELIC :
2023 enc = kCFStringEncodingMacGaelic ;
2024 break ;
ecd9653b
WS
2025// case wxFONTENCODING_MACKEYBOARD :
2026// enc = kCFStringEncodingMacKeyboardGlyphs ;
2027// break ;
2028 default :
2029 // because gcc is picky
2030 break ;
2031 } ;
2032 return enc ;
f7e98dee
RN
2033}
2034
f7e98dee
RN
2035class wxMBConv_cocoa : public wxMBConv
2036{
2037public:
2038 wxMBConv_cocoa()
2039 {
2040 Init(CFStringGetSystemEncoding()) ;
2041 }
2042
a6900d10 2043#if wxUSE_FONTMAP
f7e98dee
RN
2044 wxMBConv_cocoa(const wxChar* name)
2045 {
267e11c5 2046 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2047 }
a6900d10 2048#endif
f7e98dee
RN
2049
2050 wxMBConv_cocoa(wxFontEncoding encoding)
2051 {
2052 Init( wxCFStringEncFromFontEnc(encoding) );
2053 }
2054
2055 ~wxMBConv_cocoa()
2056 {
2057 }
2058
2059 void Init( CFStringEncoding encoding)
2060 {
638357a0 2061 m_encoding = encoding ;
f7e98dee
RN
2062 }
2063
2064 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2065 {
2066 wxASSERT(szUnConv);
ecd9653b 2067
638357a0
RN
2068 CFStringRef theString = CFStringCreateWithBytes (
2069 NULL, //the allocator
2070 (const UInt8*)szUnConv,
2071 strlen(szUnConv),
2072 m_encoding,
2073 false //no BOM/external representation
f7e98dee
RN
2074 );
2075
2076 wxASSERT(theString);
2077
638357a0
RN
2078 size_t nOutLength = CFStringGetLength(theString);
2079
2080 if (szOut == NULL)
f7e98dee 2081 {
f7e98dee 2082 CFRelease(theString);
638357a0 2083 return nOutLength;
f7e98dee 2084 }
ecd9653b 2085
638357a0 2086 CFRange theRange = { 0, nOutSize };
ecd9653b 2087
638357a0
RN
2088#if SIZEOF_WCHAR_T == 4
2089 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2090#endif
3698ae71 2091
f7e98dee 2092 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2093
f7e98dee 2094 CFRelease(theString);
ecd9653b 2095
638357a0 2096 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2097
2098#if SIZEOF_WCHAR_T == 4
2099 wxMBConvUTF16 converter ;
638357a0 2100 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2101 delete[] szUniCharBuffer;
2102#endif
3698ae71 2103
638357a0 2104 return nOutLength;
f7e98dee
RN
2105 }
2106
2107 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2108 {
638357a0 2109 wxASSERT(szUnConv);
3698ae71 2110
f7e98dee 2111 size_t nRealOutSize;
638357a0 2112 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2113 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2114
f7e98dee
RN
2115#if SIZEOF_WCHAR_T == 4
2116 wxMBConvUTF16BE converter ;
2117 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2118 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2119 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2120 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2121#endif
2122
2123 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2124 NULL, //allocator
2125 szUniBuffer,
2126 nBufSize,
638357a0 2127 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2128 );
ecd9653b 2129
f7e98dee 2130 wxASSERT(theString);
ecd9653b 2131
f7e98dee 2132 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2133 //so we check and use getchars instead in that case
2134 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2135 {
638357a0
RN
2136 if (szOut != NULL)
2137 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2138
638357a0
RN
2139 nRealOutSize = CFStringGetLength(theString) + 1;
2140 }
2141 else
2142 {
2143 CFStringGetBytes(
2144 theString,
2145 CFRangeMake(0, CFStringGetLength(theString)),
2146 m_encoding,
2147 0, //what to put in characters that can't be converted -
2148 //0 tells CFString to return NULL if it meets such a character
2149 false, //not an external representation
2150 (UInt8*) szOut,
3698ae71 2151 nOutSize,
638357a0
RN
2152 (CFIndex*) &nRealOutSize
2153 );
f7e98dee 2154 }
ecd9653b 2155
638357a0 2156 CFRelease(theString);
ecd9653b 2157
638357a0
RN
2158#if SIZEOF_WCHAR_T == 4
2159 delete[] szUniBuffer;
2160#endif
ecd9653b 2161
f7e98dee
RN
2162 return nRealOutSize - 1;
2163 }
2164
2165 bool IsOk() const
ecd9653b 2166 {
3698ae71 2167 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2168 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2169 }
2170
2171private:
638357a0 2172 CFStringEncoding m_encoding ;
f7e98dee
RN
2173};
2174
2175#endif // defined(__WXCOCOA__)
2176
335d31e0
SC
2177// ============================================================================
2178// Mac conversion classes
2179// ============================================================================
2180
2181#if defined(__WXMAC__) && defined(TARGET_CARBON)
2182
2183class wxMBConv_mac : public wxMBConv
2184{
2185public:
2186 wxMBConv_mac()
2187 {
2188 Init(CFStringGetSystemEncoding()) ;
2189 }
2190
2d1659cf 2191#if wxUSE_FONTMAP
335d31e0
SC
2192 wxMBConv_mac(const wxChar* name)
2193 {
267e11c5 2194 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2195 }
2d1659cf 2196#endif
335d31e0
SC
2197
2198 wxMBConv_mac(wxFontEncoding encoding)
2199 {
d775fa82
WS
2200 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2201 }
2202
2203 ~wxMBConv_mac()
2204 {
2205 OSStatus status = noErr ;
2206 status = TECDisposeConverter(m_MB2WC_converter);
2207 status = TECDisposeConverter(m_WC2MB_converter);
2208 }
2209
2210
2211 void Init( TextEncodingBase encoding)
2212 {
2213 OSStatus status = noErr ;
2214 m_char_encoding = encoding ;
2215 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2216
2217 status = TECCreateConverter(&m_MB2WC_converter,
2218 m_char_encoding,
2219 m_unicode_encoding);
2220 status = TECCreateConverter(&m_WC2MB_converter,
2221 m_unicode_encoding,
2222 m_char_encoding);
2223 }
2224
335d31e0
SC
2225 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2226 {
d775fa82
WS
2227 OSStatus status = noErr ;
2228 ByteCount byteOutLen ;
2229 ByteCount byteInLen = strlen(psz) ;
2230 wchar_t *tbuf = NULL ;
2231 UniChar* ubuf = NULL ;
2232 size_t res = 0 ;
2233
2234 if (buf == NULL)
2235 {
638357a0 2236 //apple specs say at least 32
c543817b 2237 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2238 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2239 }
2240 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2241#if SIZEOF_WCHAR_T == 4
d775fa82 2242 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2243#else
d775fa82 2244 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2245#endif
d775fa82
WS
2246 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2247 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2248#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2249 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2250 // is not properly terminated we get random characters at the end
2251 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2252 wxMBConvUTF16BE converter ;
2253 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2254 free( ubuf ) ;
f3a355ce 2255#else
d775fa82 2256 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2257#endif
d775fa82
WS
2258 if ( buf == NULL )
2259 free(tbuf) ;
335d31e0 2260
335d31e0
SC
2261 if ( buf && res < n)
2262 buf[res] = 0;
2263
d775fa82 2264 return res ;
335d31e0
SC
2265 }
2266
2267 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2268 {
2269 OSStatus status = noErr ;
2270 ByteCount byteOutLen ;
2271 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2272
2273 char *tbuf = NULL ;
2274
2275 if (buf == NULL)
2276 {
638357a0 2277 //apple specs say at least 32
c543817b 2278 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2279 tbuf = (char*) malloc( n ) ;
2280 }
2281
2282 ByteCount byteBufferLen = n ;
2283 UniChar* ubuf = NULL ;
f3a355ce 2284#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2285 wxMBConvUTF16BE converter ;
2286 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2287 byteInLen = unicharlen ;
2288 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2289 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2290#else
d775fa82 2291 ubuf = (UniChar*) psz ;
f3a355ce 2292#endif
d775fa82
WS
2293 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2294 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2295#if SIZEOF_WCHAR_T == 4
d775fa82 2296 free( ubuf ) ;
f3a355ce 2297#endif
d775fa82
WS
2298 if ( buf == NULL )
2299 free(tbuf) ;
335d31e0 2300
d775fa82 2301 size_t res = byteOutLen ;
335d31e0 2302 if ( buf && res < n)
638357a0 2303 {
335d31e0 2304 buf[res] = 0;
3698ae71 2305
638357a0
RN
2306 //we need to double-trip to verify it didn't insert any ? in place
2307 //of bogus characters
2308 wxWCharBuffer wcBuf(n);
2309 size_t pszlen = wxWcslen(psz);
2310 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2311 wxWcslen(wcBuf) != pszlen ||
2312 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2313 {
2314 // we didn't obtain the same thing we started from, hence
2315 // the conversion was lossy and we consider that it failed
2316 return (size_t)-1;
2317 }
2318 }
335d31e0 2319
d775fa82 2320 return res ;
335d31e0
SC
2321 }
2322
2323 bool IsOk() const
2324 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2325
2326private:
d775fa82
WS
2327 TECObjectRef m_MB2WC_converter ;
2328 TECObjectRef m_WC2MB_converter ;
2329
2330 TextEncodingBase m_char_encoding ;
2331 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2332};
2333
2334#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2335
36acb880
VZ
2336// ============================================================================
2337// wxEncodingConverter based conversion classes
2338// ============================================================================
2339
1e6feb95 2340#if wxUSE_FONTMAP
1cd52418 2341
e95354ec 2342class wxMBConv_wxwin : public wxMBConv
1cd52418 2343{
8b04d4c4
VZ
2344private:
2345 void Init()
2346 {
2347 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2348 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2349 }
2350
6001e347 2351public:
f1339c56
RR
2352 // temporarily just use wxEncodingConverter stuff,
2353 // so that it works while a better implementation is built
e95354ec 2354 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2355 {
2356 if (name)
267e11c5 2357 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2358 else
2359 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2360
8b04d4c4
VZ
2361 Init();
2362 }
2363
e95354ec 2364 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2365 {
2366 m_enc = enc;
2367
2368 Init();
f1339c56 2369 }
dccce9ea 2370
bde4baac 2371 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2372 {
2373 size_t inbuf = strlen(psz);
dccce9ea 2374 if (buf)
c643a977
VS
2375 {
2376 if (!m2w.Convert(psz,buf))
2377 return (size_t)-1;
2378 }
f1339c56
RR
2379 return inbuf;
2380 }
dccce9ea 2381
bde4baac 2382 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2383 {
f8d791e0 2384 const size_t inbuf = wxWcslen(psz);
f1339c56 2385 if (buf)
c643a977
VS
2386 {
2387 if (!w2m.Convert(psz,buf))
2388 return (size_t)-1;
2389 }
dccce9ea 2390
f1339c56
RR
2391 return inbuf;
2392 }
dccce9ea 2393
e95354ec 2394 bool IsOk() const { return m_ok; }
f1339c56
RR
2395
2396public:
8b04d4c4 2397 wxFontEncoding m_enc;
f1339c56 2398 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2399
2400 // were we initialized successfully?
2401 bool m_ok;
fc7a2a60 2402
e95354ec 2403 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2404};
6001e347 2405
1e6feb95
VZ
2406#endif // wxUSE_FONTMAP
2407
36acb880
VZ
2408// ============================================================================
2409// wxCSConv implementation
2410// ============================================================================
2411
8b04d4c4 2412void wxCSConv::Init()
6001e347 2413{
e95354ec
VZ
2414 m_name = NULL;
2415 m_convReal = NULL;
2416 m_deferred = true;
2417}
2418
8b04d4c4
VZ
2419wxCSConv::wxCSConv(const wxChar *charset)
2420{
2421 Init();
82713003 2422
e95354ec
VZ
2423 if ( charset )
2424 {
e95354ec
VZ
2425 SetName(charset);
2426 }
bda3d86a
VZ
2427
2428 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2429}
2430
8b04d4c4
VZ
2431wxCSConv::wxCSConv(wxFontEncoding encoding)
2432{
bda3d86a 2433 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2434 {
2435 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2436
2437 encoding = wxFONTENCODING_SYSTEM;
2438 }
2439
8b04d4c4
VZ
2440 Init();
2441
bda3d86a 2442 m_encoding = encoding;
8b04d4c4
VZ
2443}
2444
6001e347
RR
2445wxCSConv::~wxCSConv()
2446{
65e50848
JS
2447 Clear();
2448}
2449
54380f29 2450wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2451 : wxMBConv()
54380f29 2452{
8b04d4c4
VZ
2453 Init();
2454
54380f29 2455 SetName(conv.m_name);
8b04d4c4 2456 m_encoding = conv.m_encoding;
54380f29
GD
2457}
2458
2459wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2460{
2461 Clear();
8b04d4c4 2462
54380f29 2463 SetName(conv.m_name);
8b04d4c4
VZ
2464 m_encoding = conv.m_encoding;
2465
54380f29
GD
2466 return *this;
2467}
2468
65e50848
JS
2469void wxCSConv::Clear()
2470{
8b04d4c4 2471 free(m_name);
e95354ec 2472 delete m_convReal;
8b04d4c4 2473
65e50848 2474 m_name = NULL;
e95354ec 2475 m_convReal = NULL;
6001e347
RR
2476}
2477
2478void wxCSConv::SetName(const wxChar *charset)
2479{
f1339c56
RR
2480 if (charset)
2481 {
2482 m_name = wxStrdup(charset);
e95354ec 2483 m_deferred = true;
f1339c56 2484 }
6001e347
RR
2485}
2486
e95354ec
VZ
2487wxMBConv *wxCSConv::DoCreate() const
2488{
c547282d
VZ
2489 // check for the special case of ASCII or ISO8859-1 charset: as we have
2490 // special knowledge of it anyhow, we don't need to create a special
2491 // conversion object
2492 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2493 {
e95354ec
VZ
2494 // don't convert at all
2495 return NULL;
2496 }
dccce9ea 2497
e95354ec
VZ
2498 // we trust OS to do conversion better than we can so try external
2499 // conversion methods first
2500 //
2501 // the full order is:
2502 // 1. OS conversion (iconv() under Unix or Win32 API)
2503 // 2. hard coded conversions for UTF
2504 // 3. wxEncodingConverter as fall back
2505
2506 // step (1)
2507#ifdef HAVE_ICONV
c547282d 2508#if !wxUSE_FONTMAP
e95354ec 2509 if ( m_name )
c547282d 2510#endif // !wxUSE_FONTMAP
e95354ec 2511 {
c547282d
VZ
2512 wxString name(m_name);
2513
2514#if wxUSE_FONTMAP
2515 if ( name.empty() )
267e11c5 2516 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2517#endif // wxUSE_FONTMAP
2518
2519 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2520 if ( conv->IsOk() )
2521 return conv;
2522
2523 delete conv;
2524 }
2525#endif // HAVE_ICONV
2526
2527#ifdef wxHAVE_WIN32_MB2WC
2528 {
7608a683 2529#if wxUSE_FONTMAP
e95354ec
VZ
2530 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2531 : new wxMBConv_win32(m_encoding);
2532 if ( conv->IsOk() )
2533 return conv;
2534
2535 delete conv;
7608a683
WS
2536#else
2537 return NULL;
2538#endif
e95354ec
VZ
2539 }
2540#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2541#if defined(__WXMAC__)
2542 {
5c3c8676 2543 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2544 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2545 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2546 {
2547
2d1659cf 2548#if wxUSE_FONTMAP
d775fa82
WS
2549 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2550 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2551#else
2552 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2553#endif
d775fa82 2554 if ( conv->IsOk() )
f7e98dee
RN
2555 return conv;
2556
2557 delete conv;
2558 }
2559 }
2560#endif
2561#if defined(__WXCOCOA__)
2562 {
2563 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2564 {
2565
a6900d10 2566#if wxUSE_FONTMAP
f7e98dee
RN
2567 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2568 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2569#else
2570 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2571#endif
f7e98dee 2572 if ( conv->IsOk() )
d775fa82
WS
2573 return conv;
2574
2575 delete conv;
2576 }
335d31e0
SC
2577 }
2578#endif
e95354ec
VZ
2579 // step (2)
2580 wxFontEncoding enc = m_encoding;
2581#if wxUSE_FONTMAP
c547282d
VZ
2582 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2583 {
2584 // use "false" to suppress interactive dialogs -- we can be called from
2585 // anywhere and popping up a dialog from here is the last thing we want to
2586 // do
267e11c5 2587 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2588 }
e95354ec
VZ
2589#endif // wxUSE_FONTMAP
2590
2591 switch ( enc )
2592 {
2593 case wxFONTENCODING_UTF7:
2594 return new wxMBConvUTF7;
2595
2596 case wxFONTENCODING_UTF8:
2597 return new wxMBConvUTF8;
2598
e95354ec
VZ
2599 case wxFONTENCODING_UTF16BE:
2600 return new wxMBConvUTF16BE;
2601
2602 case wxFONTENCODING_UTF16LE:
2603 return new wxMBConvUTF16LE;
2604
e95354ec
VZ
2605 case wxFONTENCODING_UTF32BE:
2606 return new wxMBConvUTF32BE;
2607
2608 case wxFONTENCODING_UTF32LE:
2609 return new wxMBConvUTF32LE;
2610
2611 default:
2612 // nothing to do but put here to suppress gcc warnings
2613 ;
2614 }
2615
2616 // step (3)
2617#if wxUSE_FONTMAP
2618 {
2619 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2620 : new wxMBConv_wxwin(m_encoding);
2621 if ( conv->IsOk() )
2622 return conv;
2623
2624 delete conv;
2625 }
2626#endif // wxUSE_FONTMAP
2627
a58d4f4d
VS
2628 // NB: This is a hack to prevent deadlock. What could otherwise happen
2629 // in Unicode build: wxConvLocal creation ends up being here
2630 // because of some failure and logs the error. But wxLog will try to
2631 // attach timestamp, for which it will need wxConvLocal (to convert
2632 // time to char* and then wchar_t*), but that fails, tries to log
2633 // error, but wxLog has a (already locked) critical section that
2634 // guards static buffer.
2635 static bool alreadyLoggingError = false;
2636 if (!alreadyLoggingError)
2637 {
2638 alreadyLoggingError = true;
2639 wxLogError(_("Cannot convert from the charset '%s'!"),
2640 m_name ? m_name
e95354ec
VZ
2641 :
2642#if wxUSE_FONTMAP
267e11c5 2643 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2644#else // !wxUSE_FONTMAP
2645 wxString::Format(_("encoding %s"), m_encoding).c_str()
2646#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2647 );
a58d4f4d
VS
2648 alreadyLoggingError = false;
2649 }
e95354ec
VZ
2650
2651 return NULL;
2652}
2653
2654void wxCSConv::CreateConvIfNeeded() const
2655{
2656 if ( m_deferred )
2657 {
2658 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2659
2660#if wxUSE_INTL
2661 // if we don't have neither the name nor the encoding, use the default
2662 // encoding for this system
2663 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2664 {
4d312c22 2665 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2666 }
2667#endif // wxUSE_INTL
2668
e95354ec
VZ
2669 self->m_convReal = DoCreate();
2670 self->m_deferred = false;
6001e347 2671 }
6001e347
RR
2672}
2673
2674size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2675{
e95354ec 2676 CreateConvIfNeeded();
dccce9ea 2677
e95354ec
VZ
2678 if (m_convReal)
2679 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2680
2681 // latin-1 (direct)
4def3b35 2682 size_t len = strlen(psz);
dccce9ea 2683
f1339c56
RR
2684 if (buf)
2685 {
4def3b35 2686 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2687 buf[c] = (unsigned char)(psz[c]);
2688 }
dccce9ea 2689
f1339c56 2690 return len;
6001e347
RR
2691}
2692
2693size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2694{
e95354ec 2695 CreateConvIfNeeded();
dccce9ea 2696
e95354ec
VZ
2697 if (m_convReal)
2698 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2699
f1339c56 2700 // latin-1 (direct)
f8d791e0 2701 const size_t len = wxWcslen(psz);
f1339c56
RR
2702 if (buf)
2703 {
4def3b35 2704 for (size_t c = 0; c <= len; c++)
24642831
VS
2705 {
2706 if (psz[c] > 0xFF)
2707 return (size_t)-1;
907173e5 2708 buf[c] = (char)psz[c];
24642831
VS
2709 }
2710 }
2711 else
2712 {
2713 for (size_t c = 0; c <= len; c++)
2714 {
2715 if (psz[c] > 0xFF)
2716 return (size_t)-1;
2717 }
f1339c56 2718 }
dccce9ea 2719
f1339c56 2720 return len;
6001e347
RR
2721}
2722
bde4baac
VZ
2723// ----------------------------------------------------------------------------
2724// globals
2725// ----------------------------------------------------------------------------
2726
2727#ifdef __WINDOWS__
2728 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2729#elif defined(__WXMAC__) && !defined(__MACH__)
2730 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2731#else
dcc8fac0 2732 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2733#endif
2734
2735static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2736static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2737static wxMBConvUTF7 wxConvUTF7Obj;
2738static wxMBConvUTF8 wxConvUTF8Obj;
ea8ce907 2739static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
bde4baac 2740
bde4baac
VZ
2741WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2742WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2743WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2744WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2745WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2746WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2747WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2748#ifdef __WXOSX__
ea8ce907
RR
2749 wxConvUTF8Obj;
2750#elif __WXGTK20__
2751 wxConvBrokenFileNamesObj;
f5a1953b 2752#else
ea8ce907 2753 wxConvLibcObj;
f5a1953b
VZ
2754#endif
2755
bde4baac
VZ
2756
2757#else // !wxUSE_WCHAR_T
2758
2759// stand-ins in absence of wchar_t
2760WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2761 wxConvISO8859_1,
2762 wxConvLocal,
2763 wxConvUTF8;
2764
2765#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2766
2767