]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
GetSystemEncodingName() cleanup: we don't need to check for US-ASCII synonyms here...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
7608a683 43#ifdef __WINDOWS__
532d575b 44 #include "wx/msw/private.h"
13dd924a 45 #include "wx/msw/missing.h"
0a1c1e62
GRG
46#endif
47
1c193821 48#ifndef __WXWINCE__
1cd52418 49#include <errno.h>
1c193821
JS
50#endif
51
6001e347
RR
52#include <ctype.h>
53#include <string.h>
54#include <stdlib.h>
55
e95354ec
VZ
56#if defined(__WIN32__) && !defined(__WXMICROWIN__)
57 #define wxHAVE_WIN32_MB2WC
58#endif // __WIN32__ but !__WXMICROWIN__
59
373658eb
VZ
60// ----------------------------------------------------------------------------
61// headers
62// ----------------------------------------------------------------------------
7af284fd 63
6001e347 64#ifdef __SALFORDC__
373658eb 65 #include <clib.h>
6001e347
RR
66#endif
67
b040e242 68#ifdef HAVE_ICONV
373658eb 69 #include <iconv.h>
b1d547eb 70 #include "wx/thread.h"
1cd52418 71#endif
1cd52418 72
373658eb
VZ
73#include "wx/encconv.h"
74#include "wx/fontmap.h"
7608a683 75#include "wx/utils.h"
373658eb 76
335d31e0 77#ifdef __WXMAC__
40ba2f3b 78#ifndef __DARWIN__
4227afa4
SC
79#include <ATSUnicode.h>
80#include <TextCommon.h>
81#include <TextEncodingConverter.h>
40ba2f3b 82#endif
335d31e0
SC
83
84#include "wx/mac/private.h" // includes mac headers
85#endif
ce6f8d6f
VZ
86
87#define TRACE_STRCONV _T("strconv")
88
373658eb
VZ
89// ----------------------------------------------------------------------------
90// macros
91// ----------------------------------------------------------------------------
3e61dfb0 92
1cd52418 93#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 94#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
95
96#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
97 #define WC_NAME "UCS4"
98 #define WC_BSWAP BSWAP_UCS4
99 #ifdef WORDS_BIGENDIAN
100 #define WC_NAME_BEST "UCS-4BE"
101 #else
102 #define WC_NAME_BEST "UCS-4LE"
103 #endif
1cd52418 104#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
105 #define WC_NAME "UTF16"
106 #define WC_BSWAP BSWAP_UTF16
a3f2769e 107 #define WC_UTF16
3a0d76bc
VS
108 #ifdef WORDS_BIGENDIAN
109 #define WC_NAME_BEST "UTF-16BE"
110 #else
111 #define WC_NAME_BEST "UTF-16LE"
112 #endif
bab1e722 113#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
114 // does this ever happen?
115 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
116#endif
117
373658eb
VZ
118// ============================================================================
119// implementation
120// ============================================================================
121
122// ----------------------------------------------------------------------------
c91830cb 123// UTF-16 en/decoding to/from UCS-4
373658eb 124// ----------------------------------------------------------------------------
6001e347 125
b0a6bb75 126
c91830cb 127static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 128{
dccce9ea 129 if (input<=0xffff)
4def3b35 130 {
999836aa
VZ
131 if (output)
132 *output = (wxUint16) input;
4def3b35 133 return 1;
dccce9ea
VZ
134 }
135 else if (input>=0x110000)
4def3b35
VS
136 {
137 return (size_t)-1;
dccce9ea
VZ
138 }
139 else
4def3b35 140 {
dccce9ea 141 if (output)
4def3b35 142 {
c91830cb 143 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 144 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
145 }
146 return 2;
1cd52418 147 }
1cd52418
OK
148}
149
c91830cb 150static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 151{
dccce9ea 152 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
153 {
154 output = *input;
155 return 1;
dccce9ea 156 }
cdb14ecb 157 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
158 {
159 output = *input;
160 return (size_t)-1;
dccce9ea
VZ
161 }
162 else
4def3b35
VS
163 {
164 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
165 return 2;
166 }
1cd52418
OK
167}
168
b0a6bb75 169
f6bcfd97 170// ----------------------------------------------------------------------------
6001e347 171// wxMBConv
f6bcfd97 172// ----------------------------------------------------------------------------
2c53a80a
WS
173
174wxMBConv::~wxMBConv()
175{
176 // nothing to do here (necessary for Darwin linking probably)
177}
6001e347 178
6001e347
RR
179const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
180{
2b5f62a0 181 if ( psz )
6001e347 182 {
2b5f62a0
VZ
183 // calculate the length of the buffer needed first
184 size_t nLen = MB2WC(NULL, psz, 0);
185 if ( nLen != (size_t)-1 )
186 {
187 // now do the actual conversion
188 wxWCharBuffer buf(nLen);
635f33ce
VS
189 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
190 if ( nLen != (size_t)-1 )
191 {
192 return buf;
193 }
2b5f62a0 194 }
f6bcfd97 195 }
2b5f62a0
VZ
196
197 wxWCharBuffer buf((wchar_t *)NULL);
198
199 return buf;
6001e347
RR
200}
201
e5cceba0 202const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 203{
2b5f62a0
VZ
204 if ( pwz )
205 {
206 size_t nLen = WC2MB(NULL, pwz, 0);
207 if ( nLen != (size_t)-1 )
208 {
c91830cb 209 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
210 nLen = WC2MB(buf.data(), pwz, nLen + 4);
211 if ( nLen != (size_t)-1 )
212 {
213 return buf;
214 }
2b5f62a0
VZ
215 }
216 }
217
218 wxCharBuffer buf((char *)NULL);
e5cceba0 219
e5cceba0 220 return buf;
6001e347
RR
221}
222
f5fb6871 223const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 224{
f5fb6871
RN
225 wxASSERT(pOutSize != NULL);
226
e4e3bbb4
RN
227 const char* szEnd = szString + nStringLen + 1;
228 const char* szPos = szString;
229 const char* szStart = szPos;
230
231 size_t nActualLength = 0;
f5fb6871
RN
232 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
233
234 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
235
236 //Convert the string until the length() is reached, continuing the
237 //loop every time a null character is reached
238 while(szPos != szEnd)
239 {
240 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
241
242 //Get the length of the current (sub)string
243 size_t nLen = MB2WC(NULL, szPos, 0);
244
245 //Invalid conversion?
246 if( nLen == (size_t)-1 )
f5fb6871
RN
247 {
248 *pOutSize = 0;
249 theBuffer.data()[0u] = wxT('\0');
250 return theBuffer;
251 }
252
e4e3bbb4
RN
253
254 //Increase the actual length (+1 for current null character)
255 nActualLength += nLen + 1;
256
f5fb6871
RN
257 //if buffer too big, realloc the buffer
258 if (nActualLength > (nCurrentSize+1))
259 {
260 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
261 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
262 theBuffer = theNewBuffer;
263 nCurrentSize <<= 1;
264 }
265
266 //Convert the current (sub)string
267 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 268 {
f5fb6871
RN
269 *pOutSize = 0;
270 theBuffer.data()[0u] = wxT('\0');
271 return theBuffer;
e4e3bbb4
RN
272 }
273
274 //Increment to next (sub)string
3103e8a9
JS
275 //Note that we have to use strlen instead of nLen here
276 //because XX2XX gives us the size of the output buffer,
277 //which is not necessarily the length of the string
e4e3bbb4
RN
278 szPos += strlen(szPos) + 1;
279 }
280
f5fb6871
RN
281 //success - return actual length and the buffer
282 *pOutSize = nActualLength;
3698ae71 283 return theBuffer;
e4e3bbb4
RN
284}
285
f5fb6871 286const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 287{
f5fb6871
RN
288 wxASSERT(pOutSize != NULL);
289
e4e3bbb4
RN
290 const wchar_t* szEnd = szString + nStringLen + 1;
291 const wchar_t* szPos = szString;
292 const wchar_t* szStart = szPos;
293
294 size_t nActualLength = 0;
f5fb6871
RN
295 size_t nCurrentSize = nStringLen << 2; //try * 4 first
296
297 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
298
299 //Convert the string until the length() is reached, continuing the
300 //loop every time a null character is reached
301 while(szPos != szEnd)
302 {
303 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
304
305 //Get the length of the current (sub)string
306 size_t nLen = WC2MB(NULL, szPos, 0);
307
308 //Invalid conversion?
309 if( nLen == (size_t)-1 )
f5fb6871
RN
310 {
311 *pOutSize = 0;
312 theBuffer.data()[0u] = wxT('\0');
313 return theBuffer;
314 }
e4e3bbb4
RN
315
316 //Increase the actual length (+1 for current null character)
317 nActualLength += nLen + 1;
3698ae71 318
f5fb6871
RN
319 //if buffer too big, realloc the buffer
320 if (nActualLength > (nCurrentSize+1))
321 {
322 wxCharBuffer theNewBuffer(nCurrentSize << 1);
323 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
324 theBuffer = theNewBuffer;
325 nCurrentSize <<= 1;
326 }
327
328 //Convert the current (sub)string
329 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 330 {
f5fb6871
RN
331 *pOutSize = 0;
332 theBuffer.data()[0u] = wxT('\0');
333 return theBuffer;
e4e3bbb4
RN
334 }
335
336 //Increment to next (sub)string
3103e8a9
JS
337 //Note that we have to use wxWcslen instead of nLen here
338 //because XX2XX gives us the size of the output buffer,
339 //which is not necessarily the length of the string
e4e3bbb4
RN
340 szPos += wxWcslen(szPos) + 1;
341 }
342
f5fb6871
RN
343 //success - return actual length and the buffer
344 *pOutSize = nActualLength;
3698ae71 345 return theBuffer;
e4e3bbb4
RN
346}
347
6001e347 348// ----------------------------------------------------------------------------
bde4baac 349// wxMBConvLibc
6001e347
RR
350// ----------------------------------------------------------------------------
351
bde4baac
VZ
352size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
353{
354 return wxMB2WC(buf, psz, n);
355}
356
357size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
358{
359 return wxWC2MB(buf, psz, n);
360}
e1bfe89e 361
66bf0099 362#ifdef __UNIX__
c12b7f79 363
e1bfe89e 364// ----------------------------------------------------------------------------
532d575b 365// wxConvBrokenFileNames
e1bfe89e
RR
366// ----------------------------------------------------------------------------
367
845905d5 368wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 369{
845905d5
MW
370 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
371 || wxStricmp(charset, _T("UTF8")) == 0 )
372 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
373 else
374 m_conv = new wxCSConv(charset);
ea8ce907
RR
375}
376
c12b7f79
VZ
377size_t
378wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
379 const char *psz,
380 size_t outputSize) const
e1bfe89e 381{
c12b7f79 382 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
383}
384
c12b7f79
VZ
385size_t
386wxConvBrokenFileNames::WC2MB(char *outputBuf,
387 const wchar_t *psz,
388 size_t outputSize) const
e1bfe89e 389{
c12b7f79 390 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
391}
392
66bf0099 393#endif
c12b7f79 394
bde4baac 395// ----------------------------------------------------------------------------
3698ae71 396// UTF-7
bde4baac 397// ----------------------------------------------------------------------------
6001e347 398
15f2ee32 399// Implementation (C) 2004 Fredrik Roubert
6001e347 400
15f2ee32
RN
401//
402// BASE64 decoding table
403//
404static const unsigned char utf7unb64[] =
6001e347 405{
15f2ee32
RN
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
412 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
413 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
415 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
416 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
417 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
419 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
420 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
421 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
438};
439
440size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
441{
15f2ee32
RN
442 size_t len = 0;
443
444 while (*psz && ((!buf) || (len < n)))
445 {
446 unsigned char cc = *psz++;
447 if (cc != '+')
448 {
449 // plain ASCII char
450 if (buf)
451 *buf++ = cc;
452 len++;
453 }
454 else if (*psz == '-')
455 {
456 // encoded plus sign
457 if (buf)
458 *buf++ = cc;
459 len++;
460 psz++;
461 }
462 else
463 {
464 // BASE64 encoded string
465 bool lsb;
466 unsigned char c;
467 unsigned int d, l;
468 for (lsb = false, d = 0, l = 0;
469 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
470 {
471 d <<= 6;
472 d += cc;
473 for (l += 6; l >= 8; lsb = !lsb)
474 {
6356d52a 475 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
476 if (lsb)
477 {
478 if (buf)
479 *buf++ |= c;
480 len ++;
481 }
482 else
483 if (buf)
6356d52a 484 *buf = (wchar_t)(c << 8);
15f2ee32
RN
485 }
486 }
487 if (*psz == '-')
488 psz++;
489 }
490 }
491 if (buf && (len < n))
492 *buf = 0;
493 return len;
6001e347
RR
494}
495
15f2ee32
RN
496//
497// BASE64 encoding table
498//
499static const unsigned char utf7enb64[] =
500{
501 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
502 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
503 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
504 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
505 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
506 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
507 'w', 'x', 'y', 'z', '0', '1', '2', '3',
508 '4', '5', '6', '7', '8', '9', '+', '/'
509};
510
511//
512// UTF-7 encoding table
513//
514// 0 - Set D (directly encoded characters)
515// 1 - Set O (optional direct characters)
516// 2 - whitespace characters (optional)
517// 3 - special characters
518//
519static const unsigned char utf7encode[128] =
6001e347 520{
15f2ee32
RN
521 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
522 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
523 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
525 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
527 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
529};
530
667e5b3e 531size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
532{
533
534
535 size_t len = 0;
536
537 while (*psz && ((!buf) || (len < n)))
538 {
539 wchar_t cc = *psz++;
540 if (cc < 0x80 && utf7encode[cc] < 1)
541 {
542 // plain ASCII char
543 if (buf)
544 *buf++ = (char)cc;
545 len++;
546 }
547#ifndef WC_UTF16
79c78d42 548 else if (((wxUint32)cc) > 0xffff)
b2c13097 549 {
15f2ee32
RN
550 // no surrogate pair generation (yet?)
551 return (size_t)-1;
552 }
553#endif
554 else
555 {
556 if (buf)
557 *buf++ = '+';
558 len++;
559 if (cc != '+')
560 {
561 // BASE64 encode string
562 unsigned int lsb, d, l;
563 for (d = 0, l = 0;; psz++)
564 {
565 for (lsb = 0; lsb < 2; lsb ++)
566 {
567 d <<= 8;
568 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
569
570 for (l += 8; l >= 6; )
571 {
572 l -= 6;
573 if (buf)
574 *buf++ = utf7enb64[(d >> l) % 64];
575 len++;
576 }
577 }
578 cc = *psz;
579 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
580 break;
581 }
582 if (l != 0)
583 {
584 if (buf)
585 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
586 len++;
587 }
588 }
589 if (buf)
590 *buf++ = '-';
591 len++;
592 }
593 }
594 if (buf && (len < n))
595 *buf = 0;
596 return len;
6001e347
RR
597}
598
f6bcfd97 599// ----------------------------------------------------------------------------
6001e347 600// UTF-8
f6bcfd97 601// ----------------------------------------------------------------------------
6001e347 602
dccce9ea 603static wxUint32 utf8_max[]=
4def3b35 604 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 605
3698ae71
VZ
606// boundaries of the private use area we use to (temporarily) remap invalid
607// characters invalid in a UTF-8 encoded string
ea8ce907
RR
608const wxUint32 wxUnicodePUA = 0x100000;
609const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
610
6001e347
RR
611size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
612{
4def3b35
VS
613 size_t len = 0;
614
dccce9ea 615 while (*psz && ((!buf) || (len < n)))
4def3b35 616 {
ea8ce907
RR
617 const char *opsz = psz;
618 bool invalid = false;
4def3b35
VS
619 unsigned char cc = *psz++, fc = cc;
620 unsigned cnt;
dccce9ea 621 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 622 fc <<= 1;
dccce9ea 623 if (!cnt)
4def3b35
VS
624 {
625 // plain ASCII char
dccce9ea 626 if (buf)
4def3b35
VS
627 *buf++ = cc;
628 len++;
561488ef
MW
629
630 // escape the escape character for octal escapes
631 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
632 && cc == '\\' && (!buf || len < n))
633 {
634 if (buf)
635 *buf++ = cc;
636 len++;
637 }
dccce9ea
VZ
638 }
639 else
4def3b35
VS
640 {
641 cnt--;
dccce9ea 642 if (!cnt)
4def3b35
VS
643 {
644 // invalid UTF-8 sequence
ea8ce907 645 invalid = true;
dccce9ea
VZ
646 }
647 else
4def3b35
VS
648 {
649 unsigned ocnt = cnt - 1;
650 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 651 while (cnt--)
4def3b35 652 {
ea8ce907 653 cc = *psz;
dccce9ea 654 if ((cc & 0xC0) != 0x80)
4def3b35
VS
655 {
656 // invalid UTF-8 sequence
ea8ce907
RR
657 invalid = true;
658 break;
4def3b35 659 }
ea8ce907 660 psz++;
4def3b35
VS
661 res = (res << 6) | (cc & 0x3f);
662 }
ea8ce907 663 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
664 {
665 // illegal UTF-8 encoding
ea8ce907 666 invalid = true;
4def3b35 667 }
ea8ce907
RR
668 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
669 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
670 {
671 // if one of our PUA characters turns up externally
672 // it must also be treated as an illegal sequence
673 // (a bit like you have to escape an escape character)
674 invalid = true;
675 }
676 else
677 {
1cd52418 678#ifdef WC_UTF16
ea8ce907
RR
679 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
680 size_t pa = encode_utf16(res, (wxUint16 *)buf);
681 if (pa == (size_t)-1)
682 {
683 invalid = true;
684 }
685 else
686 {
687 if (buf)
688 buf += pa;
689 len += pa;
690 }
373658eb 691#else // !WC_UTF16
ea8ce907
RR
692 if (buf)
693 *buf++ = res;
694 len++;
373658eb 695#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
696 }
697 }
698 if (invalid)
699 {
700 if (m_options & MAP_INVALID_UTF8_TO_PUA)
701 {
702 while (opsz < psz && (!buf || len < n))
703 {
704#ifdef WC_UTF16
705 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
706 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
707 wxASSERT(pa != (size_t)-1);
708 if (buf)
709 buf += pa;
710 opsz++;
711 len += pa;
712#else
713 if (buf)
714 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
715 opsz++;
716 len++;
717#endif
718 }
719 }
3698ae71 720 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
721 {
722 while (opsz < psz && (!buf || len < n))
723 {
3698ae71
VZ
724 if ( buf && len + 3 < n )
725 {
726 unsigned char n = *opsz;
727 *buf++ = L'\\';
b2c13097
WS
728 *buf++ = (wchar_t)( L'0' + n / 0100 );
729 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
730 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 731 }
ea8ce907
RR
732 opsz++;
733 len += 4;
734 }
735 }
3698ae71 736 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
737 {
738 return (size_t)-1;
739 }
4def3b35
VS
740 }
741 }
6001e347 742 }
dccce9ea 743 if (buf && (len < n))
4def3b35
VS
744 *buf = 0;
745 return len;
6001e347
RR
746}
747
3698ae71
VZ
748static inline bool isoctal(wchar_t wch)
749{
750 return L'0' <= wch && wch <= L'7';
751}
752
6001e347
RR
753size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
754{
4def3b35 755 size_t len = 0;
6001e347 756
dccce9ea 757 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
758 {
759 wxUint32 cc;
1cd52418 760#ifdef WC_UTF16
b5153fd8
VZ
761 // cast is ok for WC_UTF16
762 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 763 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 764#else
4def3b35
VS
765 cc=(*psz++) & 0x7fffffff;
766#endif
3698ae71
VZ
767
768 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
769 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 770 {
dccce9ea 771 if (buf)
ea8ce907 772 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 773 len++;
3698ae71 774 }
561488ef
MW
775 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
776 && cc == L'\\' && psz[0] == L'\\' )
777 {
778 if (buf)
779 *buf++ = (char)cc;
780 psz++;
781 len++;
782 }
3698ae71
VZ
783 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
784 cc == L'\\' &&
785 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 786 {
dccce9ea 787 if (buf)
3698ae71 788 {
b2c13097
WS
789 *buf++ = (char) ((psz[0] - L'0')*0100 +
790 (psz[1] - L'0')*010 +
791 (psz[2] - L'0'));
3698ae71
VZ
792 }
793
794 psz += 3;
ea8ce907
RR
795 len++;
796 }
797 else
798 {
799 unsigned cnt;
800 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
801 if (!cnt)
4def3b35 802 {
ea8ce907
RR
803 // plain ASCII char
804 if (buf)
805 *buf++ = (char) cc;
806 len++;
807 }
808
809 else
810 {
811 len += cnt + 1;
812 if (buf)
813 {
814 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
815 while (cnt--)
816 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
817 }
4def3b35
VS
818 }
819 }
6001e347 820 }
4def3b35 821
3698ae71
VZ
822 if (buf && (len<n))
823 *buf = 0;
adb45366 824
4def3b35 825 return len;
6001e347
RR
826}
827
c91830cb
VZ
828// ----------------------------------------------------------------------------
829// UTF-16
830// ----------------------------------------------------------------------------
831
832#ifdef WORDS_BIGENDIAN
bde4baac
VZ
833 #define wxMBConvUTF16straight wxMBConvUTF16BE
834 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 835#else
bde4baac
VZ
836 #define wxMBConvUTF16swap wxMBConvUTF16BE
837 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
838#endif
839
840
c91830cb
VZ
841#ifdef WC_UTF16
842
c91830cb
VZ
843// copy 16bit MB to 16bit String
844size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
845{
846 size_t len=0;
847
848 while (*(wxUint16*)psz && (!buf || len < n))
849 {
850 if (buf)
851 *buf++ = *(wxUint16*)psz;
852 len++;
853
854 psz += sizeof(wxUint16);
855 }
856 if (buf && len<n) *buf=0;
857
858 return len;
859}
860
861
862// copy 16bit String to 16bit MB
863size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
864{
865 size_t len=0;
866
867 while (*psz && (!buf || len < n))
868 {
869 if (buf)
870 {
871 *(wxUint16*)buf = *psz;
872 buf += sizeof(wxUint16);
873 }
874 len += sizeof(wxUint16);
875 psz++;
876 }
877 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
878
879 return len;
880}
881
882
883// swap 16bit MB to 16bit String
884size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
885{
886 size_t len=0;
887
888 while (*(wxUint16*)psz && (!buf || len < n))
889 {
890 if (buf)
891 {
892 ((char *)buf)[0] = psz[1];
893 ((char *)buf)[1] = psz[0];
894 buf++;
895 }
896 len++;
897 psz += sizeof(wxUint16);
898 }
899 if (buf && len<n) *buf=0;
900
901 return len;
902}
903
904
905// swap 16bit MB to 16bit String
906size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
907{
908 size_t len=0;
909
910 while (*psz && (!buf || len < n))
911 {
912 if (buf)
913 {
914 *buf++ = ((char*)psz)[1];
915 *buf++ = ((char*)psz)[0];
916 }
917 len += sizeof(wxUint16);
918 psz++;
919 }
920 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
921
922 return len;
923}
924
925
926#else // WC_UTF16
927
928
929// copy 16bit MB to 32bit String
930size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
931{
932 size_t len=0;
933
934 while (*(wxUint16*)psz && (!buf || len < n))
935 {
936 wxUint32 cc;
937 size_t pa=decode_utf16((wxUint16*)psz, cc);
938 if (pa == (size_t)-1)
939 return pa;
940
941 if (buf)
942 *buf++ = cc;
943 len++;
944 psz += pa * sizeof(wxUint16);
945 }
946 if (buf && len<n) *buf=0;
947
948 return len;
949}
950
951
952// copy 32bit String to 16bit MB
953size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
954{
955 size_t len=0;
956
957 while (*psz && (!buf || len < n))
958 {
959 wxUint16 cc[2];
960 size_t pa=encode_utf16(*psz, cc);
961
962 if (pa == (size_t)-1)
963 return pa;
964
965 if (buf)
966 {
69b80d28 967 *(wxUint16*)buf = cc[0];
b5153fd8 968 buf += sizeof(wxUint16);
c91830cb 969 if (pa > 1)
69b80d28
VZ
970 {
971 *(wxUint16*)buf = cc[1];
972 buf += sizeof(wxUint16);
973 }
c91830cb
VZ
974 }
975
976 len += pa*sizeof(wxUint16);
977 psz++;
978 }
979 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
980
981 return len;
982}
983
984
985// swap 16bit MB to 32bit String
986size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
987{
988 size_t len=0;
989
990 while (*(wxUint16*)psz && (!buf || len < n))
991 {
992 wxUint32 cc;
993 char tmp[4];
994 tmp[0]=psz[1]; tmp[1]=psz[0];
995 tmp[2]=psz[3]; tmp[3]=psz[2];
996
997 size_t pa=decode_utf16((wxUint16*)tmp, cc);
998 if (pa == (size_t)-1)
999 return pa;
1000
1001 if (buf)
1002 *buf++ = cc;
1003
1004 len++;
1005 psz += pa * sizeof(wxUint16);
1006 }
1007 if (buf && len<n) *buf=0;
1008
1009 return len;
1010}
1011
1012
1013// swap 32bit String to 16bit MB
1014size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1015{
1016 size_t len=0;
1017
1018 while (*psz && (!buf || len < n))
1019 {
1020 wxUint16 cc[2];
1021 size_t pa=encode_utf16(*psz, cc);
1022
1023 if (pa == (size_t)-1)
1024 return pa;
1025
1026 if (buf)
1027 {
1028 *buf++ = ((char*)cc)[1];
1029 *buf++ = ((char*)cc)[0];
1030 if (pa > 1)
1031 {
1032 *buf++ = ((char*)cc)[3];
1033 *buf++ = ((char*)cc)[2];
1034 }
1035 }
1036
1037 len += pa*sizeof(wxUint16);
1038 psz++;
1039 }
1040 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1041
1042 return len;
1043}
1044
1045#endif // WC_UTF16
1046
1047
1048// ----------------------------------------------------------------------------
1049// UTF-32
1050// ----------------------------------------------------------------------------
1051
1052#ifdef WORDS_BIGENDIAN
1053#define wxMBConvUTF32straight wxMBConvUTF32BE
1054#define wxMBConvUTF32swap wxMBConvUTF32LE
1055#else
1056#define wxMBConvUTF32swap wxMBConvUTF32BE
1057#define wxMBConvUTF32straight wxMBConvUTF32LE
1058#endif
1059
1060
1061WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1062WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1063
1064
1065#ifdef WC_UTF16
1066
1067// copy 32bit MB to 16bit String
1068size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1069{
1070 size_t len=0;
1071
1072 while (*(wxUint32*)psz && (!buf || len < n))
1073 {
1074 wxUint16 cc[2];
1075
1076 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1077 if (pa == (size_t)-1)
1078 return pa;
1079
1080 if (buf)
1081 {
1082 *buf++ = cc[0];
1083 if (pa > 1)
1084 *buf++ = cc[1];
1085 }
1086 len += pa;
1087 psz += sizeof(wxUint32);
1088 }
1089 if (buf && len<n) *buf=0;
1090
1091 return len;
1092}
1093
1094
1095// copy 16bit String to 32bit MB
1096size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1097{
1098 size_t len=0;
1099
1100 while (*psz && (!buf || len < n))
1101 {
1102 wxUint32 cc;
1103
b5153fd8
VZ
1104 // cast is ok for WC_UTF16
1105 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1106 if (pa == (size_t)-1)
1107 return pa;
1108
1109 if (buf)
1110 {
1111 *(wxUint32*)buf = cc;
1112 buf += sizeof(wxUint32);
1113 }
1114 len += sizeof(wxUint32);
1115 psz += pa;
1116 }
b5153fd8
VZ
1117
1118 if (buf && len<=n-sizeof(wxUint32))
1119 *(wxUint32*)buf=0;
c91830cb
VZ
1120
1121 return len;
1122}
1123
1124
1125
1126// swap 32bit MB to 16bit String
1127size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1128{
1129 size_t len=0;
1130
1131 while (*(wxUint32*)psz && (!buf || len < n))
1132 {
1133 char tmp[4];
1134 tmp[0] = psz[3]; tmp[1] = psz[2];
1135 tmp[2] = psz[1]; tmp[3] = psz[0];
1136
1137
1138 wxUint16 cc[2];
1139
1140 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1141 if (pa == (size_t)-1)
1142 return pa;
1143
1144 if (buf)
1145 {
1146 *buf++ = cc[0];
1147 if (pa > 1)
1148 *buf++ = cc[1];
1149 }
1150 len += pa;
1151 psz += sizeof(wxUint32);
1152 }
b5153fd8
VZ
1153
1154 if (buf && len<n)
1155 *buf=0;
c91830cb
VZ
1156
1157 return len;
1158}
1159
1160
1161// swap 16bit String to 32bit MB
1162size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1163{
1164 size_t len=0;
1165
1166 while (*psz && (!buf || len < n))
1167 {
1168 char cc[4];
1169
b5153fd8
VZ
1170 // cast is ok for WC_UTF16
1171 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1172 if (pa == (size_t)-1)
1173 return pa;
1174
1175 if (buf)
1176 {
1177 *buf++ = cc[3];
1178 *buf++ = cc[2];
1179 *buf++ = cc[1];
1180 *buf++ = cc[0];
1181 }
1182 len += sizeof(wxUint32);
1183 psz += pa;
1184 }
b5153fd8
VZ
1185
1186 if (buf && len<=n-sizeof(wxUint32))
1187 *(wxUint32*)buf=0;
c91830cb
VZ
1188
1189 return len;
1190}
1191
1192#else // WC_UTF16
1193
1194
1195// copy 32bit MB to 32bit String
1196size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1197{
1198 size_t len=0;
1199
1200 while (*(wxUint32*)psz && (!buf || len < n))
1201 {
1202 if (buf)
1203 *buf++ = *(wxUint32*)psz;
1204 len++;
1205 psz += sizeof(wxUint32);
1206 }
b5153fd8
VZ
1207
1208 if (buf && len<n)
1209 *buf=0;
c91830cb
VZ
1210
1211 return len;
1212}
1213
1214
1215// copy 32bit String to 32bit MB
1216size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1217{
1218 size_t len=0;
1219
1220 while (*psz && (!buf || len < n))
1221 {
1222 if (buf)
1223 {
1224 *(wxUint32*)buf = *psz;
1225 buf += sizeof(wxUint32);
1226 }
1227
1228 len += sizeof(wxUint32);
1229 psz++;
1230 }
1231
b5153fd8
VZ
1232 if (buf && len<=n-sizeof(wxUint32))
1233 *(wxUint32*)buf=0;
c91830cb
VZ
1234
1235 return len;
1236}
1237
1238
1239// swap 32bit MB to 32bit String
1240size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1241{
1242 size_t len=0;
1243
1244 while (*(wxUint32*)psz && (!buf || len < n))
1245 {
1246 if (buf)
1247 {
1248 ((char *)buf)[0] = psz[3];
1249 ((char *)buf)[1] = psz[2];
1250 ((char *)buf)[2] = psz[1];
1251 ((char *)buf)[3] = psz[0];
1252 buf++;
1253 }
1254 len++;
1255 psz += sizeof(wxUint32);
1256 }
b5153fd8
VZ
1257
1258 if (buf && len<n)
1259 *buf=0;
c91830cb
VZ
1260
1261 return len;
1262}
1263
1264
1265// swap 32bit String to 32bit MB
1266size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1267{
1268 size_t len=0;
1269
1270 while (*psz && (!buf || len < n))
1271 {
1272 if (buf)
1273 {
1274 *buf++ = ((char *)psz)[3];
1275 *buf++ = ((char *)psz)[2];
1276 *buf++ = ((char *)psz)[1];
1277 *buf++ = ((char *)psz)[0];
1278 }
1279 len += sizeof(wxUint32);
1280 psz++;
1281 }
b5153fd8
VZ
1282
1283 if (buf && len<=n-sizeof(wxUint32))
1284 *(wxUint32*)buf=0;
c91830cb
VZ
1285
1286 return len;
1287}
1288
1289
1290#endif // WC_UTF16
1291
1292
36acb880
VZ
1293// ============================================================================
1294// The classes doing conversion using the iconv_xxx() functions
1295// ============================================================================
3caec1bb 1296
b040e242 1297#ifdef HAVE_ICONV
3a0d76bc 1298
b1d547eb
VS
1299// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1300// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1301// (unless there's yet another bug in glibc) the only case when iconv()
1302// returns with (size_t)-1 (which means error) and says there are 0 bytes
1303// left in the input buffer -- when _real_ error occurs,
1304// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1305// iconv() failure.
3caec1bb
VS
1306// [This bug does not appear in glibc 2.2.]
1307#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1308#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1309 (errno != E2BIG || bufLeft != 0))
1310#else
1311#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1312#endif
1313
ab217dba 1314#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1315
1316// ----------------------------------------------------------------------------
e95354ec 1317// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1318// ----------------------------------------------------------------------------
1319
e95354ec 1320class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1321{
1322public:
e95354ec
VZ
1323 wxMBConv_iconv(const wxChar *name);
1324 virtual ~wxMBConv_iconv();
36acb880 1325
bde4baac
VZ
1326 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1327 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1328
e95354ec 1329 bool IsOk() const
36acb880
VZ
1330 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1331
1332protected:
1333 // the iconv handlers used to translate from multibyte to wide char and in
1334 // the other direction
1335 iconv_t m2w,
1336 w2m;
b1d547eb
VS
1337#if wxUSE_THREADS
1338 // guards access to m2w and w2m objects
1339 wxMutex m_iconvMutex;
1340#endif
36acb880
VZ
1341
1342private:
e95354ec 1343 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1344 // available on this machine, it will remain NULL
1345 static const char *ms_wcCharsetName;
1346
1347 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1348 // different endian-ness than the native one
405d8f46 1349 static bool ms_wcNeedsSwap;
36acb880
VZ
1350};
1351
8f115891
MW
1352// make the constructor available for unit testing
1353WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1354{
1355 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1356 if ( !result->IsOk() )
1357 {
1358 delete result;
1359 return 0;
1360 }
1361 return result;
1362}
1363
e95354ec
VZ
1364const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1365bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1366
e95354ec 1367wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1368{
0331b385
VZ
1369 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1370 // names for the charsets
200a9923 1371 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1372
36acb880
VZ
1373 // check for charset that represents wchar_t:
1374 if (ms_wcCharsetName == NULL)
f1339c56 1375 {
e95354ec 1376 ms_wcNeedsSwap = false;
dccce9ea 1377
36acb880
VZ
1378 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1379 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1380 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1381
36acb880
VZ
1382 if (m2w == (iconv_t)-1)
1383 {
1384 // try charset w/o bytesex info (e.g. "UCS4")
1385 // and check for bytesex ourselves:
1386 ms_wcCharsetName = WC_NAME;
04c79127 1387 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1388
1389 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1390 if (m2w == (iconv_t)-1)
1391 {
36acb880 1392 ms_wcCharsetName = "WCHAR_T";
04c79127 1393 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1394 }
3a0d76bc 1395
36acb880
VZ
1396 if (m2w != (iconv_t)-1)
1397 {
1398 char buf[2], *bufPtr;
1399 wchar_t wbuf[2], *wbufPtr;
1400 size_t insz, outsz;
1401 size_t res;
1402
1403 buf[0] = 'A';
1404 buf[1] = 0;
1405 wbuf[0] = 0;
1406 insz = 2;
1407 outsz = SIZEOF_WCHAR_T * 2;
1408 wbufPtr = wbuf;
1409 bufPtr = buf;
1410
1411 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1412 (char**)&wbufPtr, &outsz);
1413
1414 if (ICONV_FAILED(res, insz))
3a0d76bc 1415 {
36acb880
VZ
1416 ms_wcCharsetName = NULL;
1417 wxLogLastError(wxT("iconv"));
2b5f62a0 1418 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1419 }
1420 else
1421 {
36acb880 1422 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1423 }
1424 }
36acb880
VZ
1425 else
1426 {
1427 ms_wcCharsetName = NULL;
373658eb 1428
77ffb593 1429 // VS: we must not output an error here, since wxWidgets will safely
957686c8 1430 // fall back to using wxEncodingConverter.
ce6f8d6f 1431 wxLogTrace(TRACE_STRCONV, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
36acb880 1432 }
3a0d76bc 1433 }
0944fceb
VZ
1434 wxLogTrace(TRACE_STRCONV,
1435 wxT("wchar_t charset is '%s', needs swap: %i"),
1436 ms_wcCharsetName ? ms_wcCharsetName : "<none>", ms_wcNeedsSwap);
3a0d76bc 1437 }
36acb880 1438 else // we already have ms_wcCharsetName
3caec1bb 1439 {
04c79127 1440 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1441 }
dccce9ea 1442
36acb880
VZ
1443 // NB: don't ever pass NULL to iconv_open(), it may crash!
1444 if ( ms_wcCharsetName )
f1339c56 1445 {
04c79127 1446 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1447 }
405d8f46
VZ
1448 else
1449 {
1450 w2m = (iconv_t)-1;
1451 }
36acb880 1452}
3caec1bb 1453
e95354ec 1454wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1455{
1456 if ( m2w != (iconv_t)-1 )
1457 iconv_close(m2w);
1458 if ( w2m != (iconv_t)-1 )
1459 iconv_close(w2m);
1460}
3a0d76bc 1461
bde4baac 1462size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1463{
b1d547eb
VS
1464#if wxUSE_THREADS
1465 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1466 // Unfortunately there is a couple of global wxCSConv objects such as
1467 // wxConvLocal that are used all over wx code, so we have to make sure
1468 // the handle is used by at most one thread at the time. Otherwise
1469 // only a few wx classes would be safe to use from non-main threads
1470 // as MB<->WC conversion would fail "randomly".
1471 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1472#endif
3698ae71 1473
36acb880
VZ
1474 size_t inbuf = strlen(psz);
1475 size_t outbuf = n * SIZEOF_WCHAR_T;
1476 size_t res, cres;
1477 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1478 wchar_t *bufPtr = buf;
1479 const char *pszPtr = psz;
1480
1481 if (buf)
1482 {
1483 // have destination buffer, convert there
1484 cres = iconv(m2w,
1485 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1486 (char**)&bufPtr, &outbuf);
1487 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1488
36acb880 1489 if (ms_wcNeedsSwap)
3a0d76bc 1490 {
36acb880
VZ
1491 // convert to native endianness
1492 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1493 }
adb45366 1494
49dd9820
VS
1495 // NB: iconv was given only strlen(psz) characters on input, and so
1496 // it couldn't convert the trailing zero. Let's do it ourselves
1497 // if there's some room left for it in the output buffer.
1498 if (res < n)
1499 buf[res] = 0;
36acb880
VZ
1500 }
1501 else
1502 {
1503 // no destination buffer... convert using temp buffer
1504 // to calculate destination buffer requirement
1505 wchar_t tbuf[8];
1506 res = 0;
1507 do {
1508 bufPtr = tbuf;
1509 outbuf = 8*SIZEOF_WCHAR_T;
1510
1511 cres = iconv(m2w,
1512 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1513 (char**)&bufPtr, &outbuf );
1514
1515 res += 8-(outbuf/SIZEOF_WCHAR_T);
1516 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1517 }
dccce9ea 1518
36acb880 1519 if (ICONV_FAILED(cres, inbuf))
f1339c56 1520 {
36acb880 1521 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1522 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1523 return (size_t)-1;
1524 }
1525
1526 return res;
1527}
1528
bde4baac 1529size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1530{
b1d547eb
VS
1531#if wxUSE_THREADS
1532 // NB: explained in MB2WC
1533 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1534#endif
3698ae71 1535
f8d791e0 1536 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1537 size_t outbuf = n;
1538 size_t res, cres;
3a0d76bc 1539
36acb880 1540 wchar_t *tmpbuf = 0;
3caec1bb 1541
36acb880
VZ
1542 if (ms_wcNeedsSwap)
1543 {
1544 // need to copy to temp buffer to switch endianness
1545 // this absolutely doesn't rock!
1546 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1547 // could be in read-only memory, or be accessed in some other thread)
1548 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1549 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1550 WC_BSWAP(tmpbuf, inbuf)
1551 psz=tmpbuf;
1552 }
3a0d76bc 1553
36acb880
VZ
1554 if (buf)
1555 {
1556 // have destination buffer, convert there
1557 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1558
36acb880 1559 res = n-outbuf;
adb45366 1560
49dd9820
VS
1561 // NB: iconv was given only wcslen(psz) characters on input, and so
1562 // it couldn't convert the trailing zero. Let's do it ourselves
1563 // if there's some room left for it in the output buffer.
1564 if (res < n)
1565 buf[0] = 0;
36acb880
VZ
1566 }
1567 else
1568 {
1569 // no destination buffer... convert using temp buffer
1570 // to calculate destination buffer requirement
1571 char tbuf[16];
1572 res = 0;
1573 do {
1574 buf = tbuf; outbuf = 16;
1575
1576 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1577
36acb880
VZ
1578 res += 16 - outbuf;
1579 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1580 }
dccce9ea 1581
36acb880
VZ
1582 if (ms_wcNeedsSwap)
1583 {
1584 free(tmpbuf);
1585 }
dccce9ea 1586
36acb880
VZ
1587 if (ICONV_FAILED(cres, inbuf))
1588 {
1589 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1590 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1591 return (size_t)-1;
1592 }
1593
1594 return res;
1595}
1596
b040e242 1597#endif // HAVE_ICONV
36acb880 1598
e95354ec 1599
36acb880
VZ
1600// ============================================================================
1601// Win32 conversion classes
1602// ============================================================================
1cd52418 1603
e95354ec 1604#ifdef wxHAVE_WIN32_MB2WC
373658eb 1605
8b04d4c4 1606// from utils.cpp
d775fa82 1607#if wxUSE_FONTMAP
8b04d4c4
VZ
1608extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1609extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1610#endif
373658eb 1611
e95354ec 1612class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1613{
1614public:
bde4baac
VZ
1615 wxMBConv_win32()
1616 {
1617 m_CodePage = CP_ACP;
1618 }
1619
7608a683 1620#if wxUSE_FONTMAP
e95354ec 1621 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1622 {
1623 m_CodePage = wxCharsetToCodepage(name);
1624 }
dccce9ea 1625
e95354ec 1626 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1627 {
1628 m_CodePage = wxEncodingToCodepage(encoding);
1629 }
7608a683 1630#endif
8b04d4c4 1631
bde4baac 1632 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1633 {
02272c9c
VZ
1634 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1635 // the behaviour is not compatible with the Unix version (using iconv)
1636 // and break the library itself, e.g. wxTextInputStream::NextChar()
1637 // wouldn't work if reading an incomplete MB char didn't result in an
1638 // error
667e5b3e
VZ
1639 //
1640 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1641 // an error (tested under Windows Server 2003) and apparently it is
1642 // done on purpose, i.e. the function accepts any input in this case
1643 // and although I'd prefer to return error on ill-formed output, our
1644 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1645 // explicitly ill-formed according to RFC 2152) neither so we don't
1646 // even have any fallback here...
1647 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1648
2b5f62a0
VZ
1649 const size_t len = ::MultiByteToWideChar
1650 (
1651 m_CodePage, // code page
667e5b3e 1652 flags, // flags: fall on error
2b5f62a0
VZ
1653 psz, // input string
1654 -1, // its length (NUL-terminated)
b4da152e 1655 buf, // output string
2b5f62a0
VZ
1656 buf ? n : 0 // size of output buffer
1657 );
1658
03a991bc
VZ
1659 // note that it returns count of written chars for buf != NULL and size
1660 // of the needed buffer for buf == NULL so in either case the length of
1661 // the string (which never includes the terminating NUL) is one less
1662 return len ? len - 1 : (size_t)-1;
f1339c56 1663 }
dccce9ea 1664
13dd924a 1665 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1666 {
13dd924a
VZ
1667 /*
1668 we have a problem here: by default, WideCharToMultiByte() may
1669 replace characters unrepresentable in the target code page with bad
1670 quality approximations such as turning "1/2" symbol (U+00BD) into
1671 "1" for the code pages which don't have it and we, obviously, want
1672 to avoid this at any price
d775fa82 1673
13dd924a
VZ
1674 the trouble is that this function does it _silently_, i.e. it won't
1675 even tell us whether it did or not... Win98/2000 and higher provide
1676 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1677 we have to resort to a round trip, i.e. check that converting back
1678 results in the same string -- this is, of course, expensive but
1679 otherwise we simply can't be sure to not garble the data.
1680 */
1681
1682 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1683 // it doesn't work with CJK encodings (which we test for rather roughly
1684 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1685 // supporting it
907173e5
WS
1686 BOOL usedDef wxDUMMY_INITIALIZE(false);
1687 BOOL *pUsedDef;
13dd924a
VZ
1688 int flags;
1689 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1690 {
1691 // it's our lucky day
1692 flags = WC_NO_BEST_FIT_CHARS;
1693 pUsedDef = &usedDef;
1694 }
1695 else // old system or unsupported encoding
1696 {
1697 flags = 0;
1698 pUsedDef = NULL;
1699 }
1700
2b5f62a0
VZ
1701 const size_t len = ::WideCharToMultiByte
1702 (
1703 m_CodePage, // code page
13dd924a
VZ
1704 flags, // either none or no best fit
1705 pwz, // input string
2b5f62a0
VZ
1706 -1, // it is (wide) NUL-terminated
1707 buf, // output buffer
1708 buf ? n : 0, // and its size
1709 NULL, // default "replacement" char
13dd924a 1710 pUsedDef // [out] was it used?
2b5f62a0
VZ
1711 );
1712
13dd924a
VZ
1713 if ( !len )
1714 {
1715 // function totally failed
1716 return (size_t)-1;
1717 }
1718
1719 // if we were really converting, check if we succeeded
1720 if ( buf )
1721 {
1722 if ( flags )
1723 {
1724 // check if the conversion failed, i.e. if any replacements
1725 // were done
1726 if ( usedDef )
1727 return (size_t)-1;
1728 }
1729 else // we must resort to double tripping...
1730 {
1731 wxWCharBuffer wcBuf(n);
1732 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1733 wcscmp(wcBuf, pwz) != 0 )
1734 {
1735 // we didn't obtain the same thing we started from, hence
1736 // the conversion was lossy and we consider that it failed
1737 return (size_t)-1;
1738 }
1739 }
1740 }
1741
03a991bc 1742 // see the comment above for the reason of "len - 1"
13dd924a 1743 return len - 1;
f1339c56 1744 }
dccce9ea 1745
13dd924a
VZ
1746 bool IsOk() const { return m_CodePage != -1; }
1747
1748private:
1749 static bool CanUseNoBestFit()
1750 {
1751 static int s_isWin98Or2k = -1;
1752
1753 if ( s_isWin98Or2k == -1 )
1754 {
1755 int verMaj, verMin;
1756 switch ( wxGetOsVersion(&verMaj, &verMin) )
1757 {
1758 case wxWIN95:
1759 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1760 break;
1761
1762 case wxWINDOWS_NT:
1763 s_isWin98Or2k = verMaj >= 5;
1764 break;
1765
1766 default:
1767 // unknown, be conseravtive by default
1768 s_isWin98Or2k = 0;
1769 }
1770
1771 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1772 }
1773
1774 return s_isWin98Or2k == 1;
1775 }
f1339c56 1776
b1d66b54 1777 long m_CodePage;
1cd52418 1778};
e95354ec
VZ
1779
1780#endif // wxHAVE_WIN32_MB2WC
1781
f7e98dee
RN
1782// ============================================================================
1783// Cocoa conversion classes
1784// ============================================================================
1785
1786#if defined(__WXCOCOA__)
1787
ecd9653b 1788// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1789// Cocoa. Strangely enough, internally Core Foundation uses
1790// UTF 32 internally quite a bit - its just not public (yet).
1791
1792#include <CoreFoundation/CFString.h>
1793#include <CoreFoundation/CFStringEncodingExt.h>
1794
1795CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1796{
638357a0 1797 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1798 if ( encoding == wxFONTENCODING_DEFAULT )
1799 {
638357a0 1800 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1801 }
1802 else switch( encoding)
1803 {
1804 case wxFONTENCODING_ISO8859_1 :
1805 enc = kCFStringEncodingISOLatin1 ;
1806 break ;
1807 case wxFONTENCODING_ISO8859_2 :
1808 enc = kCFStringEncodingISOLatin2;
1809 break ;
1810 case wxFONTENCODING_ISO8859_3 :
1811 enc = kCFStringEncodingISOLatin3 ;
1812 break ;
1813 case wxFONTENCODING_ISO8859_4 :
1814 enc = kCFStringEncodingISOLatin4;
1815 break ;
1816 case wxFONTENCODING_ISO8859_5 :
1817 enc = kCFStringEncodingISOLatinCyrillic;
1818 break ;
1819 case wxFONTENCODING_ISO8859_6 :
1820 enc = kCFStringEncodingISOLatinArabic;
1821 break ;
1822 case wxFONTENCODING_ISO8859_7 :
1823 enc = kCFStringEncodingISOLatinGreek;
1824 break ;
1825 case wxFONTENCODING_ISO8859_8 :
1826 enc = kCFStringEncodingISOLatinHebrew;
1827 break ;
1828 case wxFONTENCODING_ISO8859_9 :
1829 enc = kCFStringEncodingISOLatin5;
1830 break ;
1831 case wxFONTENCODING_ISO8859_10 :
1832 enc = kCFStringEncodingISOLatin6;
1833 break ;
1834 case wxFONTENCODING_ISO8859_11 :
1835 enc = kCFStringEncodingISOLatinThai;
1836 break ;
1837 case wxFONTENCODING_ISO8859_13 :
1838 enc = kCFStringEncodingISOLatin7;
1839 break ;
1840 case wxFONTENCODING_ISO8859_14 :
1841 enc = kCFStringEncodingISOLatin8;
1842 break ;
1843 case wxFONTENCODING_ISO8859_15 :
1844 enc = kCFStringEncodingISOLatin9;
1845 break ;
1846
1847 case wxFONTENCODING_KOI8 :
1848 enc = kCFStringEncodingKOI8_R;
1849 break ;
1850 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1851 enc = kCFStringEncodingDOSRussian;
1852 break ;
1853
1854// case wxFONTENCODING_BULGARIAN :
1855// enc = ;
1856// break ;
1857
1858 case wxFONTENCODING_CP437 :
1859 enc =kCFStringEncodingDOSLatinUS ;
1860 break ;
1861 case wxFONTENCODING_CP850 :
1862 enc = kCFStringEncodingDOSLatin1;
1863 break ;
1864 case wxFONTENCODING_CP852 :
1865 enc = kCFStringEncodingDOSLatin2;
1866 break ;
1867 case wxFONTENCODING_CP855 :
1868 enc = kCFStringEncodingDOSCyrillic;
1869 break ;
1870 case wxFONTENCODING_CP866 :
1871 enc =kCFStringEncodingDOSRussian ;
1872 break ;
1873 case wxFONTENCODING_CP874 :
1874 enc = kCFStringEncodingDOSThai;
1875 break ;
1876 case wxFONTENCODING_CP932 :
1877 enc = kCFStringEncodingDOSJapanese;
1878 break ;
1879 case wxFONTENCODING_CP936 :
1880 enc =kCFStringEncodingDOSChineseSimplif ;
1881 break ;
1882 case wxFONTENCODING_CP949 :
1883 enc = kCFStringEncodingDOSKorean;
1884 break ;
1885 case wxFONTENCODING_CP950 :
1886 enc = kCFStringEncodingDOSChineseTrad;
1887 break ;
ecd9653b
WS
1888 case wxFONTENCODING_CP1250 :
1889 enc = kCFStringEncodingWindowsLatin2;
1890 break ;
1891 case wxFONTENCODING_CP1251 :
1892 enc =kCFStringEncodingWindowsCyrillic ;
1893 break ;
1894 case wxFONTENCODING_CP1252 :
1895 enc =kCFStringEncodingWindowsLatin1 ;
1896 break ;
1897 case wxFONTENCODING_CP1253 :
1898 enc = kCFStringEncodingWindowsGreek;
1899 break ;
1900 case wxFONTENCODING_CP1254 :
1901 enc = kCFStringEncodingWindowsLatin5;
1902 break ;
1903 case wxFONTENCODING_CP1255 :
1904 enc =kCFStringEncodingWindowsHebrew ;
1905 break ;
1906 case wxFONTENCODING_CP1256 :
1907 enc =kCFStringEncodingWindowsArabic ;
1908 break ;
1909 case wxFONTENCODING_CP1257 :
1910 enc = kCFStringEncodingWindowsBalticRim;
1911 break ;
638357a0
RN
1912// This only really encodes to UTF7 (if that) evidently
1913// case wxFONTENCODING_UTF7 :
1914// enc = kCFStringEncodingNonLossyASCII ;
1915// break ;
ecd9653b
WS
1916 case wxFONTENCODING_UTF8 :
1917 enc = kCFStringEncodingUTF8 ;
1918 break ;
1919 case wxFONTENCODING_EUC_JP :
1920 enc = kCFStringEncodingEUC_JP;
1921 break ;
1922 case wxFONTENCODING_UTF16 :
f7e98dee 1923 enc = kCFStringEncodingUnicode ;
ecd9653b 1924 break ;
f7e98dee
RN
1925 case wxFONTENCODING_MACROMAN :
1926 enc = kCFStringEncodingMacRoman ;
1927 break ;
1928 case wxFONTENCODING_MACJAPANESE :
1929 enc = kCFStringEncodingMacJapanese ;
1930 break ;
1931 case wxFONTENCODING_MACCHINESETRAD :
1932 enc = kCFStringEncodingMacChineseTrad ;
1933 break ;
1934 case wxFONTENCODING_MACKOREAN :
1935 enc = kCFStringEncodingMacKorean ;
1936 break ;
1937 case wxFONTENCODING_MACARABIC :
1938 enc = kCFStringEncodingMacArabic ;
1939 break ;
1940 case wxFONTENCODING_MACHEBREW :
1941 enc = kCFStringEncodingMacHebrew ;
1942 break ;
1943 case wxFONTENCODING_MACGREEK :
1944 enc = kCFStringEncodingMacGreek ;
1945 break ;
1946 case wxFONTENCODING_MACCYRILLIC :
1947 enc = kCFStringEncodingMacCyrillic ;
1948 break ;
1949 case wxFONTENCODING_MACDEVANAGARI :
1950 enc = kCFStringEncodingMacDevanagari ;
1951 break ;
1952 case wxFONTENCODING_MACGURMUKHI :
1953 enc = kCFStringEncodingMacGurmukhi ;
1954 break ;
1955 case wxFONTENCODING_MACGUJARATI :
1956 enc = kCFStringEncodingMacGujarati ;
1957 break ;
1958 case wxFONTENCODING_MACORIYA :
1959 enc = kCFStringEncodingMacOriya ;
1960 break ;
1961 case wxFONTENCODING_MACBENGALI :
1962 enc = kCFStringEncodingMacBengali ;
1963 break ;
1964 case wxFONTENCODING_MACTAMIL :
1965 enc = kCFStringEncodingMacTamil ;
1966 break ;
1967 case wxFONTENCODING_MACTELUGU :
1968 enc = kCFStringEncodingMacTelugu ;
1969 break ;
1970 case wxFONTENCODING_MACKANNADA :
1971 enc = kCFStringEncodingMacKannada ;
1972 break ;
1973 case wxFONTENCODING_MACMALAJALAM :
1974 enc = kCFStringEncodingMacMalayalam ;
1975 break ;
1976 case wxFONTENCODING_MACSINHALESE :
1977 enc = kCFStringEncodingMacSinhalese ;
1978 break ;
1979 case wxFONTENCODING_MACBURMESE :
1980 enc = kCFStringEncodingMacBurmese ;
1981 break ;
1982 case wxFONTENCODING_MACKHMER :
1983 enc = kCFStringEncodingMacKhmer ;
1984 break ;
1985 case wxFONTENCODING_MACTHAI :
1986 enc = kCFStringEncodingMacThai ;
1987 break ;
1988 case wxFONTENCODING_MACLAOTIAN :
1989 enc = kCFStringEncodingMacLaotian ;
1990 break ;
1991 case wxFONTENCODING_MACGEORGIAN :
1992 enc = kCFStringEncodingMacGeorgian ;
1993 break ;
1994 case wxFONTENCODING_MACARMENIAN :
1995 enc = kCFStringEncodingMacArmenian ;
1996 break ;
1997 case wxFONTENCODING_MACCHINESESIMP :
1998 enc = kCFStringEncodingMacChineseSimp ;
1999 break ;
2000 case wxFONTENCODING_MACTIBETAN :
2001 enc = kCFStringEncodingMacTibetan ;
2002 break ;
2003 case wxFONTENCODING_MACMONGOLIAN :
2004 enc = kCFStringEncodingMacMongolian ;
2005 break ;
2006 case wxFONTENCODING_MACETHIOPIC :
2007 enc = kCFStringEncodingMacEthiopic ;
2008 break ;
2009 case wxFONTENCODING_MACCENTRALEUR :
2010 enc = kCFStringEncodingMacCentralEurRoman ;
2011 break ;
2012 case wxFONTENCODING_MACVIATNAMESE :
2013 enc = kCFStringEncodingMacVietnamese ;
2014 break ;
2015 case wxFONTENCODING_MACARABICEXT :
2016 enc = kCFStringEncodingMacExtArabic ;
2017 break ;
2018 case wxFONTENCODING_MACSYMBOL :
2019 enc = kCFStringEncodingMacSymbol ;
2020 break ;
2021 case wxFONTENCODING_MACDINGBATS :
2022 enc = kCFStringEncodingMacDingbats ;
2023 break ;
2024 case wxFONTENCODING_MACTURKISH :
2025 enc = kCFStringEncodingMacTurkish ;
2026 break ;
2027 case wxFONTENCODING_MACCROATIAN :
2028 enc = kCFStringEncodingMacCroatian ;
2029 break ;
2030 case wxFONTENCODING_MACICELANDIC :
2031 enc = kCFStringEncodingMacIcelandic ;
2032 break ;
2033 case wxFONTENCODING_MACROMANIAN :
2034 enc = kCFStringEncodingMacRomanian ;
2035 break ;
2036 case wxFONTENCODING_MACCELTIC :
2037 enc = kCFStringEncodingMacCeltic ;
2038 break ;
2039 case wxFONTENCODING_MACGAELIC :
2040 enc = kCFStringEncodingMacGaelic ;
2041 break ;
ecd9653b
WS
2042// case wxFONTENCODING_MACKEYBOARD :
2043// enc = kCFStringEncodingMacKeyboardGlyphs ;
2044// break ;
2045 default :
2046 // because gcc is picky
2047 break ;
2048 } ;
2049 return enc ;
f7e98dee
RN
2050}
2051
f7e98dee
RN
2052class wxMBConv_cocoa : public wxMBConv
2053{
2054public:
2055 wxMBConv_cocoa()
2056 {
2057 Init(CFStringGetSystemEncoding()) ;
2058 }
2059
a6900d10 2060#if wxUSE_FONTMAP
f7e98dee
RN
2061 wxMBConv_cocoa(const wxChar* name)
2062 {
267e11c5 2063 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2064 }
a6900d10 2065#endif
f7e98dee
RN
2066
2067 wxMBConv_cocoa(wxFontEncoding encoding)
2068 {
2069 Init( wxCFStringEncFromFontEnc(encoding) );
2070 }
2071
2072 ~wxMBConv_cocoa()
2073 {
2074 }
2075
2076 void Init( CFStringEncoding encoding)
2077 {
638357a0 2078 m_encoding = encoding ;
f7e98dee
RN
2079 }
2080
2081 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2082 {
2083 wxASSERT(szUnConv);
ecd9653b 2084
638357a0
RN
2085 CFStringRef theString = CFStringCreateWithBytes (
2086 NULL, //the allocator
2087 (const UInt8*)szUnConv,
2088 strlen(szUnConv),
2089 m_encoding,
2090 false //no BOM/external representation
f7e98dee
RN
2091 );
2092
2093 wxASSERT(theString);
2094
638357a0
RN
2095 size_t nOutLength = CFStringGetLength(theString);
2096
2097 if (szOut == NULL)
f7e98dee 2098 {
f7e98dee 2099 CFRelease(theString);
638357a0 2100 return nOutLength;
f7e98dee 2101 }
ecd9653b 2102
638357a0 2103 CFRange theRange = { 0, nOutSize };
ecd9653b 2104
638357a0
RN
2105#if SIZEOF_WCHAR_T == 4
2106 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2107#endif
3698ae71 2108
f7e98dee 2109 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2110
f7e98dee 2111 CFRelease(theString);
ecd9653b 2112
638357a0 2113 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2114
2115#if SIZEOF_WCHAR_T == 4
2116 wxMBConvUTF16 converter ;
638357a0 2117 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2118 delete[] szUniCharBuffer;
2119#endif
3698ae71 2120
638357a0 2121 return nOutLength;
f7e98dee
RN
2122 }
2123
2124 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2125 {
638357a0 2126 wxASSERT(szUnConv);
3698ae71 2127
f7e98dee 2128 size_t nRealOutSize;
638357a0 2129 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2130 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2131
f7e98dee 2132#if SIZEOF_WCHAR_T == 4
d9d488cf 2133 wxMBConvUTF16 converter ;
f7e98dee
RN
2134 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2135 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2136 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2137 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2138#endif
2139
2140 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2141 NULL, //allocator
2142 szUniBuffer,
2143 nBufSize,
638357a0 2144 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2145 );
ecd9653b 2146
f7e98dee 2147 wxASSERT(theString);
ecd9653b 2148
f7e98dee 2149 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2150 //so we check and use getchars instead in that case
2151 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2152 {
638357a0
RN
2153 if (szOut != NULL)
2154 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2155
638357a0
RN
2156 nRealOutSize = CFStringGetLength(theString) + 1;
2157 }
2158 else
2159 {
2160 CFStringGetBytes(
2161 theString,
2162 CFRangeMake(0, CFStringGetLength(theString)),
2163 m_encoding,
2164 0, //what to put in characters that can't be converted -
2165 //0 tells CFString to return NULL if it meets such a character
2166 false, //not an external representation
2167 (UInt8*) szOut,
3698ae71 2168 nOutSize,
638357a0
RN
2169 (CFIndex*) &nRealOutSize
2170 );
f7e98dee 2171 }
ecd9653b 2172
638357a0 2173 CFRelease(theString);
ecd9653b 2174
638357a0
RN
2175#if SIZEOF_WCHAR_T == 4
2176 delete[] szUniBuffer;
2177#endif
ecd9653b 2178
f7e98dee
RN
2179 return nRealOutSize - 1;
2180 }
2181
2182 bool IsOk() const
ecd9653b 2183 {
3698ae71 2184 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2185 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2186 }
2187
2188private:
638357a0 2189 CFStringEncoding m_encoding ;
f7e98dee
RN
2190};
2191
2192#endif // defined(__WXCOCOA__)
2193
335d31e0
SC
2194// ============================================================================
2195// Mac conversion classes
2196// ============================================================================
2197
2198#if defined(__WXMAC__) && defined(TARGET_CARBON)
2199
2200class wxMBConv_mac : public wxMBConv
2201{
2202public:
2203 wxMBConv_mac()
2204 {
2205 Init(CFStringGetSystemEncoding()) ;
2206 }
2207
2d1659cf 2208#if wxUSE_FONTMAP
335d31e0
SC
2209 wxMBConv_mac(const wxChar* name)
2210 {
267e11c5 2211 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2212 }
2d1659cf 2213#endif
335d31e0
SC
2214
2215 wxMBConv_mac(wxFontEncoding encoding)
2216 {
d775fa82
WS
2217 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2218 }
2219
2220 ~wxMBConv_mac()
2221 {
2222 OSStatus status = noErr ;
2223 status = TECDisposeConverter(m_MB2WC_converter);
2224 status = TECDisposeConverter(m_WC2MB_converter);
2225 }
2226
2227
2228 void Init( TextEncodingBase encoding)
2229 {
2230 OSStatus status = noErr ;
2231 m_char_encoding = encoding ;
2232 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2233
2234 status = TECCreateConverter(&m_MB2WC_converter,
2235 m_char_encoding,
2236 m_unicode_encoding);
2237 status = TECCreateConverter(&m_WC2MB_converter,
2238 m_unicode_encoding,
2239 m_char_encoding);
2240 }
2241
335d31e0
SC
2242 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2243 {
d775fa82
WS
2244 OSStatus status = noErr ;
2245 ByteCount byteOutLen ;
2246 ByteCount byteInLen = strlen(psz) ;
2247 wchar_t *tbuf = NULL ;
2248 UniChar* ubuf = NULL ;
2249 size_t res = 0 ;
2250
2251 if (buf == NULL)
2252 {
638357a0 2253 //apple specs say at least 32
c543817b 2254 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2255 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2256 }
2257 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2258#if SIZEOF_WCHAR_T == 4
d775fa82 2259 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2260#else
d775fa82 2261 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2262#endif
d775fa82
WS
2263 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2264 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2265#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2266 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2267 // is not properly terminated we get random characters at the end
2268 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2269 wxMBConvUTF16 converter ;
d775fa82
WS
2270 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2271 free( ubuf ) ;
f3a355ce 2272#else
d775fa82 2273 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2274#endif
d775fa82
WS
2275 if ( buf == NULL )
2276 free(tbuf) ;
335d31e0 2277
335d31e0
SC
2278 if ( buf && res < n)
2279 buf[res] = 0;
2280
d775fa82 2281 return res ;
335d31e0
SC
2282 }
2283
2284 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2285 {
2286 OSStatus status = noErr ;
2287 ByteCount byteOutLen ;
2288 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2289
2290 char *tbuf = NULL ;
2291
2292 if (buf == NULL)
2293 {
638357a0 2294 //apple specs say at least 32
c543817b 2295 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2296 tbuf = (char*) malloc( n ) ;
2297 }
2298
2299 ByteCount byteBufferLen = n ;
2300 UniChar* ubuf = NULL ;
f3a355ce 2301#if SIZEOF_WCHAR_T == 4
d9d488cf 2302 wxMBConvUTF16 converter ;
d775fa82
WS
2303 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2304 byteInLen = unicharlen ;
2305 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2306 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2307#else
d775fa82 2308 ubuf = (UniChar*) psz ;
f3a355ce 2309#endif
d775fa82
WS
2310 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2311 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2312#if SIZEOF_WCHAR_T == 4
d775fa82 2313 free( ubuf ) ;
f3a355ce 2314#endif
d775fa82
WS
2315 if ( buf == NULL )
2316 free(tbuf) ;
335d31e0 2317
d775fa82 2318 size_t res = byteOutLen ;
335d31e0 2319 if ( buf && res < n)
638357a0 2320 {
335d31e0 2321 buf[res] = 0;
3698ae71 2322
638357a0
RN
2323 //we need to double-trip to verify it didn't insert any ? in place
2324 //of bogus characters
2325 wxWCharBuffer wcBuf(n);
2326 size_t pszlen = wxWcslen(psz);
2327 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2328 wxWcslen(wcBuf) != pszlen ||
2329 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2330 {
2331 // we didn't obtain the same thing we started from, hence
2332 // the conversion was lossy and we consider that it failed
2333 return (size_t)-1;
2334 }
2335 }
335d31e0 2336
d775fa82 2337 return res ;
335d31e0
SC
2338 }
2339
2340 bool IsOk() const
2341 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2342
2343private:
d775fa82
WS
2344 TECObjectRef m_MB2WC_converter ;
2345 TECObjectRef m_WC2MB_converter ;
2346
2347 TextEncodingBase m_char_encoding ;
2348 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2349};
2350
2351#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2352
36acb880
VZ
2353// ============================================================================
2354// wxEncodingConverter based conversion classes
2355// ============================================================================
2356
1e6feb95 2357#if wxUSE_FONTMAP
1cd52418 2358
e95354ec 2359class wxMBConv_wxwin : public wxMBConv
1cd52418 2360{
8b04d4c4
VZ
2361private:
2362 void Init()
2363 {
2364 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2365 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2366 }
2367
6001e347 2368public:
f1339c56
RR
2369 // temporarily just use wxEncodingConverter stuff,
2370 // so that it works while a better implementation is built
e95354ec 2371 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2372 {
2373 if (name)
267e11c5 2374 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2375 else
2376 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2377
8b04d4c4
VZ
2378 Init();
2379 }
2380
e95354ec 2381 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2382 {
2383 m_enc = enc;
2384
2385 Init();
f1339c56 2386 }
dccce9ea 2387
bde4baac 2388 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2389 {
2390 size_t inbuf = strlen(psz);
dccce9ea 2391 if (buf)
c643a977
VS
2392 {
2393 if (!m2w.Convert(psz,buf))
2394 return (size_t)-1;
2395 }
f1339c56
RR
2396 return inbuf;
2397 }
dccce9ea 2398
bde4baac 2399 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2400 {
f8d791e0 2401 const size_t inbuf = wxWcslen(psz);
f1339c56 2402 if (buf)
c643a977
VS
2403 {
2404 if (!w2m.Convert(psz,buf))
2405 return (size_t)-1;
2406 }
dccce9ea 2407
f1339c56
RR
2408 return inbuf;
2409 }
dccce9ea 2410
e95354ec 2411 bool IsOk() const { return m_ok; }
f1339c56
RR
2412
2413public:
8b04d4c4 2414 wxFontEncoding m_enc;
f1339c56 2415 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2416
2417 // were we initialized successfully?
2418 bool m_ok;
fc7a2a60 2419
e95354ec 2420 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2421};
6001e347 2422
8f115891
MW
2423// make the constructors available for unit testing
2424WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2425{
2426 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2427 if ( !result->IsOk() )
2428 {
2429 delete result;
2430 return 0;
2431 }
2432 return result;
2433}
2434
1e6feb95
VZ
2435#endif // wxUSE_FONTMAP
2436
36acb880
VZ
2437// ============================================================================
2438// wxCSConv implementation
2439// ============================================================================
2440
8b04d4c4 2441void wxCSConv::Init()
6001e347 2442{
e95354ec
VZ
2443 m_name = NULL;
2444 m_convReal = NULL;
2445 m_deferred = true;
2446}
2447
8b04d4c4
VZ
2448wxCSConv::wxCSConv(const wxChar *charset)
2449{
2450 Init();
82713003 2451
e95354ec
VZ
2452 if ( charset )
2453 {
e95354ec
VZ
2454 SetName(charset);
2455 }
bda3d86a
VZ
2456
2457 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2458}
2459
8b04d4c4
VZ
2460wxCSConv::wxCSConv(wxFontEncoding encoding)
2461{
bda3d86a 2462 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2463 {
2464 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2465
2466 encoding = wxFONTENCODING_SYSTEM;
2467 }
2468
8b04d4c4
VZ
2469 Init();
2470
bda3d86a 2471 m_encoding = encoding;
8b04d4c4
VZ
2472}
2473
6001e347
RR
2474wxCSConv::~wxCSConv()
2475{
65e50848
JS
2476 Clear();
2477}
2478
54380f29 2479wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2480 : wxMBConv()
54380f29 2481{
8b04d4c4
VZ
2482 Init();
2483
54380f29 2484 SetName(conv.m_name);
8b04d4c4 2485 m_encoding = conv.m_encoding;
54380f29
GD
2486}
2487
2488wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2489{
2490 Clear();
8b04d4c4 2491
54380f29 2492 SetName(conv.m_name);
8b04d4c4
VZ
2493 m_encoding = conv.m_encoding;
2494
54380f29
GD
2495 return *this;
2496}
2497
65e50848
JS
2498void wxCSConv::Clear()
2499{
8b04d4c4 2500 free(m_name);
e95354ec 2501 delete m_convReal;
8b04d4c4 2502
65e50848 2503 m_name = NULL;
e95354ec 2504 m_convReal = NULL;
6001e347
RR
2505}
2506
2507void wxCSConv::SetName(const wxChar *charset)
2508{
f1339c56
RR
2509 if (charset)
2510 {
2511 m_name = wxStrdup(charset);
e95354ec 2512 m_deferred = true;
f1339c56 2513 }
6001e347
RR
2514}
2515
8b3eb85d
VZ
2516#if wxUSE_FONTMAP
2517#include "wx/hashmap.h"
2518
2519WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2520 wxEncodingNameCache );
2521
2522static wxEncodingNameCache gs_nameCache;
2523#endif
2524
e95354ec
VZ
2525wxMBConv *wxCSConv::DoCreate() const
2526{
ce6f8d6f
VZ
2527#if wxUSE_FONTMAP
2528 wxLogTrace(TRACE_STRCONV,
2529 wxT("creating conversion for %s"),
2530 (m_name ? m_name
2531 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2532#endif // wxUSE_FONTMAP
2533
c547282d
VZ
2534 // check for the special case of ASCII or ISO8859-1 charset: as we have
2535 // special knowledge of it anyhow, we don't need to create a special
2536 // conversion object
2537 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2538 {
e95354ec
VZ
2539 // don't convert at all
2540 return NULL;
2541 }
dccce9ea 2542
e95354ec
VZ
2543 // we trust OS to do conversion better than we can so try external
2544 // conversion methods first
2545 //
2546 // the full order is:
2547 // 1. OS conversion (iconv() under Unix or Win32 API)
2548 // 2. hard coded conversions for UTF
2549 // 3. wxEncodingConverter as fall back
2550
2551 // step (1)
2552#ifdef HAVE_ICONV
c547282d 2553#if !wxUSE_FONTMAP
e95354ec 2554 if ( m_name )
c547282d 2555#endif // !wxUSE_FONTMAP
e95354ec 2556 {
c547282d 2557 wxString name(m_name);
8b3eb85d
VZ
2558 wxFontEncoding encoding(m_encoding);
2559
2560 if ( !name.empty() )
2561 {
2562 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2563 if ( conv->IsOk() )
2564 return conv;
2565
2566 delete conv;
c547282d
VZ
2567
2568#if wxUSE_FONTMAP
8b3eb85d
VZ
2569 encoding =
2570 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 2571#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2572 }
2573#if wxUSE_FONTMAP
2574 {
2575 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2576 if ( it != gs_nameCache.end() )
2577 {
2578 if ( it->second.empty() )
2579 return NULL;
c547282d 2580
8b3eb85d
VZ
2581 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2582 if ( conv->IsOk() )
2583 return conv;
e95354ec 2584
8b3eb85d
VZ
2585 delete conv;
2586 }
2587
2588 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2589
2590 for ( ; *names; ++names )
2591 {
2592 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2593 if ( conv->IsOk() )
2594 {
2595 gs_nameCache[encoding] = *names;
2596 return conv;
2597 }
2598
2599 delete conv;
2600 }
2601
2602 gs_nameCache[encoding] = ""; // cache the failure
2603 }
2604#endif // wxUSE_FONTMAP
e95354ec
VZ
2605 }
2606#endif // HAVE_ICONV
2607
2608#ifdef wxHAVE_WIN32_MB2WC
2609 {
7608a683 2610#if wxUSE_FONTMAP
e95354ec
VZ
2611 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2612 : new wxMBConv_win32(m_encoding);
2613 if ( conv->IsOk() )
2614 return conv;
2615
2616 delete conv;
7608a683
WS
2617#else
2618 return NULL;
2619#endif
e95354ec
VZ
2620 }
2621#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2622#if defined(__WXMAC__)
2623 {
5c3c8676 2624 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2625 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2626 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2627 {
2628
2d1659cf 2629#if wxUSE_FONTMAP
d775fa82
WS
2630 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2631 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2632#else
2633 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2634#endif
d775fa82 2635 if ( conv->IsOk() )
f7e98dee
RN
2636 return conv;
2637
2638 delete conv;
2639 }
2640 }
2641#endif
2642#if defined(__WXCOCOA__)
2643 {
2644 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2645 {
2646
a6900d10 2647#if wxUSE_FONTMAP
f7e98dee
RN
2648 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2649 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2650#else
2651 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2652#endif
f7e98dee 2653 if ( conv->IsOk() )
d775fa82
WS
2654 return conv;
2655
2656 delete conv;
2657 }
335d31e0
SC
2658 }
2659#endif
e95354ec
VZ
2660 // step (2)
2661 wxFontEncoding enc = m_encoding;
2662#if wxUSE_FONTMAP
c547282d
VZ
2663 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2664 {
2665 // use "false" to suppress interactive dialogs -- we can be called from
2666 // anywhere and popping up a dialog from here is the last thing we want to
2667 // do
267e11c5 2668 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2669 }
e95354ec
VZ
2670#endif // wxUSE_FONTMAP
2671
2672 switch ( enc )
2673 {
2674 case wxFONTENCODING_UTF7:
2675 return new wxMBConvUTF7;
2676
2677 case wxFONTENCODING_UTF8:
2678 return new wxMBConvUTF8;
2679
e95354ec
VZ
2680 case wxFONTENCODING_UTF16BE:
2681 return new wxMBConvUTF16BE;
2682
2683 case wxFONTENCODING_UTF16LE:
2684 return new wxMBConvUTF16LE;
2685
e95354ec
VZ
2686 case wxFONTENCODING_UTF32BE:
2687 return new wxMBConvUTF32BE;
2688
2689 case wxFONTENCODING_UTF32LE:
2690 return new wxMBConvUTF32LE;
2691
2692 default:
2693 // nothing to do but put here to suppress gcc warnings
2694 ;
2695 }
2696
2697 // step (3)
2698#if wxUSE_FONTMAP
2699 {
2700 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2701 : new wxMBConv_wxwin(m_encoding);
2702 if ( conv->IsOk() )
2703 return conv;
2704
2705 delete conv;
2706 }
2707#endif // wxUSE_FONTMAP
2708
a58d4f4d
VS
2709 // NB: This is a hack to prevent deadlock. What could otherwise happen
2710 // in Unicode build: wxConvLocal creation ends up being here
2711 // because of some failure and logs the error. But wxLog will try to
2712 // attach timestamp, for which it will need wxConvLocal (to convert
2713 // time to char* and then wchar_t*), but that fails, tries to log
2714 // error, but wxLog has a (already locked) critical section that
2715 // guards static buffer.
2716 static bool alreadyLoggingError = false;
2717 if (!alreadyLoggingError)
2718 {
2719 alreadyLoggingError = true;
2720 wxLogError(_("Cannot convert from the charset '%s'!"),
2721 m_name ? m_name
e95354ec
VZ
2722 :
2723#if wxUSE_FONTMAP
267e11c5 2724 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2725#else // !wxUSE_FONTMAP
2726 wxString::Format(_("encoding %s"), m_encoding).c_str()
2727#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2728 );
a58d4f4d
VS
2729 alreadyLoggingError = false;
2730 }
e95354ec
VZ
2731
2732 return NULL;
2733}
2734
2735void wxCSConv::CreateConvIfNeeded() const
2736{
2737 if ( m_deferred )
2738 {
2739 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2740
2741#if wxUSE_INTL
2742 // if we don't have neither the name nor the encoding, use the default
2743 // encoding for this system
2744 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2745 {
4d312c22 2746 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2747 }
2748#endif // wxUSE_INTL
2749
e95354ec
VZ
2750 self->m_convReal = DoCreate();
2751 self->m_deferred = false;
6001e347 2752 }
6001e347
RR
2753}
2754
2755size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2756{
e95354ec 2757 CreateConvIfNeeded();
dccce9ea 2758
e95354ec
VZ
2759 if (m_convReal)
2760 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2761
2762 // latin-1 (direct)
4def3b35 2763 size_t len = strlen(psz);
dccce9ea 2764
f1339c56
RR
2765 if (buf)
2766 {
4def3b35 2767 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2768 buf[c] = (unsigned char)(psz[c]);
2769 }
dccce9ea 2770
f1339c56 2771 return len;
6001e347
RR
2772}
2773
2774size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2775{
e95354ec 2776 CreateConvIfNeeded();
dccce9ea 2777
e95354ec
VZ
2778 if (m_convReal)
2779 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2780
f1339c56 2781 // latin-1 (direct)
f8d791e0 2782 const size_t len = wxWcslen(psz);
f1339c56
RR
2783 if (buf)
2784 {
4def3b35 2785 for (size_t c = 0; c <= len; c++)
24642831
VS
2786 {
2787 if (psz[c] > 0xFF)
2788 return (size_t)-1;
907173e5 2789 buf[c] = (char)psz[c];
24642831
VS
2790 }
2791 }
2792 else
2793 {
2794 for (size_t c = 0; c <= len; c++)
2795 {
2796 if (psz[c] > 0xFF)
2797 return (size_t)-1;
2798 }
f1339c56 2799 }
dccce9ea 2800
f1339c56 2801 return len;
6001e347
RR
2802}
2803
bde4baac
VZ
2804// ----------------------------------------------------------------------------
2805// globals
2806// ----------------------------------------------------------------------------
2807
2808#ifdef __WINDOWS__
2809 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2810#elif defined(__WXMAC__) && !defined(__MACH__)
2811 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2812#else
dcc8fac0 2813 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2814#endif
2815
2816static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2817static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2818static wxMBConvUTF7 wxConvUTF7Obj;
2819static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2820
bde4baac
VZ
2821WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2822WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2823WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2824WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2825WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2826WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2827WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2828#ifdef __WXOSX__
ea8ce907 2829 wxConvUTF8Obj;
f5a1953b 2830#else
ea8ce907 2831 wxConvLibcObj;
f5a1953b
VZ
2832#endif
2833
bde4baac
VZ
2834
2835#else // !wxUSE_WCHAR_T
2836
2837// stand-ins in absence of wchar_t
2838WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2839 wxConvISO8859_1,
2840 wxConvLocal,
2841 wxConvUTF8;
2842
2843#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2844
2845