]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Added licence/copyright information
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea 154 }
cdb14ecb 155 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
3698ae71 281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
3698ae71 316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
3698ae71 343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
e1bfe89e 359
66bf0099 360#ifdef __UNIX__
c12b7f79 361
e1bfe89e 362// ----------------------------------------------------------------------------
66bf0099 363// wxConvBrokenFileNames
e1bfe89e
RR
364// ----------------------------------------------------------------------------
365
845905d5 366wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 367{
845905d5
MW
368 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
369 || wxStricmp(charset, _T("UTF8")) == 0 )
370 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
371 else
372 m_conv = new wxCSConv(charset);
ea8ce907
RR
373}
374
c12b7f79
VZ
375size_t
376wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
377 const char *psz,
378 size_t outputSize) const
e1bfe89e 379{
c12b7f79 380 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
381}
382
c12b7f79
VZ
383size_t
384wxConvBrokenFileNames::WC2MB(char *outputBuf,
385 const wchar_t *psz,
386 size_t outputSize) const
e1bfe89e 387{
c12b7f79 388 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
389}
390
66bf0099 391#endif
c12b7f79 392
bde4baac 393// ----------------------------------------------------------------------------
3698ae71 394// UTF-7
bde4baac 395// ----------------------------------------------------------------------------
6001e347 396
15f2ee32 397// Implementation (C) 2004 Fredrik Roubert
6001e347 398
15f2ee32
RN
399//
400// BASE64 decoding table
401//
402static const unsigned char utf7unb64[] =
6001e347 403{
15f2ee32
RN
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
410 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
411 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
413 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
414 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
415 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
417 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
418 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
419 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
436};
437
438size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
439{
15f2ee32
RN
440 size_t len = 0;
441
442 while (*psz && ((!buf) || (len < n)))
443 {
444 unsigned char cc = *psz++;
445 if (cc != '+')
446 {
447 // plain ASCII char
448 if (buf)
449 *buf++ = cc;
450 len++;
451 }
452 else if (*psz == '-')
453 {
454 // encoded plus sign
455 if (buf)
456 *buf++ = cc;
457 len++;
458 psz++;
459 }
460 else
461 {
462 // BASE64 encoded string
463 bool lsb;
464 unsigned char c;
465 unsigned int d, l;
466 for (lsb = false, d = 0, l = 0;
467 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
468 {
469 d <<= 6;
470 d += cc;
471 for (l += 6; l >= 8; lsb = !lsb)
472 {
6356d52a 473 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
474 if (lsb)
475 {
476 if (buf)
477 *buf++ |= c;
478 len ++;
479 }
480 else
481 if (buf)
6356d52a 482 *buf = (wchar_t)(c << 8);
15f2ee32
RN
483 }
484 }
485 if (*psz == '-')
486 psz++;
487 }
488 }
489 if (buf && (len < n))
490 *buf = 0;
491 return len;
6001e347
RR
492}
493
15f2ee32
RN
494//
495// BASE64 encoding table
496//
497static const unsigned char utf7enb64[] =
498{
499 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
500 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
501 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
502 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
503 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
504 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
505 'w', 'x', 'y', 'z', '0', '1', '2', '3',
506 '4', '5', '6', '7', '8', '9', '+', '/'
507};
508
509//
510// UTF-7 encoding table
511//
512// 0 - Set D (directly encoded characters)
513// 1 - Set O (optional direct characters)
514// 2 - whitespace characters (optional)
515// 3 - special characters
516//
517static const unsigned char utf7encode[128] =
6001e347 518{
15f2ee32
RN
519 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
520 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
521 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
523 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
525 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
527};
528
667e5b3e 529size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
530{
531
532
533 size_t len = 0;
534
535 while (*psz && ((!buf) || (len < n)))
536 {
537 wchar_t cc = *psz++;
538 if (cc < 0x80 && utf7encode[cc] < 1)
539 {
540 // plain ASCII char
541 if (buf)
542 *buf++ = (char)cc;
543 len++;
544 }
545#ifndef WC_UTF16
79c78d42 546 else if (((wxUint32)cc) > 0xffff)
b2c13097 547 {
15f2ee32
RN
548 // no surrogate pair generation (yet?)
549 return (size_t)-1;
550 }
551#endif
552 else
553 {
554 if (buf)
555 *buf++ = '+';
556 len++;
557 if (cc != '+')
558 {
559 // BASE64 encode string
560 unsigned int lsb, d, l;
561 for (d = 0, l = 0;; psz++)
562 {
563 for (lsb = 0; lsb < 2; lsb ++)
564 {
565 d <<= 8;
566 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
567
568 for (l += 8; l >= 6; )
569 {
570 l -= 6;
571 if (buf)
572 *buf++ = utf7enb64[(d >> l) % 64];
573 len++;
574 }
575 }
576 cc = *psz;
577 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
578 break;
579 }
580 if (l != 0)
581 {
582 if (buf)
583 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
584 len++;
585 }
586 }
587 if (buf)
588 *buf++ = '-';
589 len++;
590 }
591 }
592 if (buf && (len < n))
593 *buf = 0;
594 return len;
6001e347
RR
595}
596
f6bcfd97 597// ----------------------------------------------------------------------------
6001e347 598// UTF-8
f6bcfd97 599// ----------------------------------------------------------------------------
6001e347 600
dccce9ea 601static wxUint32 utf8_max[]=
4def3b35 602 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 603
3698ae71
VZ
604// boundaries of the private use area we use to (temporarily) remap invalid
605// characters invalid in a UTF-8 encoded string
ea8ce907
RR
606const wxUint32 wxUnicodePUA = 0x100000;
607const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
608
6001e347
RR
609size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
610{
4def3b35
VS
611 size_t len = 0;
612
dccce9ea 613 while (*psz && ((!buf) || (len < n)))
4def3b35 614 {
ea8ce907
RR
615 const char *opsz = psz;
616 bool invalid = false;
4def3b35
VS
617 unsigned char cc = *psz++, fc = cc;
618 unsigned cnt;
dccce9ea 619 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 620 fc <<= 1;
dccce9ea 621 if (!cnt)
4def3b35
VS
622 {
623 // plain ASCII char
dccce9ea 624 if (buf)
4def3b35
VS
625 *buf++ = cc;
626 len++;
561488ef
MW
627
628 // escape the escape character for octal escapes
629 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
630 && cc == '\\' && (!buf || len < n))
631 {
632 if (buf)
633 *buf++ = cc;
634 len++;
635 }
dccce9ea
VZ
636 }
637 else
4def3b35
VS
638 {
639 cnt--;
dccce9ea 640 if (!cnt)
4def3b35
VS
641 {
642 // invalid UTF-8 sequence
ea8ce907 643 invalid = true;
dccce9ea
VZ
644 }
645 else
4def3b35
VS
646 {
647 unsigned ocnt = cnt - 1;
648 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 649 while (cnt--)
4def3b35 650 {
ea8ce907 651 cc = *psz;
dccce9ea 652 if ((cc & 0xC0) != 0x80)
4def3b35
VS
653 {
654 // invalid UTF-8 sequence
ea8ce907
RR
655 invalid = true;
656 break;
4def3b35 657 }
ea8ce907 658 psz++;
4def3b35
VS
659 res = (res << 6) | (cc & 0x3f);
660 }
ea8ce907 661 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
662 {
663 // illegal UTF-8 encoding
ea8ce907 664 invalid = true;
4def3b35 665 }
ea8ce907
RR
666 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
667 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
668 {
669 // if one of our PUA characters turns up externally
670 // it must also be treated as an illegal sequence
671 // (a bit like you have to escape an escape character)
672 invalid = true;
673 }
674 else
675 {
1cd52418 676#ifdef WC_UTF16
ea8ce907
RR
677 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
678 size_t pa = encode_utf16(res, (wxUint16 *)buf);
679 if (pa == (size_t)-1)
680 {
681 invalid = true;
682 }
683 else
684 {
685 if (buf)
686 buf += pa;
687 len += pa;
688 }
373658eb 689#else // !WC_UTF16
ea8ce907
RR
690 if (buf)
691 *buf++ = res;
692 len++;
373658eb 693#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
694 }
695 }
696 if (invalid)
697 {
698 if (m_options & MAP_INVALID_UTF8_TO_PUA)
699 {
700 while (opsz < psz && (!buf || len < n))
701 {
702#ifdef WC_UTF16
703 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
704 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
705 wxASSERT(pa != (size_t)-1);
706 if (buf)
707 buf += pa;
708 opsz++;
709 len += pa;
710#else
711 if (buf)
712 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
713 opsz++;
714 len++;
715#endif
716 }
717 }
3698ae71 718 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
719 {
720 while (opsz < psz && (!buf || len < n))
721 {
3698ae71
VZ
722 if ( buf && len + 3 < n )
723 {
724 unsigned char n = *opsz;
725 *buf++ = L'\\';
b2c13097
WS
726 *buf++ = (wchar_t)( L'0' + n / 0100 );
727 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
728 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 729 }
ea8ce907
RR
730 opsz++;
731 len += 4;
732 }
733 }
3698ae71 734 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
735 {
736 return (size_t)-1;
737 }
4def3b35
VS
738 }
739 }
6001e347 740 }
dccce9ea 741 if (buf && (len < n))
4def3b35
VS
742 *buf = 0;
743 return len;
6001e347
RR
744}
745
3698ae71
VZ
746static inline bool isoctal(wchar_t wch)
747{
748 return L'0' <= wch && wch <= L'7';
749}
750
6001e347
RR
751size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
752{
4def3b35 753 size_t len = 0;
6001e347 754
dccce9ea 755 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
756 {
757 wxUint32 cc;
1cd52418 758#ifdef WC_UTF16
b5153fd8
VZ
759 // cast is ok for WC_UTF16
760 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 761 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 762#else
4def3b35
VS
763 cc=(*psz++) & 0x7fffffff;
764#endif
3698ae71
VZ
765
766 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
767 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 768 {
dccce9ea 769 if (buf)
ea8ce907 770 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 771 len++;
3698ae71 772 }
561488ef
MW
773 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
774 && cc == L'\\' && psz[0] == L'\\' )
775 {
776 if (buf)
777 *buf++ = (char)cc;
778 psz++;
779 len++;
780 }
3698ae71
VZ
781 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
782 cc == L'\\' &&
783 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 784 {
dccce9ea 785 if (buf)
3698ae71 786 {
b2c13097
WS
787 *buf++ = (char) ((psz[0] - L'0')*0100 +
788 (psz[1] - L'0')*010 +
789 (psz[2] - L'0'));
3698ae71
VZ
790 }
791
792 psz += 3;
ea8ce907
RR
793 len++;
794 }
795 else
796 {
797 unsigned cnt;
798 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
799 if (!cnt)
4def3b35 800 {
ea8ce907
RR
801 // plain ASCII char
802 if (buf)
803 *buf++ = (char) cc;
804 len++;
805 }
806
807 else
808 {
809 len += cnt + 1;
810 if (buf)
811 {
812 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
813 while (cnt--)
814 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
815 }
4def3b35
VS
816 }
817 }
6001e347 818 }
4def3b35 819
3698ae71
VZ
820 if (buf && (len<n))
821 *buf = 0;
adb45366 822
4def3b35 823 return len;
6001e347
RR
824}
825
c91830cb
VZ
826// ----------------------------------------------------------------------------
827// UTF-16
828// ----------------------------------------------------------------------------
829
830#ifdef WORDS_BIGENDIAN
bde4baac
VZ
831 #define wxMBConvUTF16straight wxMBConvUTF16BE
832 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 833#else
bde4baac
VZ
834 #define wxMBConvUTF16swap wxMBConvUTF16BE
835 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
836#endif
837
838
c91830cb
VZ
839#ifdef WC_UTF16
840
c91830cb
VZ
841// copy 16bit MB to 16bit String
842size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
843{
844 size_t len=0;
845
846 while (*(wxUint16*)psz && (!buf || len < n))
847 {
848 if (buf)
849 *buf++ = *(wxUint16*)psz;
850 len++;
851
852 psz += sizeof(wxUint16);
853 }
854 if (buf && len<n) *buf=0;
855
856 return len;
857}
858
859
860// copy 16bit String to 16bit MB
861size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
862{
863 size_t len=0;
864
865 while (*psz && (!buf || len < n))
866 {
867 if (buf)
868 {
869 *(wxUint16*)buf = *psz;
870 buf += sizeof(wxUint16);
871 }
872 len += sizeof(wxUint16);
873 psz++;
874 }
875 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
876
877 return len;
878}
879
880
881// swap 16bit MB to 16bit String
882size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
883{
884 size_t len=0;
885
886 while (*(wxUint16*)psz && (!buf || len < n))
887 {
888 if (buf)
889 {
890 ((char *)buf)[0] = psz[1];
891 ((char *)buf)[1] = psz[0];
892 buf++;
893 }
894 len++;
895 psz += sizeof(wxUint16);
896 }
897 if (buf && len<n) *buf=0;
898
899 return len;
900}
901
902
903// swap 16bit MB to 16bit String
904size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
905{
906 size_t len=0;
907
908 while (*psz && (!buf || len < n))
909 {
910 if (buf)
911 {
912 *buf++ = ((char*)psz)[1];
913 *buf++ = ((char*)psz)[0];
914 }
915 len += sizeof(wxUint16);
916 psz++;
917 }
918 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
919
920 return len;
921}
922
923
924#else // WC_UTF16
925
926
927// copy 16bit MB to 32bit String
928size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
929{
930 size_t len=0;
931
932 while (*(wxUint16*)psz && (!buf || len < n))
933 {
934 wxUint32 cc;
935 size_t pa=decode_utf16((wxUint16*)psz, cc);
936 if (pa == (size_t)-1)
937 return pa;
938
939 if (buf)
940 *buf++ = cc;
941 len++;
942 psz += pa * sizeof(wxUint16);
943 }
944 if (buf && len<n) *buf=0;
945
946 return len;
947}
948
949
950// copy 32bit String to 16bit MB
951size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
952{
953 size_t len=0;
954
955 while (*psz && (!buf || len < n))
956 {
957 wxUint16 cc[2];
958 size_t pa=encode_utf16(*psz, cc);
959
960 if (pa == (size_t)-1)
961 return pa;
962
963 if (buf)
964 {
69b80d28 965 *(wxUint16*)buf = cc[0];
b5153fd8 966 buf += sizeof(wxUint16);
c91830cb 967 if (pa > 1)
69b80d28
VZ
968 {
969 *(wxUint16*)buf = cc[1];
970 buf += sizeof(wxUint16);
971 }
c91830cb
VZ
972 }
973
974 len += pa*sizeof(wxUint16);
975 psz++;
976 }
977 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
978
979 return len;
980}
981
982
983// swap 16bit MB to 32bit String
984size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
985{
986 size_t len=0;
987
988 while (*(wxUint16*)psz && (!buf || len < n))
989 {
990 wxUint32 cc;
991 char tmp[4];
992 tmp[0]=psz[1]; tmp[1]=psz[0];
993 tmp[2]=psz[3]; tmp[3]=psz[2];
994
995 size_t pa=decode_utf16((wxUint16*)tmp, cc);
996 if (pa == (size_t)-1)
997 return pa;
998
999 if (buf)
1000 *buf++ = cc;
1001
1002 len++;
1003 psz += pa * sizeof(wxUint16);
1004 }
1005 if (buf && len<n) *buf=0;
1006
1007 return len;
1008}
1009
1010
1011// swap 32bit String to 16bit MB
1012size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1013{
1014 size_t len=0;
1015
1016 while (*psz && (!buf || len < n))
1017 {
1018 wxUint16 cc[2];
1019 size_t pa=encode_utf16(*psz, cc);
1020
1021 if (pa == (size_t)-1)
1022 return pa;
1023
1024 if (buf)
1025 {
1026 *buf++ = ((char*)cc)[1];
1027 *buf++ = ((char*)cc)[0];
1028 if (pa > 1)
1029 {
1030 *buf++ = ((char*)cc)[3];
1031 *buf++ = ((char*)cc)[2];
1032 }
1033 }
1034
1035 len += pa*sizeof(wxUint16);
1036 psz++;
1037 }
1038 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1039
1040 return len;
1041}
1042
1043#endif // WC_UTF16
1044
1045
1046// ----------------------------------------------------------------------------
1047// UTF-32
1048// ----------------------------------------------------------------------------
1049
1050#ifdef WORDS_BIGENDIAN
1051#define wxMBConvUTF32straight wxMBConvUTF32BE
1052#define wxMBConvUTF32swap wxMBConvUTF32LE
1053#else
1054#define wxMBConvUTF32swap wxMBConvUTF32BE
1055#define wxMBConvUTF32straight wxMBConvUTF32LE
1056#endif
1057
1058
1059WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1060WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1061
1062
1063#ifdef WC_UTF16
1064
1065// copy 32bit MB to 16bit String
1066size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1067{
1068 size_t len=0;
1069
1070 while (*(wxUint32*)psz && (!buf || len < n))
1071 {
1072 wxUint16 cc[2];
1073
1074 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1075 if (pa == (size_t)-1)
1076 return pa;
1077
1078 if (buf)
1079 {
1080 *buf++ = cc[0];
1081 if (pa > 1)
1082 *buf++ = cc[1];
1083 }
1084 len += pa;
1085 psz += sizeof(wxUint32);
1086 }
1087 if (buf && len<n) *buf=0;
1088
1089 return len;
1090}
1091
1092
1093// copy 16bit String to 32bit MB
1094size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1095{
1096 size_t len=0;
1097
1098 while (*psz && (!buf || len < n))
1099 {
1100 wxUint32 cc;
1101
b5153fd8
VZ
1102 // cast is ok for WC_UTF16
1103 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1104 if (pa == (size_t)-1)
1105 return pa;
1106
1107 if (buf)
1108 {
1109 *(wxUint32*)buf = cc;
1110 buf += sizeof(wxUint32);
1111 }
1112 len += sizeof(wxUint32);
1113 psz += pa;
1114 }
b5153fd8
VZ
1115
1116 if (buf && len<=n-sizeof(wxUint32))
1117 *(wxUint32*)buf=0;
c91830cb
VZ
1118
1119 return len;
1120}
1121
1122
1123
1124// swap 32bit MB to 16bit String
1125size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1126{
1127 size_t len=0;
1128
1129 while (*(wxUint32*)psz && (!buf || len < n))
1130 {
1131 char tmp[4];
1132 tmp[0] = psz[3]; tmp[1] = psz[2];
1133 tmp[2] = psz[1]; tmp[3] = psz[0];
1134
1135
1136 wxUint16 cc[2];
1137
1138 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1139 if (pa == (size_t)-1)
1140 return pa;
1141
1142 if (buf)
1143 {
1144 *buf++ = cc[0];
1145 if (pa > 1)
1146 *buf++ = cc[1];
1147 }
1148 len += pa;
1149 psz += sizeof(wxUint32);
1150 }
b5153fd8
VZ
1151
1152 if (buf && len<n)
1153 *buf=0;
c91830cb
VZ
1154
1155 return len;
1156}
1157
1158
1159// swap 16bit String to 32bit MB
1160size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1161{
1162 size_t len=0;
1163
1164 while (*psz && (!buf || len < n))
1165 {
1166 char cc[4];
1167
b5153fd8
VZ
1168 // cast is ok for WC_UTF16
1169 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1170 if (pa == (size_t)-1)
1171 return pa;
1172
1173 if (buf)
1174 {
1175 *buf++ = cc[3];
1176 *buf++ = cc[2];
1177 *buf++ = cc[1];
1178 *buf++ = cc[0];
1179 }
1180 len += sizeof(wxUint32);
1181 psz += pa;
1182 }
b5153fd8
VZ
1183
1184 if (buf && len<=n-sizeof(wxUint32))
1185 *(wxUint32*)buf=0;
c91830cb
VZ
1186
1187 return len;
1188}
1189
1190#else // WC_UTF16
1191
1192
1193// copy 32bit MB to 32bit String
1194size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1195{
1196 size_t len=0;
1197
1198 while (*(wxUint32*)psz && (!buf || len < n))
1199 {
1200 if (buf)
1201 *buf++ = *(wxUint32*)psz;
1202 len++;
1203 psz += sizeof(wxUint32);
1204 }
b5153fd8
VZ
1205
1206 if (buf && len<n)
1207 *buf=0;
c91830cb
VZ
1208
1209 return len;
1210}
1211
1212
1213// copy 32bit String to 32bit MB
1214size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1215{
1216 size_t len=0;
1217
1218 while (*psz && (!buf || len < n))
1219 {
1220 if (buf)
1221 {
1222 *(wxUint32*)buf = *psz;
1223 buf += sizeof(wxUint32);
1224 }
1225
1226 len += sizeof(wxUint32);
1227 psz++;
1228 }
1229
b5153fd8
VZ
1230 if (buf && len<=n-sizeof(wxUint32))
1231 *(wxUint32*)buf=0;
c91830cb
VZ
1232
1233 return len;
1234}
1235
1236
1237// swap 32bit MB to 32bit String
1238size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1239{
1240 size_t len=0;
1241
1242 while (*(wxUint32*)psz && (!buf || len < n))
1243 {
1244 if (buf)
1245 {
1246 ((char *)buf)[0] = psz[3];
1247 ((char *)buf)[1] = psz[2];
1248 ((char *)buf)[2] = psz[1];
1249 ((char *)buf)[3] = psz[0];
1250 buf++;
1251 }
1252 len++;
1253 psz += sizeof(wxUint32);
1254 }
b5153fd8
VZ
1255
1256 if (buf && len<n)
1257 *buf=0;
c91830cb
VZ
1258
1259 return len;
1260}
1261
1262
1263// swap 32bit String to 32bit MB
1264size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1265{
1266 size_t len=0;
1267
1268 while (*psz && (!buf || len < n))
1269 {
1270 if (buf)
1271 {
1272 *buf++ = ((char *)psz)[3];
1273 *buf++ = ((char *)psz)[2];
1274 *buf++ = ((char *)psz)[1];
1275 *buf++ = ((char *)psz)[0];
1276 }
1277 len += sizeof(wxUint32);
1278 psz++;
1279 }
b5153fd8
VZ
1280
1281 if (buf && len<=n-sizeof(wxUint32))
1282 *(wxUint32*)buf=0;
c91830cb
VZ
1283
1284 return len;
1285}
1286
1287
1288#endif // WC_UTF16
1289
1290
36acb880
VZ
1291// ============================================================================
1292// The classes doing conversion using the iconv_xxx() functions
1293// ============================================================================
3caec1bb 1294
b040e242 1295#ifdef HAVE_ICONV
3a0d76bc 1296
b1d547eb
VS
1297// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1298// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1299// (unless there's yet another bug in glibc) the only case when iconv()
1300// returns with (size_t)-1 (which means error) and says there are 0 bytes
1301// left in the input buffer -- when _real_ error occurs,
1302// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1303// iconv() failure.
3caec1bb
VS
1304// [This bug does not appear in glibc 2.2.]
1305#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1306#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1307 (errno != E2BIG || bufLeft != 0))
1308#else
1309#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1310#endif
1311
ab217dba 1312#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1313
1314// ----------------------------------------------------------------------------
e95354ec 1315// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1316// ----------------------------------------------------------------------------
1317
e95354ec 1318class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1319{
1320public:
e95354ec
VZ
1321 wxMBConv_iconv(const wxChar *name);
1322 virtual ~wxMBConv_iconv();
36acb880 1323
bde4baac
VZ
1324 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1325 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1326
e95354ec 1327 bool IsOk() const
36acb880
VZ
1328 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1329
1330protected:
1331 // the iconv handlers used to translate from multibyte to wide char and in
1332 // the other direction
1333 iconv_t m2w,
1334 w2m;
b1d547eb
VS
1335#if wxUSE_THREADS
1336 // guards access to m2w and w2m objects
1337 wxMutex m_iconvMutex;
1338#endif
36acb880
VZ
1339
1340private:
e95354ec 1341 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1342 // available on this machine, it will remain NULL
1343 static const char *ms_wcCharsetName;
1344
1345 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1346 // different endian-ness than the native one
405d8f46 1347 static bool ms_wcNeedsSwap;
36acb880
VZ
1348};
1349
e95354ec
VZ
1350const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1351bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1352
e95354ec 1353wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1354{
04c79127
RR
1355 // Do it the hard way
1356 char cname[100];
1357 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1358 cname[i] = (char) name[i];
1359
36acb880
VZ
1360 // check for charset that represents wchar_t:
1361 if (ms_wcCharsetName == NULL)
f1339c56 1362 {
e95354ec 1363 ms_wcNeedsSwap = false;
dccce9ea 1364
36acb880
VZ
1365 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1366 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1367 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1368
36acb880
VZ
1369 if (m2w == (iconv_t)-1)
1370 {
1371 // try charset w/o bytesex info (e.g. "UCS4")
1372 // and check for bytesex ourselves:
1373 ms_wcCharsetName = WC_NAME;
04c79127 1374 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1375
1376 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1377 if (m2w == (iconv_t)-1)
1378 {
36acb880 1379 ms_wcCharsetName = "WCHAR_T";
04c79127 1380 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1381 }
3a0d76bc 1382
36acb880
VZ
1383 if (m2w != (iconv_t)-1)
1384 {
1385 char buf[2], *bufPtr;
1386 wchar_t wbuf[2], *wbufPtr;
1387 size_t insz, outsz;
1388 size_t res;
1389
1390 buf[0] = 'A';
1391 buf[1] = 0;
1392 wbuf[0] = 0;
1393 insz = 2;
1394 outsz = SIZEOF_WCHAR_T * 2;
1395 wbufPtr = wbuf;
1396 bufPtr = buf;
1397
1398 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1399 (char**)&wbufPtr, &outsz);
1400
1401 if (ICONV_FAILED(res, insz))
3a0d76bc 1402 {
36acb880
VZ
1403 ms_wcCharsetName = NULL;
1404 wxLogLastError(wxT("iconv"));
2b5f62a0 1405 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1406 }
1407 else
1408 {
36acb880 1409 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1410 }
1411 }
36acb880
VZ
1412 else
1413 {
1414 ms_wcCharsetName = NULL;
373658eb 1415
77ffb593 1416 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1417 // fall back to using wxEncodingConverter.
1418 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1419 //wxLogError(
36acb880 1420 }
3a0d76bc 1421 }
36acb880 1422 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1423 }
36acb880 1424 else // we already have ms_wcCharsetName
3caec1bb 1425 {
04c79127 1426 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1427 }
dccce9ea 1428
36acb880
VZ
1429 // NB: don't ever pass NULL to iconv_open(), it may crash!
1430 if ( ms_wcCharsetName )
f1339c56 1431 {
04c79127 1432 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1433 }
405d8f46
VZ
1434 else
1435 {
1436 w2m = (iconv_t)-1;
1437 }
36acb880 1438}
3caec1bb 1439
e95354ec 1440wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1441{
1442 if ( m2w != (iconv_t)-1 )
1443 iconv_close(m2w);
1444 if ( w2m != (iconv_t)-1 )
1445 iconv_close(w2m);
1446}
3a0d76bc 1447
bde4baac 1448size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1449{
b1d547eb
VS
1450#if wxUSE_THREADS
1451 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1452 // Unfortunately there is a couple of global wxCSConv objects such as
1453 // wxConvLocal that are used all over wx code, so we have to make sure
1454 // the handle is used by at most one thread at the time. Otherwise
1455 // only a few wx classes would be safe to use from non-main threads
1456 // as MB<->WC conversion would fail "randomly".
1457 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1458#endif
3698ae71 1459
36acb880
VZ
1460 size_t inbuf = strlen(psz);
1461 size_t outbuf = n * SIZEOF_WCHAR_T;
1462 size_t res, cres;
1463 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1464 wchar_t *bufPtr = buf;
1465 const char *pszPtr = psz;
1466
1467 if (buf)
1468 {
1469 // have destination buffer, convert there
1470 cres = iconv(m2w,
1471 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1472 (char**)&bufPtr, &outbuf);
1473 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1474
36acb880 1475 if (ms_wcNeedsSwap)
3a0d76bc 1476 {
36acb880
VZ
1477 // convert to native endianness
1478 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1479 }
adb45366 1480
49dd9820
VS
1481 // NB: iconv was given only strlen(psz) characters on input, and so
1482 // it couldn't convert the trailing zero. Let's do it ourselves
1483 // if there's some room left for it in the output buffer.
1484 if (res < n)
1485 buf[res] = 0;
36acb880
VZ
1486 }
1487 else
1488 {
1489 // no destination buffer... convert using temp buffer
1490 // to calculate destination buffer requirement
1491 wchar_t tbuf[8];
1492 res = 0;
1493 do {
1494 bufPtr = tbuf;
1495 outbuf = 8*SIZEOF_WCHAR_T;
1496
1497 cres = iconv(m2w,
1498 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1499 (char**)&bufPtr, &outbuf );
1500
1501 res += 8-(outbuf/SIZEOF_WCHAR_T);
1502 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1503 }
dccce9ea 1504
36acb880 1505 if (ICONV_FAILED(cres, inbuf))
f1339c56 1506 {
36acb880
VZ
1507 //VS: it is ok if iconv fails, hence trace only
1508 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1509 return (size_t)-1;
1510 }
1511
1512 return res;
1513}
1514
bde4baac 1515size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1516{
b1d547eb
VS
1517#if wxUSE_THREADS
1518 // NB: explained in MB2WC
1519 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1520#endif
3698ae71 1521
f8d791e0 1522 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1523 size_t outbuf = n;
1524 size_t res, cres;
3a0d76bc 1525
36acb880 1526 wchar_t *tmpbuf = 0;
3caec1bb 1527
36acb880
VZ
1528 if (ms_wcNeedsSwap)
1529 {
1530 // need to copy to temp buffer to switch endianness
1531 // this absolutely doesn't rock!
1532 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1533 // could be in read-only memory, or be accessed in some other thread)
1534 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1535 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1536 WC_BSWAP(tmpbuf, inbuf)
1537 psz=tmpbuf;
1538 }
3a0d76bc 1539
36acb880
VZ
1540 if (buf)
1541 {
1542 // have destination buffer, convert there
1543 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1544
36acb880 1545 res = n-outbuf;
adb45366 1546
49dd9820
VS
1547 // NB: iconv was given only wcslen(psz) characters on input, and so
1548 // it couldn't convert the trailing zero. Let's do it ourselves
1549 // if there's some room left for it in the output buffer.
1550 if (res < n)
1551 buf[0] = 0;
36acb880
VZ
1552 }
1553 else
1554 {
1555 // no destination buffer... convert using temp buffer
1556 // to calculate destination buffer requirement
1557 char tbuf[16];
1558 res = 0;
1559 do {
1560 buf = tbuf; outbuf = 16;
1561
1562 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1563
36acb880
VZ
1564 res += 16 - outbuf;
1565 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1566 }
dccce9ea 1567
36acb880
VZ
1568 if (ms_wcNeedsSwap)
1569 {
1570 free(tmpbuf);
1571 }
dccce9ea 1572
36acb880
VZ
1573 if (ICONV_FAILED(cres, inbuf))
1574 {
1575 //VS: it is ok if iconv fails, hence trace only
1576 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1577 return (size_t)-1;
1578 }
1579
1580 return res;
1581}
1582
b040e242 1583#endif // HAVE_ICONV
36acb880 1584
e95354ec 1585
36acb880
VZ
1586// ============================================================================
1587// Win32 conversion classes
1588// ============================================================================
1cd52418 1589
e95354ec 1590#ifdef wxHAVE_WIN32_MB2WC
373658eb 1591
8b04d4c4 1592// from utils.cpp
d775fa82 1593#if wxUSE_FONTMAP
8b04d4c4
VZ
1594extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1595extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1596#endif
373658eb 1597
e95354ec 1598class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1599{
1600public:
bde4baac
VZ
1601 wxMBConv_win32()
1602 {
1603 m_CodePage = CP_ACP;
1604 }
1605
7608a683 1606#if wxUSE_FONTMAP
e95354ec 1607 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1608 {
1609 m_CodePage = wxCharsetToCodepage(name);
1610 }
dccce9ea 1611
e95354ec 1612 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1613 {
1614 m_CodePage = wxEncodingToCodepage(encoding);
1615 }
7608a683 1616#endif
8b04d4c4 1617
bde4baac 1618 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1619 {
02272c9c
VZ
1620 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1621 // the behaviour is not compatible with the Unix version (using iconv)
1622 // and break the library itself, e.g. wxTextInputStream::NextChar()
1623 // wouldn't work if reading an incomplete MB char didn't result in an
1624 // error
667e5b3e
VZ
1625 //
1626 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1627 // an error (tested under Windows Server 2003) and apparently it is
1628 // done on purpose, i.e. the function accepts any input in this case
1629 // and although I'd prefer to return error on ill-formed output, our
1630 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1631 // explicitly ill-formed according to RFC 2152) neither so we don't
1632 // even have any fallback here...
1633 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1634
2b5f62a0
VZ
1635 const size_t len = ::MultiByteToWideChar
1636 (
1637 m_CodePage, // code page
667e5b3e 1638 flags, // flags: fall on error
2b5f62a0
VZ
1639 psz, // input string
1640 -1, // its length (NUL-terminated)
b4da152e 1641 buf, // output string
2b5f62a0
VZ
1642 buf ? n : 0 // size of output buffer
1643 );
1644
03a991bc
VZ
1645 // note that it returns count of written chars for buf != NULL and size
1646 // of the needed buffer for buf == NULL so in either case the length of
1647 // the string (which never includes the terminating NUL) is one less
1648 return len ? len - 1 : (size_t)-1;
f1339c56 1649 }
dccce9ea 1650
13dd924a 1651 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1652 {
13dd924a
VZ
1653 /*
1654 we have a problem here: by default, WideCharToMultiByte() may
1655 replace characters unrepresentable in the target code page with bad
1656 quality approximations such as turning "1/2" symbol (U+00BD) into
1657 "1" for the code pages which don't have it and we, obviously, want
1658 to avoid this at any price
d775fa82 1659
13dd924a
VZ
1660 the trouble is that this function does it _silently_, i.e. it won't
1661 even tell us whether it did or not... Win98/2000 and higher provide
1662 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1663 we have to resort to a round trip, i.e. check that converting back
1664 results in the same string -- this is, of course, expensive but
1665 otherwise we simply can't be sure to not garble the data.
1666 */
1667
1668 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1669 // it doesn't work with CJK encodings (which we test for rather roughly
1670 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1671 // supporting it
907173e5
WS
1672 BOOL usedDef wxDUMMY_INITIALIZE(false);
1673 BOOL *pUsedDef;
13dd924a
VZ
1674 int flags;
1675 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1676 {
1677 // it's our lucky day
1678 flags = WC_NO_BEST_FIT_CHARS;
1679 pUsedDef = &usedDef;
1680 }
1681 else // old system or unsupported encoding
1682 {
1683 flags = 0;
1684 pUsedDef = NULL;
1685 }
1686
2b5f62a0
VZ
1687 const size_t len = ::WideCharToMultiByte
1688 (
1689 m_CodePage, // code page
13dd924a
VZ
1690 flags, // either none or no best fit
1691 pwz, // input string
2b5f62a0
VZ
1692 -1, // it is (wide) NUL-terminated
1693 buf, // output buffer
1694 buf ? n : 0, // and its size
1695 NULL, // default "replacement" char
13dd924a 1696 pUsedDef // [out] was it used?
2b5f62a0
VZ
1697 );
1698
13dd924a
VZ
1699 if ( !len )
1700 {
1701 // function totally failed
1702 return (size_t)-1;
1703 }
1704
1705 // if we were really converting, check if we succeeded
1706 if ( buf )
1707 {
1708 if ( flags )
1709 {
1710 // check if the conversion failed, i.e. if any replacements
1711 // were done
1712 if ( usedDef )
1713 return (size_t)-1;
1714 }
1715 else // we must resort to double tripping...
1716 {
1717 wxWCharBuffer wcBuf(n);
1718 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1719 wcscmp(wcBuf, pwz) != 0 )
1720 {
1721 // we didn't obtain the same thing we started from, hence
1722 // the conversion was lossy and we consider that it failed
1723 return (size_t)-1;
1724 }
1725 }
1726 }
1727
03a991bc 1728 // see the comment above for the reason of "len - 1"
13dd924a 1729 return len - 1;
f1339c56 1730 }
dccce9ea 1731
13dd924a
VZ
1732 bool IsOk() const { return m_CodePage != -1; }
1733
1734private:
1735 static bool CanUseNoBestFit()
1736 {
1737 static int s_isWin98Or2k = -1;
1738
1739 if ( s_isWin98Or2k == -1 )
1740 {
1741 int verMaj, verMin;
1742 switch ( wxGetOsVersion(&verMaj, &verMin) )
1743 {
1744 case wxWIN95:
1745 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1746 break;
1747
1748 case wxWINDOWS_NT:
1749 s_isWin98Or2k = verMaj >= 5;
1750 break;
1751
1752 default:
1753 // unknown, be conseravtive by default
1754 s_isWin98Or2k = 0;
1755 }
1756
1757 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1758 }
1759
1760 return s_isWin98Or2k == 1;
1761 }
f1339c56 1762
b1d66b54 1763 long m_CodePage;
1cd52418 1764};
e95354ec
VZ
1765
1766#endif // wxHAVE_WIN32_MB2WC
1767
f7e98dee
RN
1768// ============================================================================
1769// Cocoa conversion classes
1770// ============================================================================
1771
1772#if defined(__WXCOCOA__)
1773
ecd9653b 1774// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1775// Cocoa. Strangely enough, internally Core Foundation uses
1776// UTF 32 internally quite a bit - its just not public (yet).
1777
1778#include <CoreFoundation/CFString.h>
1779#include <CoreFoundation/CFStringEncodingExt.h>
1780
1781CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1782{
638357a0 1783 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1784 if ( encoding == wxFONTENCODING_DEFAULT )
1785 {
638357a0 1786 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1787 }
1788 else switch( encoding)
1789 {
1790 case wxFONTENCODING_ISO8859_1 :
1791 enc = kCFStringEncodingISOLatin1 ;
1792 break ;
1793 case wxFONTENCODING_ISO8859_2 :
1794 enc = kCFStringEncodingISOLatin2;
1795 break ;
1796 case wxFONTENCODING_ISO8859_3 :
1797 enc = kCFStringEncodingISOLatin3 ;
1798 break ;
1799 case wxFONTENCODING_ISO8859_4 :
1800 enc = kCFStringEncodingISOLatin4;
1801 break ;
1802 case wxFONTENCODING_ISO8859_5 :
1803 enc = kCFStringEncodingISOLatinCyrillic;
1804 break ;
1805 case wxFONTENCODING_ISO8859_6 :
1806 enc = kCFStringEncodingISOLatinArabic;
1807 break ;
1808 case wxFONTENCODING_ISO8859_7 :
1809 enc = kCFStringEncodingISOLatinGreek;
1810 break ;
1811 case wxFONTENCODING_ISO8859_8 :
1812 enc = kCFStringEncodingISOLatinHebrew;
1813 break ;
1814 case wxFONTENCODING_ISO8859_9 :
1815 enc = kCFStringEncodingISOLatin5;
1816 break ;
1817 case wxFONTENCODING_ISO8859_10 :
1818 enc = kCFStringEncodingISOLatin6;
1819 break ;
1820 case wxFONTENCODING_ISO8859_11 :
1821 enc = kCFStringEncodingISOLatinThai;
1822 break ;
1823 case wxFONTENCODING_ISO8859_13 :
1824 enc = kCFStringEncodingISOLatin7;
1825 break ;
1826 case wxFONTENCODING_ISO8859_14 :
1827 enc = kCFStringEncodingISOLatin8;
1828 break ;
1829 case wxFONTENCODING_ISO8859_15 :
1830 enc = kCFStringEncodingISOLatin9;
1831 break ;
1832
1833 case wxFONTENCODING_KOI8 :
1834 enc = kCFStringEncodingKOI8_R;
1835 break ;
1836 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1837 enc = kCFStringEncodingDOSRussian;
1838 break ;
1839
1840// case wxFONTENCODING_BULGARIAN :
1841// enc = ;
1842// break ;
1843
1844 case wxFONTENCODING_CP437 :
1845 enc =kCFStringEncodingDOSLatinUS ;
1846 break ;
1847 case wxFONTENCODING_CP850 :
1848 enc = kCFStringEncodingDOSLatin1;
1849 break ;
1850 case wxFONTENCODING_CP852 :
1851 enc = kCFStringEncodingDOSLatin2;
1852 break ;
1853 case wxFONTENCODING_CP855 :
1854 enc = kCFStringEncodingDOSCyrillic;
1855 break ;
1856 case wxFONTENCODING_CP866 :
1857 enc =kCFStringEncodingDOSRussian ;
1858 break ;
1859 case wxFONTENCODING_CP874 :
1860 enc = kCFStringEncodingDOSThai;
1861 break ;
1862 case wxFONTENCODING_CP932 :
1863 enc = kCFStringEncodingDOSJapanese;
1864 break ;
1865 case wxFONTENCODING_CP936 :
1866 enc =kCFStringEncodingDOSChineseSimplif ;
1867 break ;
1868 case wxFONTENCODING_CP949 :
1869 enc = kCFStringEncodingDOSKorean;
1870 break ;
1871 case wxFONTENCODING_CP950 :
1872 enc = kCFStringEncodingDOSChineseTrad;
1873 break ;
ecd9653b
WS
1874 case wxFONTENCODING_CP1250 :
1875 enc = kCFStringEncodingWindowsLatin2;
1876 break ;
1877 case wxFONTENCODING_CP1251 :
1878 enc =kCFStringEncodingWindowsCyrillic ;
1879 break ;
1880 case wxFONTENCODING_CP1252 :
1881 enc =kCFStringEncodingWindowsLatin1 ;
1882 break ;
1883 case wxFONTENCODING_CP1253 :
1884 enc = kCFStringEncodingWindowsGreek;
1885 break ;
1886 case wxFONTENCODING_CP1254 :
1887 enc = kCFStringEncodingWindowsLatin5;
1888 break ;
1889 case wxFONTENCODING_CP1255 :
1890 enc =kCFStringEncodingWindowsHebrew ;
1891 break ;
1892 case wxFONTENCODING_CP1256 :
1893 enc =kCFStringEncodingWindowsArabic ;
1894 break ;
1895 case wxFONTENCODING_CP1257 :
1896 enc = kCFStringEncodingWindowsBalticRim;
1897 break ;
638357a0
RN
1898// This only really encodes to UTF7 (if that) evidently
1899// case wxFONTENCODING_UTF7 :
1900// enc = kCFStringEncodingNonLossyASCII ;
1901// break ;
ecd9653b
WS
1902 case wxFONTENCODING_UTF8 :
1903 enc = kCFStringEncodingUTF8 ;
1904 break ;
1905 case wxFONTENCODING_EUC_JP :
1906 enc = kCFStringEncodingEUC_JP;
1907 break ;
1908 case wxFONTENCODING_UTF16 :
f7e98dee 1909 enc = kCFStringEncodingUnicode ;
ecd9653b 1910 break ;
f7e98dee
RN
1911 case wxFONTENCODING_MACROMAN :
1912 enc = kCFStringEncodingMacRoman ;
1913 break ;
1914 case wxFONTENCODING_MACJAPANESE :
1915 enc = kCFStringEncodingMacJapanese ;
1916 break ;
1917 case wxFONTENCODING_MACCHINESETRAD :
1918 enc = kCFStringEncodingMacChineseTrad ;
1919 break ;
1920 case wxFONTENCODING_MACKOREAN :
1921 enc = kCFStringEncodingMacKorean ;
1922 break ;
1923 case wxFONTENCODING_MACARABIC :
1924 enc = kCFStringEncodingMacArabic ;
1925 break ;
1926 case wxFONTENCODING_MACHEBREW :
1927 enc = kCFStringEncodingMacHebrew ;
1928 break ;
1929 case wxFONTENCODING_MACGREEK :
1930 enc = kCFStringEncodingMacGreek ;
1931 break ;
1932 case wxFONTENCODING_MACCYRILLIC :
1933 enc = kCFStringEncodingMacCyrillic ;
1934 break ;
1935 case wxFONTENCODING_MACDEVANAGARI :
1936 enc = kCFStringEncodingMacDevanagari ;
1937 break ;
1938 case wxFONTENCODING_MACGURMUKHI :
1939 enc = kCFStringEncodingMacGurmukhi ;
1940 break ;
1941 case wxFONTENCODING_MACGUJARATI :
1942 enc = kCFStringEncodingMacGujarati ;
1943 break ;
1944 case wxFONTENCODING_MACORIYA :
1945 enc = kCFStringEncodingMacOriya ;
1946 break ;
1947 case wxFONTENCODING_MACBENGALI :
1948 enc = kCFStringEncodingMacBengali ;
1949 break ;
1950 case wxFONTENCODING_MACTAMIL :
1951 enc = kCFStringEncodingMacTamil ;
1952 break ;
1953 case wxFONTENCODING_MACTELUGU :
1954 enc = kCFStringEncodingMacTelugu ;
1955 break ;
1956 case wxFONTENCODING_MACKANNADA :
1957 enc = kCFStringEncodingMacKannada ;
1958 break ;
1959 case wxFONTENCODING_MACMALAJALAM :
1960 enc = kCFStringEncodingMacMalayalam ;
1961 break ;
1962 case wxFONTENCODING_MACSINHALESE :
1963 enc = kCFStringEncodingMacSinhalese ;
1964 break ;
1965 case wxFONTENCODING_MACBURMESE :
1966 enc = kCFStringEncodingMacBurmese ;
1967 break ;
1968 case wxFONTENCODING_MACKHMER :
1969 enc = kCFStringEncodingMacKhmer ;
1970 break ;
1971 case wxFONTENCODING_MACTHAI :
1972 enc = kCFStringEncodingMacThai ;
1973 break ;
1974 case wxFONTENCODING_MACLAOTIAN :
1975 enc = kCFStringEncodingMacLaotian ;
1976 break ;
1977 case wxFONTENCODING_MACGEORGIAN :
1978 enc = kCFStringEncodingMacGeorgian ;
1979 break ;
1980 case wxFONTENCODING_MACARMENIAN :
1981 enc = kCFStringEncodingMacArmenian ;
1982 break ;
1983 case wxFONTENCODING_MACCHINESESIMP :
1984 enc = kCFStringEncodingMacChineseSimp ;
1985 break ;
1986 case wxFONTENCODING_MACTIBETAN :
1987 enc = kCFStringEncodingMacTibetan ;
1988 break ;
1989 case wxFONTENCODING_MACMONGOLIAN :
1990 enc = kCFStringEncodingMacMongolian ;
1991 break ;
1992 case wxFONTENCODING_MACETHIOPIC :
1993 enc = kCFStringEncodingMacEthiopic ;
1994 break ;
1995 case wxFONTENCODING_MACCENTRALEUR :
1996 enc = kCFStringEncodingMacCentralEurRoman ;
1997 break ;
1998 case wxFONTENCODING_MACVIATNAMESE :
1999 enc = kCFStringEncodingMacVietnamese ;
2000 break ;
2001 case wxFONTENCODING_MACARABICEXT :
2002 enc = kCFStringEncodingMacExtArabic ;
2003 break ;
2004 case wxFONTENCODING_MACSYMBOL :
2005 enc = kCFStringEncodingMacSymbol ;
2006 break ;
2007 case wxFONTENCODING_MACDINGBATS :
2008 enc = kCFStringEncodingMacDingbats ;
2009 break ;
2010 case wxFONTENCODING_MACTURKISH :
2011 enc = kCFStringEncodingMacTurkish ;
2012 break ;
2013 case wxFONTENCODING_MACCROATIAN :
2014 enc = kCFStringEncodingMacCroatian ;
2015 break ;
2016 case wxFONTENCODING_MACICELANDIC :
2017 enc = kCFStringEncodingMacIcelandic ;
2018 break ;
2019 case wxFONTENCODING_MACROMANIAN :
2020 enc = kCFStringEncodingMacRomanian ;
2021 break ;
2022 case wxFONTENCODING_MACCELTIC :
2023 enc = kCFStringEncodingMacCeltic ;
2024 break ;
2025 case wxFONTENCODING_MACGAELIC :
2026 enc = kCFStringEncodingMacGaelic ;
2027 break ;
ecd9653b
WS
2028// case wxFONTENCODING_MACKEYBOARD :
2029// enc = kCFStringEncodingMacKeyboardGlyphs ;
2030// break ;
2031 default :
2032 // because gcc is picky
2033 break ;
2034 } ;
2035 return enc ;
f7e98dee
RN
2036}
2037
f7e98dee
RN
2038class wxMBConv_cocoa : public wxMBConv
2039{
2040public:
2041 wxMBConv_cocoa()
2042 {
2043 Init(CFStringGetSystemEncoding()) ;
2044 }
2045
a6900d10 2046#if wxUSE_FONTMAP
f7e98dee
RN
2047 wxMBConv_cocoa(const wxChar* name)
2048 {
267e11c5 2049 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2050 }
a6900d10 2051#endif
f7e98dee
RN
2052
2053 wxMBConv_cocoa(wxFontEncoding encoding)
2054 {
2055 Init( wxCFStringEncFromFontEnc(encoding) );
2056 }
2057
2058 ~wxMBConv_cocoa()
2059 {
2060 }
2061
2062 void Init( CFStringEncoding encoding)
2063 {
638357a0 2064 m_encoding = encoding ;
f7e98dee
RN
2065 }
2066
2067 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2068 {
2069 wxASSERT(szUnConv);
ecd9653b 2070
638357a0
RN
2071 CFStringRef theString = CFStringCreateWithBytes (
2072 NULL, //the allocator
2073 (const UInt8*)szUnConv,
2074 strlen(szUnConv),
2075 m_encoding,
2076 false //no BOM/external representation
f7e98dee
RN
2077 );
2078
2079 wxASSERT(theString);
2080
638357a0
RN
2081 size_t nOutLength = CFStringGetLength(theString);
2082
2083 if (szOut == NULL)
f7e98dee 2084 {
f7e98dee 2085 CFRelease(theString);
638357a0 2086 return nOutLength;
f7e98dee 2087 }
ecd9653b 2088
638357a0 2089 CFRange theRange = { 0, nOutSize };
ecd9653b 2090
638357a0
RN
2091#if SIZEOF_WCHAR_T == 4
2092 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2093#endif
3698ae71 2094
f7e98dee 2095 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2096
f7e98dee 2097 CFRelease(theString);
ecd9653b 2098
638357a0 2099 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2100
2101#if SIZEOF_WCHAR_T == 4
2102 wxMBConvUTF16 converter ;
638357a0 2103 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2104 delete[] szUniCharBuffer;
2105#endif
3698ae71 2106
638357a0 2107 return nOutLength;
f7e98dee
RN
2108 }
2109
2110 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2111 {
638357a0 2112 wxASSERT(szUnConv);
3698ae71 2113
f7e98dee 2114 size_t nRealOutSize;
638357a0 2115 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2116 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2117
f7e98dee
RN
2118#if SIZEOF_WCHAR_T == 4
2119 wxMBConvUTF16BE converter ;
2120 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2121 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2122 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2123 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2124#endif
2125
2126 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2127 NULL, //allocator
2128 szUniBuffer,
2129 nBufSize,
638357a0 2130 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2131 );
ecd9653b 2132
f7e98dee 2133 wxASSERT(theString);
ecd9653b 2134
f7e98dee 2135 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2136 //so we check and use getchars instead in that case
2137 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2138 {
638357a0
RN
2139 if (szOut != NULL)
2140 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2141
638357a0
RN
2142 nRealOutSize = CFStringGetLength(theString) + 1;
2143 }
2144 else
2145 {
2146 CFStringGetBytes(
2147 theString,
2148 CFRangeMake(0, CFStringGetLength(theString)),
2149 m_encoding,
2150 0, //what to put in characters that can't be converted -
2151 //0 tells CFString to return NULL if it meets such a character
2152 false, //not an external representation
2153 (UInt8*) szOut,
3698ae71 2154 nOutSize,
638357a0
RN
2155 (CFIndex*) &nRealOutSize
2156 );
f7e98dee 2157 }
ecd9653b 2158
638357a0 2159 CFRelease(theString);
ecd9653b 2160
638357a0
RN
2161#if SIZEOF_WCHAR_T == 4
2162 delete[] szUniBuffer;
2163#endif
ecd9653b 2164
f7e98dee
RN
2165 return nRealOutSize - 1;
2166 }
2167
2168 bool IsOk() const
ecd9653b 2169 {
3698ae71 2170 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2171 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2172 }
2173
2174private:
638357a0 2175 CFStringEncoding m_encoding ;
f7e98dee
RN
2176};
2177
2178#endif // defined(__WXCOCOA__)
2179
335d31e0
SC
2180// ============================================================================
2181// Mac conversion classes
2182// ============================================================================
2183
2184#if defined(__WXMAC__) && defined(TARGET_CARBON)
2185
2186class wxMBConv_mac : public wxMBConv
2187{
2188public:
2189 wxMBConv_mac()
2190 {
2191 Init(CFStringGetSystemEncoding()) ;
2192 }
2193
2d1659cf 2194#if wxUSE_FONTMAP
335d31e0
SC
2195 wxMBConv_mac(const wxChar* name)
2196 {
267e11c5 2197 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2198 }
2d1659cf 2199#endif
335d31e0
SC
2200
2201 wxMBConv_mac(wxFontEncoding encoding)
2202 {
d775fa82
WS
2203 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2204 }
2205
2206 ~wxMBConv_mac()
2207 {
2208 OSStatus status = noErr ;
2209 status = TECDisposeConverter(m_MB2WC_converter);
2210 status = TECDisposeConverter(m_WC2MB_converter);
2211 }
2212
2213
2214 void Init( TextEncodingBase encoding)
2215 {
2216 OSStatus status = noErr ;
2217 m_char_encoding = encoding ;
2218 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2219
2220 status = TECCreateConverter(&m_MB2WC_converter,
2221 m_char_encoding,
2222 m_unicode_encoding);
2223 status = TECCreateConverter(&m_WC2MB_converter,
2224 m_unicode_encoding,
2225 m_char_encoding);
2226 }
2227
335d31e0
SC
2228 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2229 {
d775fa82
WS
2230 OSStatus status = noErr ;
2231 ByteCount byteOutLen ;
2232 ByteCount byteInLen = strlen(psz) ;
2233 wchar_t *tbuf = NULL ;
2234 UniChar* ubuf = NULL ;
2235 size_t res = 0 ;
2236
2237 if (buf == NULL)
2238 {
638357a0 2239 //apple specs say at least 32
c543817b 2240 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2241 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2242 }
2243 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2244#if SIZEOF_WCHAR_T == 4
d775fa82 2245 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2246#else
d775fa82 2247 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2248#endif
d775fa82
WS
2249 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2250 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2251#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2252 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2253 // is not properly terminated we get random characters at the end
2254 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2255 wxMBConvUTF16BE converter ;
2256 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2257 free( ubuf ) ;
f3a355ce 2258#else
d775fa82 2259 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2260#endif
d775fa82
WS
2261 if ( buf == NULL )
2262 free(tbuf) ;
335d31e0 2263
335d31e0
SC
2264 if ( buf && res < n)
2265 buf[res] = 0;
2266
d775fa82 2267 return res ;
335d31e0
SC
2268 }
2269
2270 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2271 {
2272 OSStatus status = noErr ;
2273 ByteCount byteOutLen ;
2274 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2275
2276 char *tbuf = NULL ;
2277
2278 if (buf == NULL)
2279 {
638357a0 2280 //apple specs say at least 32
c543817b 2281 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2282 tbuf = (char*) malloc( n ) ;
2283 }
2284
2285 ByteCount byteBufferLen = n ;
2286 UniChar* ubuf = NULL ;
f3a355ce 2287#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2288 wxMBConvUTF16BE converter ;
2289 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2290 byteInLen = unicharlen ;
2291 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2292 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2293#else
d775fa82 2294 ubuf = (UniChar*) psz ;
f3a355ce 2295#endif
d775fa82
WS
2296 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2297 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2298#if SIZEOF_WCHAR_T == 4
d775fa82 2299 free( ubuf ) ;
f3a355ce 2300#endif
d775fa82
WS
2301 if ( buf == NULL )
2302 free(tbuf) ;
335d31e0 2303
d775fa82 2304 size_t res = byteOutLen ;
335d31e0 2305 if ( buf && res < n)
638357a0 2306 {
335d31e0 2307 buf[res] = 0;
3698ae71 2308
638357a0
RN
2309 //we need to double-trip to verify it didn't insert any ? in place
2310 //of bogus characters
2311 wxWCharBuffer wcBuf(n);
2312 size_t pszlen = wxWcslen(psz);
2313 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2314 wxWcslen(wcBuf) != pszlen ||
2315 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2316 {
2317 // we didn't obtain the same thing we started from, hence
2318 // the conversion was lossy and we consider that it failed
2319 return (size_t)-1;
2320 }
2321 }
335d31e0 2322
d775fa82 2323 return res ;
335d31e0
SC
2324 }
2325
2326 bool IsOk() const
2327 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2328
2329private:
d775fa82
WS
2330 TECObjectRef m_MB2WC_converter ;
2331 TECObjectRef m_WC2MB_converter ;
2332
2333 TextEncodingBase m_char_encoding ;
2334 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2335};
2336
2337#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2338
36acb880
VZ
2339// ============================================================================
2340// wxEncodingConverter based conversion classes
2341// ============================================================================
2342
1e6feb95 2343#if wxUSE_FONTMAP
1cd52418 2344
e95354ec 2345class wxMBConv_wxwin : public wxMBConv
1cd52418 2346{
8b04d4c4
VZ
2347private:
2348 void Init()
2349 {
2350 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2351 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2352 }
2353
6001e347 2354public:
f1339c56
RR
2355 // temporarily just use wxEncodingConverter stuff,
2356 // so that it works while a better implementation is built
e95354ec 2357 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2358 {
2359 if (name)
267e11c5 2360 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2361 else
2362 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2363
8b04d4c4
VZ
2364 Init();
2365 }
2366
e95354ec 2367 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2368 {
2369 m_enc = enc;
2370
2371 Init();
f1339c56 2372 }
dccce9ea 2373
bde4baac 2374 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2375 {
2376 size_t inbuf = strlen(psz);
dccce9ea 2377 if (buf)
c643a977
VS
2378 {
2379 if (!m2w.Convert(psz,buf))
2380 return (size_t)-1;
2381 }
f1339c56
RR
2382 return inbuf;
2383 }
dccce9ea 2384
bde4baac 2385 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2386 {
f8d791e0 2387 const size_t inbuf = wxWcslen(psz);
f1339c56 2388 if (buf)
c643a977
VS
2389 {
2390 if (!w2m.Convert(psz,buf))
2391 return (size_t)-1;
2392 }
dccce9ea 2393
f1339c56
RR
2394 return inbuf;
2395 }
dccce9ea 2396
e95354ec 2397 bool IsOk() const { return m_ok; }
f1339c56
RR
2398
2399public:
8b04d4c4 2400 wxFontEncoding m_enc;
f1339c56 2401 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2402
2403 // were we initialized successfully?
2404 bool m_ok;
fc7a2a60 2405
e95354ec 2406 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2407};
6001e347 2408
1e6feb95
VZ
2409#endif // wxUSE_FONTMAP
2410
36acb880
VZ
2411// ============================================================================
2412// wxCSConv implementation
2413// ============================================================================
2414
8b04d4c4 2415void wxCSConv::Init()
6001e347 2416{
e95354ec
VZ
2417 m_name = NULL;
2418 m_convReal = NULL;
2419 m_deferred = true;
2420}
2421
8b04d4c4
VZ
2422wxCSConv::wxCSConv(const wxChar *charset)
2423{
2424 Init();
82713003 2425
e95354ec
VZ
2426 if ( charset )
2427 {
e95354ec
VZ
2428 SetName(charset);
2429 }
bda3d86a
VZ
2430
2431 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2432}
2433
8b04d4c4
VZ
2434wxCSConv::wxCSConv(wxFontEncoding encoding)
2435{
bda3d86a 2436 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2437 {
2438 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2439
2440 encoding = wxFONTENCODING_SYSTEM;
2441 }
2442
8b04d4c4
VZ
2443 Init();
2444
bda3d86a 2445 m_encoding = encoding;
8b04d4c4
VZ
2446}
2447
6001e347
RR
2448wxCSConv::~wxCSConv()
2449{
65e50848
JS
2450 Clear();
2451}
2452
54380f29 2453wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2454 : wxMBConv()
54380f29 2455{
8b04d4c4
VZ
2456 Init();
2457
54380f29 2458 SetName(conv.m_name);
8b04d4c4 2459 m_encoding = conv.m_encoding;
54380f29
GD
2460}
2461
2462wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2463{
2464 Clear();
8b04d4c4 2465
54380f29 2466 SetName(conv.m_name);
8b04d4c4
VZ
2467 m_encoding = conv.m_encoding;
2468
54380f29
GD
2469 return *this;
2470}
2471
65e50848
JS
2472void wxCSConv::Clear()
2473{
8b04d4c4 2474 free(m_name);
e95354ec 2475 delete m_convReal;
8b04d4c4 2476
65e50848 2477 m_name = NULL;
e95354ec 2478 m_convReal = NULL;
6001e347
RR
2479}
2480
2481void wxCSConv::SetName(const wxChar *charset)
2482{
f1339c56
RR
2483 if (charset)
2484 {
2485 m_name = wxStrdup(charset);
e95354ec 2486 m_deferred = true;
f1339c56 2487 }
6001e347
RR
2488}
2489
e95354ec
VZ
2490wxMBConv *wxCSConv::DoCreate() const
2491{
c547282d
VZ
2492 // check for the special case of ASCII or ISO8859-1 charset: as we have
2493 // special knowledge of it anyhow, we don't need to create a special
2494 // conversion object
2495 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2496 {
e95354ec
VZ
2497 // don't convert at all
2498 return NULL;
2499 }
dccce9ea 2500
e95354ec
VZ
2501 // we trust OS to do conversion better than we can so try external
2502 // conversion methods first
2503 //
2504 // the full order is:
2505 // 1. OS conversion (iconv() under Unix or Win32 API)
2506 // 2. hard coded conversions for UTF
2507 // 3. wxEncodingConverter as fall back
2508
2509 // step (1)
2510#ifdef HAVE_ICONV
c547282d 2511#if !wxUSE_FONTMAP
e95354ec 2512 if ( m_name )
c547282d 2513#endif // !wxUSE_FONTMAP
e95354ec 2514 {
c547282d
VZ
2515 wxString name(m_name);
2516
2517#if wxUSE_FONTMAP
2518 if ( name.empty() )
267e11c5 2519 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2520#endif // wxUSE_FONTMAP
2521
2522 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2523 if ( conv->IsOk() )
2524 return conv;
2525
2526 delete conv;
2527 }
2528#endif // HAVE_ICONV
2529
2530#ifdef wxHAVE_WIN32_MB2WC
2531 {
7608a683 2532#if wxUSE_FONTMAP
e95354ec
VZ
2533 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2534 : new wxMBConv_win32(m_encoding);
2535 if ( conv->IsOk() )
2536 return conv;
2537
2538 delete conv;
7608a683
WS
2539#else
2540 return NULL;
2541#endif
e95354ec
VZ
2542 }
2543#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2544#if defined(__WXMAC__)
2545 {
5c3c8676 2546 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2547 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2548 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2549 {
2550
2d1659cf 2551#if wxUSE_FONTMAP
d775fa82
WS
2552 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2553 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2554#else
2555 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2556#endif
d775fa82 2557 if ( conv->IsOk() )
f7e98dee
RN
2558 return conv;
2559
2560 delete conv;
2561 }
2562 }
2563#endif
2564#if defined(__WXCOCOA__)
2565 {
2566 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2567 {
2568
a6900d10 2569#if wxUSE_FONTMAP
f7e98dee
RN
2570 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2571 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2572#else
2573 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2574#endif
f7e98dee 2575 if ( conv->IsOk() )
d775fa82
WS
2576 return conv;
2577
2578 delete conv;
2579 }
335d31e0
SC
2580 }
2581#endif
e95354ec
VZ
2582 // step (2)
2583 wxFontEncoding enc = m_encoding;
2584#if wxUSE_FONTMAP
c547282d
VZ
2585 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2586 {
2587 // use "false" to suppress interactive dialogs -- we can be called from
2588 // anywhere and popping up a dialog from here is the last thing we want to
2589 // do
267e11c5 2590 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2591 }
e95354ec
VZ
2592#endif // wxUSE_FONTMAP
2593
2594 switch ( enc )
2595 {
2596 case wxFONTENCODING_UTF7:
2597 return new wxMBConvUTF7;
2598
2599 case wxFONTENCODING_UTF8:
2600 return new wxMBConvUTF8;
2601
e95354ec
VZ
2602 case wxFONTENCODING_UTF16BE:
2603 return new wxMBConvUTF16BE;
2604
2605 case wxFONTENCODING_UTF16LE:
2606 return new wxMBConvUTF16LE;
2607
e95354ec
VZ
2608 case wxFONTENCODING_UTF32BE:
2609 return new wxMBConvUTF32BE;
2610
2611 case wxFONTENCODING_UTF32LE:
2612 return new wxMBConvUTF32LE;
2613
2614 default:
2615 // nothing to do but put here to suppress gcc warnings
2616 ;
2617 }
2618
2619 // step (3)
2620#if wxUSE_FONTMAP
2621 {
2622 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2623 : new wxMBConv_wxwin(m_encoding);
2624 if ( conv->IsOk() )
2625 return conv;
2626
2627 delete conv;
2628 }
2629#endif // wxUSE_FONTMAP
2630
a58d4f4d
VS
2631 // NB: This is a hack to prevent deadlock. What could otherwise happen
2632 // in Unicode build: wxConvLocal creation ends up being here
2633 // because of some failure and logs the error. But wxLog will try to
2634 // attach timestamp, for which it will need wxConvLocal (to convert
2635 // time to char* and then wchar_t*), but that fails, tries to log
2636 // error, but wxLog has a (already locked) critical section that
2637 // guards static buffer.
2638 static bool alreadyLoggingError = false;
2639 if (!alreadyLoggingError)
2640 {
2641 alreadyLoggingError = true;
2642 wxLogError(_("Cannot convert from the charset '%s'!"),
2643 m_name ? m_name
e95354ec
VZ
2644 :
2645#if wxUSE_FONTMAP
267e11c5 2646 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2647#else // !wxUSE_FONTMAP
2648 wxString::Format(_("encoding %s"), m_encoding).c_str()
2649#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2650 );
a58d4f4d
VS
2651 alreadyLoggingError = false;
2652 }
e95354ec
VZ
2653
2654 return NULL;
2655}
2656
2657void wxCSConv::CreateConvIfNeeded() const
2658{
2659 if ( m_deferred )
2660 {
2661 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2662
2663#if wxUSE_INTL
2664 // if we don't have neither the name nor the encoding, use the default
2665 // encoding for this system
2666 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2667 {
4d312c22 2668 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2669 }
2670#endif // wxUSE_INTL
2671
e95354ec
VZ
2672 self->m_convReal = DoCreate();
2673 self->m_deferred = false;
6001e347 2674 }
6001e347
RR
2675}
2676
2677size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2678{
e95354ec 2679 CreateConvIfNeeded();
dccce9ea 2680
e95354ec
VZ
2681 if (m_convReal)
2682 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2683
2684 // latin-1 (direct)
4def3b35 2685 size_t len = strlen(psz);
dccce9ea 2686
f1339c56
RR
2687 if (buf)
2688 {
4def3b35 2689 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2690 buf[c] = (unsigned char)(psz[c]);
2691 }
dccce9ea 2692
f1339c56 2693 return len;
6001e347
RR
2694}
2695
2696size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2697{
e95354ec 2698 CreateConvIfNeeded();
dccce9ea 2699
e95354ec
VZ
2700 if (m_convReal)
2701 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2702
f1339c56 2703 // latin-1 (direct)
f8d791e0 2704 const size_t len = wxWcslen(psz);
f1339c56
RR
2705 if (buf)
2706 {
4def3b35 2707 for (size_t c = 0; c <= len; c++)
24642831
VS
2708 {
2709 if (psz[c] > 0xFF)
2710 return (size_t)-1;
907173e5 2711 buf[c] = (char)psz[c];
24642831
VS
2712 }
2713 }
2714 else
2715 {
2716 for (size_t c = 0; c <= len; c++)
2717 {
2718 if (psz[c] > 0xFF)
2719 return (size_t)-1;
2720 }
f1339c56 2721 }
dccce9ea 2722
f1339c56 2723 return len;
6001e347
RR
2724}
2725
bde4baac
VZ
2726// ----------------------------------------------------------------------------
2727// globals
2728// ----------------------------------------------------------------------------
2729
2730#ifdef __WINDOWS__
2731 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2732#elif defined(__WXMAC__) && !defined(__MACH__)
2733 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2734#else
dcc8fac0 2735 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2736#endif
2737
2738static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2739static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2740static wxMBConvUTF7 wxConvUTF7Obj;
2741static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2742
bde4baac
VZ
2743WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2744WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2745WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2746WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2747WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2748WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2749WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2750#ifdef __WXOSX__
ea8ce907 2751 wxConvUTF8Obj;
f5a1953b 2752#else
ea8ce907 2753 wxConvLibcObj;
f5a1953b
VZ
2754#endif
2755
bde4baac
VZ
2756
2757#else // !wxUSE_WCHAR_T
2758
2759// stand-ins in absence of wchar_t
2760WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2761 wxConvISO8859_1,
2762 wxConvLocal,
2763 wxConvUTF8;
2764
2765#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2766
2767