]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Update to zlib 1.2.3
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
7608a683 43#ifdef __WINDOWS__
532d575b 44 #include "wx/msw/private.h"
13dd924a 45 #include "wx/msw/missing.h"
0a1c1e62
GRG
46#endif
47
1c193821 48#ifndef __WXWINCE__
1cd52418 49#include <errno.h>
1c193821
JS
50#endif
51
6001e347
RR
52#include <ctype.h>
53#include <string.h>
54#include <stdlib.h>
55
e95354ec
VZ
56#if defined(__WIN32__) && !defined(__WXMICROWIN__)
57 #define wxHAVE_WIN32_MB2WC
58#endif // __WIN32__ but !__WXMICROWIN__
59
373658eb
VZ
60// ----------------------------------------------------------------------------
61// headers
62// ----------------------------------------------------------------------------
7af284fd 63
6001e347 64#ifdef __SALFORDC__
373658eb 65 #include <clib.h>
6001e347
RR
66#endif
67
b040e242 68#ifdef HAVE_ICONV
373658eb 69 #include <iconv.h>
b1d547eb 70 #include "wx/thread.h"
1cd52418 71#endif
1cd52418 72
373658eb
VZ
73#include "wx/encconv.h"
74#include "wx/fontmap.h"
7608a683 75#include "wx/utils.h"
373658eb 76
335d31e0 77#ifdef __WXMAC__
40ba2f3b 78#ifndef __DARWIN__
4227afa4
SC
79#include <ATSUnicode.h>
80#include <TextCommon.h>
81#include <TextEncodingConverter.h>
40ba2f3b 82#endif
335d31e0
SC
83
84#include "wx/mac/private.h" // includes mac headers
85#endif
373658eb
VZ
86// ----------------------------------------------------------------------------
87// macros
88// ----------------------------------------------------------------------------
3e61dfb0 89
1cd52418 90#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 91#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
92
93#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
1cd52418 101#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
a3f2769e 104 #define WC_UTF16
3a0d76bc
VS
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
bab1e722 110#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
113#endif
114
373658eb
VZ
115// ============================================================================
116// implementation
117// ============================================================================
118
119// ----------------------------------------------------------------------------
c91830cb 120// UTF-16 en/decoding to/from UCS-4
373658eb 121// ----------------------------------------------------------------------------
6001e347 122
b0a6bb75 123
c91830cb 124static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 125{
dccce9ea 126 if (input<=0xffff)
4def3b35 127 {
999836aa
VZ
128 if (output)
129 *output = (wxUint16) input;
4def3b35 130 return 1;
dccce9ea
VZ
131 }
132 else if (input>=0x110000)
4def3b35
VS
133 {
134 return (size_t)-1;
dccce9ea
VZ
135 }
136 else
4def3b35 137 {
dccce9ea 138 if (output)
4def3b35 139 {
c91830cb 140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
142 }
143 return 2;
1cd52418 144 }
1cd52418
OK
145}
146
c91830cb 147static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 148{
dccce9ea 149 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
150 {
151 output = *input;
152 return 1;
dccce9ea 153 }
cdb14ecb 154 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
155 {
156 output = *input;
157 return (size_t)-1;
dccce9ea
VZ
158 }
159 else
4def3b35
VS
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
1cd52418
OK
164}
165
b0a6bb75 166
f6bcfd97 167// ----------------------------------------------------------------------------
6001e347 168// wxMBConv
f6bcfd97 169// ----------------------------------------------------------------------------
2c53a80a
WS
170
171wxMBConv::~wxMBConv()
172{
173 // nothing to do here (necessary for Darwin linking probably)
174}
6001e347 175
6001e347
RR
176const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177{
2b5f62a0 178 if ( psz )
6001e347 179 {
2b5f62a0
VZ
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
635f33ce
VS
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
2b5f62a0 191 }
f6bcfd97 192 }
2b5f62a0
VZ
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
6001e347
RR
197}
198
e5cceba0 199const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 200{
2b5f62a0
VZ
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
c91830cb 206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
2b5f62a0
VZ
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
e5cceba0 216
e5cceba0 217 return buf;
6001e347
RR
218}
219
f5fb6871 220const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 221{
f5fb6871
RN
222 wxASSERT(pOutSize != NULL);
223
e4e3bbb4
RN
224 const char* szEnd = szString + nStringLen + 1;
225 const char* szPos = szString;
226 const char* szStart = szPos;
227
228 size_t nActualLength = 0;
f5fb6871
RN
229 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
230
231 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
232
233 //Convert the string until the length() is reached, continuing the
234 //loop every time a null character is reached
235 while(szPos != szEnd)
236 {
237 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
238
239 //Get the length of the current (sub)string
240 size_t nLen = MB2WC(NULL, szPos, 0);
241
242 //Invalid conversion?
243 if( nLen == (size_t)-1 )
f5fb6871
RN
244 {
245 *pOutSize = 0;
246 theBuffer.data()[0u] = wxT('\0');
247 return theBuffer;
248 }
249
e4e3bbb4
RN
250
251 //Increase the actual length (+1 for current null character)
252 nActualLength += nLen + 1;
253
f5fb6871
RN
254 //if buffer too big, realloc the buffer
255 if (nActualLength > (nCurrentSize+1))
256 {
257 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
258 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
259 theBuffer = theNewBuffer;
260 nCurrentSize <<= 1;
261 }
262
263 //Convert the current (sub)string
264 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 265 {
f5fb6871
RN
266 *pOutSize = 0;
267 theBuffer.data()[0u] = wxT('\0');
268 return theBuffer;
e4e3bbb4
RN
269 }
270
271 //Increment to next (sub)string
3103e8a9
JS
272 //Note that we have to use strlen instead of nLen here
273 //because XX2XX gives us the size of the output buffer,
274 //which is not necessarily the length of the string
e4e3bbb4
RN
275 szPos += strlen(szPos) + 1;
276 }
277
f5fb6871
RN
278 //success - return actual length and the buffer
279 *pOutSize = nActualLength;
3698ae71 280 return theBuffer;
e4e3bbb4
RN
281}
282
f5fb6871 283const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 284{
f5fb6871
RN
285 wxASSERT(pOutSize != NULL);
286
e4e3bbb4
RN
287 const wchar_t* szEnd = szString + nStringLen + 1;
288 const wchar_t* szPos = szString;
289 const wchar_t* szStart = szPos;
290
291 size_t nActualLength = 0;
f5fb6871
RN
292 size_t nCurrentSize = nStringLen << 2; //try * 4 first
293
294 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
295
296 //Convert the string until the length() is reached, continuing the
297 //loop every time a null character is reached
298 while(szPos != szEnd)
299 {
300 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
301
302 //Get the length of the current (sub)string
303 size_t nLen = WC2MB(NULL, szPos, 0);
304
305 //Invalid conversion?
306 if( nLen == (size_t)-1 )
f5fb6871
RN
307 {
308 *pOutSize = 0;
309 theBuffer.data()[0u] = wxT('\0');
310 return theBuffer;
311 }
e4e3bbb4
RN
312
313 //Increase the actual length (+1 for current null character)
314 nActualLength += nLen + 1;
3698ae71 315
f5fb6871
RN
316 //if buffer too big, realloc the buffer
317 if (nActualLength > (nCurrentSize+1))
318 {
319 wxCharBuffer theNewBuffer(nCurrentSize << 1);
320 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
321 theBuffer = theNewBuffer;
322 nCurrentSize <<= 1;
323 }
324
325 //Convert the current (sub)string
326 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 327 {
f5fb6871
RN
328 *pOutSize = 0;
329 theBuffer.data()[0u] = wxT('\0');
330 return theBuffer;
e4e3bbb4
RN
331 }
332
333 //Increment to next (sub)string
3103e8a9
JS
334 //Note that we have to use wxWcslen instead of nLen here
335 //because XX2XX gives us the size of the output buffer,
336 //which is not necessarily the length of the string
e4e3bbb4
RN
337 szPos += wxWcslen(szPos) + 1;
338 }
339
f5fb6871
RN
340 //success - return actual length and the buffer
341 *pOutSize = nActualLength;
3698ae71 342 return theBuffer;
e4e3bbb4
RN
343}
344
6001e347 345// ----------------------------------------------------------------------------
bde4baac 346// wxMBConvLibc
6001e347
RR
347// ----------------------------------------------------------------------------
348
bde4baac
VZ
349size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
350{
351 return wxMB2WC(buf, psz, n);
352}
353
354size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
355{
356 return wxWC2MB(buf, psz, n);
357}
e1bfe89e 358
66bf0099 359#ifdef __UNIX__
c12b7f79 360
e1bfe89e 361// ----------------------------------------------------------------------------
532d575b 362// wxConvBrokenFileNames
e1bfe89e
RR
363// ----------------------------------------------------------------------------
364
845905d5 365wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 366{
845905d5
MW
367 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
368 || wxStricmp(charset, _T("UTF8")) == 0 )
369 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
370 else
371 m_conv = new wxCSConv(charset);
ea8ce907
RR
372}
373
c12b7f79
VZ
374size_t
375wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
376 const char *psz,
377 size_t outputSize) const
e1bfe89e 378{
c12b7f79 379 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
380}
381
c12b7f79
VZ
382size_t
383wxConvBrokenFileNames::WC2MB(char *outputBuf,
384 const wchar_t *psz,
385 size_t outputSize) const
e1bfe89e 386{
c12b7f79 387 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
388}
389
66bf0099 390#endif
c12b7f79 391
bde4baac 392// ----------------------------------------------------------------------------
3698ae71 393// UTF-7
bde4baac 394// ----------------------------------------------------------------------------
6001e347 395
15f2ee32 396// Implementation (C) 2004 Fredrik Roubert
6001e347 397
15f2ee32
RN
398//
399// BASE64 decoding table
400//
401static const unsigned char utf7unb64[] =
6001e347 402{
15f2ee32
RN
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
409 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
410 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
412 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
413 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
414 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
416 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
417 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
418 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
425 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
426 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
434 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
435};
436
437size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
438{
15f2ee32
RN
439 size_t len = 0;
440
441 while (*psz && ((!buf) || (len < n)))
442 {
443 unsigned char cc = *psz++;
444 if (cc != '+')
445 {
446 // plain ASCII char
447 if (buf)
448 *buf++ = cc;
449 len++;
450 }
451 else if (*psz == '-')
452 {
453 // encoded plus sign
454 if (buf)
455 *buf++ = cc;
456 len++;
457 psz++;
458 }
459 else
460 {
461 // BASE64 encoded string
462 bool lsb;
463 unsigned char c;
464 unsigned int d, l;
465 for (lsb = false, d = 0, l = 0;
466 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
467 {
468 d <<= 6;
469 d += cc;
470 for (l += 6; l >= 8; lsb = !lsb)
471 {
6356d52a 472 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
473 if (lsb)
474 {
475 if (buf)
476 *buf++ |= c;
477 len ++;
478 }
479 else
480 if (buf)
6356d52a 481 *buf = (wchar_t)(c << 8);
15f2ee32
RN
482 }
483 }
484 if (*psz == '-')
485 psz++;
486 }
487 }
488 if (buf && (len < n))
489 *buf = 0;
490 return len;
6001e347
RR
491}
492
15f2ee32
RN
493//
494// BASE64 encoding table
495//
496static const unsigned char utf7enb64[] =
497{
498 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
499 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
500 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
501 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
502 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
503 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
504 'w', 'x', 'y', 'z', '0', '1', '2', '3',
505 '4', '5', '6', '7', '8', '9', '+', '/'
506};
507
508//
509// UTF-7 encoding table
510//
511// 0 - Set D (directly encoded characters)
512// 1 - Set O (optional direct characters)
513// 2 - whitespace characters (optional)
514// 3 - special characters
515//
516static const unsigned char utf7encode[128] =
6001e347 517{
15f2ee32
RN
518 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
519 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
520 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
524 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
526};
527
667e5b3e 528size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
529{
530
531
532 size_t len = 0;
533
534 while (*psz && ((!buf) || (len < n)))
535 {
536 wchar_t cc = *psz++;
537 if (cc < 0x80 && utf7encode[cc] < 1)
538 {
539 // plain ASCII char
540 if (buf)
541 *buf++ = (char)cc;
542 len++;
543 }
544#ifndef WC_UTF16
79c78d42 545 else if (((wxUint32)cc) > 0xffff)
b2c13097 546 {
15f2ee32
RN
547 // no surrogate pair generation (yet?)
548 return (size_t)-1;
549 }
550#endif
551 else
552 {
553 if (buf)
554 *buf++ = '+';
555 len++;
556 if (cc != '+')
557 {
558 // BASE64 encode string
559 unsigned int lsb, d, l;
560 for (d = 0, l = 0;; psz++)
561 {
562 for (lsb = 0; lsb < 2; lsb ++)
563 {
564 d <<= 8;
565 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
566
567 for (l += 8; l >= 6; )
568 {
569 l -= 6;
570 if (buf)
571 *buf++ = utf7enb64[(d >> l) % 64];
572 len++;
573 }
574 }
575 cc = *psz;
576 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
577 break;
578 }
579 if (l != 0)
580 {
581 if (buf)
582 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
583 len++;
584 }
585 }
586 if (buf)
587 *buf++ = '-';
588 len++;
589 }
590 }
591 if (buf && (len < n))
592 *buf = 0;
593 return len;
6001e347
RR
594}
595
f6bcfd97 596// ----------------------------------------------------------------------------
6001e347 597// UTF-8
f6bcfd97 598// ----------------------------------------------------------------------------
6001e347 599
dccce9ea 600static wxUint32 utf8_max[]=
4def3b35 601 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 602
3698ae71
VZ
603// boundaries of the private use area we use to (temporarily) remap invalid
604// characters invalid in a UTF-8 encoded string
ea8ce907
RR
605const wxUint32 wxUnicodePUA = 0x100000;
606const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
607
6001e347
RR
608size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
609{
4def3b35
VS
610 size_t len = 0;
611
dccce9ea 612 while (*psz && ((!buf) || (len < n)))
4def3b35 613 {
ea8ce907
RR
614 const char *opsz = psz;
615 bool invalid = false;
4def3b35
VS
616 unsigned char cc = *psz++, fc = cc;
617 unsigned cnt;
dccce9ea 618 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 619 fc <<= 1;
dccce9ea 620 if (!cnt)
4def3b35
VS
621 {
622 // plain ASCII char
dccce9ea 623 if (buf)
4def3b35
VS
624 *buf++ = cc;
625 len++;
561488ef
MW
626
627 // escape the escape character for octal escapes
628 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
629 && cc == '\\' && (!buf || len < n))
630 {
631 if (buf)
632 *buf++ = cc;
633 len++;
634 }
dccce9ea
VZ
635 }
636 else
4def3b35
VS
637 {
638 cnt--;
dccce9ea 639 if (!cnt)
4def3b35
VS
640 {
641 // invalid UTF-8 sequence
ea8ce907 642 invalid = true;
dccce9ea
VZ
643 }
644 else
4def3b35
VS
645 {
646 unsigned ocnt = cnt - 1;
647 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 648 while (cnt--)
4def3b35 649 {
ea8ce907 650 cc = *psz;
dccce9ea 651 if ((cc & 0xC0) != 0x80)
4def3b35
VS
652 {
653 // invalid UTF-8 sequence
ea8ce907
RR
654 invalid = true;
655 break;
4def3b35 656 }
ea8ce907 657 psz++;
4def3b35
VS
658 res = (res << 6) | (cc & 0x3f);
659 }
ea8ce907 660 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
661 {
662 // illegal UTF-8 encoding
ea8ce907 663 invalid = true;
4def3b35 664 }
ea8ce907
RR
665 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
666 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
667 {
668 // if one of our PUA characters turns up externally
669 // it must also be treated as an illegal sequence
670 // (a bit like you have to escape an escape character)
671 invalid = true;
672 }
673 else
674 {
1cd52418 675#ifdef WC_UTF16
ea8ce907
RR
676 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
677 size_t pa = encode_utf16(res, (wxUint16 *)buf);
678 if (pa == (size_t)-1)
679 {
680 invalid = true;
681 }
682 else
683 {
684 if (buf)
685 buf += pa;
686 len += pa;
687 }
373658eb 688#else // !WC_UTF16
ea8ce907
RR
689 if (buf)
690 *buf++ = res;
691 len++;
373658eb 692#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
693 }
694 }
695 if (invalid)
696 {
697 if (m_options & MAP_INVALID_UTF8_TO_PUA)
698 {
699 while (opsz < psz && (!buf || len < n))
700 {
701#ifdef WC_UTF16
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
704 wxASSERT(pa != (size_t)-1);
705 if (buf)
706 buf += pa;
707 opsz++;
708 len += pa;
709#else
710 if (buf)
711 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
712 opsz++;
713 len++;
714#endif
715 }
716 }
3698ae71 717 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
718 {
719 while (opsz < psz && (!buf || len < n))
720 {
3698ae71
VZ
721 if ( buf && len + 3 < n )
722 {
723 unsigned char n = *opsz;
724 *buf++ = L'\\';
b2c13097
WS
725 *buf++ = (wchar_t)( L'0' + n / 0100 );
726 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
727 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 728 }
ea8ce907
RR
729 opsz++;
730 len += 4;
731 }
732 }
3698ae71 733 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
734 {
735 return (size_t)-1;
736 }
4def3b35
VS
737 }
738 }
6001e347 739 }
dccce9ea 740 if (buf && (len < n))
4def3b35
VS
741 *buf = 0;
742 return len;
6001e347
RR
743}
744
3698ae71
VZ
745static inline bool isoctal(wchar_t wch)
746{
747 return L'0' <= wch && wch <= L'7';
748}
749
6001e347
RR
750size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
751{
4def3b35 752 size_t len = 0;
6001e347 753
dccce9ea 754 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
755 {
756 wxUint32 cc;
1cd52418 757#ifdef WC_UTF16
b5153fd8
VZ
758 // cast is ok for WC_UTF16
759 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 760 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 761#else
4def3b35
VS
762 cc=(*psz++) & 0x7fffffff;
763#endif
3698ae71
VZ
764
765 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
766 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 767 {
dccce9ea 768 if (buf)
ea8ce907 769 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 770 len++;
3698ae71 771 }
561488ef
MW
772 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
773 && cc == L'\\' && psz[0] == L'\\' )
774 {
775 if (buf)
776 *buf++ = (char)cc;
777 psz++;
778 len++;
779 }
3698ae71
VZ
780 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
781 cc == L'\\' &&
782 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 783 {
dccce9ea 784 if (buf)
3698ae71 785 {
b2c13097
WS
786 *buf++ = (char) ((psz[0] - L'0')*0100 +
787 (psz[1] - L'0')*010 +
788 (psz[2] - L'0'));
3698ae71
VZ
789 }
790
791 psz += 3;
ea8ce907
RR
792 len++;
793 }
794 else
795 {
796 unsigned cnt;
797 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
798 if (!cnt)
4def3b35 799 {
ea8ce907
RR
800 // plain ASCII char
801 if (buf)
802 *buf++ = (char) cc;
803 len++;
804 }
805
806 else
807 {
808 len += cnt + 1;
809 if (buf)
810 {
811 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
812 while (cnt--)
813 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
814 }
4def3b35
VS
815 }
816 }
6001e347 817 }
4def3b35 818
3698ae71
VZ
819 if (buf && (len<n))
820 *buf = 0;
adb45366 821
4def3b35 822 return len;
6001e347
RR
823}
824
c91830cb
VZ
825// ----------------------------------------------------------------------------
826// UTF-16
827// ----------------------------------------------------------------------------
828
829#ifdef WORDS_BIGENDIAN
bde4baac
VZ
830 #define wxMBConvUTF16straight wxMBConvUTF16BE
831 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 832#else
bde4baac
VZ
833 #define wxMBConvUTF16swap wxMBConvUTF16BE
834 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
835#endif
836
837
c91830cb
VZ
838#ifdef WC_UTF16
839
c91830cb
VZ
840// copy 16bit MB to 16bit String
841size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
842{
843 size_t len=0;
844
845 while (*(wxUint16*)psz && (!buf || len < n))
846 {
847 if (buf)
848 *buf++ = *(wxUint16*)psz;
849 len++;
850
851 psz += sizeof(wxUint16);
852 }
853 if (buf && len<n) *buf=0;
854
855 return len;
856}
857
858
859// copy 16bit String to 16bit MB
860size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
861{
862 size_t len=0;
863
864 while (*psz && (!buf || len < n))
865 {
866 if (buf)
867 {
868 *(wxUint16*)buf = *psz;
869 buf += sizeof(wxUint16);
870 }
871 len += sizeof(wxUint16);
872 psz++;
873 }
874 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
875
876 return len;
877}
878
879
880// swap 16bit MB to 16bit String
881size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
882{
883 size_t len=0;
884
885 while (*(wxUint16*)psz && (!buf || len < n))
886 {
887 if (buf)
888 {
889 ((char *)buf)[0] = psz[1];
890 ((char *)buf)[1] = psz[0];
891 buf++;
892 }
893 len++;
894 psz += sizeof(wxUint16);
895 }
896 if (buf && len<n) *buf=0;
897
898 return len;
899}
900
901
902// swap 16bit MB to 16bit String
903size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
904{
905 size_t len=0;
906
907 while (*psz && (!buf || len < n))
908 {
909 if (buf)
910 {
911 *buf++ = ((char*)psz)[1];
912 *buf++ = ((char*)psz)[0];
913 }
914 len += sizeof(wxUint16);
915 psz++;
916 }
917 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
918
919 return len;
920}
921
922
923#else // WC_UTF16
924
925
926// copy 16bit MB to 32bit String
927size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
928{
929 size_t len=0;
930
931 while (*(wxUint16*)psz && (!buf || len < n))
932 {
933 wxUint32 cc;
934 size_t pa=decode_utf16((wxUint16*)psz, cc);
935 if (pa == (size_t)-1)
936 return pa;
937
938 if (buf)
939 *buf++ = cc;
940 len++;
941 psz += pa * sizeof(wxUint16);
942 }
943 if (buf && len<n) *buf=0;
944
945 return len;
946}
947
948
949// copy 32bit String to 16bit MB
950size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
951{
952 size_t len=0;
953
954 while (*psz && (!buf || len < n))
955 {
956 wxUint16 cc[2];
957 size_t pa=encode_utf16(*psz, cc);
958
959 if (pa == (size_t)-1)
960 return pa;
961
962 if (buf)
963 {
69b80d28 964 *(wxUint16*)buf = cc[0];
b5153fd8 965 buf += sizeof(wxUint16);
c91830cb 966 if (pa > 1)
69b80d28
VZ
967 {
968 *(wxUint16*)buf = cc[1];
969 buf += sizeof(wxUint16);
970 }
c91830cb
VZ
971 }
972
973 len += pa*sizeof(wxUint16);
974 psz++;
975 }
976 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
977
978 return len;
979}
980
981
982// swap 16bit MB to 32bit String
983size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
984{
985 size_t len=0;
986
987 while (*(wxUint16*)psz && (!buf || len < n))
988 {
989 wxUint32 cc;
990 char tmp[4];
991 tmp[0]=psz[1]; tmp[1]=psz[0];
992 tmp[2]=psz[3]; tmp[3]=psz[2];
993
994 size_t pa=decode_utf16((wxUint16*)tmp, cc);
995 if (pa == (size_t)-1)
996 return pa;
997
998 if (buf)
999 *buf++ = cc;
1000
1001 len++;
1002 psz += pa * sizeof(wxUint16);
1003 }
1004 if (buf && len<n) *buf=0;
1005
1006 return len;
1007}
1008
1009
1010// swap 32bit String to 16bit MB
1011size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1012{
1013 size_t len=0;
1014
1015 while (*psz && (!buf || len < n))
1016 {
1017 wxUint16 cc[2];
1018 size_t pa=encode_utf16(*psz, cc);
1019
1020 if (pa == (size_t)-1)
1021 return pa;
1022
1023 if (buf)
1024 {
1025 *buf++ = ((char*)cc)[1];
1026 *buf++ = ((char*)cc)[0];
1027 if (pa > 1)
1028 {
1029 *buf++ = ((char*)cc)[3];
1030 *buf++ = ((char*)cc)[2];
1031 }
1032 }
1033
1034 len += pa*sizeof(wxUint16);
1035 psz++;
1036 }
1037 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1038
1039 return len;
1040}
1041
1042#endif // WC_UTF16
1043
1044
1045// ----------------------------------------------------------------------------
1046// UTF-32
1047// ----------------------------------------------------------------------------
1048
1049#ifdef WORDS_BIGENDIAN
1050#define wxMBConvUTF32straight wxMBConvUTF32BE
1051#define wxMBConvUTF32swap wxMBConvUTF32LE
1052#else
1053#define wxMBConvUTF32swap wxMBConvUTF32BE
1054#define wxMBConvUTF32straight wxMBConvUTF32LE
1055#endif
1056
1057
1058WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1059WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1060
1061
1062#ifdef WC_UTF16
1063
1064// copy 32bit MB to 16bit String
1065size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1066{
1067 size_t len=0;
1068
1069 while (*(wxUint32*)psz && (!buf || len < n))
1070 {
1071 wxUint16 cc[2];
1072
1073 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1074 if (pa == (size_t)-1)
1075 return pa;
1076
1077 if (buf)
1078 {
1079 *buf++ = cc[0];
1080 if (pa > 1)
1081 *buf++ = cc[1];
1082 }
1083 len += pa;
1084 psz += sizeof(wxUint32);
1085 }
1086 if (buf && len<n) *buf=0;
1087
1088 return len;
1089}
1090
1091
1092// copy 16bit String to 32bit MB
1093size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1094{
1095 size_t len=0;
1096
1097 while (*psz && (!buf || len < n))
1098 {
1099 wxUint32 cc;
1100
b5153fd8
VZ
1101 // cast is ok for WC_UTF16
1102 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1103 if (pa == (size_t)-1)
1104 return pa;
1105
1106 if (buf)
1107 {
1108 *(wxUint32*)buf = cc;
1109 buf += sizeof(wxUint32);
1110 }
1111 len += sizeof(wxUint32);
1112 psz += pa;
1113 }
b5153fd8
VZ
1114
1115 if (buf && len<=n-sizeof(wxUint32))
1116 *(wxUint32*)buf=0;
c91830cb
VZ
1117
1118 return len;
1119}
1120
1121
1122
1123// swap 32bit MB to 16bit String
1124size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1125{
1126 size_t len=0;
1127
1128 while (*(wxUint32*)psz && (!buf || len < n))
1129 {
1130 char tmp[4];
1131 tmp[0] = psz[3]; tmp[1] = psz[2];
1132 tmp[2] = psz[1]; tmp[3] = psz[0];
1133
1134
1135 wxUint16 cc[2];
1136
1137 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1138 if (pa == (size_t)-1)
1139 return pa;
1140
1141 if (buf)
1142 {
1143 *buf++ = cc[0];
1144 if (pa > 1)
1145 *buf++ = cc[1];
1146 }
1147 len += pa;
1148 psz += sizeof(wxUint32);
1149 }
b5153fd8
VZ
1150
1151 if (buf && len<n)
1152 *buf=0;
c91830cb
VZ
1153
1154 return len;
1155}
1156
1157
1158// swap 16bit String to 32bit MB
1159size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1160{
1161 size_t len=0;
1162
1163 while (*psz && (!buf || len < n))
1164 {
1165 char cc[4];
1166
b5153fd8
VZ
1167 // cast is ok for WC_UTF16
1168 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1169 if (pa == (size_t)-1)
1170 return pa;
1171
1172 if (buf)
1173 {
1174 *buf++ = cc[3];
1175 *buf++ = cc[2];
1176 *buf++ = cc[1];
1177 *buf++ = cc[0];
1178 }
1179 len += sizeof(wxUint32);
1180 psz += pa;
1181 }
b5153fd8
VZ
1182
1183 if (buf && len<=n-sizeof(wxUint32))
1184 *(wxUint32*)buf=0;
c91830cb
VZ
1185
1186 return len;
1187}
1188
1189#else // WC_UTF16
1190
1191
1192// copy 32bit MB to 32bit String
1193size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1194{
1195 size_t len=0;
1196
1197 while (*(wxUint32*)psz && (!buf || len < n))
1198 {
1199 if (buf)
1200 *buf++ = *(wxUint32*)psz;
1201 len++;
1202 psz += sizeof(wxUint32);
1203 }
b5153fd8
VZ
1204
1205 if (buf && len<n)
1206 *buf=0;
c91830cb
VZ
1207
1208 return len;
1209}
1210
1211
1212// copy 32bit String to 32bit MB
1213size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1214{
1215 size_t len=0;
1216
1217 while (*psz && (!buf || len < n))
1218 {
1219 if (buf)
1220 {
1221 *(wxUint32*)buf = *psz;
1222 buf += sizeof(wxUint32);
1223 }
1224
1225 len += sizeof(wxUint32);
1226 psz++;
1227 }
1228
b5153fd8
VZ
1229 if (buf && len<=n-sizeof(wxUint32))
1230 *(wxUint32*)buf=0;
c91830cb
VZ
1231
1232 return len;
1233}
1234
1235
1236// swap 32bit MB to 32bit String
1237size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1238{
1239 size_t len=0;
1240
1241 while (*(wxUint32*)psz && (!buf || len < n))
1242 {
1243 if (buf)
1244 {
1245 ((char *)buf)[0] = psz[3];
1246 ((char *)buf)[1] = psz[2];
1247 ((char *)buf)[2] = psz[1];
1248 ((char *)buf)[3] = psz[0];
1249 buf++;
1250 }
1251 len++;
1252 psz += sizeof(wxUint32);
1253 }
b5153fd8
VZ
1254
1255 if (buf && len<n)
1256 *buf=0;
c91830cb
VZ
1257
1258 return len;
1259}
1260
1261
1262// swap 32bit String to 32bit MB
1263size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1264{
1265 size_t len=0;
1266
1267 while (*psz && (!buf || len < n))
1268 {
1269 if (buf)
1270 {
1271 *buf++ = ((char *)psz)[3];
1272 *buf++ = ((char *)psz)[2];
1273 *buf++ = ((char *)psz)[1];
1274 *buf++ = ((char *)psz)[0];
1275 }
1276 len += sizeof(wxUint32);
1277 psz++;
1278 }
b5153fd8
VZ
1279
1280 if (buf && len<=n-sizeof(wxUint32))
1281 *(wxUint32*)buf=0;
c91830cb
VZ
1282
1283 return len;
1284}
1285
1286
1287#endif // WC_UTF16
1288
1289
36acb880
VZ
1290// ============================================================================
1291// The classes doing conversion using the iconv_xxx() functions
1292// ============================================================================
3caec1bb 1293
b040e242 1294#ifdef HAVE_ICONV
3a0d76bc 1295
b1d547eb
VS
1296// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1297// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1298// (unless there's yet another bug in glibc) the only case when iconv()
1299// returns with (size_t)-1 (which means error) and says there are 0 bytes
1300// left in the input buffer -- when _real_ error occurs,
1301// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1302// iconv() failure.
3caec1bb
VS
1303// [This bug does not appear in glibc 2.2.]
1304#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1305#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1306 (errno != E2BIG || bufLeft != 0))
1307#else
1308#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1309#endif
1310
ab217dba 1311#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1312
1313// ----------------------------------------------------------------------------
e95354ec 1314// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1315// ----------------------------------------------------------------------------
1316
e95354ec 1317class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1318{
1319public:
e95354ec
VZ
1320 wxMBConv_iconv(const wxChar *name);
1321 virtual ~wxMBConv_iconv();
36acb880 1322
bde4baac
VZ
1323 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1324 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1325
e95354ec 1326 bool IsOk() const
36acb880
VZ
1327 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1328
1329protected:
1330 // the iconv handlers used to translate from multibyte to wide char and in
1331 // the other direction
1332 iconv_t m2w,
1333 w2m;
b1d547eb
VS
1334#if wxUSE_THREADS
1335 // guards access to m2w and w2m objects
1336 wxMutex m_iconvMutex;
1337#endif
36acb880
VZ
1338
1339private:
e95354ec 1340 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1341 // available on this machine, it will remain NULL
1342 static const char *ms_wcCharsetName;
1343
1344 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1345 // different endian-ness than the native one
405d8f46 1346 static bool ms_wcNeedsSwap;
36acb880
VZ
1347};
1348
8f115891
MW
1349// make the constructor available for unit testing
1350WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1351{
1352 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1353 if ( !result->IsOk() )
1354 {
1355 delete result;
1356 return 0;
1357 }
1358 return result;
1359}
1360
e95354ec
VZ
1361const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1362bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1363
e95354ec 1364wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1365{
04c79127
RR
1366 // Do it the hard way
1367 char cname[100];
1368 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1369 cname[i] = (char) name[i];
1370
36acb880
VZ
1371 // check for charset that represents wchar_t:
1372 if (ms_wcCharsetName == NULL)
f1339c56 1373 {
e95354ec 1374 ms_wcNeedsSwap = false;
dccce9ea 1375
36acb880
VZ
1376 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1378 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1379
36acb880
VZ
1380 if (m2w == (iconv_t)-1)
1381 {
1382 // try charset w/o bytesex info (e.g. "UCS4")
1383 // and check for bytesex ourselves:
1384 ms_wcCharsetName = WC_NAME;
04c79127 1385 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1386
1387 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1388 if (m2w == (iconv_t)-1)
1389 {
36acb880 1390 ms_wcCharsetName = "WCHAR_T";
04c79127 1391 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1392 }
3a0d76bc 1393
36acb880
VZ
1394 if (m2w != (iconv_t)-1)
1395 {
1396 char buf[2], *bufPtr;
1397 wchar_t wbuf[2], *wbufPtr;
1398 size_t insz, outsz;
1399 size_t res;
1400
1401 buf[0] = 'A';
1402 buf[1] = 0;
1403 wbuf[0] = 0;
1404 insz = 2;
1405 outsz = SIZEOF_WCHAR_T * 2;
1406 wbufPtr = wbuf;
1407 bufPtr = buf;
1408
1409 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1410 (char**)&wbufPtr, &outsz);
1411
1412 if (ICONV_FAILED(res, insz))
3a0d76bc 1413 {
36acb880
VZ
1414 ms_wcCharsetName = NULL;
1415 wxLogLastError(wxT("iconv"));
2b5f62a0 1416 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1417 }
1418 else
1419 {
36acb880 1420 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1421 }
1422 }
36acb880
VZ
1423 else
1424 {
1425 ms_wcCharsetName = NULL;
373658eb 1426
77ffb593 1427 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1428 // fall back to using wxEncodingConverter.
1429 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1430 //wxLogError(
36acb880 1431 }
3a0d76bc 1432 }
36acb880 1433 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1434 }
36acb880 1435 else // we already have ms_wcCharsetName
3caec1bb 1436 {
04c79127 1437 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1438 }
dccce9ea 1439
36acb880
VZ
1440 // NB: don't ever pass NULL to iconv_open(), it may crash!
1441 if ( ms_wcCharsetName )
f1339c56 1442 {
04c79127 1443 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1444 }
405d8f46
VZ
1445 else
1446 {
1447 w2m = (iconv_t)-1;
1448 }
36acb880 1449}
3caec1bb 1450
e95354ec 1451wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1452{
1453 if ( m2w != (iconv_t)-1 )
1454 iconv_close(m2w);
1455 if ( w2m != (iconv_t)-1 )
1456 iconv_close(w2m);
1457}
3a0d76bc 1458
bde4baac 1459size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1460{
b1d547eb
VS
1461#if wxUSE_THREADS
1462 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1463 // Unfortunately there is a couple of global wxCSConv objects such as
1464 // wxConvLocal that are used all over wx code, so we have to make sure
1465 // the handle is used by at most one thread at the time. Otherwise
1466 // only a few wx classes would be safe to use from non-main threads
1467 // as MB<->WC conversion would fail "randomly".
1468 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1469#endif
3698ae71 1470
36acb880
VZ
1471 size_t inbuf = strlen(psz);
1472 size_t outbuf = n * SIZEOF_WCHAR_T;
1473 size_t res, cres;
1474 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1475 wchar_t *bufPtr = buf;
1476 const char *pszPtr = psz;
1477
1478 if (buf)
1479 {
1480 // have destination buffer, convert there
1481 cres = iconv(m2w,
1482 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1483 (char**)&bufPtr, &outbuf);
1484 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1485
36acb880 1486 if (ms_wcNeedsSwap)
3a0d76bc 1487 {
36acb880
VZ
1488 // convert to native endianness
1489 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1490 }
adb45366 1491
49dd9820
VS
1492 // NB: iconv was given only strlen(psz) characters on input, and so
1493 // it couldn't convert the trailing zero. Let's do it ourselves
1494 // if there's some room left for it in the output buffer.
1495 if (res < n)
1496 buf[res] = 0;
36acb880
VZ
1497 }
1498 else
1499 {
1500 // no destination buffer... convert using temp buffer
1501 // to calculate destination buffer requirement
1502 wchar_t tbuf[8];
1503 res = 0;
1504 do {
1505 bufPtr = tbuf;
1506 outbuf = 8*SIZEOF_WCHAR_T;
1507
1508 cres = iconv(m2w,
1509 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1510 (char**)&bufPtr, &outbuf );
1511
1512 res += 8-(outbuf/SIZEOF_WCHAR_T);
1513 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1514 }
dccce9ea 1515
36acb880 1516 if (ICONV_FAILED(cres, inbuf))
f1339c56 1517 {
36acb880
VZ
1518 //VS: it is ok if iconv fails, hence trace only
1519 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1520 return (size_t)-1;
1521 }
1522
1523 return res;
1524}
1525
bde4baac 1526size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1527{
b1d547eb
VS
1528#if wxUSE_THREADS
1529 // NB: explained in MB2WC
1530 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1531#endif
3698ae71 1532
f8d791e0 1533 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1534 size_t outbuf = n;
1535 size_t res, cres;
3a0d76bc 1536
36acb880 1537 wchar_t *tmpbuf = 0;
3caec1bb 1538
36acb880
VZ
1539 if (ms_wcNeedsSwap)
1540 {
1541 // need to copy to temp buffer to switch endianness
1542 // this absolutely doesn't rock!
1543 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1544 // could be in read-only memory, or be accessed in some other thread)
1545 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1546 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1547 WC_BSWAP(tmpbuf, inbuf)
1548 psz=tmpbuf;
1549 }
3a0d76bc 1550
36acb880
VZ
1551 if (buf)
1552 {
1553 // have destination buffer, convert there
1554 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1555
36acb880 1556 res = n-outbuf;
adb45366 1557
49dd9820
VS
1558 // NB: iconv was given only wcslen(psz) characters on input, and so
1559 // it couldn't convert the trailing zero. Let's do it ourselves
1560 // if there's some room left for it in the output buffer.
1561 if (res < n)
1562 buf[0] = 0;
36acb880
VZ
1563 }
1564 else
1565 {
1566 // no destination buffer... convert using temp buffer
1567 // to calculate destination buffer requirement
1568 char tbuf[16];
1569 res = 0;
1570 do {
1571 buf = tbuf; outbuf = 16;
1572
1573 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1574
36acb880
VZ
1575 res += 16 - outbuf;
1576 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1577 }
dccce9ea 1578
36acb880
VZ
1579 if (ms_wcNeedsSwap)
1580 {
1581 free(tmpbuf);
1582 }
dccce9ea 1583
36acb880
VZ
1584 if (ICONV_FAILED(cres, inbuf))
1585 {
1586 //VS: it is ok if iconv fails, hence trace only
1587 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1588 return (size_t)-1;
1589 }
1590
1591 return res;
1592}
1593
b040e242 1594#endif // HAVE_ICONV
36acb880 1595
e95354ec 1596
36acb880
VZ
1597// ============================================================================
1598// Win32 conversion classes
1599// ============================================================================
1cd52418 1600
e95354ec 1601#ifdef wxHAVE_WIN32_MB2WC
373658eb 1602
8b04d4c4 1603// from utils.cpp
d775fa82 1604#if wxUSE_FONTMAP
8b04d4c4
VZ
1605extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1606extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1607#endif
373658eb 1608
e95354ec 1609class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1610{
1611public:
bde4baac
VZ
1612 wxMBConv_win32()
1613 {
1614 m_CodePage = CP_ACP;
1615 }
1616
7608a683 1617#if wxUSE_FONTMAP
e95354ec 1618 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1619 {
1620 m_CodePage = wxCharsetToCodepage(name);
1621 }
dccce9ea 1622
e95354ec 1623 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1624 {
1625 m_CodePage = wxEncodingToCodepage(encoding);
1626 }
7608a683 1627#endif
8b04d4c4 1628
bde4baac 1629 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1630 {
02272c9c
VZ
1631 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1632 // the behaviour is not compatible with the Unix version (using iconv)
1633 // and break the library itself, e.g. wxTextInputStream::NextChar()
1634 // wouldn't work if reading an incomplete MB char didn't result in an
1635 // error
667e5b3e
VZ
1636 //
1637 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1638 // an error (tested under Windows Server 2003) and apparently it is
1639 // done on purpose, i.e. the function accepts any input in this case
1640 // and although I'd prefer to return error on ill-formed output, our
1641 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1642 // explicitly ill-formed according to RFC 2152) neither so we don't
1643 // even have any fallback here...
1644 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1645
2b5f62a0
VZ
1646 const size_t len = ::MultiByteToWideChar
1647 (
1648 m_CodePage, // code page
667e5b3e 1649 flags, // flags: fall on error
2b5f62a0
VZ
1650 psz, // input string
1651 -1, // its length (NUL-terminated)
b4da152e 1652 buf, // output string
2b5f62a0
VZ
1653 buf ? n : 0 // size of output buffer
1654 );
1655
03a991bc
VZ
1656 // note that it returns count of written chars for buf != NULL and size
1657 // of the needed buffer for buf == NULL so in either case the length of
1658 // the string (which never includes the terminating NUL) is one less
1659 return len ? len - 1 : (size_t)-1;
f1339c56 1660 }
dccce9ea 1661
13dd924a 1662 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1663 {
13dd924a
VZ
1664 /*
1665 we have a problem here: by default, WideCharToMultiByte() may
1666 replace characters unrepresentable in the target code page with bad
1667 quality approximations such as turning "1/2" symbol (U+00BD) into
1668 "1" for the code pages which don't have it and we, obviously, want
1669 to avoid this at any price
d775fa82 1670
13dd924a
VZ
1671 the trouble is that this function does it _silently_, i.e. it won't
1672 even tell us whether it did or not... Win98/2000 and higher provide
1673 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1674 we have to resort to a round trip, i.e. check that converting back
1675 results in the same string -- this is, of course, expensive but
1676 otherwise we simply can't be sure to not garble the data.
1677 */
1678
1679 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1680 // it doesn't work with CJK encodings (which we test for rather roughly
1681 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1682 // supporting it
907173e5
WS
1683 BOOL usedDef wxDUMMY_INITIALIZE(false);
1684 BOOL *pUsedDef;
13dd924a
VZ
1685 int flags;
1686 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1687 {
1688 // it's our lucky day
1689 flags = WC_NO_BEST_FIT_CHARS;
1690 pUsedDef = &usedDef;
1691 }
1692 else // old system or unsupported encoding
1693 {
1694 flags = 0;
1695 pUsedDef = NULL;
1696 }
1697
2b5f62a0
VZ
1698 const size_t len = ::WideCharToMultiByte
1699 (
1700 m_CodePage, // code page
13dd924a
VZ
1701 flags, // either none or no best fit
1702 pwz, // input string
2b5f62a0
VZ
1703 -1, // it is (wide) NUL-terminated
1704 buf, // output buffer
1705 buf ? n : 0, // and its size
1706 NULL, // default "replacement" char
13dd924a 1707 pUsedDef // [out] was it used?
2b5f62a0
VZ
1708 );
1709
13dd924a
VZ
1710 if ( !len )
1711 {
1712 // function totally failed
1713 return (size_t)-1;
1714 }
1715
1716 // if we were really converting, check if we succeeded
1717 if ( buf )
1718 {
1719 if ( flags )
1720 {
1721 // check if the conversion failed, i.e. if any replacements
1722 // were done
1723 if ( usedDef )
1724 return (size_t)-1;
1725 }
1726 else // we must resort to double tripping...
1727 {
1728 wxWCharBuffer wcBuf(n);
1729 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1730 wcscmp(wcBuf, pwz) != 0 )
1731 {
1732 // we didn't obtain the same thing we started from, hence
1733 // the conversion was lossy and we consider that it failed
1734 return (size_t)-1;
1735 }
1736 }
1737 }
1738
03a991bc 1739 // see the comment above for the reason of "len - 1"
13dd924a 1740 return len - 1;
f1339c56 1741 }
dccce9ea 1742
13dd924a
VZ
1743 bool IsOk() const { return m_CodePage != -1; }
1744
1745private:
1746 static bool CanUseNoBestFit()
1747 {
1748 static int s_isWin98Or2k = -1;
1749
1750 if ( s_isWin98Or2k == -1 )
1751 {
1752 int verMaj, verMin;
1753 switch ( wxGetOsVersion(&verMaj, &verMin) )
1754 {
1755 case wxWIN95:
1756 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1757 break;
1758
1759 case wxWINDOWS_NT:
1760 s_isWin98Or2k = verMaj >= 5;
1761 break;
1762
1763 default:
1764 // unknown, be conseravtive by default
1765 s_isWin98Or2k = 0;
1766 }
1767
1768 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1769 }
1770
1771 return s_isWin98Or2k == 1;
1772 }
f1339c56 1773
b1d66b54 1774 long m_CodePage;
1cd52418 1775};
e95354ec
VZ
1776
1777#endif // wxHAVE_WIN32_MB2WC
1778
f7e98dee
RN
1779// ============================================================================
1780// Cocoa conversion classes
1781// ============================================================================
1782
1783#if defined(__WXCOCOA__)
1784
ecd9653b 1785// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1786// Cocoa. Strangely enough, internally Core Foundation uses
1787// UTF 32 internally quite a bit - its just not public (yet).
1788
1789#include <CoreFoundation/CFString.h>
1790#include <CoreFoundation/CFStringEncodingExt.h>
1791
1792CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1793{
638357a0 1794 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1795 if ( encoding == wxFONTENCODING_DEFAULT )
1796 {
638357a0 1797 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1798 }
1799 else switch( encoding)
1800 {
1801 case wxFONTENCODING_ISO8859_1 :
1802 enc = kCFStringEncodingISOLatin1 ;
1803 break ;
1804 case wxFONTENCODING_ISO8859_2 :
1805 enc = kCFStringEncodingISOLatin2;
1806 break ;
1807 case wxFONTENCODING_ISO8859_3 :
1808 enc = kCFStringEncodingISOLatin3 ;
1809 break ;
1810 case wxFONTENCODING_ISO8859_4 :
1811 enc = kCFStringEncodingISOLatin4;
1812 break ;
1813 case wxFONTENCODING_ISO8859_5 :
1814 enc = kCFStringEncodingISOLatinCyrillic;
1815 break ;
1816 case wxFONTENCODING_ISO8859_6 :
1817 enc = kCFStringEncodingISOLatinArabic;
1818 break ;
1819 case wxFONTENCODING_ISO8859_7 :
1820 enc = kCFStringEncodingISOLatinGreek;
1821 break ;
1822 case wxFONTENCODING_ISO8859_8 :
1823 enc = kCFStringEncodingISOLatinHebrew;
1824 break ;
1825 case wxFONTENCODING_ISO8859_9 :
1826 enc = kCFStringEncodingISOLatin5;
1827 break ;
1828 case wxFONTENCODING_ISO8859_10 :
1829 enc = kCFStringEncodingISOLatin6;
1830 break ;
1831 case wxFONTENCODING_ISO8859_11 :
1832 enc = kCFStringEncodingISOLatinThai;
1833 break ;
1834 case wxFONTENCODING_ISO8859_13 :
1835 enc = kCFStringEncodingISOLatin7;
1836 break ;
1837 case wxFONTENCODING_ISO8859_14 :
1838 enc = kCFStringEncodingISOLatin8;
1839 break ;
1840 case wxFONTENCODING_ISO8859_15 :
1841 enc = kCFStringEncodingISOLatin9;
1842 break ;
1843
1844 case wxFONTENCODING_KOI8 :
1845 enc = kCFStringEncodingKOI8_R;
1846 break ;
1847 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1848 enc = kCFStringEncodingDOSRussian;
1849 break ;
1850
1851// case wxFONTENCODING_BULGARIAN :
1852// enc = ;
1853// break ;
1854
1855 case wxFONTENCODING_CP437 :
1856 enc =kCFStringEncodingDOSLatinUS ;
1857 break ;
1858 case wxFONTENCODING_CP850 :
1859 enc = kCFStringEncodingDOSLatin1;
1860 break ;
1861 case wxFONTENCODING_CP852 :
1862 enc = kCFStringEncodingDOSLatin2;
1863 break ;
1864 case wxFONTENCODING_CP855 :
1865 enc = kCFStringEncodingDOSCyrillic;
1866 break ;
1867 case wxFONTENCODING_CP866 :
1868 enc =kCFStringEncodingDOSRussian ;
1869 break ;
1870 case wxFONTENCODING_CP874 :
1871 enc = kCFStringEncodingDOSThai;
1872 break ;
1873 case wxFONTENCODING_CP932 :
1874 enc = kCFStringEncodingDOSJapanese;
1875 break ;
1876 case wxFONTENCODING_CP936 :
1877 enc =kCFStringEncodingDOSChineseSimplif ;
1878 break ;
1879 case wxFONTENCODING_CP949 :
1880 enc = kCFStringEncodingDOSKorean;
1881 break ;
1882 case wxFONTENCODING_CP950 :
1883 enc = kCFStringEncodingDOSChineseTrad;
1884 break ;
ecd9653b
WS
1885 case wxFONTENCODING_CP1250 :
1886 enc = kCFStringEncodingWindowsLatin2;
1887 break ;
1888 case wxFONTENCODING_CP1251 :
1889 enc =kCFStringEncodingWindowsCyrillic ;
1890 break ;
1891 case wxFONTENCODING_CP1252 :
1892 enc =kCFStringEncodingWindowsLatin1 ;
1893 break ;
1894 case wxFONTENCODING_CP1253 :
1895 enc = kCFStringEncodingWindowsGreek;
1896 break ;
1897 case wxFONTENCODING_CP1254 :
1898 enc = kCFStringEncodingWindowsLatin5;
1899 break ;
1900 case wxFONTENCODING_CP1255 :
1901 enc =kCFStringEncodingWindowsHebrew ;
1902 break ;
1903 case wxFONTENCODING_CP1256 :
1904 enc =kCFStringEncodingWindowsArabic ;
1905 break ;
1906 case wxFONTENCODING_CP1257 :
1907 enc = kCFStringEncodingWindowsBalticRim;
1908 break ;
638357a0
RN
1909// This only really encodes to UTF7 (if that) evidently
1910// case wxFONTENCODING_UTF7 :
1911// enc = kCFStringEncodingNonLossyASCII ;
1912// break ;
ecd9653b
WS
1913 case wxFONTENCODING_UTF8 :
1914 enc = kCFStringEncodingUTF8 ;
1915 break ;
1916 case wxFONTENCODING_EUC_JP :
1917 enc = kCFStringEncodingEUC_JP;
1918 break ;
1919 case wxFONTENCODING_UTF16 :
f7e98dee 1920 enc = kCFStringEncodingUnicode ;
ecd9653b 1921 break ;
f7e98dee
RN
1922 case wxFONTENCODING_MACROMAN :
1923 enc = kCFStringEncodingMacRoman ;
1924 break ;
1925 case wxFONTENCODING_MACJAPANESE :
1926 enc = kCFStringEncodingMacJapanese ;
1927 break ;
1928 case wxFONTENCODING_MACCHINESETRAD :
1929 enc = kCFStringEncodingMacChineseTrad ;
1930 break ;
1931 case wxFONTENCODING_MACKOREAN :
1932 enc = kCFStringEncodingMacKorean ;
1933 break ;
1934 case wxFONTENCODING_MACARABIC :
1935 enc = kCFStringEncodingMacArabic ;
1936 break ;
1937 case wxFONTENCODING_MACHEBREW :
1938 enc = kCFStringEncodingMacHebrew ;
1939 break ;
1940 case wxFONTENCODING_MACGREEK :
1941 enc = kCFStringEncodingMacGreek ;
1942 break ;
1943 case wxFONTENCODING_MACCYRILLIC :
1944 enc = kCFStringEncodingMacCyrillic ;
1945 break ;
1946 case wxFONTENCODING_MACDEVANAGARI :
1947 enc = kCFStringEncodingMacDevanagari ;
1948 break ;
1949 case wxFONTENCODING_MACGURMUKHI :
1950 enc = kCFStringEncodingMacGurmukhi ;
1951 break ;
1952 case wxFONTENCODING_MACGUJARATI :
1953 enc = kCFStringEncodingMacGujarati ;
1954 break ;
1955 case wxFONTENCODING_MACORIYA :
1956 enc = kCFStringEncodingMacOriya ;
1957 break ;
1958 case wxFONTENCODING_MACBENGALI :
1959 enc = kCFStringEncodingMacBengali ;
1960 break ;
1961 case wxFONTENCODING_MACTAMIL :
1962 enc = kCFStringEncodingMacTamil ;
1963 break ;
1964 case wxFONTENCODING_MACTELUGU :
1965 enc = kCFStringEncodingMacTelugu ;
1966 break ;
1967 case wxFONTENCODING_MACKANNADA :
1968 enc = kCFStringEncodingMacKannada ;
1969 break ;
1970 case wxFONTENCODING_MACMALAJALAM :
1971 enc = kCFStringEncodingMacMalayalam ;
1972 break ;
1973 case wxFONTENCODING_MACSINHALESE :
1974 enc = kCFStringEncodingMacSinhalese ;
1975 break ;
1976 case wxFONTENCODING_MACBURMESE :
1977 enc = kCFStringEncodingMacBurmese ;
1978 break ;
1979 case wxFONTENCODING_MACKHMER :
1980 enc = kCFStringEncodingMacKhmer ;
1981 break ;
1982 case wxFONTENCODING_MACTHAI :
1983 enc = kCFStringEncodingMacThai ;
1984 break ;
1985 case wxFONTENCODING_MACLAOTIAN :
1986 enc = kCFStringEncodingMacLaotian ;
1987 break ;
1988 case wxFONTENCODING_MACGEORGIAN :
1989 enc = kCFStringEncodingMacGeorgian ;
1990 break ;
1991 case wxFONTENCODING_MACARMENIAN :
1992 enc = kCFStringEncodingMacArmenian ;
1993 break ;
1994 case wxFONTENCODING_MACCHINESESIMP :
1995 enc = kCFStringEncodingMacChineseSimp ;
1996 break ;
1997 case wxFONTENCODING_MACTIBETAN :
1998 enc = kCFStringEncodingMacTibetan ;
1999 break ;
2000 case wxFONTENCODING_MACMONGOLIAN :
2001 enc = kCFStringEncodingMacMongolian ;
2002 break ;
2003 case wxFONTENCODING_MACETHIOPIC :
2004 enc = kCFStringEncodingMacEthiopic ;
2005 break ;
2006 case wxFONTENCODING_MACCENTRALEUR :
2007 enc = kCFStringEncodingMacCentralEurRoman ;
2008 break ;
2009 case wxFONTENCODING_MACVIATNAMESE :
2010 enc = kCFStringEncodingMacVietnamese ;
2011 break ;
2012 case wxFONTENCODING_MACARABICEXT :
2013 enc = kCFStringEncodingMacExtArabic ;
2014 break ;
2015 case wxFONTENCODING_MACSYMBOL :
2016 enc = kCFStringEncodingMacSymbol ;
2017 break ;
2018 case wxFONTENCODING_MACDINGBATS :
2019 enc = kCFStringEncodingMacDingbats ;
2020 break ;
2021 case wxFONTENCODING_MACTURKISH :
2022 enc = kCFStringEncodingMacTurkish ;
2023 break ;
2024 case wxFONTENCODING_MACCROATIAN :
2025 enc = kCFStringEncodingMacCroatian ;
2026 break ;
2027 case wxFONTENCODING_MACICELANDIC :
2028 enc = kCFStringEncodingMacIcelandic ;
2029 break ;
2030 case wxFONTENCODING_MACROMANIAN :
2031 enc = kCFStringEncodingMacRomanian ;
2032 break ;
2033 case wxFONTENCODING_MACCELTIC :
2034 enc = kCFStringEncodingMacCeltic ;
2035 break ;
2036 case wxFONTENCODING_MACGAELIC :
2037 enc = kCFStringEncodingMacGaelic ;
2038 break ;
ecd9653b
WS
2039// case wxFONTENCODING_MACKEYBOARD :
2040// enc = kCFStringEncodingMacKeyboardGlyphs ;
2041// break ;
2042 default :
2043 // because gcc is picky
2044 break ;
2045 } ;
2046 return enc ;
f7e98dee
RN
2047}
2048
f7e98dee
RN
2049class wxMBConv_cocoa : public wxMBConv
2050{
2051public:
2052 wxMBConv_cocoa()
2053 {
2054 Init(CFStringGetSystemEncoding()) ;
2055 }
2056
a6900d10 2057#if wxUSE_FONTMAP
f7e98dee
RN
2058 wxMBConv_cocoa(const wxChar* name)
2059 {
267e11c5 2060 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2061 }
a6900d10 2062#endif
f7e98dee
RN
2063
2064 wxMBConv_cocoa(wxFontEncoding encoding)
2065 {
2066 Init( wxCFStringEncFromFontEnc(encoding) );
2067 }
2068
2069 ~wxMBConv_cocoa()
2070 {
2071 }
2072
2073 void Init( CFStringEncoding encoding)
2074 {
638357a0 2075 m_encoding = encoding ;
f7e98dee
RN
2076 }
2077
2078 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2079 {
2080 wxASSERT(szUnConv);
ecd9653b 2081
638357a0
RN
2082 CFStringRef theString = CFStringCreateWithBytes (
2083 NULL, //the allocator
2084 (const UInt8*)szUnConv,
2085 strlen(szUnConv),
2086 m_encoding,
2087 false //no BOM/external representation
f7e98dee
RN
2088 );
2089
2090 wxASSERT(theString);
2091
638357a0
RN
2092 size_t nOutLength = CFStringGetLength(theString);
2093
2094 if (szOut == NULL)
f7e98dee 2095 {
f7e98dee 2096 CFRelease(theString);
638357a0 2097 return nOutLength;
f7e98dee 2098 }
ecd9653b 2099
638357a0 2100 CFRange theRange = { 0, nOutSize };
ecd9653b 2101
638357a0
RN
2102#if SIZEOF_WCHAR_T == 4
2103 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2104#endif
3698ae71 2105
f7e98dee 2106 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2107
f7e98dee 2108 CFRelease(theString);
ecd9653b 2109
638357a0 2110 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2111
2112#if SIZEOF_WCHAR_T == 4
2113 wxMBConvUTF16 converter ;
638357a0 2114 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2115 delete[] szUniCharBuffer;
2116#endif
3698ae71 2117
638357a0 2118 return nOutLength;
f7e98dee
RN
2119 }
2120
2121 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2122 {
638357a0 2123 wxASSERT(szUnConv);
3698ae71 2124
f7e98dee 2125 size_t nRealOutSize;
638357a0 2126 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2127 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2128
f7e98dee 2129#if SIZEOF_WCHAR_T == 4
d9d488cf 2130 wxMBConvUTF16 converter ;
f7e98dee
RN
2131 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2132 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2133 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2134 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2135#endif
2136
2137 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2138 NULL, //allocator
2139 szUniBuffer,
2140 nBufSize,
638357a0 2141 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2142 );
ecd9653b 2143
f7e98dee 2144 wxASSERT(theString);
ecd9653b 2145
f7e98dee 2146 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2147 //so we check and use getchars instead in that case
2148 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2149 {
638357a0
RN
2150 if (szOut != NULL)
2151 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2152
638357a0
RN
2153 nRealOutSize = CFStringGetLength(theString) + 1;
2154 }
2155 else
2156 {
2157 CFStringGetBytes(
2158 theString,
2159 CFRangeMake(0, CFStringGetLength(theString)),
2160 m_encoding,
2161 0, //what to put in characters that can't be converted -
2162 //0 tells CFString to return NULL if it meets such a character
2163 false, //not an external representation
2164 (UInt8*) szOut,
3698ae71 2165 nOutSize,
638357a0
RN
2166 (CFIndex*) &nRealOutSize
2167 );
f7e98dee 2168 }
ecd9653b 2169
638357a0 2170 CFRelease(theString);
ecd9653b 2171
638357a0
RN
2172#if SIZEOF_WCHAR_T == 4
2173 delete[] szUniBuffer;
2174#endif
ecd9653b 2175
f7e98dee
RN
2176 return nRealOutSize - 1;
2177 }
2178
2179 bool IsOk() const
ecd9653b 2180 {
3698ae71 2181 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2182 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2183 }
2184
2185private:
638357a0 2186 CFStringEncoding m_encoding ;
f7e98dee
RN
2187};
2188
2189#endif // defined(__WXCOCOA__)
2190
335d31e0
SC
2191// ============================================================================
2192// Mac conversion classes
2193// ============================================================================
2194
2195#if defined(__WXMAC__) && defined(TARGET_CARBON)
2196
2197class wxMBConv_mac : public wxMBConv
2198{
2199public:
2200 wxMBConv_mac()
2201 {
2202 Init(CFStringGetSystemEncoding()) ;
2203 }
2204
2d1659cf 2205#if wxUSE_FONTMAP
335d31e0
SC
2206 wxMBConv_mac(const wxChar* name)
2207 {
267e11c5 2208 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2209 }
2d1659cf 2210#endif
335d31e0
SC
2211
2212 wxMBConv_mac(wxFontEncoding encoding)
2213 {
d775fa82
WS
2214 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2215 }
2216
2217 ~wxMBConv_mac()
2218 {
2219 OSStatus status = noErr ;
2220 status = TECDisposeConverter(m_MB2WC_converter);
2221 status = TECDisposeConverter(m_WC2MB_converter);
2222 }
2223
2224
2225 void Init( TextEncodingBase encoding)
2226 {
2227 OSStatus status = noErr ;
2228 m_char_encoding = encoding ;
2229 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2230
2231 status = TECCreateConverter(&m_MB2WC_converter,
2232 m_char_encoding,
2233 m_unicode_encoding);
2234 status = TECCreateConverter(&m_WC2MB_converter,
2235 m_unicode_encoding,
2236 m_char_encoding);
2237 }
2238
335d31e0
SC
2239 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2240 {
d775fa82
WS
2241 OSStatus status = noErr ;
2242 ByteCount byteOutLen ;
2243 ByteCount byteInLen = strlen(psz) ;
2244 wchar_t *tbuf = NULL ;
2245 UniChar* ubuf = NULL ;
2246 size_t res = 0 ;
2247
2248 if (buf == NULL)
2249 {
638357a0 2250 //apple specs say at least 32
c543817b 2251 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2252 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2253 }
2254 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2255#if SIZEOF_WCHAR_T == 4
d775fa82 2256 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2257#else
d775fa82 2258 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2259#endif
d775fa82
WS
2260 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2261 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2262#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2263 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2264 // is not properly terminated we get random characters at the end
2265 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2266 wxMBConvUTF16 converter ;
d775fa82
WS
2267 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2268 free( ubuf ) ;
f3a355ce 2269#else
d775fa82 2270 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2271#endif
d775fa82
WS
2272 if ( buf == NULL )
2273 free(tbuf) ;
335d31e0 2274
335d31e0
SC
2275 if ( buf && res < n)
2276 buf[res] = 0;
2277
d775fa82 2278 return res ;
335d31e0
SC
2279 }
2280
2281 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2282 {
2283 OSStatus status = noErr ;
2284 ByteCount byteOutLen ;
2285 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2286
2287 char *tbuf = NULL ;
2288
2289 if (buf == NULL)
2290 {
638357a0 2291 //apple specs say at least 32
c543817b 2292 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2293 tbuf = (char*) malloc( n ) ;
2294 }
2295
2296 ByteCount byteBufferLen = n ;
2297 UniChar* ubuf = NULL ;
f3a355ce 2298#if SIZEOF_WCHAR_T == 4
d9d488cf 2299 wxMBConvUTF16 converter ;
d775fa82
WS
2300 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2301 byteInLen = unicharlen ;
2302 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2303 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2304#else
d775fa82 2305 ubuf = (UniChar*) psz ;
f3a355ce 2306#endif
d775fa82
WS
2307 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2308 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2309#if SIZEOF_WCHAR_T == 4
d775fa82 2310 free( ubuf ) ;
f3a355ce 2311#endif
d775fa82
WS
2312 if ( buf == NULL )
2313 free(tbuf) ;
335d31e0 2314
d775fa82 2315 size_t res = byteOutLen ;
335d31e0 2316 if ( buf && res < n)
638357a0 2317 {
335d31e0 2318 buf[res] = 0;
3698ae71 2319
638357a0
RN
2320 //we need to double-trip to verify it didn't insert any ? in place
2321 //of bogus characters
2322 wxWCharBuffer wcBuf(n);
2323 size_t pszlen = wxWcslen(psz);
2324 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2325 wxWcslen(wcBuf) != pszlen ||
2326 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2327 {
2328 // we didn't obtain the same thing we started from, hence
2329 // the conversion was lossy and we consider that it failed
2330 return (size_t)-1;
2331 }
2332 }
335d31e0 2333
d775fa82 2334 return res ;
335d31e0
SC
2335 }
2336
2337 bool IsOk() const
2338 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2339
2340private:
d775fa82
WS
2341 TECObjectRef m_MB2WC_converter ;
2342 TECObjectRef m_WC2MB_converter ;
2343
2344 TextEncodingBase m_char_encoding ;
2345 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2346};
2347
2348#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2349
36acb880
VZ
2350// ============================================================================
2351// wxEncodingConverter based conversion classes
2352// ============================================================================
2353
1e6feb95 2354#if wxUSE_FONTMAP
1cd52418 2355
e95354ec 2356class wxMBConv_wxwin : public wxMBConv
1cd52418 2357{
8b04d4c4
VZ
2358private:
2359 void Init()
2360 {
2361 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2362 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2363 }
2364
6001e347 2365public:
f1339c56
RR
2366 // temporarily just use wxEncodingConverter stuff,
2367 // so that it works while a better implementation is built
e95354ec 2368 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2369 {
2370 if (name)
267e11c5 2371 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2372 else
2373 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2374
8b04d4c4
VZ
2375 Init();
2376 }
2377
e95354ec 2378 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2379 {
2380 m_enc = enc;
2381
2382 Init();
f1339c56 2383 }
dccce9ea 2384
bde4baac 2385 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2386 {
2387 size_t inbuf = strlen(psz);
dccce9ea 2388 if (buf)
c643a977
VS
2389 {
2390 if (!m2w.Convert(psz,buf))
2391 return (size_t)-1;
2392 }
f1339c56
RR
2393 return inbuf;
2394 }
dccce9ea 2395
bde4baac 2396 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2397 {
f8d791e0 2398 const size_t inbuf = wxWcslen(psz);
f1339c56 2399 if (buf)
c643a977
VS
2400 {
2401 if (!w2m.Convert(psz,buf))
2402 return (size_t)-1;
2403 }
dccce9ea 2404
f1339c56
RR
2405 return inbuf;
2406 }
dccce9ea 2407
e95354ec 2408 bool IsOk() const { return m_ok; }
f1339c56
RR
2409
2410public:
8b04d4c4 2411 wxFontEncoding m_enc;
f1339c56 2412 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2413
2414 // were we initialized successfully?
2415 bool m_ok;
fc7a2a60 2416
e95354ec 2417 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2418};
6001e347 2419
8f115891
MW
2420// make the constructors available for unit testing
2421WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2422{
2423 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2424 if ( !result->IsOk() )
2425 {
2426 delete result;
2427 return 0;
2428 }
2429 return result;
2430}
2431
1e6feb95
VZ
2432#endif // wxUSE_FONTMAP
2433
36acb880
VZ
2434// ============================================================================
2435// wxCSConv implementation
2436// ============================================================================
2437
8b04d4c4 2438void wxCSConv::Init()
6001e347 2439{
e95354ec
VZ
2440 m_name = NULL;
2441 m_convReal = NULL;
2442 m_deferred = true;
2443}
2444
8b04d4c4
VZ
2445wxCSConv::wxCSConv(const wxChar *charset)
2446{
2447 Init();
82713003 2448
e95354ec
VZ
2449 if ( charset )
2450 {
e95354ec
VZ
2451 SetName(charset);
2452 }
bda3d86a
VZ
2453
2454 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2455}
2456
8b04d4c4
VZ
2457wxCSConv::wxCSConv(wxFontEncoding encoding)
2458{
bda3d86a 2459 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2460 {
2461 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2462
2463 encoding = wxFONTENCODING_SYSTEM;
2464 }
2465
8b04d4c4
VZ
2466 Init();
2467
bda3d86a 2468 m_encoding = encoding;
8b04d4c4
VZ
2469}
2470
6001e347
RR
2471wxCSConv::~wxCSConv()
2472{
65e50848
JS
2473 Clear();
2474}
2475
54380f29 2476wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2477 : wxMBConv()
54380f29 2478{
8b04d4c4
VZ
2479 Init();
2480
54380f29 2481 SetName(conv.m_name);
8b04d4c4 2482 m_encoding = conv.m_encoding;
54380f29
GD
2483}
2484
2485wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2486{
2487 Clear();
8b04d4c4 2488
54380f29 2489 SetName(conv.m_name);
8b04d4c4
VZ
2490 m_encoding = conv.m_encoding;
2491
54380f29
GD
2492 return *this;
2493}
2494
65e50848
JS
2495void wxCSConv::Clear()
2496{
8b04d4c4 2497 free(m_name);
e95354ec 2498 delete m_convReal;
8b04d4c4 2499
65e50848 2500 m_name = NULL;
e95354ec 2501 m_convReal = NULL;
6001e347
RR
2502}
2503
2504void wxCSConv::SetName(const wxChar *charset)
2505{
f1339c56
RR
2506 if (charset)
2507 {
2508 m_name = wxStrdup(charset);
e95354ec 2509 m_deferred = true;
f1339c56 2510 }
6001e347
RR
2511}
2512
e95354ec
VZ
2513wxMBConv *wxCSConv::DoCreate() const
2514{
c547282d
VZ
2515 // check for the special case of ASCII or ISO8859-1 charset: as we have
2516 // special knowledge of it anyhow, we don't need to create a special
2517 // conversion object
2518 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2519 {
e95354ec
VZ
2520 // don't convert at all
2521 return NULL;
2522 }
dccce9ea 2523
e95354ec
VZ
2524 // we trust OS to do conversion better than we can so try external
2525 // conversion methods first
2526 //
2527 // the full order is:
2528 // 1. OS conversion (iconv() under Unix or Win32 API)
2529 // 2. hard coded conversions for UTF
2530 // 3. wxEncodingConverter as fall back
2531
2532 // step (1)
2533#ifdef HAVE_ICONV
c547282d 2534#if !wxUSE_FONTMAP
e95354ec 2535 if ( m_name )
c547282d 2536#endif // !wxUSE_FONTMAP
e95354ec 2537 {
c547282d
VZ
2538 wxString name(m_name);
2539
2540#if wxUSE_FONTMAP
2541 if ( name.empty() )
d0ee33f5 2542 name = wxFontMapperBase::GetEncodingName(m_encoding);
c547282d
VZ
2543#endif // wxUSE_FONTMAP
2544
2545 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2546 if ( conv->IsOk() )
2547 return conv;
2548
2549 delete conv;
2550 }
2551#endif // HAVE_ICONV
2552
2553#ifdef wxHAVE_WIN32_MB2WC
2554 {
7608a683 2555#if wxUSE_FONTMAP
e95354ec
VZ
2556 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2557 : new wxMBConv_win32(m_encoding);
2558 if ( conv->IsOk() )
2559 return conv;
2560
2561 delete conv;
7608a683
WS
2562#else
2563 return NULL;
2564#endif
e95354ec
VZ
2565 }
2566#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2567#if defined(__WXMAC__)
2568 {
5c3c8676 2569 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2570 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2571 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2572 {
2573
2d1659cf 2574#if wxUSE_FONTMAP
d775fa82
WS
2575 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2576 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2577#else
2578 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2579#endif
d775fa82 2580 if ( conv->IsOk() )
f7e98dee
RN
2581 return conv;
2582
2583 delete conv;
2584 }
2585 }
2586#endif
2587#if defined(__WXCOCOA__)
2588 {
2589 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2590 {
2591
a6900d10 2592#if wxUSE_FONTMAP
f7e98dee
RN
2593 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2594 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2595#else
2596 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2597#endif
f7e98dee 2598 if ( conv->IsOk() )
d775fa82
WS
2599 return conv;
2600
2601 delete conv;
2602 }
335d31e0
SC
2603 }
2604#endif
e95354ec
VZ
2605 // step (2)
2606 wxFontEncoding enc = m_encoding;
2607#if wxUSE_FONTMAP
c547282d
VZ
2608 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2609 {
2610 // use "false" to suppress interactive dialogs -- we can be called from
2611 // anywhere and popping up a dialog from here is the last thing we want to
2612 // do
267e11c5 2613 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2614 }
e95354ec
VZ
2615#endif // wxUSE_FONTMAP
2616
2617 switch ( enc )
2618 {
2619 case wxFONTENCODING_UTF7:
2620 return new wxMBConvUTF7;
2621
2622 case wxFONTENCODING_UTF8:
2623 return new wxMBConvUTF8;
2624
e95354ec
VZ
2625 case wxFONTENCODING_UTF16BE:
2626 return new wxMBConvUTF16BE;
2627
2628 case wxFONTENCODING_UTF16LE:
2629 return new wxMBConvUTF16LE;
2630
e95354ec
VZ
2631 case wxFONTENCODING_UTF32BE:
2632 return new wxMBConvUTF32BE;
2633
2634 case wxFONTENCODING_UTF32LE:
2635 return new wxMBConvUTF32LE;
2636
2637 default:
2638 // nothing to do but put here to suppress gcc warnings
2639 ;
2640 }
2641
2642 // step (3)
2643#if wxUSE_FONTMAP
2644 {
2645 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2646 : new wxMBConv_wxwin(m_encoding);
2647 if ( conv->IsOk() )
2648 return conv;
2649
2650 delete conv;
2651 }
2652#endif // wxUSE_FONTMAP
2653
a58d4f4d
VS
2654 // NB: This is a hack to prevent deadlock. What could otherwise happen
2655 // in Unicode build: wxConvLocal creation ends up being here
2656 // because of some failure and logs the error. But wxLog will try to
2657 // attach timestamp, for which it will need wxConvLocal (to convert
2658 // time to char* and then wchar_t*), but that fails, tries to log
2659 // error, but wxLog has a (already locked) critical section that
2660 // guards static buffer.
2661 static bool alreadyLoggingError = false;
2662 if (!alreadyLoggingError)
2663 {
2664 alreadyLoggingError = true;
2665 wxLogError(_("Cannot convert from the charset '%s'!"),
2666 m_name ? m_name
e95354ec
VZ
2667 :
2668#if wxUSE_FONTMAP
267e11c5 2669 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2670#else // !wxUSE_FONTMAP
2671 wxString::Format(_("encoding %s"), m_encoding).c_str()
2672#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2673 );
a58d4f4d
VS
2674 alreadyLoggingError = false;
2675 }
e95354ec
VZ
2676
2677 return NULL;
2678}
2679
2680void wxCSConv::CreateConvIfNeeded() const
2681{
2682 if ( m_deferred )
2683 {
2684 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2685
2686#if wxUSE_INTL
2687 // if we don't have neither the name nor the encoding, use the default
2688 // encoding for this system
2689 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2690 {
4d312c22 2691 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2692 }
2693#endif // wxUSE_INTL
2694
e95354ec
VZ
2695 self->m_convReal = DoCreate();
2696 self->m_deferred = false;
6001e347 2697 }
6001e347
RR
2698}
2699
2700size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2701{
e95354ec 2702 CreateConvIfNeeded();
dccce9ea 2703
e95354ec
VZ
2704 if (m_convReal)
2705 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2706
2707 // latin-1 (direct)
4def3b35 2708 size_t len = strlen(psz);
dccce9ea 2709
f1339c56
RR
2710 if (buf)
2711 {
4def3b35 2712 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2713 buf[c] = (unsigned char)(psz[c]);
2714 }
dccce9ea 2715
f1339c56 2716 return len;
6001e347
RR
2717}
2718
2719size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2720{
e95354ec 2721 CreateConvIfNeeded();
dccce9ea 2722
e95354ec
VZ
2723 if (m_convReal)
2724 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2725
f1339c56 2726 // latin-1 (direct)
f8d791e0 2727 const size_t len = wxWcslen(psz);
f1339c56
RR
2728 if (buf)
2729 {
4def3b35 2730 for (size_t c = 0; c <= len; c++)
24642831
VS
2731 {
2732 if (psz[c] > 0xFF)
2733 return (size_t)-1;
907173e5 2734 buf[c] = (char)psz[c];
24642831
VS
2735 }
2736 }
2737 else
2738 {
2739 for (size_t c = 0; c <= len; c++)
2740 {
2741 if (psz[c] > 0xFF)
2742 return (size_t)-1;
2743 }
f1339c56 2744 }
dccce9ea 2745
f1339c56 2746 return len;
6001e347
RR
2747}
2748
bde4baac
VZ
2749// ----------------------------------------------------------------------------
2750// globals
2751// ----------------------------------------------------------------------------
2752
2753#ifdef __WINDOWS__
2754 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2755#elif defined(__WXMAC__) && !defined(__MACH__)
2756 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2757#else
dcc8fac0 2758 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2759#endif
2760
2761static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2762static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2763static wxMBConvUTF7 wxConvUTF7Obj;
2764static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2765
bde4baac
VZ
2766WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2767WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2768WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2769WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2770WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2771WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2772WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2773#ifdef __WXOSX__
ea8ce907 2774 wxConvUTF8Obj;
f5a1953b 2775#else
ea8ce907 2776 wxConvLibcObj;
f5a1953b
VZ
2777#endif
2778
bde4baac
VZ
2779
2780#else // !wxUSE_WCHAR_T
2781
2782// stand-ins in absence of wchar_t
2783WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2784 wxConvISO8859_1,
2785 wxConvLocal,
2786 wxConvUTF8;
2787
2788#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2789
2790