]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
applying OSAF patch for "toolbar selection not correct on Mac after calling Realize...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
ea8ce907
RR
58#ifdef HAVE_LANGINFO_H
59 #include <langinfo.h>
60#endif
6001e347 61
e95354ec
VZ
62#if defined(__WIN32__) && !defined(__WXMICROWIN__)
63 #define wxHAVE_WIN32_MB2WC
64#endif // __WIN32__ but !__WXMICROWIN__
65
373658eb
VZ
66// ----------------------------------------------------------------------------
67// headers
68// ----------------------------------------------------------------------------
7af284fd 69
6001e347 70#ifdef __SALFORDC__
373658eb 71 #include <clib.h>
6001e347
RR
72#endif
73
b040e242 74#ifdef HAVE_ICONV
373658eb 75 #include <iconv.h>
b1d547eb 76 #include "wx/thread.h"
1cd52418 77#endif
1cd52418 78
373658eb
VZ
79#include "wx/encconv.h"
80#include "wx/fontmap.h"
7608a683 81#include "wx/utils.h"
373658eb 82
335d31e0 83#ifdef __WXMAC__
4227afa4
SC
84#include <ATSUnicode.h>
85#include <TextCommon.h>
86#include <TextEncodingConverter.h>
335d31e0
SC
87
88#include "wx/mac/private.h" // includes mac headers
89#endif
373658eb
VZ
90// ----------------------------------------------------------------------------
91// macros
92// ----------------------------------------------------------------------------
3e61dfb0 93
1cd52418 94#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 95#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
96
97#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
98 #define WC_NAME "UCS4"
99 #define WC_BSWAP BSWAP_UCS4
100 #ifdef WORDS_BIGENDIAN
101 #define WC_NAME_BEST "UCS-4BE"
102 #else
103 #define WC_NAME_BEST "UCS-4LE"
104 #endif
1cd52418 105#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
106 #define WC_NAME "UTF16"
107 #define WC_BSWAP BSWAP_UTF16
a3f2769e 108 #define WC_UTF16
3a0d76bc
VS
109 #ifdef WORDS_BIGENDIAN
110 #define WC_NAME_BEST "UTF-16BE"
111 #else
112 #define WC_NAME_BEST "UTF-16LE"
113 #endif
bab1e722 114#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
115 // does this ever happen?
116 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
117#endif
118
373658eb
VZ
119// ============================================================================
120// implementation
121// ============================================================================
122
123// ----------------------------------------------------------------------------
c91830cb 124// UTF-16 en/decoding to/from UCS-4
373658eb 125// ----------------------------------------------------------------------------
6001e347 126
b0a6bb75 127
c91830cb 128static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 129{
dccce9ea 130 if (input<=0xffff)
4def3b35 131 {
999836aa
VZ
132 if (output)
133 *output = (wxUint16) input;
4def3b35 134 return 1;
dccce9ea
VZ
135 }
136 else if (input>=0x110000)
4def3b35
VS
137 {
138 return (size_t)-1;
dccce9ea
VZ
139 }
140 else
4def3b35 141 {
dccce9ea 142 if (output)
4def3b35 143 {
c91830cb 144 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 145 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
146 }
147 return 2;
1cd52418 148 }
1cd52418
OK
149}
150
c91830cb 151static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 152{
dccce9ea 153 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
154 {
155 output = *input;
156 return 1;
dccce9ea
VZ
157 }
158 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
159 {
160 output = *input;
161 return (size_t)-1;
dccce9ea
VZ
162 }
163 else
4def3b35
VS
164 {
165 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
166 return 2;
167 }
1cd52418
OK
168}
169
b0a6bb75 170
f6bcfd97 171// ----------------------------------------------------------------------------
6001e347 172// wxMBConv
f6bcfd97 173// ----------------------------------------------------------------------------
2c53a80a
WS
174
175wxMBConv::~wxMBConv()
176{
177 // nothing to do here (necessary for Darwin linking probably)
178}
6001e347 179
6001e347
RR
180const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181{
2b5f62a0 182 if ( psz )
6001e347 183 {
2b5f62a0
VZ
184 // calculate the length of the buffer needed first
185 size_t nLen = MB2WC(NULL, psz, 0);
186 if ( nLen != (size_t)-1 )
187 {
188 // now do the actual conversion
189 wxWCharBuffer buf(nLen);
635f33ce
VS
190 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
191 if ( nLen != (size_t)-1 )
192 {
193 return buf;
194 }
2b5f62a0 195 }
f6bcfd97 196 }
2b5f62a0
VZ
197
198 wxWCharBuffer buf((wchar_t *)NULL);
199
200 return buf;
6001e347
RR
201}
202
e5cceba0 203const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 204{
2b5f62a0
VZ
205 if ( pwz )
206 {
207 size_t nLen = WC2MB(NULL, pwz, 0);
208 if ( nLen != (size_t)-1 )
209 {
c91830cb 210 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
211 nLen = WC2MB(buf.data(), pwz, nLen + 4);
212 if ( nLen != (size_t)-1 )
213 {
214 return buf;
215 }
2b5f62a0
VZ
216 }
217 }
218
219 wxCharBuffer buf((char *)NULL);
e5cceba0 220
e5cceba0 221 return buf;
6001e347
RR
222}
223
f5fb6871 224const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 225{
f5fb6871
RN
226 wxASSERT(pOutSize != NULL);
227
e4e3bbb4
RN
228 const char* szEnd = szString + nStringLen + 1;
229 const char* szPos = szString;
230 const char* szStart = szPos;
231
232 size_t nActualLength = 0;
f5fb6871
RN
233 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
234
235 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
236
237 //Convert the string until the length() is reached, continuing the
238 //loop every time a null character is reached
239 while(szPos != szEnd)
240 {
241 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
242
243 //Get the length of the current (sub)string
244 size_t nLen = MB2WC(NULL, szPos, 0);
245
246 //Invalid conversion?
247 if( nLen == (size_t)-1 )
f5fb6871
RN
248 {
249 *pOutSize = 0;
250 theBuffer.data()[0u] = wxT('\0');
251 return theBuffer;
252 }
253
e4e3bbb4
RN
254
255 //Increase the actual length (+1 for current null character)
256 nActualLength += nLen + 1;
257
f5fb6871
RN
258 //if buffer too big, realloc the buffer
259 if (nActualLength > (nCurrentSize+1))
260 {
261 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
262 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
263 theBuffer = theNewBuffer;
264 nCurrentSize <<= 1;
265 }
266
267 //Convert the current (sub)string
268 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 269 {
f5fb6871
RN
270 *pOutSize = 0;
271 theBuffer.data()[0u] = wxT('\0');
272 return theBuffer;
e4e3bbb4
RN
273 }
274
275 //Increment to next (sub)string
276 //Note that we have to use strlen here instead of nLen
277 //here because XX2XX gives us the size of the output buffer,
278 //not neccessarly the length of the string
279 szPos += strlen(szPos) + 1;
280 }
281
f5fb6871
RN
282 //success - return actual length and the buffer
283 *pOutSize = nActualLength;
3698ae71 284 return theBuffer;
e4e3bbb4
RN
285}
286
f5fb6871 287const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 288{
f5fb6871
RN
289 wxASSERT(pOutSize != NULL);
290
e4e3bbb4
RN
291 const wchar_t* szEnd = szString + nStringLen + 1;
292 const wchar_t* szPos = szString;
293 const wchar_t* szStart = szPos;
294
295 size_t nActualLength = 0;
f5fb6871
RN
296 size_t nCurrentSize = nStringLen << 2; //try * 4 first
297
298 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
299
300 //Convert the string until the length() is reached, continuing the
301 //loop every time a null character is reached
302 while(szPos != szEnd)
303 {
304 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
305
306 //Get the length of the current (sub)string
307 size_t nLen = WC2MB(NULL, szPos, 0);
308
309 //Invalid conversion?
310 if( nLen == (size_t)-1 )
f5fb6871
RN
311 {
312 *pOutSize = 0;
313 theBuffer.data()[0u] = wxT('\0');
314 return theBuffer;
315 }
e4e3bbb4
RN
316
317 //Increase the actual length (+1 for current null character)
318 nActualLength += nLen + 1;
3698ae71 319
f5fb6871
RN
320 //if buffer too big, realloc the buffer
321 if (nActualLength > (nCurrentSize+1))
322 {
323 wxCharBuffer theNewBuffer(nCurrentSize << 1);
324 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
325 theBuffer = theNewBuffer;
326 nCurrentSize <<= 1;
327 }
328
329 //Convert the current (sub)string
330 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 331 {
f5fb6871
RN
332 *pOutSize = 0;
333 theBuffer.data()[0u] = wxT('\0');
334 return theBuffer;
e4e3bbb4
RN
335 }
336
337 //Increment to next (sub)string
338 //Note that we have to use wxWcslen here instead of nLen
339 //here because XX2XX gives us the size of the output buffer,
340 //not neccessarly the length of the string
341 szPos += wxWcslen(szPos) + 1;
342 }
343
f5fb6871
RN
344 //success - return actual length and the buffer
345 *pOutSize = nActualLength;
3698ae71 346 return theBuffer;
e4e3bbb4
RN
347}
348
6001e347 349// ----------------------------------------------------------------------------
bde4baac 350// wxMBConvLibc
6001e347
RR
351// ----------------------------------------------------------------------------
352
bde4baac
VZ
353size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
354{
355 return wxMB2WC(buf, psz, n);
356}
357
358size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
359{
360 return wxWC2MB(buf, psz, n);
361}
e1bfe89e 362
66bf0099 363#ifdef __UNIX__
c12b7f79 364
e1bfe89e 365// ----------------------------------------------------------------------------
66bf0099 366// wxConvBrokenFileNames
e1bfe89e
RR
367// ----------------------------------------------------------------------------
368
c12b7f79 369wxConvBrokenFileNames::wxConvBrokenFileNames()
ea8ce907 370{
c12b7f79
VZ
371 // decide which conversion to use for the file names
372
373 // (1) this variable exists for the sole purpose of specifying the encoding
374 // of the filenames for GTK+ programs, so use it if it is set
375 const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
376 if ( encName )
377 {
378 m_conv = new wxCSConv(encName);
379 }
380 else // no G_FILENAME_ENCODING
381 {
382 // (2) if a non default locale is set, assume that the user wants his
383 // filenames in this locale too
384 switch ( wxLocale::GetSystemEncoding() )
385 {
386 default:
387 m_conv = new wxMBConvLibc;
388 break;
389
390 // (3) finally use UTF-8 by default
391 case wxFONTENCODING_SYSTEM:
392 case wxFONTENCODING_UTF8:
393 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
394 break;
395 }
396 }
ea8ce907
RR
397}
398
c12b7f79
VZ
399size_t
400wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
401 const char *psz,
402 size_t outputSize) const
e1bfe89e 403{
c12b7f79 404 return m_conv->MB2WC( outputBuf, psz, outputSize );
e1bfe89e
RR
405}
406
c12b7f79
VZ
407size_t
408wxConvBrokenFileNames::WC2MB(char *outputBuf,
409 const wchar_t *psz,
410 size_t outputSize) const
e1bfe89e 411{
c12b7f79 412 return m_conv->WC2MB( outputBuf, psz, outputSize );
e1bfe89e
RR
413}
414
66bf0099 415#endif
c12b7f79 416
bde4baac 417// ----------------------------------------------------------------------------
3698ae71 418// UTF-7
bde4baac 419// ----------------------------------------------------------------------------
6001e347 420
15f2ee32 421// Implementation (C) 2004 Fredrik Roubert
6001e347 422
15f2ee32
RN
423//
424// BASE64 decoding table
425//
426static const unsigned char utf7unb64[] =
6001e347 427{
15f2ee32
RN
428 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
429 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
430 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
432 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
433 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
434 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
435 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
437 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
438 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
439 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
441 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
442 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
443 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
459 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
460};
461
462size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
463{
15f2ee32
RN
464 size_t len = 0;
465
466 while (*psz && ((!buf) || (len < n)))
467 {
468 unsigned char cc = *psz++;
469 if (cc != '+')
470 {
471 // plain ASCII char
472 if (buf)
473 *buf++ = cc;
474 len++;
475 }
476 else if (*psz == '-')
477 {
478 // encoded plus sign
479 if (buf)
480 *buf++ = cc;
481 len++;
482 psz++;
483 }
484 else
485 {
486 // BASE64 encoded string
487 bool lsb;
488 unsigned char c;
489 unsigned int d, l;
490 for (lsb = false, d = 0, l = 0;
491 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
492 {
493 d <<= 6;
494 d += cc;
495 for (l += 6; l >= 8; lsb = !lsb)
496 {
6356d52a 497 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
498 if (lsb)
499 {
500 if (buf)
501 *buf++ |= c;
502 len ++;
503 }
504 else
505 if (buf)
6356d52a 506 *buf = (wchar_t)(c << 8);
15f2ee32
RN
507 }
508 }
509 if (*psz == '-')
510 psz++;
511 }
512 }
513 if (buf && (len < n))
514 *buf = 0;
515 return len;
6001e347
RR
516}
517
15f2ee32
RN
518//
519// BASE64 encoding table
520//
521static const unsigned char utf7enb64[] =
522{
523 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
524 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
525 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
526 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
527 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
528 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
529 'w', 'x', 'y', 'z', '0', '1', '2', '3',
530 '4', '5', '6', '7', '8', '9', '+', '/'
531};
532
533//
534// UTF-7 encoding table
535//
536// 0 - Set D (directly encoded characters)
537// 1 - Set O (optional direct characters)
538// 2 - whitespace characters (optional)
539// 3 - special characters
540//
541static const unsigned char utf7encode[128] =
6001e347 542{
15f2ee32
RN
543 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
544 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
545 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
547 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
549 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
551};
552
667e5b3e 553size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
554{
555
556
557 size_t len = 0;
558
559 while (*psz && ((!buf) || (len < n)))
560 {
561 wchar_t cc = *psz++;
562 if (cc < 0x80 && utf7encode[cc] < 1)
563 {
564 // plain ASCII char
565 if (buf)
566 *buf++ = (char)cc;
567 len++;
568 }
569#ifndef WC_UTF16
79c78d42 570 else if (((wxUint32)cc) > 0xffff)
b2c13097 571 {
15f2ee32
RN
572 // no surrogate pair generation (yet?)
573 return (size_t)-1;
574 }
575#endif
576 else
577 {
578 if (buf)
579 *buf++ = '+';
580 len++;
581 if (cc != '+')
582 {
583 // BASE64 encode string
584 unsigned int lsb, d, l;
585 for (d = 0, l = 0;; psz++)
586 {
587 for (lsb = 0; lsb < 2; lsb ++)
588 {
589 d <<= 8;
590 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
591
592 for (l += 8; l >= 6; )
593 {
594 l -= 6;
595 if (buf)
596 *buf++ = utf7enb64[(d >> l) % 64];
597 len++;
598 }
599 }
600 cc = *psz;
601 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
602 break;
603 }
604 if (l != 0)
605 {
606 if (buf)
607 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
608 len++;
609 }
610 }
611 if (buf)
612 *buf++ = '-';
613 len++;
614 }
615 }
616 if (buf && (len < n))
617 *buf = 0;
618 return len;
6001e347
RR
619}
620
f6bcfd97 621// ----------------------------------------------------------------------------
6001e347 622// UTF-8
f6bcfd97 623// ----------------------------------------------------------------------------
6001e347 624
dccce9ea 625static wxUint32 utf8_max[]=
4def3b35 626 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 627
3698ae71
VZ
628// boundaries of the private use area we use to (temporarily) remap invalid
629// characters invalid in a UTF-8 encoded string
ea8ce907
RR
630const wxUint32 wxUnicodePUA = 0x100000;
631const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
632
6001e347
RR
633size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
634{
4def3b35
VS
635 size_t len = 0;
636
dccce9ea 637 while (*psz && ((!buf) || (len < n)))
4def3b35 638 {
ea8ce907
RR
639 const char *opsz = psz;
640 bool invalid = false;
4def3b35
VS
641 unsigned char cc = *psz++, fc = cc;
642 unsigned cnt;
dccce9ea 643 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 644 fc <<= 1;
dccce9ea 645 if (!cnt)
4def3b35
VS
646 {
647 // plain ASCII char
dccce9ea 648 if (buf)
4def3b35
VS
649 *buf++ = cc;
650 len++;
dccce9ea
VZ
651 }
652 else
4def3b35
VS
653 {
654 cnt--;
dccce9ea 655 if (!cnt)
4def3b35
VS
656 {
657 // invalid UTF-8 sequence
ea8ce907 658 invalid = true;
dccce9ea
VZ
659 }
660 else
4def3b35
VS
661 {
662 unsigned ocnt = cnt - 1;
663 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 664 while (cnt--)
4def3b35 665 {
ea8ce907 666 cc = *psz;
dccce9ea 667 if ((cc & 0xC0) != 0x80)
4def3b35
VS
668 {
669 // invalid UTF-8 sequence
ea8ce907
RR
670 invalid = true;
671 break;
4def3b35 672 }
ea8ce907 673 psz++;
4def3b35
VS
674 res = (res << 6) | (cc & 0x3f);
675 }
ea8ce907 676 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
677 {
678 // illegal UTF-8 encoding
ea8ce907 679 invalid = true;
4def3b35 680 }
ea8ce907
RR
681 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
682 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
683 {
684 // if one of our PUA characters turns up externally
685 // it must also be treated as an illegal sequence
686 // (a bit like you have to escape an escape character)
687 invalid = true;
688 }
689 else
690 {
1cd52418 691#ifdef WC_UTF16
ea8ce907
RR
692 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
693 size_t pa = encode_utf16(res, (wxUint16 *)buf);
694 if (pa == (size_t)-1)
695 {
696 invalid = true;
697 }
698 else
699 {
700 if (buf)
701 buf += pa;
702 len += pa;
703 }
373658eb 704#else // !WC_UTF16
ea8ce907
RR
705 if (buf)
706 *buf++ = res;
707 len++;
373658eb 708#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
709 }
710 }
711 if (invalid)
712 {
713 if (m_options & MAP_INVALID_UTF8_TO_PUA)
714 {
715 while (opsz < psz && (!buf || len < n))
716 {
717#ifdef WC_UTF16
718 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
719 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
720 wxASSERT(pa != (size_t)-1);
721 if (buf)
722 buf += pa;
723 opsz++;
724 len += pa;
725#else
726 if (buf)
727 *buf++ = wxUnicodePUA + (unsigned char)*opsz;
728 opsz++;
729 len++;
730#endif
731 }
732 }
3698ae71 733 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
734 {
735 while (opsz < psz && (!buf || len < n))
736 {
3698ae71
VZ
737 if ( buf && len + 3 < n )
738 {
739 unsigned char n = *opsz;
740 *buf++ = L'\\';
b2c13097
WS
741 *buf++ = (wchar_t)( L'0' + n / 0100 );
742 *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
743 *buf++ = (wchar_t)( L'0' + n % 010 );
3698ae71 744 }
ea8ce907
RR
745 opsz++;
746 len += 4;
747 }
748 }
3698ae71 749 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
750 {
751 return (size_t)-1;
752 }
4def3b35
VS
753 }
754 }
6001e347 755 }
dccce9ea 756 if (buf && (len < n))
4def3b35
VS
757 *buf = 0;
758 return len;
6001e347
RR
759}
760
3698ae71
VZ
761static inline bool isoctal(wchar_t wch)
762{
763 return L'0' <= wch && wch <= L'7';
764}
765
6001e347
RR
766size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
767{
4def3b35 768 size_t len = 0;
6001e347 769
dccce9ea 770 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
771 {
772 wxUint32 cc;
1cd52418 773#ifdef WC_UTF16
b5153fd8
VZ
774 // cast is ok for WC_UTF16
775 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 776 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 777#else
4def3b35
VS
778 cc=(*psz++) & 0x7fffffff;
779#endif
3698ae71
VZ
780
781 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
782 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 783 {
dccce9ea 784 if (buf)
ea8ce907 785 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 786 len++;
3698ae71
VZ
787 }
788 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
789 cc == L'\\' &&
790 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 791 {
dccce9ea 792 if (buf)
3698ae71 793 {
b2c13097
WS
794 *buf++ = (char) ((psz[0] - L'0')*0100 +
795 (psz[1] - L'0')*010 +
796 (psz[2] - L'0'));
3698ae71
VZ
797 }
798
799 psz += 3;
ea8ce907
RR
800 len++;
801 }
802 else
803 {
804 unsigned cnt;
805 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
806 if (!cnt)
4def3b35 807 {
ea8ce907
RR
808 // plain ASCII char
809 if (buf)
810 *buf++ = (char) cc;
811 len++;
812 }
813
814 else
815 {
816 len += cnt + 1;
817 if (buf)
818 {
819 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
820 while (cnt--)
821 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
822 }
4def3b35
VS
823 }
824 }
6001e347 825 }
4def3b35 826
3698ae71
VZ
827 if (buf && (len<n))
828 *buf = 0;
adb45366 829
4def3b35 830 return len;
6001e347
RR
831}
832
c91830cb
VZ
833// ----------------------------------------------------------------------------
834// UTF-16
835// ----------------------------------------------------------------------------
836
837#ifdef WORDS_BIGENDIAN
bde4baac
VZ
838 #define wxMBConvUTF16straight wxMBConvUTF16BE
839 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 840#else
bde4baac
VZ
841 #define wxMBConvUTF16swap wxMBConvUTF16BE
842 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
843#endif
844
845
c91830cb
VZ
846#ifdef WC_UTF16
847
c91830cb
VZ
848// copy 16bit MB to 16bit String
849size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
850{
851 size_t len=0;
852
853 while (*(wxUint16*)psz && (!buf || len < n))
854 {
855 if (buf)
856 *buf++ = *(wxUint16*)psz;
857 len++;
858
859 psz += sizeof(wxUint16);
860 }
861 if (buf && len<n) *buf=0;
862
863 return len;
864}
865
866
867// copy 16bit String to 16bit MB
868size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
869{
870 size_t len=0;
871
872 while (*psz && (!buf || len < n))
873 {
874 if (buf)
875 {
876 *(wxUint16*)buf = *psz;
877 buf += sizeof(wxUint16);
878 }
879 len += sizeof(wxUint16);
880 psz++;
881 }
882 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
883
884 return len;
885}
886
887
888// swap 16bit MB to 16bit String
889size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
890{
891 size_t len=0;
892
893 while (*(wxUint16*)psz && (!buf || len < n))
894 {
895 if (buf)
896 {
897 ((char *)buf)[0] = psz[1];
898 ((char *)buf)[1] = psz[0];
899 buf++;
900 }
901 len++;
902 psz += sizeof(wxUint16);
903 }
904 if (buf && len<n) *buf=0;
905
906 return len;
907}
908
909
910// swap 16bit MB to 16bit String
911size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
912{
913 size_t len=0;
914
915 while (*psz && (!buf || len < n))
916 {
917 if (buf)
918 {
919 *buf++ = ((char*)psz)[1];
920 *buf++ = ((char*)psz)[0];
921 }
922 len += sizeof(wxUint16);
923 psz++;
924 }
925 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
926
927 return len;
928}
929
930
931#else // WC_UTF16
932
933
934// copy 16bit MB to 32bit String
935size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
936{
937 size_t len=0;
938
939 while (*(wxUint16*)psz && (!buf || len < n))
940 {
941 wxUint32 cc;
942 size_t pa=decode_utf16((wxUint16*)psz, cc);
943 if (pa == (size_t)-1)
944 return pa;
945
946 if (buf)
947 *buf++ = cc;
948 len++;
949 psz += pa * sizeof(wxUint16);
950 }
951 if (buf && len<n) *buf=0;
952
953 return len;
954}
955
956
957// copy 32bit String to 16bit MB
958size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
959{
960 size_t len=0;
961
962 while (*psz && (!buf || len < n))
963 {
964 wxUint16 cc[2];
965 size_t pa=encode_utf16(*psz, cc);
966
967 if (pa == (size_t)-1)
968 return pa;
969
970 if (buf)
971 {
69b80d28 972 *(wxUint16*)buf = cc[0];
b5153fd8 973 buf += sizeof(wxUint16);
c91830cb 974 if (pa > 1)
69b80d28
VZ
975 {
976 *(wxUint16*)buf = cc[1];
977 buf += sizeof(wxUint16);
978 }
c91830cb
VZ
979 }
980
981 len += pa*sizeof(wxUint16);
982 psz++;
983 }
984 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
985
986 return len;
987}
988
989
990// swap 16bit MB to 32bit String
991size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
992{
993 size_t len=0;
994
995 while (*(wxUint16*)psz && (!buf || len < n))
996 {
997 wxUint32 cc;
998 char tmp[4];
999 tmp[0]=psz[1]; tmp[1]=psz[0];
1000 tmp[2]=psz[3]; tmp[3]=psz[2];
1001
1002 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1003 if (pa == (size_t)-1)
1004 return pa;
1005
1006 if (buf)
1007 *buf++ = cc;
1008
1009 len++;
1010 psz += pa * sizeof(wxUint16);
1011 }
1012 if (buf && len<n) *buf=0;
1013
1014 return len;
1015}
1016
1017
1018// swap 32bit String to 16bit MB
1019size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1020{
1021 size_t len=0;
1022
1023 while (*psz && (!buf || len < n))
1024 {
1025 wxUint16 cc[2];
1026 size_t pa=encode_utf16(*psz, cc);
1027
1028 if (pa == (size_t)-1)
1029 return pa;
1030
1031 if (buf)
1032 {
1033 *buf++ = ((char*)cc)[1];
1034 *buf++ = ((char*)cc)[0];
1035 if (pa > 1)
1036 {
1037 *buf++ = ((char*)cc)[3];
1038 *buf++ = ((char*)cc)[2];
1039 }
1040 }
1041
1042 len += pa*sizeof(wxUint16);
1043 psz++;
1044 }
1045 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1046
1047 return len;
1048}
1049
1050#endif // WC_UTF16
1051
1052
1053// ----------------------------------------------------------------------------
1054// UTF-32
1055// ----------------------------------------------------------------------------
1056
1057#ifdef WORDS_BIGENDIAN
1058#define wxMBConvUTF32straight wxMBConvUTF32BE
1059#define wxMBConvUTF32swap wxMBConvUTF32LE
1060#else
1061#define wxMBConvUTF32swap wxMBConvUTF32BE
1062#define wxMBConvUTF32straight wxMBConvUTF32LE
1063#endif
1064
1065
1066WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1067WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1068
1069
1070#ifdef WC_UTF16
1071
1072// copy 32bit MB to 16bit String
1073size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1074{
1075 size_t len=0;
1076
1077 while (*(wxUint32*)psz && (!buf || len < n))
1078 {
1079 wxUint16 cc[2];
1080
1081 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1082 if (pa == (size_t)-1)
1083 return pa;
1084
1085 if (buf)
1086 {
1087 *buf++ = cc[0];
1088 if (pa > 1)
1089 *buf++ = cc[1];
1090 }
1091 len += pa;
1092 psz += sizeof(wxUint32);
1093 }
1094 if (buf && len<n) *buf=0;
1095
1096 return len;
1097}
1098
1099
1100// copy 16bit String to 32bit MB
1101size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1102{
1103 size_t len=0;
1104
1105 while (*psz && (!buf || len < n))
1106 {
1107 wxUint32 cc;
1108
b5153fd8
VZ
1109 // cast is ok for WC_UTF16
1110 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1111 if (pa == (size_t)-1)
1112 return pa;
1113
1114 if (buf)
1115 {
1116 *(wxUint32*)buf = cc;
1117 buf += sizeof(wxUint32);
1118 }
1119 len += sizeof(wxUint32);
1120 psz += pa;
1121 }
b5153fd8
VZ
1122
1123 if (buf && len<=n-sizeof(wxUint32))
1124 *(wxUint32*)buf=0;
c91830cb
VZ
1125
1126 return len;
1127}
1128
1129
1130
1131// swap 32bit MB to 16bit String
1132size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1133{
1134 size_t len=0;
1135
1136 while (*(wxUint32*)psz && (!buf || len < n))
1137 {
1138 char tmp[4];
1139 tmp[0] = psz[3]; tmp[1] = psz[2];
1140 tmp[2] = psz[1]; tmp[3] = psz[0];
1141
1142
1143 wxUint16 cc[2];
1144
1145 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1146 if (pa == (size_t)-1)
1147 return pa;
1148
1149 if (buf)
1150 {
1151 *buf++ = cc[0];
1152 if (pa > 1)
1153 *buf++ = cc[1];
1154 }
1155 len += pa;
1156 psz += sizeof(wxUint32);
1157 }
b5153fd8
VZ
1158
1159 if (buf && len<n)
1160 *buf=0;
c91830cb
VZ
1161
1162 return len;
1163}
1164
1165
1166// swap 16bit String to 32bit MB
1167size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1168{
1169 size_t len=0;
1170
1171 while (*psz && (!buf || len < n))
1172 {
1173 char cc[4];
1174
b5153fd8
VZ
1175 // cast is ok for WC_UTF16
1176 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1177 if (pa == (size_t)-1)
1178 return pa;
1179
1180 if (buf)
1181 {
1182 *buf++ = cc[3];
1183 *buf++ = cc[2];
1184 *buf++ = cc[1];
1185 *buf++ = cc[0];
1186 }
1187 len += sizeof(wxUint32);
1188 psz += pa;
1189 }
b5153fd8
VZ
1190
1191 if (buf && len<=n-sizeof(wxUint32))
1192 *(wxUint32*)buf=0;
c91830cb
VZ
1193
1194 return len;
1195}
1196
1197#else // WC_UTF16
1198
1199
1200// copy 32bit MB to 32bit String
1201size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1202{
1203 size_t len=0;
1204
1205 while (*(wxUint32*)psz && (!buf || len < n))
1206 {
1207 if (buf)
1208 *buf++ = *(wxUint32*)psz;
1209 len++;
1210 psz += sizeof(wxUint32);
1211 }
b5153fd8
VZ
1212
1213 if (buf && len<n)
1214 *buf=0;
c91830cb
VZ
1215
1216 return len;
1217}
1218
1219
1220// copy 32bit String to 32bit MB
1221size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1222{
1223 size_t len=0;
1224
1225 while (*psz && (!buf || len < n))
1226 {
1227 if (buf)
1228 {
1229 *(wxUint32*)buf = *psz;
1230 buf += sizeof(wxUint32);
1231 }
1232
1233 len += sizeof(wxUint32);
1234 psz++;
1235 }
1236
b5153fd8
VZ
1237 if (buf && len<=n-sizeof(wxUint32))
1238 *(wxUint32*)buf=0;
c91830cb
VZ
1239
1240 return len;
1241}
1242
1243
1244// swap 32bit MB to 32bit String
1245size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1246{
1247 size_t len=0;
1248
1249 while (*(wxUint32*)psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 ((char *)buf)[0] = psz[3];
1254 ((char *)buf)[1] = psz[2];
1255 ((char *)buf)[2] = psz[1];
1256 ((char *)buf)[3] = psz[0];
1257 buf++;
1258 }
1259 len++;
1260 psz += sizeof(wxUint32);
1261 }
b5153fd8
VZ
1262
1263 if (buf && len<n)
1264 *buf=0;
c91830cb
VZ
1265
1266 return len;
1267}
1268
1269
1270// swap 32bit String to 32bit MB
1271size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1272{
1273 size_t len=0;
1274
1275 while (*psz && (!buf || len < n))
1276 {
1277 if (buf)
1278 {
1279 *buf++ = ((char *)psz)[3];
1280 *buf++ = ((char *)psz)[2];
1281 *buf++ = ((char *)psz)[1];
1282 *buf++ = ((char *)psz)[0];
1283 }
1284 len += sizeof(wxUint32);
1285 psz++;
1286 }
b5153fd8
VZ
1287
1288 if (buf && len<=n-sizeof(wxUint32))
1289 *(wxUint32*)buf=0;
c91830cb
VZ
1290
1291 return len;
1292}
1293
1294
1295#endif // WC_UTF16
1296
1297
36acb880
VZ
1298// ============================================================================
1299// The classes doing conversion using the iconv_xxx() functions
1300// ============================================================================
3caec1bb 1301
b040e242 1302#ifdef HAVE_ICONV
3a0d76bc 1303
b1d547eb
VS
1304// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1305// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1306// (unless there's yet another bug in glibc) the only case when iconv()
1307// returns with (size_t)-1 (which means error) and says there are 0 bytes
1308// left in the input buffer -- when _real_ error occurs,
1309// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1310// iconv() failure.
3caec1bb
VS
1311// [This bug does not appear in glibc 2.2.]
1312#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1313#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1314 (errno != E2BIG || bufLeft != 0))
1315#else
1316#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1317#endif
1318
ab217dba 1319#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1320
1321// ----------------------------------------------------------------------------
e95354ec 1322// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1323// ----------------------------------------------------------------------------
1324
e95354ec 1325class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1326{
1327public:
e95354ec
VZ
1328 wxMBConv_iconv(const wxChar *name);
1329 virtual ~wxMBConv_iconv();
36acb880 1330
bde4baac
VZ
1331 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1332 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1333
e95354ec 1334 bool IsOk() const
36acb880
VZ
1335 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1336
1337protected:
1338 // the iconv handlers used to translate from multibyte to wide char and in
1339 // the other direction
1340 iconv_t m2w,
1341 w2m;
b1d547eb
VS
1342#if wxUSE_THREADS
1343 // guards access to m2w and w2m objects
1344 wxMutex m_iconvMutex;
1345#endif
36acb880
VZ
1346
1347private:
e95354ec 1348 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1349 // available on this machine, it will remain NULL
1350 static const char *ms_wcCharsetName;
1351
1352 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1353 // different endian-ness than the native one
405d8f46 1354 static bool ms_wcNeedsSwap;
36acb880
VZ
1355};
1356
e95354ec
VZ
1357const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1358bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1359
e95354ec 1360wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1361{
04c79127
RR
1362 // Do it the hard way
1363 char cname[100];
1364 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1365 cname[i] = (char) name[i];
1366
36acb880
VZ
1367 // check for charset that represents wchar_t:
1368 if (ms_wcCharsetName == NULL)
f1339c56 1369 {
e95354ec 1370 ms_wcNeedsSwap = false;
dccce9ea 1371
36acb880
VZ
1372 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1373 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1374 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1375
36acb880
VZ
1376 if (m2w == (iconv_t)-1)
1377 {
1378 // try charset w/o bytesex info (e.g. "UCS4")
1379 // and check for bytesex ourselves:
1380 ms_wcCharsetName = WC_NAME;
04c79127 1381 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1382
1383 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1384 if (m2w == (iconv_t)-1)
1385 {
36acb880 1386 ms_wcCharsetName = "WCHAR_T";
04c79127 1387 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1388 }
3a0d76bc 1389
36acb880
VZ
1390 if (m2w != (iconv_t)-1)
1391 {
1392 char buf[2], *bufPtr;
1393 wchar_t wbuf[2], *wbufPtr;
1394 size_t insz, outsz;
1395 size_t res;
1396
1397 buf[0] = 'A';
1398 buf[1] = 0;
1399 wbuf[0] = 0;
1400 insz = 2;
1401 outsz = SIZEOF_WCHAR_T * 2;
1402 wbufPtr = wbuf;
1403 bufPtr = buf;
1404
1405 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1406 (char**)&wbufPtr, &outsz);
1407
1408 if (ICONV_FAILED(res, insz))
3a0d76bc 1409 {
36acb880
VZ
1410 ms_wcCharsetName = NULL;
1411 wxLogLastError(wxT("iconv"));
2b5f62a0 1412 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1413 }
1414 else
1415 {
36acb880 1416 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1417 }
1418 }
36acb880
VZ
1419 else
1420 {
1421 ms_wcCharsetName = NULL;
373658eb 1422
77ffb593 1423 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1424 // fall back to using wxEncodingConverter.
1425 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1426 //wxLogError(
36acb880 1427 }
3a0d76bc 1428 }
36acb880 1429 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1430 }
36acb880 1431 else // we already have ms_wcCharsetName
3caec1bb 1432 {
04c79127 1433 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1434 }
dccce9ea 1435
36acb880
VZ
1436 // NB: don't ever pass NULL to iconv_open(), it may crash!
1437 if ( ms_wcCharsetName )
f1339c56 1438 {
04c79127 1439 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1440 }
405d8f46
VZ
1441 else
1442 {
1443 w2m = (iconv_t)-1;
1444 }
36acb880 1445}
3caec1bb 1446
e95354ec 1447wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1448{
1449 if ( m2w != (iconv_t)-1 )
1450 iconv_close(m2w);
1451 if ( w2m != (iconv_t)-1 )
1452 iconv_close(w2m);
1453}
3a0d76bc 1454
bde4baac 1455size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1456{
b1d547eb
VS
1457#if wxUSE_THREADS
1458 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1459 // Unfortunately there is a couple of global wxCSConv objects such as
1460 // wxConvLocal that are used all over wx code, so we have to make sure
1461 // the handle is used by at most one thread at the time. Otherwise
1462 // only a few wx classes would be safe to use from non-main threads
1463 // as MB<->WC conversion would fail "randomly".
1464 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1465#endif
3698ae71 1466
36acb880
VZ
1467 size_t inbuf = strlen(psz);
1468 size_t outbuf = n * SIZEOF_WCHAR_T;
1469 size_t res, cres;
1470 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1471 wchar_t *bufPtr = buf;
1472 const char *pszPtr = psz;
1473
1474 if (buf)
1475 {
1476 // have destination buffer, convert there
1477 cres = iconv(m2w,
1478 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1479 (char**)&bufPtr, &outbuf);
1480 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1481
36acb880 1482 if (ms_wcNeedsSwap)
3a0d76bc 1483 {
36acb880
VZ
1484 // convert to native endianness
1485 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1486 }
adb45366 1487
49dd9820
VS
1488 // NB: iconv was given only strlen(psz) characters on input, and so
1489 // it couldn't convert the trailing zero. Let's do it ourselves
1490 // if there's some room left for it in the output buffer.
1491 if (res < n)
1492 buf[res] = 0;
36acb880
VZ
1493 }
1494 else
1495 {
1496 // no destination buffer... convert using temp buffer
1497 // to calculate destination buffer requirement
1498 wchar_t tbuf[8];
1499 res = 0;
1500 do {
1501 bufPtr = tbuf;
1502 outbuf = 8*SIZEOF_WCHAR_T;
1503
1504 cres = iconv(m2w,
1505 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1506 (char**)&bufPtr, &outbuf );
1507
1508 res += 8-(outbuf/SIZEOF_WCHAR_T);
1509 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1510 }
dccce9ea 1511
36acb880 1512 if (ICONV_FAILED(cres, inbuf))
f1339c56 1513 {
36acb880
VZ
1514 //VS: it is ok if iconv fails, hence trace only
1515 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1516 return (size_t)-1;
1517 }
1518
1519 return res;
1520}
1521
bde4baac 1522size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1523{
b1d547eb
VS
1524#if wxUSE_THREADS
1525 // NB: explained in MB2WC
1526 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1527#endif
3698ae71 1528
f8d791e0 1529 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1530 size_t outbuf = n;
1531 size_t res, cres;
3a0d76bc 1532
36acb880 1533 wchar_t *tmpbuf = 0;
3caec1bb 1534
36acb880
VZ
1535 if (ms_wcNeedsSwap)
1536 {
1537 // need to copy to temp buffer to switch endianness
1538 // this absolutely doesn't rock!
1539 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1540 // could be in read-only memory, or be accessed in some other thread)
1541 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1542 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1543 WC_BSWAP(tmpbuf, inbuf)
1544 psz=tmpbuf;
1545 }
3a0d76bc 1546
36acb880
VZ
1547 if (buf)
1548 {
1549 // have destination buffer, convert there
1550 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1551
36acb880 1552 res = n-outbuf;
adb45366 1553
49dd9820
VS
1554 // NB: iconv was given only wcslen(psz) characters on input, and so
1555 // it couldn't convert the trailing zero. Let's do it ourselves
1556 // if there's some room left for it in the output buffer.
1557 if (res < n)
1558 buf[0] = 0;
36acb880
VZ
1559 }
1560 else
1561 {
1562 // no destination buffer... convert using temp buffer
1563 // to calculate destination buffer requirement
1564 char tbuf[16];
1565 res = 0;
1566 do {
1567 buf = tbuf; outbuf = 16;
1568
1569 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1570
36acb880
VZ
1571 res += 16 - outbuf;
1572 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1573 }
dccce9ea 1574
36acb880
VZ
1575 if (ms_wcNeedsSwap)
1576 {
1577 free(tmpbuf);
1578 }
dccce9ea 1579
36acb880
VZ
1580 if (ICONV_FAILED(cres, inbuf))
1581 {
1582 //VS: it is ok if iconv fails, hence trace only
1583 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1584 return (size_t)-1;
1585 }
1586
1587 return res;
1588}
1589
b040e242 1590#endif // HAVE_ICONV
36acb880 1591
e95354ec 1592
36acb880
VZ
1593// ============================================================================
1594// Win32 conversion classes
1595// ============================================================================
1cd52418 1596
e95354ec 1597#ifdef wxHAVE_WIN32_MB2WC
373658eb 1598
8b04d4c4 1599// from utils.cpp
d775fa82 1600#if wxUSE_FONTMAP
8b04d4c4
VZ
1601extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1602extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1603#endif
373658eb 1604
e95354ec 1605class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1606{
1607public:
bde4baac
VZ
1608 wxMBConv_win32()
1609 {
1610 m_CodePage = CP_ACP;
1611 }
1612
7608a683 1613#if wxUSE_FONTMAP
e95354ec 1614 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1615 {
1616 m_CodePage = wxCharsetToCodepage(name);
1617 }
dccce9ea 1618
e95354ec 1619 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1620 {
1621 m_CodePage = wxEncodingToCodepage(encoding);
1622 }
7608a683 1623#endif
8b04d4c4 1624
bde4baac 1625 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1626 {
02272c9c
VZ
1627 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1628 // the behaviour is not compatible with the Unix version (using iconv)
1629 // and break the library itself, e.g. wxTextInputStream::NextChar()
1630 // wouldn't work if reading an incomplete MB char didn't result in an
1631 // error
667e5b3e
VZ
1632 //
1633 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1634 // an error (tested under Windows Server 2003) and apparently it is
1635 // done on purpose, i.e. the function accepts any input in this case
1636 // and although I'd prefer to return error on ill-formed output, our
1637 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1638 // explicitly ill-formed according to RFC 2152) neither so we don't
1639 // even have any fallback here...
1640 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1641
2b5f62a0
VZ
1642 const size_t len = ::MultiByteToWideChar
1643 (
1644 m_CodePage, // code page
667e5b3e 1645 flags, // flags: fall on error
2b5f62a0
VZ
1646 psz, // input string
1647 -1, // its length (NUL-terminated)
b4da152e 1648 buf, // output string
2b5f62a0
VZ
1649 buf ? n : 0 // size of output buffer
1650 );
1651
03a991bc
VZ
1652 // note that it returns count of written chars for buf != NULL and size
1653 // of the needed buffer for buf == NULL so in either case the length of
1654 // the string (which never includes the terminating NUL) is one less
1655 return len ? len - 1 : (size_t)-1;
f1339c56 1656 }
dccce9ea 1657
13dd924a 1658 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1659 {
13dd924a
VZ
1660 /*
1661 we have a problem here: by default, WideCharToMultiByte() may
1662 replace characters unrepresentable in the target code page with bad
1663 quality approximations such as turning "1/2" symbol (U+00BD) into
1664 "1" for the code pages which don't have it and we, obviously, want
1665 to avoid this at any price
d775fa82 1666
13dd924a
VZ
1667 the trouble is that this function does it _silently_, i.e. it won't
1668 even tell us whether it did or not... Win98/2000 and higher provide
1669 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1670 we have to resort to a round trip, i.e. check that converting back
1671 results in the same string -- this is, of course, expensive but
1672 otherwise we simply can't be sure to not garble the data.
1673 */
1674
1675 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1676 // it doesn't work with CJK encodings (which we test for rather roughly
1677 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1678 // supporting it
907173e5
WS
1679 BOOL usedDef wxDUMMY_INITIALIZE(false);
1680 BOOL *pUsedDef;
13dd924a
VZ
1681 int flags;
1682 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1683 {
1684 // it's our lucky day
1685 flags = WC_NO_BEST_FIT_CHARS;
1686 pUsedDef = &usedDef;
1687 }
1688 else // old system or unsupported encoding
1689 {
1690 flags = 0;
1691 pUsedDef = NULL;
1692 }
1693
2b5f62a0
VZ
1694 const size_t len = ::WideCharToMultiByte
1695 (
1696 m_CodePage, // code page
13dd924a
VZ
1697 flags, // either none or no best fit
1698 pwz, // input string
2b5f62a0
VZ
1699 -1, // it is (wide) NUL-terminated
1700 buf, // output buffer
1701 buf ? n : 0, // and its size
1702 NULL, // default "replacement" char
13dd924a 1703 pUsedDef // [out] was it used?
2b5f62a0
VZ
1704 );
1705
13dd924a
VZ
1706 if ( !len )
1707 {
1708 // function totally failed
1709 return (size_t)-1;
1710 }
1711
1712 // if we were really converting, check if we succeeded
1713 if ( buf )
1714 {
1715 if ( flags )
1716 {
1717 // check if the conversion failed, i.e. if any replacements
1718 // were done
1719 if ( usedDef )
1720 return (size_t)-1;
1721 }
1722 else // we must resort to double tripping...
1723 {
1724 wxWCharBuffer wcBuf(n);
1725 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1726 wcscmp(wcBuf, pwz) != 0 )
1727 {
1728 // we didn't obtain the same thing we started from, hence
1729 // the conversion was lossy and we consider that it failed
1730 return (size_t)-1;
1731 }
1732 }
1733 }
1734
03a991bc 1735 // see the comment above for the reason of "len - 1"
13dd924a 1736 return len - 1;
f1339c56 1737 }
dccce9ea 1738
13dd924a
VZ
1739 bool IsOk() const { return m_CodePage != -1; }
1740
1741private:
1742 static bool CanUseNoBestFit()
1743 {
1744 static int s_isWin98Or2k = -1;
1745
1746 if ( s_isWin98Or2k == -1 )
1747 {
1748 int verMaj, verMin;
1749 switch ( wxGetOsVersion(&verMaj, &verMin) )
1750 {
1751 case wxWIN95:
1752 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1753 break;
1754
1755 case wxWINDOWS_NT:
1756 s_isWin98Or2k = verMaj >= 5;
1757 break;
1758
1759 default:
1760 // unknown, be conseravtive by default
1761 s_isWin98Or2k = 0;
1762 }
1763
1764 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1765 }
1766
1767 return s_isWin98Or2k == 1;
1768 }
f1339c56 1769
b1d66b54 1770 long m_CodePage;
1cd52418 1771};
e95354ec
VZ
1772
1773#endif // wxHAVE_WIN32_MB2WC
1774
f7e98dee
RN
1775// ============================================================================
1776// Cocoa conversion classes
1777// ============================================================================
1778
1779#if defined(__WXCOCOA__)
1780
ecd9653b 1781// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1782// Cocoa. Strangely enough, internally Core Foundation uses
1783// UTF 32 internally quite a bit - its just not public (yet).
1784
1785#include <CoreFoundation/CFString.h>
1786#include <CoreFoundation/CFStringEncodingExt.h>
1787
1788CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1789{
638357a0 1790 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1791 if ( encoding == wxFONTENCODING_DEFAULT )
1792 {
638357a0 1793 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1794 }
1795 else switch( encoding)
1796 {
1797 case wxFONTENCODING_ISO8859_1 :
1798 enc = kCFStringEncodingISOLatin1 ;
1799 break ;
1800 case wxFONTENCODING_ISO8859_2 :
1801 enc = kCFStringEncodingISOLatin2;
1802 break ;
1803 case wxFONTENCODING_ISO8859_3 :
1804 enc = kCFStringEncodingISOLatin3 ;
1805 break ;
1806 case wxFONTENCODING_ISO8859_4 :
1807 enc = kCFStringEncodingISOLatin4;
1808 break ;
1809 case wxFONTENCODING_ISO8859_5 :
1810 enc = kCFStringEncodingISOLatinCyrillic;
1811 break ;
1812 case wxFONTENCODING_ISO8859_6 :
1813 enc = kCFStringEncodingISOLatinArabic;
1814 break ;
1815 case wxFONTENCODING_ISO8859_7 :
1816 enc = kCFStringEncodingISOLatinGreek;
1817 break ;
1818 case wxFONTENCODING_ISO8859_8 :
1819 enc = kCFStringEncodingISOLatinHebrew;
1820 break ;
1821 case wxFONTENCODING_ISO8859_9 :
1822 enc = kCFStringEncodingISOLatin5;
1823 break ;
1824 case wxFONTENCODING_ISO8859_10 :
1825 enc = kCFStringEncodingISOLatin6;
1826 break ;
1827 case wxFONTENCODING_ISO8859_11 :
1828 enc = kCFStringEncodingISOLatinThai;
1829 break ;
1830 case wxFONTENCODING_ISO8859_13 :
1831 enc = kCFStringEncodingISOLatin7;
1832 break ;
1833 case wxFONTENCODING_ISO8859_14 :
1834 enc = kCFStringEncodingISOLatin8;
1835 break ;
1836 case wxFONTENCODING_ISO8859_15 :
1837 enc = kCFStringEncodingISOLatin9;
1838 break ;
1839
1840 case wxFONTENCODING_KOI8 :
1841 enc = kCFStringEncodingKOI8_R;
1842 break ;
1843 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1844 enc = kCFStringEncodingDOSRussian;
1845 break ;
1846
1847// case wxFONTENCODING_BULGARIAN :
1848// enc = ;
1849// break ;
1850
1851 case wxFONTENCODING_CP437 :
1852 enc =kCFStringEncodingDOSLatinUS ;
1853 break ;
1854 case wxFONTENCODING_CP850 :
1855 enc = kCFStringEncodingDOSLatin1;
1856 break ;
1857 case wxFONTENCODING_CP852 :
1858 enc = kCFStringEncodingDOSLatin2;
1859 break ;
1860 case wxFONTENCODING_CP855 :
1861 enc = kCFStringEncodingDOSCyrillic;
1862 break ;
1863 case wxFONTENCODING_CP866 :
1864 enc =kCFStringEncodingDOSRussian ;
1865 break ;
1866 case wxFONTENCODING_CP874 :
1867 enc = kCFStringEncodingDOSThai;
1868 break ;
1869 case wxFONTENCODING_CP932 :
1870 enc = kCFStringEncodingDOSJapanese;
1871 break ;
1872 case wxFONTENCODING_CP936 :
1873 enc =kCFStringEncodingDOSChineseSimplif ;
1874 break ;
1875 case wxFONTENCODING_CP949 :
1876 enc = kCFStringEncodingDOSKorean;
1877 break ;
1878 case wxFONTENCODING_CP950 :
1879 enc = kCFStringEncodingDOSChineseTrad;
1880 break ;
ecd9653b
WS
1881 case wxFONTENCODING_CP1250 :
1882 enc = kCFStringEncodingWindowsLatin2;
1883 break ;
1884 case wxFONTENCODING_CP1251 :
1885 enc =kCFStringEncodingWindowsCyrillic ;
1886 break ;
1887 case wxFONTENCODING_CP1252 :
1888 enc =kCFStringEncodingWindowsLatin1 ;
1889 break ;
1890 case wxFONTENCODING_CP1253 :
1891 enc = kCFStringEncodingWindowsGreek;
1892 break ;
1893 case wxFONTENCODING_CP1254 :
1894 enc = kCFStringEncodingWindowsLatin5;
1895 break ;
1896 case wxFONTENCODING_CP1255 :
1897 enc =kCFStringEncodingWindowsHebrew ;
1898 break ;
1899 case wxFONTENCODING_CP1256 :
1900 enc =kCFStringEncodingWindowsArabic ;
1901 break ;
1902 case wxFONTENCODING_CP1257 :
1903 enc = kCFStringEncodingWindowsBalticRim;
1904 break ;
638357a0
RN
1905// This only really encodes to UTF7 (if that) evidently
1906// case wxFONTENCODING_UTF7 :
1907// enc = kCFStringEncodingNonLossyASCII ;
1908// break ;
ecd9653b
WS
1909 case wxFONTENCODING_UTF8 :
1910 enc = kCFStringEncodingUTF8 ;
1911 break ;
1912 case wxFONTENCODING_EUC_JP :
1913 enc = kCFStringEncodingEUC_JP;
1914 break ;
1915 case wxFONTENCODING_UTF16 :
f7e98dee 1916 enc = kCFStringEncodingUnicode ;
ecd9653b 1917 break ;
f7e98dee
RN
1918 case wxFONTENCODING_MACROMAN :
1919 enc = kCFStringEncodingMacRoman ;
1920 break ;
1921 case wxFONTENCODING_MACJAPANESE :
1922 enc = kCFStringEncodingMacJapanese ;
1923 break ;
1924 case wxFONTENCODING_MACCHINESETRAD :
1925 enc = kCFStringEncodingMacChineseTrad ;
1926 break ;
1927 case wxFONTENCODING_MACKOREAN :
1928 enc = kCFStringEncodingMacKorean ;
1929 break ;
1930 case wxFONTENCODING_MACARABIC :
1931 enc = kCFStringEncodingMacArabic ;
1932 break ;
1933 case wxFONTENCODING_MACHEBREW :
1934 enc = kCFStringEncodingMacHebrew ;
1935 break ;
1936 case wxFONTENCODING_MACGREEK :
1937 enc = kCFStringEncodingMacGreek ;
1938 break ;
1939 case wxFONTENCODING_MACCYRILLIC :
1940 enc = kCFStringEncodingMacCyrillic ;
1941 break ;
1942 case wxFONTENCODING_MACDEVANAGARI :
1943 enc = kCFStringEncodingMacDevanagari ;
1944 break ;
1945 case wxFONTENCODING_MACGURMUKHI :
1946 enc = kCFStringEncodingMacGurmukhi ;
1947 break ;
1948 case wxFONTENCODING_MACGUJARATI :
1949 enc = kCFStringEncodingMacGujarati ;
1950 break ;
1951 case wxFONTENCODING_MACORIYA :
1952 enc = kCFStringEncodingMacOriya ;
1953 break ;
1954 case wxFONTENCODING_MACBENGALI :
1955 enc = kCFStringEncodingMacBengali ;
1956 break ;
1957 case wxFONTENCODING_MACTAMIL :
1958 enc = kCFStringEncodingMacTamil ;
1959 break ;
1960 case wxFONTENCODING_MACTELUGU :
1961 enc = kCFStringEncodingMacTelugu ;
1962 break ;
1963 case wxFONTENCODING_MACKANNADA :
1964 enc = kCFStringEncodingMacKannada ;
1965 break ;
1966 case wxFONTENCODING_MACMALAJALAM :
1967 enc = kCFStringEncodingMacMalayalam ;
1968 break ;
1969 case wxFONTENCODING_MACSINHALESE :
1970 enc = kCFStringEncodingMacSinhalese ;
1971 break ;
1972 case wxFONTENCODING_MACBURMESE :
1973 enc = kCFStringEncodingMacBurmese ;
1974 break ;
1975 case wxFONTENCODING_MACKHMER :
1976 enc = kCFStringEncodingMacKhmer ;
1977 break ;
1978 case wxFONTENCODING_MACTHAI :
1979 enc = kCFStringEncodingMacThai ;
1980 break ;
1981 case wxFONTENCODING_MACLAOTIAN :
1982 enc = kCFStringEncodingMacLaotian ;
1983 break ;
1984 case wxFONTENCODING_MACGEORGIAN :
1985 enc = kCFStringEncodingMacGeorgian ;
1986 break ;
1987 case wxFONTENCODING_MACARMENIAN :
1988 enc = kCFStringEncodingMacArmenian ;
1989 break ;
1990 case wxFONTENCODING_MACCHINESESIMP :
1991 enc = kCFStringEncodingMacChineseSimp ;
1992 break ;
1993 case wxFONTENCODING_MACTIBETAN :
1994 enc = kCFStringEncodingMacTibetan ;
1995 break ;
1996 case wxFONTENCODING_MACMONGOLIAN :
1997 enc = kCFStringEncodingMacMongolian ;
1998 break ;
1999 case wxFONTENCODING_MACETHIOPIC :
2000 enc = kCFStringEncodingMacEthiopic ;
2001 break ;
2002 case wxFONTENCODING_MACCENTRALEUR :
2003 enc = kCFStringEncodingMacCentralEurRoman ;
2004 break ;
2005 case wxFONTENCODING_MACVIATNAMESE :
2006 enc = kCFStringEncodingMacVietnamese ;
2007 break ;
2008 case wxFONTENCODING_MACARABICEXT :
2009 enc = kCFStringEncodingMacExtArabic ;
2010 break ;
2011 case wxFONTENCODING_MACSYMBOL :
2012 enc = kCFStringEncodingMacSymbol ;
2013 break ;
2014 case wxFONTENCODING_MACDINGBATS :
2015 enc = kCFStringEncodingMacDingbats ;
2016 break ;
2017 case wxFONTENCODING_MACTURKISH :
2018 enc = kCFStringEncodingMacTurkish ;
2019 break ;
2020 case wxFONTENCODING_MACCROATIAN :
2021 enc = kCFStringEncodingMacCroatian ;
2022 break ;
2023 case wxFONTENCODING_MACICELANDIC :
2024 enc = kCFStringEncodingMacIcelandic ;
2025 break ;
2026 case wxFONTENCODING_MACROMANIAN :
2027 enc = kCFStringEncodingMacRomanian ;
2028 break ;
2029 case wxFONTENCODING_MACCELTIC :
2030 enc = kCFStringEncodingMacCeltic ;
2031 break ;
2032 case wxFONTENCODING_MACGAELIC :
2033 enc = kCFStringEncodingMacGaelic ;
2034 break ;
ecd9653b
WS
2035// case wxFONTENCODING_MACKEYBOARD :
2036// enc = kCFStringEncodingMacKeyboardGlyphs ;
2037// break ;
2038 default :
2039 // because gcc is picky
2040 break ;
2041 } ;
2042 return enc ;
f7e98dee
RN
2043}
2044
f7e98dee
RN
2045class wxMBConv_cocoa : public wxMBConv
2046{
2047public:
2048 wxMBConv_cocoa()
2049 {
2050 Init(CFStringGetSystemEncoding()) ;
2051 }
2052
a6900d10 2053#if wxUSE_FONTMAP
f7e98dee
RN
2054 wxMBConv_cocoa(const wxChar* name)
2055 {
267e11c5 2056 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2057 }
a6900d10 2058#endif
f7e98dee
RN
2059
2060 wxMBConv_cocoa(wxFontEncoding encoding)
2061 {
2062 Init( wxCFStringEncFromFontEnc(encoding) );
2063 }
2064
2065 ~wxMBConv_cocoa()
2066 {
2067 }
2068
2069 void Init( CFStringEncoding encoding)
2070 {
638357a0 2071 m_encoding = encoding ;
f7e98dee
RN
2072 }
2073
2074 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2075 {
2076 wxASSERT(szUnConv);
ecd9653b 2077
638357a0
RN
2078 CFStringRef theString = CFStringCreateWithBytes (
2079 NULL, //the allocator
2080 (const UInt8*)szUnConv,
2081 strlen(szUnConv),
2082 m_encoding,
2083 false //no BOM/external representation
f7e98dee
RN
2084 );
2085
2086 wxASSERT(theString);
2087
638357a0
RN
2088 size_t nOutLength = CFStringGetLength(theString);
2089
2090 if (szOut == NULL)
f7e98dee 2091 {
f7e98dee 2092 CFRelease(theString);
638357a0 2093 return nOutLength;
f7e98dee 2094 }
ecd9653b 2095
638357a0 2096 CFRange theRange = { 0, nOutSize };
ecd9653b 2097
638357a0
RN
2098#if SIZEOF_WCHAR_T == 4
2099 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2100#endif
3698ae71 2101
f7e98dee 2102 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2103
f7e98dee 2104 CFRelease(theString);
ecd9653b 2105
638357a0 2106 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2107
2108#if SIZEOF_WCHAR_T == 4
2109 wxMBConvUTF16 converter ;
638357a0 2110 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2111 delete[] szUniCharBuffer;
2112#endif
3698ae71 2113
638357a0 2114 return nOutLength;
f7e98dee
RN
2115 }
2116
2117 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2118 {
638357a0 2119 wxASSERT(szUnConv);
3698ae71 2120
f7e98dee 2121 size_t nRealOutSize;
638357a0 2122 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2123 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2124
f7e98dee
RN
2125#if SIZEOF_WCHAR_T == 4
2126 wxMBConvUTF16BE converter ;
2127 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2128 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2129 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2130 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2131#endif
2132
2133 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2134 NULL, //allocator
2135 szUniBuffer,
2136 nBufSize,
638357a0 2137 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2138 );
ecd9653b 2139
f7e98dee 2140 wxASSERT(theString);
ecd9653b 2141
f7e98dee 2142 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2143 //so we check and use getchars instead in that case
2144 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2145 {
638357a0
RN
2146 if (szOut != NULL)
2147 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2148
638357a0
RN
2149 nRealOutSize = CFStringGetLength(theString) + 1;
2150 }
2151 else
2152 {
2153 CFStringGetBytes(
2154 theString,
2155 CFRangeMake(0, CFStringGetLength(theString)),
2156 m_encoding,
2157 0, //what to put in characters that can't be converted -
2158 //0 tells CFString to return NULL if it meets such a character
2159 false, //not an external representation
2160 (UInt8*) szOut,
3698ae71 2161 nOutSize,
638357a0
RN
2162 (CFIndex*) &nRealOutSize
2163 );
f7e98dee 2164 }
ecd9653b 2165
638357a0 2166 CFRelease(theString);
ecd9653b 2167
638357a0
RN
2168#if SIZEOF_WCHAR_T == 4
2169 delete[] szUniBuffer;
2170#endif
ecd9653b 2171
f7e98dee
RN
2172 return nRealOutSize - 1;
2173 }
2174
2175 bool IsOk() const
ecd9653b 2176 {
3698ae71 2177 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2178 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2179 }
2180
2181private:
638357a0 2182 CFStringEncoding m_encoding ;
f7e98dee
RN
2183};
2184
2185#endif // defined(__WXCOCOA__)
2186
335d31e0
SC
2187// ============================================================================
2188// Mac conversion classes
2189// ============================================================================
2190
2191#if defined(__WXMAC__) && defined(TARGET_CARBON)
2192
2193class wxMBConv_mac : public wxMBConv
2194{
2195public:
2196 wxMBConv_mac()
2197 {
2198 Init(CFStringGetSystemEncoding()) ;
2199 }
2200
2d1659cf 2201#if wxUSE_FONTMAP
335d31e0
SC
2202 wxMBConv_mac(const wxChar* name)
2203 {
267e11c5 2204 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2205 }
2d1659cf 2206#endif
335d31e0
SC
2207
2208 wxMBConv_mac(wxFontEncoding encoding)
2209 {
d775fa82
WS
2210 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2211 }
2212
2213 ~wxMBConv_mac()
2214 {
2215 OSStatus status = noErr ;
2216 status = TECDisposeConverter(m_MB2WC_converter);
2217 status = TECDisposeConverter(m_WC2MB_converter);
2218 }
2219
2220
2221 void Init( TextEncodingBase encoding)
2222 {
2223 OSStatus status = noErr ;
2224 m_char_encoding = encoding ;
2225 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2226
2227 status = TECCreateConverter(&m_MB2WC_converter,
2228 m_char_encoding,
2229 m_unicode_encoding);
2230 status = TECCreateConverter(&m_WC2MB_converter,
2231 m_unicode_encoding,
2232 m_char_encoding);
2233 }
2234
335d31e0
SC
2235 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2236 {
d775fa82
WS
2237 OSStatus status = noErr ;
2238 ByteCount byteOutLen ;
2239 ByteCount byteInLen = strlen(psz) ;
2240 wchar_t *tbuf = NULL ;
2241 UniChar* ubuf = NULL ;
2242 size_t res = 0 ;
2243
2244 if (buf == NULL)
2245 {
638357a0 2246 //apple specs say at least 32
c543817b 2247 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2248 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2249 }
2250 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2251#if SIZEOF_WCHAR_T == 4
d775fa82 2252 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2253#else
d775fa82 2254 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2255#endif
d775fa82
WS
2256 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2257 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2258#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2259 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2260 // is not properly terminated we get random characters at the end
2261 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2262 wxMBConvUTF16BE converter ;
2263 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2264 free( ubuf ) ;
f3a355ce 2265#else
d775fa82 2266 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2267#endif
d775fa82
WS
2268 if ( buf == NULL )
2269 free(tbuf) ;
335d31e0 2270
335d31e0
SC
2271 if ( buf && res < n)
2272 buf[res] = 0;
2273
d775fa82 2274 return res ;
335d31e0
SC
2275 }
2276
2277 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2278 {
2279 OSStatus status = noErr ;
2280 ByteCount byteOutLen ;
2281 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2282
2283 char *tbuf = NULL ;
2284
2285 if (buf == NULL)
2286 {
638357a0 2287 //apple specs say at least 32
c543817b 2288 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2289 tbuf = (char*) malloc( n ) ;
2290 }
2291
2292 ByteCount byteBufferLen = n ;
2293 UniChar* ubuf = NULL ;
f3a355ce 2294#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2295 wxMBConvUTF16BE converter ;
2296 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2297 byteInLen = unicharlen ;
2298 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2299 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2300#else
d775fa82 2301 ubuf = (UniChar*) psz ;
f3a355ce 2302#endif
d775fa82
WS
2303 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2304 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2305#if SIZEOF_WCHAR_T == 4
d775fa82 2306 free( ubuf ) ;
f3a355ce 2307#endif
d775fa82
WS
2308 if ( buf == NULL )
2309 free(tbuf) ;
335d31e0 2310
d775fa82 2311 size_t res = byteOutLen ;
335d31e0 2312 if ( buf && res < n)
638357a0 2313 {
335d31e0 2314 buf[res] = 0;
3698ae71 2315
638357a0
RN
2316 //we need to double-trip to verify it didn't insert any ? in place
2317 //of bogus characters
2318 wxWCharBuffer wcBuf(n);
2319 size_t pszlen = wxWcslen(psz);
2320 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2321 wxWcslen(wcBuf) != pszlen ||
2322 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2323 {
2324 // we didn't obtain the same thing we started from, hence
2325 // the conversion was lossy and we consider that it failed
2326 return (size_t)-1;
2327 }
2328 }
335d31e0 2329
d775fa82 2330 return res ;
335d31e0
SC
2331 }
2332
2333 bool IsOk() const
2334 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2335
2336private:
d775fa82
WS
2337 TECObjectRef m_MB2WC_converter ;
2338 TECObjectRef m_WC2MB_converter ;
2339
2340 TextEncodingBase m_char_encoding ;
2341 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2342};
2343
2344#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2345
36acb880
VZ
2346// ============================================================================
2347// wxEncodingConverter based conversion classes
2348// ============================================================================
2349
1e6feb95 2350#if wxUSE_FONTMAP
1cd52418 2351
e95354ec 2352class wxMBConv_wxwin : public wxMBConv
1cd52418 2353{
8b04d4c4
VZ
2354private:
2355 void Init()
2356 {
2357 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2358 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2359 }
2360
6001e347 2361public:
f1339c56
RR
2362 // temporarily just use wxEncodingConverter stuff,
2363 // so that it works while a better implementation is built
e95354ec 2364 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2365 {
2366 if (name)
267e11c5 2367 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2368 else
2369 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2370
8b04d4c4
VZ
2371 Init();
2372 }
2373
e95354ec 2374 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2375 {
2376 m_enc = enc;
2377
2378 Init();
f1339c56 2379 }
dccce9ea 2380
bde4baac 2381 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2382 {
2383 size_t inbuf = strlen(psz);
dccce9ea 2384 if (buf)
c643a977
VS
2385 {
2386 if (!m2w.Convert(psz,buf))
2387 return (size_t)-1;
2388 }
f1339c56
RR
2389 return inbuf;
2390 }
dccce9ea 2391
bde4baac 2392 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2393 {
f8d791e0 2394 const size_t inbuf = wxWcslen(psz);
f1339c56 2395 if (buf)
c643a977
VS
2396 {
2397 if (!w2m.Convert(psz,buf))
2398 return (size_t)-1;
2399 }
dccce9ea 2400
f1339c56
RR
2401 return inbuf;
2402 }
dccce9ea 2403
e95354ec 2404 bool IsOk() const { return m_ok; }
f1339c56
RR
2405
2406public:
8b04d4c4 2407 wxFontEncoding m_enc;
f1339c56 2408 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2409
2410 // were we initialized successfully?
2411 bool m_ok;
fc7a2a60 2412
e95354ec 2413 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2414};
6001e347 2415
1e6feb95
VZ
2416#endif // wxUSE_FONTMAP
2417
36acb880
VZ
2418// ============================================================================
2419// wxCSConv implementation
2420// ============================================================================
2421
8b04d4c4 2422void wxCSConv::Init()
6001e347 2423{
e95354ec
VZ
2424 m_name = NULL;
2425 m_convReal = NULL;
2426 m_deferred = true;
2427}
2428
8b04d4c4
VZ
2429wxCSConv::wxCSConv(const wxChar *charset)
2430{
2431 Init();
82713003 2432
e95354ec
VZ
2433 if ( charset )
2434 {
e95354ec
VZ
2435 SetName(charset);
2436 }
bda3d86a
VZ
2437
2438 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2439}
2440
8b04d4c4
VZ
2441wxCSConv::wxCSConv(wxFontEncoding encoding)
2442{
bda3d86a 2443 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2444 {
2445 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2446
2447 encoding = wxFONTENCODING_SYSTEM;
2448 }
2449
8b04d4c4
VZ
2450 Init();
2451
bda3d86a 2452 m_encoding = encoding;
8b04d4c4
VZ
2453}
2454
6001e347
RR
2455wxCSConv::~wxCSConv()
2456{
65e50848
JS
2457 Clear();
2458}
2459
54380f29 2460wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2461 : wxMBConv()
54380f29 2462{
8b04d4c4
VZ
2463 Init();
2464
54380f29 2465 SetName(conv.m_name);
8b04d4c4 2466 m_encoding = conv.m_encoding;
54380f29
GD
2467}
2468
2469wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2470{
2471 Clear();
8b04d4c4 2472
54380f29 2473 SetName(conv.m_name);
8b04d4c4
VZ
2474 m_encoding = conv.m_encoding;
2475
54380f29
GD
2476 return *this;
2477}
2478
65e50848
JS
2479void wxCSConv::Clear()
2480{
8b04d4c4 2481 free(m_name);
e95354ec 2482 delete m_convReal;
8b04d4c4 2483
65e50848 2484 m_name = NULL;
e95354ec 2485 m_convReal = NULL;
6001e347
RR
2486}
2487
2488void wxCSConv::SetName(const wxChar *charset)
2489{
f1339c56
RR
2490 if (charset)
2491 {
2492 m_name = wxStrdup(charset);
e95354ec 2493 m_deferred = true;
f1339c56 2494 }
6001e347
RR
2495}
2496
e95354ec
VZ
2497wxMBConv *wxCSConv::DoCreate() const
2498{
c547282d
VZ
2499 // check for the special case of ASCII or ISO8859-1 charset: as we have
2500 // special knowledge of it anyhow, we don't need to create a special
2501 // conversion object
2502 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2503 {
e95354ec
VZ
2504 // don't convert at all
2505 return NULL;
2506 }
dccce9ea 2507
e95354ec
VZ
2508 // we trust OS to do conversion better than we can so try external
2509 // conversion methods first
2510 //
2511 // the full order is:
2512 // 1. OS conversion (iconv() under Unix or Win32 API)
2513 // 2. hard coded conversions for UTF
2514 // 3. wxEncodingConverter as fall back
2515
2516 // step (1)
2517#ifdef HAVE_ICONV
c547282d 2518#if !wxUSE_FONTMAP
e95354ec 2519 if ( m_name )
c547282d 2520#endif // !wxUSE_FONTMAP
e95354ec 2521 {
c547282d
VZ
2522 wxString name(m_name);
2523
2524#if wxUSE_FONTMAP
2525 if ( name.empty() )
267e11c5 2526 name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
c547282d
VZ
2527#endif // wxUSE_FONTMAP
2528
2529 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2530 if ( conv->IsOk() )
2531 return conv;
2532
2533 delete conv;
2534 }
2535#endif // HAVE_ICONV
2536
2537#ifdef wxHAVE_WIN32_MB2WC
2538 {
7608a683 2539#if wxUSE_FONTMAP
e95354ec
VZ
2540 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2541 : new wxMBConv_win32(m_encoding);
2542 if ( conv->IsOk() )
2543 return conv;
2544
2545 delete conv;
7608a683
WS
2546#else
2547 return NULL;
2548#endif
e95354ec
VZ
2549 }
2550#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2551#if defined(__WXMAC__)
2552 {
5c3c8676 2553 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2554 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2555 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2556 {
2557
2d1659cf 2558#if wxUSE_FONTMAP
d775fa82
WS
2559 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2560 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2561#else
2562 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2563#endif
d775fa82 2564 if ( conv->IsOk() )
f7e98dee
RN
2565 return conv;
2566
2567 delete conv;
2568 }
2569 }
2570#endif
2571#if defined(__WXCOCOA__)
2572 {
2573 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2574 {
2575
a6900d10 2576#if wxUSE_FONTMAP
f7e98dee
RN
2577 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2578 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2579#else
2580 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2581#endif
f7e98dee 2582 if ( conv->IsOk() )
d775fa82
WS
2583 return conv;
2584
2585 delete conv;
2586 }
335d31e0
SC
2587 }
2588#endif
e95354ec
VZ
2589 // step (2)
2590 wxFontEncoding enc = m_encoding;
2591#if wxUSE_FONTMAP
c547282d
VZ
2592 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2593 {
2594 // use "false" to suppress interactive dialogs -- we can be called from
2595 // anywhere and popping up a dialog from here is the last thing we want to
2596 // do
267e11c5 2597 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2598 }
e95354ec
VZ
2599#endif // wxUSE_FONTMAP
2600
2601 switch ( enc )
2602 {
2603 case wxFONTENCODING_UTF7:
2604 return new wxMBConvUTF7;
2605
2606 case wxFONTENCODING_UTF8:
2607 return new wxMBConvUTF8;
2608
e95354ec
VZ
2609 case wxFONTENCODING_UTF16BE:
2610 return new wxMBConvUTF16BE;
2611
2612 case wxFONTENCODING_UTF16LE:
2613 return new wxMBConvUTF16LE;
2614
e95354ec
VZ
2615 case wxFONTENCODING_UTF32BE:
2616 return new wxMBConvUTF32BE;
2617
2618 case wxFONTENCODING_UTF32LE:
2619 return new wxMBConvUTF32LE;
2620
2621 default:
2622 // nothing to do but put here to suppress gcc warnings
2623 ;
2624 }
2625
2626 // step (3)
2627#if wxUSE_FONTMAP
2628 {
2629 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2630 : new wxMBConv_wxwin(m_encoding);
2631 if ( conv->IsOk() )
2632 return conv;
2633
2634 delete conv;
2635 }
2636#endif // wxUSE_FONTMAP
2637
a58d4f4d
VS
2638 // NB: This is a hack to prevent deadlock. What could otherwise happen
2639 // in Unicode build: wxConvLocal creation ends up being here
2640 // because of some failure and logs the error. But wxLog will try to
2641 // attach timestamp, for which it will need wxConvLocal (to convert
2642 // time to char* and then wchar_t*), but that fails, tries to log
2643 // error, but wxLog has a (already locked) critical section that
2644 // guards static buffer.
2645 static bool alreadyLoggingError = false;
2646 if (!alreadyLoggingError)
2647 {
2648 alreadyLoggingError = true;
2649 wxLogError(_("Cannot convert from the charset '%s'!"),
2650 m_name ? m_name
e95354ec
VZ
2651 :
2652#if wxUSE_FONTMAP
267e11c5 2653 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2654#else // !wxUSE_FONTMAP
2655 wxString::Format(_("encoding %s"), m_encoding).c_str()
2656#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2657 );
a58d4f4d
VS
2658 alreadyLoggingError = false;
2659 }
e95354ec
VZ
2660
2661 return NULL;
2662}
2663
2664void wxCSConv::CreateConvIfNeeded() const
2665{
2666 if ( m_deferred )
2667 {
2668 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2669
2670#if wxUSE_INTL
2671 // if we don't have neither the name nor the encoding, use the default
2672 // encoding for this system
2673 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2674 {
4d312c22 2675 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2676 }
2677#endif // wxUSE_INTL
2678
e95354ec
VZ
2679 self->m_convReal = DoCreate();
2680 self->m_deferred = false;
6001e347 2681 }
6001e347
RR
2682}
2683
2684size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2685{
e95354ec 2686 CreateConvIfNeeded();
dccce9ea 2687
e95354ec
VZ
2688 if (m_convReal)
2689 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2690
2691 // latin-1 (direct)
4def3b35 2692 size_t len = strlen(psz);
dccce9ea 2693
f1339c56
RR
2694 if (buf)
2695 {
4def3b35 2696 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2697 buf[c] = (unsigned char)(psz[c]);
2698 }
dccce9ea 2699
f1339c56 2700 return len;
6001e347
RR
2701}
2702
2703size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2704{
e95354ec 2705 CreateConvIfNeeded();
dccce9ea 2706
e95354ec
VZ
2707 if (m_convReal)
2708 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2709
f1339c56 2710 // latin-1 (direct)
f8d791e0 2711 const size_t len = wxWcslen(psz);
f1339c56
RR
2712 if (buf)
2713 {
4def3b35 2714 for (size_t c = 0; c <= len; c++)
24642831
VS
2715 {
2716 if (psz[c] > 0xFF)
2717 return (size_t)-1;
907173e5 2718 buf[c] = (char)psz[c];
24642831
VS
2719 }
2720 }
2721 else
2722 {
2723 for (size_t c = 0; c <= len; c++)
2724 {
2725 if (psz[c] > 0xFF)
2726 return (size_t)-1;
2727 }
f1339c56 2728 }
dccce9ea 2729
f1339c56 2730 return len;
6001e347
RR
2731}
2732
bde4baac
VZ
2733// ----------------------------------------------------------------------------
2734// globals
2735// ----------------------------------------------------------------------------
2736
2737#ifdef __WINDOWS__
2738 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2739#elif defined(__WXMAC__) && !defined(__MACH__)
2740 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2741#else
dcc8fac0 2742 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2743#endif
2744
2745static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2746static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2747static wxMBConvUTF7 wxConvUTF7Obj;
2748static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 2749
bde4baac
VZ
2750WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2751WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2752WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2753WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2754WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2755WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2756WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2757#ifdef __WXOSX__
ea8ce907 2758 wxConvUTF8Obj;
f5a1953b 2759#else
ea8ce907 2760 wxConvLibcObj;
f5a1953b
VZ
2761#endif
2762
bde4baac
VZ
2763
2764#else // !wxUSE_WCHAR_T
2765
2766// stand-ins in absence of wchar_t
2767WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2768 wxConvISO8859_1,
2769 wxConvLocal,
2770 wxConvUTF8;
2771
2772#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2773
2774