]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
workback defs for OSX
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
1cd52418 73#endif
1cd52418 74
373658eb
VZ
75#include "wx/encconv.h"
76#include "wx/fontmap.h"
7608a683 77#include "wx/utils.h"
373658eb 78
335d31e0 79#ifdef __WXMAC__
4227afa4
SC
80#include <ATSUnicode.h>
81#include <TextCommon.h>
82#include <TextEncodingConverter.h>
335d31e0
SC
83
84#include "wx/mac/private.h" // includes mac headers
85#endif
373658eb
VZ
86// ----------------------------------------------------------------------------
87// macros
88// ----------------------------------------------------------------------------
3e61dfb0 89
1cd52418 90#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 91#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
92
93#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
1cd52418 101#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
a3f2769e 104 #define WC_UTF16
3a0d76bc
VS
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
bab1e722 110#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
113#endif
114
373658eb
VZ
115// ============================================================================
116// implementation
117// ============================================================================
118
119// ----------------------------------------------------------------------------
c91830cb 120// UTF-16 en/decoding to/from UCS-4
373658eb 121// ----------------------------------------------------------------------------
6001e347 122
b0a6bb75 123
c91830cb 124static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 125{
dccce9ea 126 if (input<=0xffff)
4def3b35 127 {
999836aa
VZ
128 if (output)
129 *output = (wxUint16) input;
4def3b35 130 return 1;
dccce9ea
VZ
131 }
132 else if (input>=0x110000)
4def3b35
VS
133 {
134 return (size_t)-1;
dccce9ea
VZ
135 }
136 else
4def3b35 137 {
dccce9ea 138 if (output)
4def3b35 139 {
c91830cb 140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
142 }
143 return 2;
1cd52418 144 }
1cd52418
OK
145}
146
c91830cb 147static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 148{
dccce9ea 149 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
150 {
151 output = *input;
152 return 1;
dccce9ea
VZ
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
155 {
156 output = *input;
157 return (size_t)-1;
dccce9ea
VZ
158 }
159 else
4def3b35
VS
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
1cd52418
OK
164}
165
b0a6bb75 166
f6bcfd97 167// ----------------------------------------------------------------------------
6001e347 168// wxMBConv
f6bcfd97 169// ----------------------------------------------------------------------------
2c53a80a
WS
170
171wxMBConv::~wxMBConv()
172{
173 // nothing to do here (necessary for Darwin linking probably)
174}
6001e347 175
6001e347
RR
176const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177{
2b5f62a0 178 if ( psz )
6001e347 179 {
2b5f62a0
VZ
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
635f33ce
VS
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
2b5f62a0 191 }
f6bcfd97 192 }
2b5f62a0
VZ
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
6001e347
RR
197}
198
e5cceba0 199const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 200{
2b5f62a0
VZ
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
c91830cb 206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
2b5f62a0
VZ
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
e5cceba0 216
e5cceba0 217 return buf;
6001e347
RR
218}
219
f5fb6871 220const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 221{
f5fb6871
RN
222 wxASSERT(pOutSize != NULL);
223
e4e3bbb4
RN
224 const char* szEnd = szString + nStringLen + 1;
225 const char* szPos = szString;
226 const char* szStart = szPos;
227
228 size_t nActualLength = 0;
f5fb6871
RN
229 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
230
231 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
232
233 //Convert the string until the length() is reached, continuing the
234 //loop every time a null character is reached
235 while(szPos != szEnd)
236 {
237 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
238
239 //Get the length of the current (sub)string
240 size_t nLen = MB2WC(NULL, szPos, 0);
241
242 //Invalid conversion?
243 if( nLen == (size_t)-1 )
f5fb6871
RN
244 {
245 *pOutSize = 0;
246 theBuffer.data()[0u] = wxT('\0');
247 return theBuffer;
248 }
249
e4e3bbb4
RN
250
251 //Increase the actual length (+1 for current null character)
252 nActualLength += nLen + 1;
253
f5fb6871
RN
254 //if buffer too big, realloc the buffer
255 if (nActualLength > (nCurrentSize+1))
256 {
257 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
258 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
259 theBuffer = theNewBuffer;
260 nCurrentSize <<= 1;
261 }
262
263 //Convert the current (sub)string
264 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 265 {
f5fb6871
RN
266 *pOutSize = 0;
267 theBuffer.data()[0u] = wxT('\0');
268 return theBuffer;
e4e3bbb4
RN
269 }
270
271 //Increment to next (sub)string
272 //Note that we have to use strlen here instead of nLen
273 //here because XX2XX gives us the size of the output buffer,
274 //not neccessarly the length of the string
275 szPos += strlen(szPos) + 1;
276 }
277
f5fb6871
RN
278 //success - return actual length and the buffer
279 *pOutSize = nActualLength;
280 return theBuffer;
e4e3bbb4
RN
281}
282
f5fb6871 283const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 284{
f5fb6871
RN
285 wxASSERT(pOutSize != NULL);
286
e4e3bbb4
RN
287 const wchar_t* szEnd = szString + nStringLen + 1;
288 const wchar_t* szPos = szString;
289 const wchar_t* szStart = szPos;
290
291 size_t nActualLength = 0;
f5fb6871
RN
292 size_t nCurrentSize = nStringLen << 2; //try * 4 first
293
294 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
295
296 //Convert the string until the length() is reached, continuing the
297 //loop every time a null character is reached
298 while(szPos != szEnd)
299 {
300 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
301
302 //Get the length of the current (sub)string
303 size_t nLen = WC2MB(NULL, szPos, 0);
304
305 //Invalid conversion?
306 if( nLen == (size_t)-1 )
f5fb6871
RN
307 {
308 *pOutSize = 0;
309 theBuffer.data()[0u] = wxT('\0');
310 return theBuffer;
311 }
e4e3bbb4
RN
312
313 //Increase the actual length (+1 for current null character)
314 nActualLength += nLen + 1;
315
f5fb6871
RN
316 //if buffer too big, realloc the buffer
317 if (nActualLength > (nCurrentSize+1))
318 {
319 wxCharBuffer theNewBuffer(nCurrentSize << 1);
320 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
321 theBuffer = theNewBuffer;
322 nCurrentSize <<= 1;
323 }
324
325 //Convert the current (sub)string
326 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 327 {
f5fb6871
RN
328 *pOutSize = 0;
329 theBuffer.data()[0u] = wxT('\0');
330 return theBuffer;
e4e3bbb4
RN
331 }
332
333 //Increment to next (sub)string
334 //Note that we have to use wxWcslen here instead of nLen
335 //here because XX2XX gives us the size of the output buffer,
336 //not neccessarly the length of the string
337 szPos += wxWcslen(szPos) + 1;
338 }
339
f5fb6871
RN
340 //success - return actual length and the buffer
341 *pOutSize = nActualLength;
342 return theBuffer;
e4e3bbb4
RN
343}
344
6001e347 345// ----------------------------------------------------------------------------
bde4baac 346// wxMBConvLibc
6001e347
RR
347// ----------------------------------------------------------------------------
348
bde4baac
VZ
349size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
350{
351 return wxMB2WC(buf, psz, n);
352}
353
354size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
355{
356 return wxWC2MB(buf, psz, n);
357}
bde4baac 358// ----------------------------------------------------------------------------
15f2ee32 359// UTF-7
bde4baac 360// ----------------------------------------------------------------------------
6001e347 361
15f2ee32 362// Implementation (C) 2004 Fredrik Roubert
6001e347 363
15f2ee32
RN
364//
365// BASE64 decoding table
366//
367static const unsigned char utf7unb64[] =
6001e347 368{
15f2ee32
RN
369 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
375 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
376 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
378 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
379 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
380 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
382 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
383 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
384 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
401};
402
403size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
404{
405
406 size_t len = 0;
407
408 while (*psz && ((!buf) || (len < n)))
409 {
410 unsigned char cc = *psz++;
411 if (cc != '+')
412 {
413 // plain ASCII char
414 if (buf)
415 *buf++ = cc;
416 len++;
417 }
418 else if (*psz == '-')
419 {
420 // encoded plus sign
421 if (buf)
422 *buf++ = cc;
423 len++;
424 psz++;
425 }
426 else
427 {
428 // BASE64 encoded string
429 bool lsb;
430 unsigned char c;
431 unsigned int d, l;
432 for (lsb = false, d = 0, l = 0;
433 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
434 {
435 d <<= 6;
436 d += cc;
437 for (l += 6; l >= 8; lsb = !lsb)
438 {
439 c = (d >> (l -= 8)) % 256;
440 if (lsb)
441 {
442 if (buf)
443 *buf++ |= c;
444 len ++;
445 }
446 else
447 if (buf)
448 *buf = c << 8;
449 }
450 }
451 if (*psz == '-')
452 psz++;
453 }
454 }
455 if (buf && (len < n))
456 *buf = 0;
457 return len;
6001e347
RR
458}
459
15f2ee32
RN
460//
461// BASE64 encoding table
462//
463static const unsigned char utf7enb64[] =
464{
465 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
466 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
467 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
468 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
469 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
470 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
471 'w', 'x', 'y', 'z', '0', '1', '2', '3',
472 '4', '5', '6', '7', '8', '9', '+', '/'
473};
474
475//
476// UTF-7 encoding table
477//
478// 0 - Set D (directly encoded characters)
479// 1 - Set O (optional direct characters)
480// 2 - whitespace characters (optional)
481// 3 - special characters
482//
483static const unsigned char utf7encode[128] =
6001e347 484{
15f2ee32
RN
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
486 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
487 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
489 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
491 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
493};
494
495size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
496*psz, size_t n) const
497{
498
499
500 size_t len = 0;
501
502 while (*psz && ((!buf) || (len < n)))
503 {
504 wchar_t cc = *psz++;
505 if (cc < 0x80 && utf7encode[cc] < 1)
506 {
507 // plain ASCII char
508 if (buf)
509 *buf++ = (char)cc;
510 len++;
511 }
512#ifndef WC_UTF16
6e394fc6
RN
513 else if (((wxUint16)cc) > 0xffff)
514 {
15f2ee32
RN
515 // no surrogate pair generation (yet?)
516 return (size_t)-1;
517 }
518#endif
519 else
520 {
521 if (buf)
522 *buf++ = '+';
523 len++;
524 if (cc != '+')
525 {
526 // BASE64 encode string
527 unsigned int lsb, d, l;
528 for (d = 0, l = 0;; psz++)
529 {
530 for (lsb = 0; lsb < 2; lsb ++)
531 {
532 d <<= 8;
533 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
534
535 for (l += 8; l >= 6; )
536 {
537 l -= 6;
538 if (buf)
539 *buf++ = utf7enb64[(d >> l) % 64];
540 len++;
541 }
542 }
543 cc = *psz;
544 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
545 break;
546 }
547 if (l != 0)
548 {
549 if (buf)
550 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
551 len++;
552 }
553 }
554 if (buf)
555 *buf++ = '-';
556 len++;
557 }
558 }
559 if (buf && (len < n))
560 *buf = 0;
561 return len;
6001e347
RR
562}
563
f6bcfd97 564// ----------------------------------------------------------------------------
6001e347 565// UTF-8
f6bcfd97 566// ----------------------------------------------------------------------------
6001e347 567
dccce9ea 568static wxUint32 utf8_max[]=
4def3b35 569 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
570
571size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
572{
4def3b35
VS
573 size_t len = 0;
574
dccce9ea 575 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
576 {
577 unsigned char cc = *psz++, fc = cc;
578 unsigned cnt;
dccce9ea 579 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 580 fc <<= 1;
dccce9ea 581 if (!cnt)
4def3b35
VS
582 {
583 // plain ASCII char
dccce9ea 584 if (buf)
4def3b35
VS
585 *buf++ = cc;
586 len++;
dccce9ea
VZ
587 }
588 else
4def3b35
VS
589 {
590 cnt--;
dccce9ea 591 if (!cnt)
4def3b35
VS
592 {
593 // invalid UTF-8 sequence
594 return (size_t)-1;
dccce9ea
VZ
595 }
596 else
4def3b35
VS
597 {
598 unsigned ocnt = cnt - 1;
599 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 600 while (cnt--)
4def3b35
VS
601 {
602 cc = *psz++;
dccce9ea 603 if ((cc & 0xC0) != 0x80)
4def3b35
VS
604 {
605 // invalid UTF-8 sequence
606 return (size_t)-1;
607 }
608 res = (res << 6) | (cc & 0x3f);
609 }
dccce9ea 610 if (res <= utf8_max[ocnt])
4def3b35
VS
611 {
612 // illegal UTF-8 encoding
613 return (size_t)-1;
614 }
1cd52418 615#ifdef WC_UTF16
b5153fd8
VZ
616 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
617 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
618 if (pa == (size_t)-1)
619 return (size_t)-1;
dccce9ea 620 if (buf)
4def3b35
VS
621 buf += pa;
622 len += pa;
373658eb 623#else // !WC_UTF16
dccce9ea 624 if (buf)
4def3b35
VS
625 *buf++ = res;
626 len++;
373658eb 627#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
628 }
629 }
6001e347 630 }
dccce9ea 631 if (buf && (len < n))
4def3b35
VS
632 *buf = 0;
633 return len;
6001e347
RR
634}
635
636size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
637{
4def3b35 638 size_t len = 0;
6001e347 639
dccce9ea 640 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
641 {
642 wxUint32 cc;
1cd52418 643#ifdef WC_UTF16
b5153fd8
VZ
644 // cast is ok for WC_UTF16
645 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 646 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 647#else
4def3b35
VS
648 cc=(*psz++) & 0x7fffffff;
649#endif
650 unsigned cnt;
651 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 652 if (!cnt)
4def3b35
VS
653 {
654 // plain ASCII char
dccce9ea 655 if (buf)
574c939e 656 *buf++ = (char) cc;
4def3b35 657 len++;
dccce9ea
VZ
658 }
659
660 else
4def3b35
VS
661 {
662 len += cnt + 1;
dccce9ea 663 if (buf)
4def3b35 664 {
574c939e 665 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 666 while (cnt--)
574c939e 667 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
668 }
669 }
6001e347 670 }
4def3b35
VS
671
672 if (buf && (len<n)) *buf = 0;
adb45366 673
4def3b35 674 return len;
6001e347
RR
675}
676
c91830cb
VZ
677
678
679
680// ----------------------------------------------------------------------------
681// UTF-16
682// ----------------------------------------------------------------------------
683
684#ifdef WORDS_BIGENDIAN
bde4baac
VZ
685 #define wxMBConvUTF16straight wxMBConvUTF16BE
686 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 687#else
bde4baac
VZ
688 #define wxMBConvUTF16swap wxMBConvUTF16BE
689 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
690#endif
691
692
c91830cb
VZ
693#ifdef WC_UTF16
694
c91830cb
VZ
695// copy 16bit MB to 16bit String
696size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
697{
698 size_t len=0;
699
700 while (*(wxUint16*)psz && (!buf || len < n))
701 {
702 if (buf)
703 *buf++ = *(wxUint16*)psz;
704 len++;
705
706 psz += sizeof(wxUint16);
707 }
708 if (buf && len<n) *buf=0;
709
710 return len;
711}
712
713
714// copy 16bit String to 16bit MB
715size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
716{
717 size_t len=0;
718
719 while (*psz && (!buf || len < n))
720 {
721 if (buf)
722 {
723 *(wxUint16*)buf = *psz;
724 buf += sizeof(wxUint16);
725 }
726 len += sizeof(wxUint16);
727 psz++;
728 }
729 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
730
731 return len;
732}
733
734
735// swap 16bit MB to 16bit String
736size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
737{
738 size_t len=0;
739
740 while (*(wxUint16*)psz && (!buf || len < n))
741 {
742 if (buf)
743 {
744 ((char *)buf)[0] = psz[1];
745 ((char *)buf)[1] = psz[0];
746 buf++;
747 }
748 len++;
749 psz += sizeof(wxUint16);
750 }
751 if (buf && len<n) *buf=0;
752
753 return len;
754}
755
756
757// swap 16bit MB to 16bit String
758size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
759{
760 size_t len=0;
761
762 while (*psz && (!buf || len < n))
763 {
764 if (buf)
765 {
766 *buf++ = ((char*)psz)[1];
767 *buf++ = ((char*)psz)[0];
768 }
769 len += sizeof(wxUint16);
770 psz++;
771 }
772 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
773
774 return len;
775}
776
777
778#else // WC_UTF16
779
780
781// copy 16bit MB to 32bit String
782size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
783{
784 size_t len=0;
785
786 while (*(wxUint16*)psz && (!buf || len < n))
787 {
788 wxUint32 cc;
789 size_t pa=decode_utf16((wxUint16*)psz, cc);
790 if (pa == (size_t)-1)
791 return pa;
792
793 if (buf)
794 *buf++ = cc;
795 len++;
796 psz += pa * sizeof(wxUint16);
797 }
798 if (buf && len<n) *buf=0;
799
800 return len;
801}
802
803
804// copy 32bit String to 16bit MB
805size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
806{
807 size_t len=0;
808
809 while (*psz && (!buf || len < n))
810 {
811 wxUint16 cc[2];
812 size_t pa=encode_utf16(*psz, cc);
813
814 if (pa == (size_t)-1)
815 return pa;
816
817 if (buf)
818 {
69b80d28 819 *(wxUint16*)buf = cc[0];
b5153fd8 820 buf += sizeof(wxUint16);
c91830cb 821 if (pa > 1)
69b80d28
VZ
822 {
823 *(wxUint16*)buf = cc[1];
824 buf += sizeof(wxUint16);
825 }
c91830cb
VZ
826 }
827
828 len += pa*sizeof(wxUint16);
829 psz++;
830 }
831 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
832
833 return len;
834}
835
836
837// swap 16bit MB to 32bit String
838size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
839{
840 size_t len=0;
841
842 while (*(wxUint16*)psz && (!buf || len < n))
843 {
844 wxUint32 cc;
845 char tmp[4];
846 tmp[0]=psz[1]; tmp[1]=psz[0];
847 tmp[2]=psz[3]; tmp[3]=psz[2];
848
849 size_t pa=decode_utf16((wxUint16*)tmp, cc);
850 if (pa == (size_t)-1)
851 return pa;
852
853 if (buf)
854 *buf++ = cc;
855
856 len++;
857 psz += pa * sizeof(wxUint16);
858 }
859 if (buf && len<n) *buf=0;
860
861 return len;
862}
863
864
865// swap 32bit String to 16bit MB
866size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
867{
868 size_t len=0;
869
870 while (*psz && (!buf || len < n))
871 {
872 wxUint16 cc[2];
873 size_t pa=encode_utf16(*psz, cc);
874
875 if (pa == (size_t)-1)
876 return pa;
877
878 if (buf)
879 {
880 *buf++ = ((char*)cc)[1];
881 *buf++ = ((char*)cc)[0];
882 if (pa > 1)
883 {
884 *buf++ = ((char*)cc)[3];
885 *buf++ = ((char*)cc)[2];
886 }
887 }
888
889 len += pa*sizeof(wxUint16);
890 psz++;
891 }
892 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
893
894 return len;
895}
896
897#endif // WC_UTF16
898
899
900// ----------------------------------------------------------------------------
901// UTF-32
902// ----------------------------------------------------------------------------
903
904#ifdef WORDS_BIGENDIAN
905#define wxMBConvUTF32straight wxMBConvUTF32BE
906#define wxMBConvUTF32swap wxMBConvUTF32LE
907#else
908#define wxMBConvUTF32swap wxMBConvUTF32BE
909#define wxMBConvUTF32straight wxMBConvUTF32LE
910#endif
911
912
913WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
914WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
915
916
917#ifdef WC_UTF16
918
919// copy 32bit MB to 16bit String
920size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
921{
922 size_t len=0;
923
924 while (*(wxUint32*)psz && (!buf || len < n))
925 {
926 wxUint16 cc[2];
927
928 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
929 if (pa == (size_t)-1)
930 return pa;
931
932 if (buf)
933 {
934 *buf++ = cc[0];
935 if (pa > 1)
936 *buf++ = cc[1];
937 }
938 len += pa;
939 psz += sizeof(wxUint32);
940 }
941 if (buf && len<n) *buf=0;
942
943 return len;
944}
945
946
947// copy 16bit String to 32bit MB
948size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
949{
950 size_t len=0;
951
952 while (*psz && (!buf || len < n))
953 {
954 wxUint32 cc;
955
b5153fd8
VZ
956 // cast is ok for WC_UTF16
957 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
958 if (pa == (size_t)-1)
959 return pa;
960
961 if (buf)
962 {
963 *(wxUint32*)buf = cc;
964 buf += sizeof(wxUint32);
965 }
966 len += sizeof(wxUint32);
967 psz += pa;
968 }
b5153fd8
VZ
969
970 if (buf && len<=n-sizeof(wxUint32))
971 *(wxUint32*)buf=0;
c91830cb
VZ
972
973 return len;
974}
975
976
977
978// swap 32bit MB to 16bit String
979size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
980{
981 size_t len=0;
982
983 while (*(wxUint32*)psz && (!buf || len < n))
984 {
985 char tmp[4];
986 tmp[0] = psz[3]; tmp[1] = psz[2];
987 tmp[2] = psz[1]; tmp[3] = psz[0];
988
989
990 wxUint16 cc[2];
991
992 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
993 if (pa == (size_t)-1)
994 return pa;
995
996 if (buf)
997 {
998 *buf++ = cc[0];
999 if (pa > 1)
1000 *buf++ = cc[1];
1001 }
1002 len += pa;
1003 psz += sizeof(wxUint32);
1004 }
b5153fd8
VZ
1005
1006 if (buf && len<n)
1007 *buf=0;
c91830cb
VZ
1008
1009 return len;
1010}
1011
1012
1013// swap 16bit String to 32bit MB
1014size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1015{
1016 size_t len=0;
1017
1018 while (*psz && (!buf || len < n))
1019 {
1020 char cc[4];
1021
b5153fd8
VZ
1022 // cast is ok for WC_UTF16
1023 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1024 if (pa == (size_t)-1)
1025 return pa;
1026
1027 if (buf)
1028 {
1029 *buf++ = cc[3];
1030 *buf++ = cc[2];
1031 *buf++ = cc[1];
1032 *buf++ = cc[0];
1033 }
1034 len += sizeof(wxUint32);
1035 psz += pa;
1036 }
b5153fd8
VZ
1037
1038 if (buf && len<=n-sizeof(wxUint32))
1039 *(wxUint32*)buf=0;
c91830cb
VZ
1040
1041 return len;
1042}
1043
1044#else // WC_UTF16
1045
1046
1047// copy 32bit MB to 32bit String
1048size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1049{
1050 size_t len=0;
1051
1052 while (*(wxUint32*)psz && (!buf || len < n))
1053 {
1054 if (buf)
1055 *buf++ = *(wxUint32*)psz;
1056 len++;
1057 psz += sizeof(wxUint32);
1058 }
b5153fd8
VZ
1059
1060 if (buf && len<n)
1061 *buf=0;
c91830cb
VZ
1062
1063 return len;
1064}
1065
1066
1067// copy 32bit String to 32bit MB
1068size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1069{
1070 size_t len=0;
1071
1072 while (*psz && (!buf || len < n))
1073 {
1074 if (buf)
1075 {
1076 *(wxUint32*)buf = *psz;
1077 buf += sizeof(wxUint32);
1078 }
1079
1080 len += sizeof(wxUint32);
1081 psz++;
1082 }
1083
b5153fd8
VZ
1084 if (buf && len<=n-sizeof(wxUint32))
1085 *(wxUint32*)buf=0;
c91830cb
VZ
1086
1087 return len;
1088}
1089
1090
1091// swap 32bit MB to 32bit String
1092size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1093{
1094 size_t len=0;
1095
1096 while (*(wxUint32*)psz && (!buf || len < n))
1097 {
1098 if (buf)
1099 {
1100 ((char *)buf)[0] = psz[3];
1101 ((char *)buf)[1] = psz[2];
1102 ((char *)buf)[2] = psz[1];
1103 ((char *)buf)[3] = psz[0];
1104 buf++;
1105 }
1106 len++;
1107 psz += sizeof(wxUint32);
1108 }
b5153fd8
VZ
1109
1110 if (buf && len<n)
1111 *buf=0;
c91830cb
VZ
1112
1113 return len;
1114}
1115
1116
1117// swap 32bit String to 32bit MB
1118size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1119{
1120 size_t len=0;
1121
1122 while (*psz && (!buf || len < n))
1123 {
1124 if (buf)
1125 {
1126 *buf++ = ((char *)psz)[3];
1127 *buf++ = ((char *)psz)[2];
1128 *buf++ = ((char *)psz)[1];
1129 *buf++ = ((char *)psz)[0];
1130 }
1131 len += sizeof(wxUint32);
1132 psz++;
1133 }
b5153fd8
VZ
1134
1135 if (buf && len<=n-sizeof(wxUint32))
1136 *(wxUint32*)buf=0;
c91830cb
VZ
1137
1138 return len;
1139}
1140
1141
1142#endif // WC_UTF16
1143
1144
36acb880
VZ
1145// ============================================================================
1146// The classes doing conversion using the iconv_xxx() functions
1147// ============================================================================
3caec1bb 1148
b040e242 1149#ifdef HAVE_ICONV
3a0d76bc 1150
3caec1bb
VS
1151// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1152// if output buffer is _exactly_ as big as needed. Such case is (unless there's
1153// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1154// (which means error) and says there are 0 bytes left in the input buffer --
1155// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1156// this alternative test for iconv() failure.
1157// [This bug does not appear in glibc 2.2.]
1158#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1159#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1160 (errno != E2BIG || bufLeft != 0))
1161#else
1162#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1163#endif
1164
ab217dba 1165#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1166
1167// ----------------------------------------------------------------------------
e95354ec 1168// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1169// ----------------------------------------------------------------------------
1170
e95354ec 1171class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1172{
1173public:
e95354ec
VZ
1174 wxMBConv_iconv(const wxChar *name);
1175 virtual ~wxMBConv_iconv();
36acb880 1176
bde4baac
VZ
1177 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1178 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1179
e95354ec 1180 bool IsOk() const
36acb880
VZ
1181 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1182
1183protected:
1184 // the iconv handlers used to translate from multibyte to wide char and in
1185 // the other direction
1186 iconv_t m2w,
1187 w2m;
1188
1189private:
e95354ec 1190 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1191 // available on this machine, it will remain NULL
1192 static const char *ms_wcCharsetName;
1193
1194 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1195 // different endian-ness than the native one
405d8f46 1196 static bool ms_wcNeedsSwap;
36acb880
VZ
1197};
1198
e95354ec
VZ
1199const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1200bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1201
e95354ec 1202wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1203{
04c79127
RR
1204 // Do it the hard way
1205 char cname[100];
1206 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1207 cname[i] = (char) name[i];
1208
36acb880
VZ
1209 // check for charset that represents wchar_t:
1210 if (ms_wcCharsetName == NULL)
f1339c56 1211 {
e95354ec 1212 ms_wcNeedsSwap = false;
dccce9ea 1213
36acb880
VZ
1214 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1215 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1216 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1217
36acb880
VZ
1218 if (m2w == (iconv_t)-1)
1219 {
1220 // try charset w/o bytesex info (e.g. "UCS4")
1221 // and check for bytesex ourselves:
1222 ms_wcCharsetName = WC_NAME;
04c79127 1223 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1224
1225 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1226 if (m2w == (iconv_t)-1)
1227 {
36acb880 1228 ms_wcCharsetName = "WCHAR_T";
04c79127 1229 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1230 }
3a0d76bc 1231
36acb880
VZ
1232 if (m2w != (iconv_t)-1)
1233 {
1234 char buf[2], *bufPtr;
1235 wchar_t wbuf[2], *wbufPtr;
1236 size_t insz, outsz;
1237 size_t res;
1238
1239 buf[0] = 'A';
1240 buf[1] = 0;
1241 wbuf[0] = 0;
1242 insz = 2;
1243 outsz = SIZEOF_WCHAR_T * 2;
1244 wbufPtr = wbuf;
1245 bufPtr = buf;
1246
1247 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1248 (char**)&wbufPtr, &outsz);
1249
1250 if (ICONV_FAILED(res, insz))
3a0d76bc 1251 {
36acb880
VZ
1252 ms_wcCharsetName = NULL;
1253 wxLogLastError(wxT("iconv"));
2b5f62a0 1254 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1255 }
1256 else
1257 {
36acb880 1258 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1259 }
1260 }
36acb880
VZ
1261 else
1262 {
1263 ms_wcCharsetName = NULL;
373658eb 1264
77ffb593 1265 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1266 // fall back to using wxEncodingConverter.
1267 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1268 //wxLogError(
36acb880 1269 }
3a0d76bc 1270 }
36acb880 1271 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1272 }
36acb880 1273 else // we already have ms_wcCharsetName
3caec1bb 1274 {
04c79127 1275 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1276 }
dccce9ea 1277
36acb880
VZ
1278 // NB: don't ever pass NULL to iconv_open(), it may crash!
1279 if ( ms_wcCharsetName )
f1339c56 1280 {
04c79127 1281 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1282 }
405d8f46
VZ
1283 else
1284 {
1285 w2m = (iconv_t)-1;
1286 }
36acb880 1287}
3caec1bb 1288
e95354ec 1289wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1290{
1291 if ( m2w != (iconv_t)-1 )
1292 iconv_close(m2w);
1293 if ( w2m != (iconv_t)-1 )
1294 iconv_close(w2m);
1295}
3a0d76bc 1296
bde4baac 1297size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880
VZ
1298{
1299 size_t inbuf = strlen(psz);
1300 size_t outbuf = n * SIZEOF_WCHAR_T;
1301 size_t res, cres;
1302 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1303 wchar_t *bufPtr = buf;
1304 const char *pszPtr = psz;
1305
1306 if (buf)
1307 {
1308 // have destination buffer, convert there
1309 cres = iconv(m2w,
1310 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1311 (char**)&bufPtr, &outbuf);
1312 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1313
36acb880 1314 if (ms_wcNeedsSwap)
3a0d76bc 1315 {
36acb880
VZ
1316 // convert to native endianness
1317 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1318 }
adb45366 1319
49dd9820
VS
1320 // NB: iconv was given only strlen(psz) characters on input, and so
1321 // it couldn't convert the trailing zero. Let's do it ourselves
1322 // if there's some room left for it in the output buffer.
1323 if (res < n)
1324 buf[res] = 0;
36acb880
VZ
1325 }
1326 else
1327 {
1328 // no destination buffer... convert using temp buffer
1329 // to calculate destination buffer requirement
1330 wchar_t tbuf[8];
1331 res = 0;
1332 do {
1333 bufPtr = tbuf;
1334 outbuf = 8*SIZEOF_WCHAR_T;
1335
1336 cres = iconv(m2w,
1337 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1338 (char**)&bufPtr, &outbuf );
1339
1340 res += 8-(outbuf/SIZEOF_WCHAR_T);
1341 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1342 }
dccce9ea 1343
36acb880 1344 if (ICONV_FAILED(cres, inbuf))
f1339c56 1345 {
36acb880
VZ
1346 //VS: it is ok if iconv fails, hence trace only
1347 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1348 return (size_t)-1;
1349 }
1350
1351 return res;
1352}
1353
bde4baac 1354size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1355{
f8d791e0 1356 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1357 size_t outbuf = n;
1358 size_t res, cres;
3a0d76bc 1359
36acb880 1360 wchar_t *tmpbuf = 0;
3caec1bb 1361
36acb880
VZ
1362 if (ms_wcNeedsSwap)
1363 {
1364 // need to copy to temp buffer to switch endianness
1365 // this absolutely doesn't rock!
1366 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1367 // could be in read-only memory, or be accessed in some other thread)
1368 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1369 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1370 WC_BSWAP(tmpbuf, inbuf)
1371 psz=tmpbuf;
1372 }
3a0d76bc 1373
36acb880
VZ
1374 if (buf)
1375 {
1376 // have destination buffer, convert there
1377 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1378
36acb880 1379 res = n-outbuf;
adb45366 1380
49dd9820
VS
1381 // NB: iconv was given only wcslen(psz) characters on input, and so
1382 // it couldn't convert the trailing zero. Let's do it ourselves
1383 // if there's some room left for it in the output buffer.
1384 if (res < n)
1385 buf[0] = 0;
36acb880
VZ
1386 }
1387 else
1388 {
1389 // no destination buffer... convert using temp buffer
1390 // to calculate destination buffer requirement
1391 char tbuf[16];
1392 res = 0;
1393 do {
1394 buf = tbuf; outbuf = 16;
1395
1396 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1397
36acb880
VZ
1398 res += 16 - outbuf;
1399 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1400 }
dccce9ea 1401
36acb880
VZ
1402 if (ms_wcNeedsSwap)
1403 {
1404 free(tmpbuf);
1405 }
dccce9ea 1406
36acb880
VZ
1407 if (ICONV_FAILED(cres, inbuf))
1408 {
1409 //VS: it is ok if iconv fails, hence trace only
1410 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1411 return (size_t)-1;
1412 }
1413
1414 return res;
1415}
1416
b040e242 1417#endif // HAVE_ICONV
36acb880 1418
e95354ec 1419
36acb880
VZ
1420// ============================================================================
1421// Win32 conversion classes
1422// ============================================================================
1cd52418 1423
e95354ec 1424#ifdef wxHAVE_WIN32_MB2WC
373658eb 1425
8b04d4c4 1426// from utils.cpp
d775fa82 1427#if wxUSE_FONTMAP
8b04d4c4
VZ
1428extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1429extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1430#endif
373658eb 1431
e95354ec 1432class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1433{
1434public:
bde4baac
VZ
1435 wxMBConv_win32()
1436 {
1437 m_CodePage = CP_ACP;
1438 }
1439
7608a683 1440#if wxUSE_FONTMAP
e95354ec 1441 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1442 {
1443 m_CodePage = wxCharsetToCodepage(name);
1444 }
dccce9ea 1445
e95354ec 1446 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1447 {
1448 m_CodePage = wxEncodingToCodepage(encoding);
1449 }
7608a683 1450#endif
8b04d4c4 1451
bde4baac 1452 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1453 {
02272c9c
VZ
1454 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1455 // the behaviour is not compatible with the Unix version (using iconv)
1456 // and break the library itself, e.g. wxTextInputStream::NextChar()
1457 // wouldn't work if reading an incomplete MB char didn't result in an
1458 // error
2b5f62a0
VZ
1459 const size_t len = ::MultiByteToWideChar
1460 (
1461 m_CodePage, // code page
02272c9c 1462 MB_ERR_INVALID_CHARS, // flags: fall on error
2b5f62a0
VZ
1463 psz, // input string
1464 -1, // its length (NUL-terminated)
b4da152e 1465 buf, // output string
2b5f62a0
VZ
1466 buf ? n : 0 // size of output buffer
1467 );
1468
03a991bc
VZ
1469 // note that it returns count of written chars for buf != NULL and size
1470 // of the needed buffer for buf == NULL so in either case the length of
1471 // the string (which never includes the terminating NUL) is one less
1472 return len ? len - 1 : (size_t)-1;
f1339c56 1473 }
dccce9ea 1474
13dd924a 1475 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1476 {
13dd924a
VZ
1477 /*
1478 we have a problem here: by default, WideCharToMultiByte() may
1479 replace characters unrepresentable in the target code page with bad
1480 quality approximations such as turning "1/2" symbol (U+00BD) into
1481 "1" for the code pages which don't have it and we, obviously, want
1482 to avoid this at any price
d775fa82 1483
13dd924a
VZ
1484 the trouble is that this function does it _silently_, i.e. it won't
1485 even tell us whether it did or not... Win98/2000 and higher provide
1486 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1487 we have to resort to a round trip, i.e. check that converting back
1488 results in the same string -- this is, of course, expensive but
1489 otherwise we simply can't be sure to not garble the data.
1490 */
1491
1492 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1493 // it doesn't work with CJK encodings (which we test for rather roughly
1494 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1495 // supporting it
907173e5
WS
1496 BOOL usedDef wxDUMMY_INITIALIZE(false);
1497 BOOL *pUsedDef;
13dd924a
VZ
1498 int flags;
1499 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1500 {
1501 // it's our lucky day
1502 flags = WC_NO_BEST_FIT_CHARS;
1503 pUsedDef = &usedDef;
1504 }
1505 else // old system or unsupported encoding
1506 {
1507 flags = 0;
1508 pUsedDef = NULL;
1509 }
1510
2b5f62a0
VZ
1511 const size_t len = ::WideCharToMultiByte
1512 (
1513 m_CodePage, // code page
13dd924a
VZ
1514 flags, // either none or no best fit
1515 pwz, // input string
2b5f62a0
VZ
1516 -1, // it is (wide) NUL-terminated
1517 buf, // output buffer
1518 buf ? n : 0, // and its size
1519 NULL, // default "replacement" char
13dd924a 1520 pUsedDef // [out] was it used?
2b5f62a0
VZ
1521 );
1522
13dd924a
VZ
1523 if ( !len )
1524 {
1525 // function totally failed
1526 return (size_t)-1;
1527 }
1528
1529 // if we were really converting, check if we succeeded
1530 if ( buf )
1531 {
1532 if ( flags )
1533 {
1534 // check if the conversion failed, i.e. if any replacements
1535 // were done
1536 if ( usedDef )
1537 return (size_t)-1;
1538 }
1539 else // we must resort to double tripping...
1540 {
1541 wxWCharBuffer wcBuf(n);
1542 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1543 wcscmp(wcBuf, pwz) != 0 )
1544 {
1545 // we didn't obtain the same thing we started from, hence
1546 // the conversion was lossy and we consider that it failed
1547 return (size_t)-1;
1548 }
1549 }
1550 }
1551
03a991bc 1552 // see the comment above for the reason of "len - 1"
13dd924a 1553 return len - 1;
f1339c56 1554 }
dccce9ea 1555
13dd924a
VZ
1556 bool IsOk() const { return m_CodePage != -1; }
1557
1558private:
1559 static bool CanUseNoBestFit()
1560 {
1561 static int s_isWin98Or2k = -1;
1562
1563 if ( s_isWin98Or2k == -1 )
1564 {
1565 int verMaj, verMin;
1566 switch ( wxGetOsVersion(&verMaj, &verMin) )
1567 {
1568 case wxWIN95:
1569 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1570 break;
1571
1572 case wxWINDOWS_NT:
1573 s_isWin98Or2k = verMaj >= 5;
1574 break;
1575
1576 default:
1577 // unknown, be conseravtive by default
1578 s_isWin98Or2k = 0;
1579 }
1580
1581 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1582 }
1583
1584 return s_isWin98Or2k == 1;
1585 }
f1339c56 1586
b1d66b54 1587 long m_CodePage;
1cd52418 1588};
e95354ec
VZ
1589
1590#endif // wxHAVE_WIN32_MB2WC
1591
f7e98dee
RN
1592// ============================================================================
1593// Cocoa conversion classes
1594// ============================================================================
1595
1596#if defined(__WXCOCOA__)
1597
ecd9653b 1598// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1599// Cocoa. Strangely enough, internally Core Foundation uses
1600// UTF 32 internally quite a bit - its just not public (yet).
1601
1602#include <CoreFoundation/CFString.h>
1603#include <CoreFoundation/CFStringEncodingExt.h>
1604
1605CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1606{
638357a0 1607 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1608 if ( encoding == wxFONTENCODING_DEFAULT )
1609 {
638357a0 1610 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1611 }
1612 else switch( encoding)
1613 {
1614 case wxFONTENCODING_ISO8859_1 :
1615 enc = kCFStringEncodingISOLatin1 ;
1616 break ;
1617 case wxFONTENCODING_ISO8859_2 :
1618 enc = kCFStringEncodingISOLatin2;
1619 break ;
1620 case wxFONTENCODING_ISO8859_3 :
1621 enc = kCFStringEncodingISOLatin3 ;
1622 break ;
1623 case wxFONTENCODING_ISO8859_4 :
1624 enc = kCFStringEncodingISOLatin4;
1625 break ;
1626 case wxFONTENCODING_ISO8859_5 :
1627 enc = kCFStringEncodingISOLatinCyrillic;
1628 break ;
1629 case wxFONTENCODING_ISO8859_6 :
1630 enc = kCFStringEncodingISOLatinArabic;
1631 break ;
1632 case wxFONTENCODING_ISO8859_7 :
1633 enc = kCFStringEncodingISOLatinGreek;
1634 break ;
1635 case wxFONTENCODING_ISO8859_8 :
1636 enc = kCFStringEncodingISOLatinHebrew;
1637 break ;
1638 case wxFONTENCODING_ISO8859_9 :
1639 enc = kCFStringEncodingISOLatin5;
1640 break ;
1641 case wxFONTENCODING_ISO8859_10 :
1642 enc = kCFStringEncodingISOLatin6;
1643 break ;
1644 case wxFONTENCODING_ISO8859_11 :
1645 enc = kCFStringEncodingISOLatinThai;
1646 break ;
1647 case wxFONTENCODING_ISO8859_13 :
1648 enc = kCFStringEncodingISOLatin7;
1649 break ;
1650 case wxFONTENCODING_ISO8859_14 :
1651 enc = kCFStringEncodingISOLatin8;
1652 break ;
1653 case wxFONTENCODING_ISO8859_15 :
1654 enc = kCFStringEncodingISOLatin9;
1655 break ;
1656
1657 case wxFONTENCODING_KOI8 :
1658 enc = kCFStringEncodingKOI8_R;
1659 break ;
1660 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1661 enc = kCFStringEncodingDOSRussian;
1662 break ;
1663
1664// case wxFONTENCODING_BULGARIAN :
1665// enc = ;
1666// break ;
1667
1668 case wxFONTENCODING_CP437 :
1669 enc =kCFStringEncodingDOSLatinUS ;
1670 break ;
1671 case wxFONTENCODING_CP850 :
1672 enc = kCFStringEncodingDOSLatin1;
1673 break ;
1674 case wxFONTENCODING_CP852 :
1675 enc = kCFStringEncodingDOSLatin2;
1676 break ;
1677 case wxFONTENCODING_CP855 :
1678 enc = kCFStringEncodingDOSCyrillic;
1679 break ;
1680 case wxFONTENCODING_CP866 :
1681 enc =kCFStringEncodingDOSRussian ;
1682 break ;
1683 case wxFONTENCODING_CP874 :
1684 enc = kCFStringEncodingDOSThai;
1685 break ;
1686 case wxFONTENCODING_CP932 :
1687 enc = kCFStringEncodingDOSJapanese;
1688 break ;
1689 case wxFONTENCODING_CP936 :
1690 enc =kCFStringEncodingDOSChineseSimplif ;
1691 break ;
1692 case wxFONTENCODING_CP949 :
1693 enc = kCFStringEncodingDOSKorean;
1694 break ;
1695 case wxFONTENCODING_CP950 :
1696 enc = kCFStringEncodingDOSChineseTrad;
1697 break ;
ecd9653b
WS
1698 case wxFONTENCODING_CP1250 :
1699 enc = kCFStringEncodingWindowsLatin2;
1700 break ;
1701 case wxFONTENCODING_CP1251 :
1702 enc =kCFStringEncodingWindowsCyrillic ;
1703 break ;
1704 case wxFONTENCODING_CP1252 :
1705 enc =kCFStringEncodingWindowsLatin1 ;
1706 break ;
1707 case wxFONTENCODING_CP1253 :
1708 enc = kCFStringEncodingWindowsGreek;
1709 break ;
1710 case wxFONTENCODING_CP1254 :
1711 enc = kCFStringEncodingWindowsLatin5;
1712 break ;
1713 case wxFONTENCODING_CP1255 :
1714 enc =kCFStringEncodingWindowsHebrew ;
1715 break ;
1716 case wxFONTENCODING_CP1256 :
1717 enc =kCFStringEncodingWindowsArabic ;
1718 break ;
1719 case wxFONTENCODING_CP1257 :
1720 enc = kCFStringEncodingWindowsBalticRim;
1721 break ;
638357a0
RN
1722// This only really encodes to UTF7 (if that) evidently
1723// case wxFONTENCODING_UTF7 :
1724// enc = kCFStringEncodingNonLossyASCII ;
1725// break ;
ecd9653b
WS
1726 case wxFONTENCODING_UTF8 :
1727 enc = kCFStringEncodingUTF8 ;
1728 break ;
1729 case wxFONTENCODING_EUC_JP :
1730 enc = kCFStringEncodingEUC_JP;
1731 break ;
1732 case wxFONTENCODING_UTF16 :
f7e98dee 1733 enc = kCFStringEncodingUnicode ;
ecd9653b 1734 break ;
f7e98dee
RN
1735 case wxFONTENCODING_MACROMAN :
1736 enc = kCFStringEncodingMacRoman ;
1737 break ;
1738 case wxFONTENCODING_MACJAPANESE :
1739 enc = kCFStringEncodingMacJapanese ;
1740 break ;
1741 case wxFONTENCODING_MACCHINESETRAD :
1742 enc = kCFStringEncodingMacChineseTrad ;
1743 break ;
1744 case wxFONTENCODING_MACKOREAN :
1745 enc = kCFStringEncodingMacKorean ;
1746 break ;
1747 case wxFONTENCODING_MACARABIC :
1748 enc = kCFStringEncodingMacArabic ;
1749 break ;
1750 case wxFONTENCODING_MACHEBREW :
1751 enc = kCFStringEncodingMacHebrew ;
1752 break ;
1753 case wxFONTENCODING_MACGREEK :
1754 enc = kCFStringEncodingMacGreek ;
1755 break ;
1756 case wxFONTENCODING_MACCYRILLIC :
1757 enc = kCFStringEncodingMacCyrillic ;
1758 break ;
1759 case wxFONTENCODING_MACDEVANAGARI :
1760 enc = kCFStringEncodingMacDevanagari ;
1761 break ;
1762 case wxFONTENCODING_MACGURMUKHI :
1763 enc = kCFStringEncodingMacGurmukhi ;
1764 break ;
1765 case wxFONTENCODING_MACGUJARATI :
1766 enc = kCFStringEncodingMacGujarati ;
1767 break ;
1768 case wxFONTENCODING_MACORIYA :
1769 enc = kCFStringEncodingMacOriya ;
1770 break ;
1771 case wxFONTENCODING_MACBENGALI :
1772 enc = kCFStringEncodingMacBengali ;
1773 break ;
1774 case wxFONTENCODING_MACTAMIL :
1775 enc = kCFStringEncodingMacTamil ;
1776 break ;
1777 case wxFONTENCODING_MACTELUGU :
1778 enc = kCFStringEncodingMacTelugu ;
1779 break ;
1780 case wxFONTENCODING_MACKANNADA :
1781 enc = kCFStringEncodingMacKannada ;
1782 break ;
1783 case wxFONTENCODING_MACMALAJALAM :
1784 enc = kCFStringEncodingMacMalayalam ;
1785 break ;
1786 case wxFONTENCODING_MACSINHALESE :
1787 enc = kCFStringEncodingMacSinhalese ;
1788 break ;
1789 case wxFONTENCODING_MACBURMESE :
1790 enc = kCFStringEncodingMacBurmese ;
1791 break ;
1792 case wxFONTENCODING_MACKHMER :
1793 enc = kCFStringEncodingMacKhmer ;
1794 break ;
1795 case wxFONTENCODING_MACTHAI :
1796 enc = kCFStringEncodingMacThai ;
1797 break ;
1798 case wxFONTENCODING_MACLAOTIAN :
1799 enc = kCFStringEncodingMacLaotian ;
1800 break ;
1801 case wxFONTENCODING_MACGEORGIAN :
1802 enc = kCFStringEncodingMacGeorgian ;
1803 break ;
1804 case wxFONTENCODING_MACARMENIAN :
1805 enc = kCFStringEncodingMacArmenian ;
1806 break ;
1807 case wxFONTENCODING_MACCHINESESIMP :
1808 enc = kCFStringEncodingMacChineseSimp ;
1809 break ;
1810 case wxFONTENCODING_MACTIBETAN :
1811 enc = kCFStringEncodingMacTibetan ;
1812 break ;
1813 case wxFONTENCODING_MACMONGOLIAN :
1814 enc = kCFStringEncodingMacMongolian ;
1815 break ;
1816 case wxFONTENCODING_MACETHIOPIC :
1817 enc = kCFStringEncodingMacEthiopic ;
1818 break ;
1819 case wxFONTENCODING_MACCENTRALEUR :
1820 enc = kCFStringEncodingMacCentralEurRoman ;
1821 break ;
1822 case wxFONTENCODING_MACVIATNAMESE :
1823 enc = kCFStringEncodingMacVietnamese ;
1824 break ;
1825 case wxFONTENCODING_MACARABICEXT :
1826 enc = kCFStringEncodingMacExtArabic ;
1827 break ;
1828 case wxFONTENCODING_MACSYMBOL :
1829 enc = kCFStringEncodingMacSymbol ;
1830 break ;
1831 case wxFONTENCODING_MACDINGBATS :
1832 enc = kCFStringEncodingMacDingbats ;
1833 break ;
1834 case wxFONTENCODING_MACTURKISH :
1835 enc = kCFStringEncodingMacTurkish ;
1836 break ;
1837 case wxFONTENCODING_MACCROATIAN :
1838 enc = kCFStringEncodingMacCroatian ;
1839 break ;
1840 case wxFONTENCODING_MACICELANDIC :
1841 enc = kCFStringEncodingMacIcelandic ;
1842 break ;
1843 case wxFONTENCODING_MACROMANIAN :
1844 enc = kCFStringEncodingMacRomanian ;
1845 break ;
1846 case wxFONTENCODING_MACCELTIC :
1847 enc = kCFStringEncodingMacCeltic ;
1848 break ;
1849 case wxFONTENCODING_MACGAELIC :
1850 enc = kCFStringEncodingMacGaelic ;
1851 break ;
ecd9653b
WS
1852// case wxFONTENCODING_MACKEYBOARD :
1853// enc = kCFStringEncodingMacKeyboardGlyphs ;
1854// break ;
1855 default :
1856 // because gcc is picky
1857 break ;
1858 } ;
1859 return enc ;
f7e98dee
RN
1860}
1861
f7e98dee
RN
1862class wxMBConv_cocoa : public wxMBConv
1863{
1864public:
1865 wxMBConv_cocoa()
1866 {
1867 Init(CFStringGetSystemEncoding()) ;
1868 }
1869
1870 wxMBConv_cocoa(const wxChar* name)
1871 {
1872 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1873 }
1874
1875 wxMBConv_cocoa(wxFontEncoding encoding)
1876 {
1877 Init( wxCFStringEncFromFontEnc(encoding) );
1878 }
1879
1880 ~wxMBConv_cocoa()
1881 {
1882 }
1883
1884 void Init( CFStringEncoding encoding)
1885 {
638357a0 1886 m_encoding = encoding ;
f7e98dee
RN
1887 }
1888
1889 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1890 {
1891 wxASSERT(szUnConv);
ecd9653b 1892
638357a0
RN
1893 CFStringRef theString = CFStringCreateWithBytes (
1894 NULL, //the allocator
1895 (const UInt8*)szUnConv,
1896 strlen(szUnConv),
1897 m_encoding,
1898 false //no BOM/external representation
f7e98dee
RN
1899 );
1900
1901 wxASSERT(theString);
1902
638357a0
RN
1903 size_t nOutLength = CFStringGetLength(theString);
1904
1905 if (szOut == NULL)
f7e98dee 1906 {
f7e98dee 1907 CFRelease(theString);
638357a0 1908 return nOutLength;
f7e98dee 1909 }
ecd9653b 1910
638357a0 1911 CFRange theRange = { 0, nOutSize };
ecd9653b 1912
638357a0
RN
1913#if SIZEOF_WCHAR_T == 4
1914 UniChar* szUniCharBuffer = new UniChar[nOutSize];
1915#endif
1916
f7e98dee 1917 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
638357a0 1918
f7e98dee 1919 CFRelease(theString);
ecd9653b 1920
638357a0 1921 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
1922
1923#if SIZEOF_WCHAR_T == 4
1924 wxMBConvUTF16 converter ;
638357a0 1925 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
1926 delete[] szUniCharBuffer;
1927#endif
638357a0
RN
1928
1929 return nOutLength;
f7e98dee
RN
1930 }
1931
1932 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1933 {
638357a0
RN
1934 wxASSERT(szUnConv);
1935
f7e98dee 1936 size_t nRealOutSize;
638357a0 1937 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 1938 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 1939
f7e98dee
RN
1940#if SIZEOF_WCHAR_T == 4
1941 wxMBConvUTF16BE converter ;
1942 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1943 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1944 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1945 nBufSize /= sizeof(UniChar);
f7e98dee
RN
1946#endif
1947
1948 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1949 NULL, //allocator
1950 szUniBuffer,
1951 nBufSize,
638357a0 1952 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 1953 );
ecd9653b 1954
f7e98dee 1955 wxASSERT(theString);
ecd9653b 1956
f7e98dee 1957 //Note that CER puts a BOM when converting to unicode
638357a0
RN
1958 //so we check and use getchars instead in that case
1959 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 1960 {
638357a0
RN
1961 if (szOut != NULL)
1962 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1963
1964 nRealOutSize = CFStringGetLength(theString) + 1;
1965 }
1966 else
1967 {
1968 CFStringGetBytes(
1969 theString,
1970 CFRangeMake(0, CFStringGetLength(theString)),
1971 m_encoding,
1972 0, //what to put in characters that can't be converted -
1973 //0 tells CFString to return NULL if it meets such a character
1974 false, //not an external representation
1975 (UInt8*) szOut,
1976 nOutSize,
1977 (CFIndex*) &nRealOutSize
1978 );
f7e98dee 1979 }
ecd9653b 1980
638357a0 1981 CFRelease(theString);
ecd9653b 1982
638357a0
RN
1983#if SIZEOF_WCHAR_T == 4
1984 delete[] szUniBuffer;
1985#endif
ecd9653b 1986
f7e98dee
RN
1987 return nRealOutSize - 1;
1988 }
1989
1990 bool IsOk() const
ecd9653b 1991 {
638357a0
RN
1992 return m_encoding != kCFStringEncodingInvalidId &&
1993 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
1994 }
1995
1996private:
638357a0 1997 CFStringEncoding m_encoding ;
f7e98dee
RN
1998};
1999
2000#endif // defined(__WXCOCOA__)
2001
335d31e0
SC
2002// ============================================================================
2003// Mac conversion classes
2004// ============================================================================
2005
2006#if defined(__WXMAC__) && defined(TARGET_CARBON)
2007
2008class wxMBConv_mac : public wxMBConv
2009{
2010public:
2011 wxMBConv_mac()
2012 {
2013 Init(CFStringGetSystemEncoding()) ;
2014 }
2015
2016 wxMBConv_mac(const wxChar* name)
2017 {
d775fa82 2018 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0
SC
2019 }
2020
2021 wxMBConv_mac(wxFontEncoding encoding)
2022 {
d775fa82
WS
2023 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2024 }
2025
2026 ~wxMBConv_mac()
2027 {
2028 OSStatus status = noErr ;
2029 status = TECDisposeConverter(m_MB2WC_converter);
2030 status = TECDisposeConverter(m_WC2MB_converter);
2031 }
2032
2033
2034 void Init( TextEncodingBase encoding)
2035 {
2036 OSStatus status = noErr ;
2037 m_char_encoding = encoding ;
2038 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2039
2040 status = TECCreateConverter(&m_MB2WC_converter,
2041 m_char_encoding,
2042 m_unicode_encoding);
2043 status = TECCreateConverter(&m_WC2MB_converter,
2044 m_unicode_encoding,
2045 m_char_encoding);
2046 }
2047
335d31e0
SC
2048 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2049 {
d775fa82
WS
2050 OSStatus status = noErr ;
2051 ByteCount byteOutLen ;
2052 ByteCount byteInLen = strlen(psz) ;
2053 wchar_t *tbuf = NULL ;
2054 UniChar* ubuf = NULL ;
2055 size_t res = 0 ;
2056
2057 if (buf == NULL)
2058 {
638357a0 2059 //apple specs say at least 32
c543817b 2060 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2061 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2062 }
2063 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2064#if SIZEOF_WCHAR_T == 4
d775fa82 2065 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2066#else
d775fa82 2067 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2068#endif
d775fa82
WS
2069 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2070 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2071#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2072 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2073 // is not properly terminated we get random characters at the end
2074 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2075 wxMBConvUTF16BE converter ;
2076 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2077 free( ubuf ) ;
f3a355ce 2078#else
d775fa82 2079 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2080#endif
d775fa82
WS
2081 if ( buf == NULL )
2082 free(tbuf) ;
335d31e0 2083
335d31e0
SC
2084 if ( buf && res < n)
2085 buf[res] = 0;
2086
d775fa82 2087 return res ;
335d31e0
SC
2088 }
2089
2090 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2091 {
2092 OSStatus status = noErr ;
2093 ByteCount byteOutLen ;
2094 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2095
2096 char *tbuf = NULL ;
2097
2098 if (buf == NULL)
2099 {
638357a0 2100 //apple specs say at least 32
c543817b 2101 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2102 tbuf = (char*) malloc( n ) ;
2103 }
2104
2105 ByteCount byteBufferLen = n ;
2106 UniChar* ubuf = NULL ;
f3a355ce 2107#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2108 wxMBConvUTF16BE converter ;
2109 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2110 byteInLen = unicharlen ;
2111 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2112 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2113#else
d775fa82 2114 ubuf = (UniChar*) psz ;
f3a355ce 2115#endif
d775fa82
WS
2116 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2117 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2118#if SIZEOF_WCHAR_T == 4
d775fa82 2119 free( ubuf ) ;
f3a355ce 2120#endif
d775fa82
WS
2121 if ( buf == NULL )
2122 free(tbuf) ;
335d31e0 2123
d775fa82 2124 size_t res = byteOutLen ;
335d31e0 2125 if ( buf && res < n)
638357a0 2126 {
335d31e0 2127 buf[res] = 0;
638357a0
RN
2128
2129 //we need to double-trip to verify it didn't insert any ? in place
2130 //of bogus characters
2131 wxWCharBuffer wcBuf(n);
2132 size_t pszlen = wxWcslen(psz);
2133 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2134 wxWcslen(wcBuf) != pszlen ||
2135 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2136 {
2137 // we didn't obtain the same thing we started from, hence
2138 // the conversion was lossy and we consider that it failed
2139 return (size_t)-1;
2140 }
2141 }
335d31e0 2142
d775fa82 2143 return res ;
335d31e0
SC
2144 }
2145
2146 bool IsOk() const
2147 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2148
2149private:
d775fa82
WS
2150 TECObjectRef m_MB2WC_converter ;
2151 TECObjectRef m_WC2MB_converter ;
2152
2153 TextEncodingBase m_char_encoding ;
2154 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2155};
2156
2157#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2158
36acb880
VZ
2159// ============================================================================
2160// wxEncodingConverter based conversion classes
2161// ============================================================================
2162
1e6feb95 2163#if wxUSE_FONTMAP
1cd52418 2164
e95354ec 2165class wxMBConv_wxwin : public wxMBConv
1cd52418 2166{
8b04d4c4
VZ
2167private:
2168 void Init()
2169 {
2170 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2171 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2172 }
2173
6001e347 2174public:
f1339c56
RR
2175 // temporarily just use wxEncodingConverter stuff,
2176 // so that it works while a better implementation is built
e95354ec 2177 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2178 {
2179 if (name)
e95354ec 2180 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2181 else
2182 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2183
8b04d4c4
VZ
2184 Init();
2185 }
2186
e95354ec 2187 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2188 {
2189 m_enc = enc;
2190
2191 Init();
f1339c56 2192 }
dccce9ea 2193
bde4baac 2194 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2195 {
2196 size_t inbuf = strlen(psz);
dccce9ea 2197 if (buf)
4def3b35 2198 m2w.Convert(psz,buf);
f1339c56
RR
2199 return inbuf;
2200 }
dccce9ea 2201
bde4baac 2202 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2203 {
f8d791e0 2204 const size_t inbuf = wxWcslen(psz);
f1339c56
RR
2205 if (buf)
2206 w2m.Convert(psz,buf);
dccce9ea 2207
f1339c56
RR
2208 return inbuf;
2209 }
dccce9ea 2210
e95354ec 2211 bool IsOk() const { return m_ok; }
f1339c56
RR
2212
2213public:
8b04d4c4 2214 wxFontEncoding m_enc;
f1339c56 2215 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2216
2217 // were we initialized successfully?
2218 bool m_ok;
fc7a2a60 2219
e95354ec 2220 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2221};
6001e347 2222
1e6feb95
VZ
2223#endif // wxUSE_FONTMAP
2224
36acb880
VZ
2225// ============================================================================
2226// wxCSConv implementation
2227// ============================================================================
2228
8b04d4c4 2229void wxCSConv::Init()
6001e347 2230{
e95354ec
VZ
2231 m_name = NULL;
2232 m_convReal = NULL;
2233 m_deferred = true;
2234}
2235
8b04d4c4
VZ
2236wxCSConv::wxCSConv(const wxChar *charset)
2237{
2238 Init();
82713003 2239
e95354ec
VZ
2240 if ( charset )
2241 {
e95354ec
VZ
2242 SetName(charset);
2243 }
bda3d86a
VZ
2244
2245 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2246}
2247
8b04d4c4
VZ
2248wxCSConv::wxCSConv(wxFontEncoding encoding)
2249{
bda3d86a 2250 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2251 {
2252 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2253
2254 encoding = wxFONTENCODING_SYSTEM;
2255 }
2256
8b04d4c4
VZ
2257 Init();
2258
bda3d86a 2259 m_encoding = encoding;
8b04d4c4
VZ
2260}
2261
6001e347
RR
2262wxCSConv::~wxCSConv()
2263{
65e50848
JS
2264 Clear();
2265}
2266
54380f29 2267wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2268 : wxMBConv()
54380f29 2269{
8b04d4c4
VZ
2270 Init();
2271
54380f29 2272 SetName(conv.m_name);
8b04d4c4 2273 m_encoding = conv.m_encoding;
54380f29
GD
2274}
2275
2276wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2277{
2278 Clear();
8b04d4c4 2279
54380f29 2280 SetName(conv.m_name);
8b04d4c4
VZ
2281 m_encoding = conv.m_encoding;
2282
54380f29
GD
2283 return *this;
2284}
2285
65e50848
JS
2286void wxCSConv::Clear()
2287{
8b04d4c4 2288 free(m_name);
e95354ec 2289 delete m_convReal;
8b04d4c4 2290
65e50848 2291 m_name = NULL;
e95354ec 2292 m_convReal = NULL;
6001e347
RR
2293}
2294
2295void wxCSConv::SetName(const wxChar *charset)
2296{
f1339c56
RR
2297 if (charset)
2298 {
2299 m_name = wxStrdup(charset);
e95354ec 2300 m_deferred = true;
f1339c56 2301 }
6001e347
RR
2302}
2303
e95354ec
VZ
2304wxMBConv *wxCSConv::DoCreate() const
2305{
c547282d
VZ
2306 // check for the special case of ASCII or ISO8859-1 charset: as we have
2307 // special knowledge of it anyhow, we don't need to create a special
2308 // conversion object
2309 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2310 {
e95354ec
VZ
2311 // don't convert at all
2312 return NULL;
2313 }
dccce9ea 2314
e95354ec
VZ
2315 // we trust OS to do conversion better than we can so try external
2316 // conversion methods first
2317 //
2318 // the full order is:
2319 // 1. OS conversion (iconv() under Unix or Win32 API)
2320 // 2. hard coded conversions for UTF
2321 // 3. wxEncodingConverter as fall back
2322
2323 // step (1)
2324#ifdef HAVE_ICONV
c547282d 2325#if !wxUSE_FONTMAP
e95354ec 2326 if ( m_name )
c547282d 2327#endif // !wxUSE_FONTMAP
e95354ec 2328 {
c547282d
VZ
2329 wxString name(m_name);
2330
2331#if wxUSE_FONTMAP
2332 if ( name.empty() )
2333 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2334#endif // wxUSE_FONTMAP
2335
2336 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2337 if ( conv->IsOk() )
2338 return conv;
2339
2340 delete conv;
2341 }
2342#endif // HAVE_ICONV
2343
2344#ifdef wxHAVE_WIN32_MB2WC
2345 {
7608a683 2346#if wxUSE_FONTMAP
e95354ec
VZ
2347 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2348 : new wxMBConv_win32(m_encoding);
2349 if ( conv->IsOk() )
2350 return conv;
2351
2352 delete conv;
7608a683
WS
2353#else
2354 return NULL;
2355#endif
e95354ec
VZ
2356 }
2357#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2358#if defined(__WXMAC__)
2359 {
2360 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2361 {
2362
2363 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2364 : new wxMBConv_mac(m_encoding);
2365 if ( conv->IsOk() )
f7e98dee
RN
2366 return conv;
2367
2368 delete conv;
2369 }
2370 }
2371#endif
2372#if defined(__WXCOCOA__)
2373 {
2374 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2375 {
2376
2377 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2378 : new wxMBConv_cocoa(m_encoding);
2379 if ( conv->IsOk() )
d775fa82
WS
2380 return conv;
2381
2382 delete conv;
2383 }
335d31e0
SC
2384 }
2385#endif
e95354ec
VZ
2386 // step (2)
2387 wxFontEncoding enc = m_encoding;
2388#if wxUSE_FONTMAP
c547282d
VZ
2389 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2390 {
2391 // use "false" to suppress interactive dialogs -- we can be called from
2392 // anywhere and popping up a dialog from here is the last thing we want to
2393 // do
2394 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2395 }
e95354ec
VZ
2396#endif // wxUSE_FONTMAP
2397
2398 switch ( enc )
2399 {
2400 case wxFONTENCODING_UTF7:
2401 return new wxMBConvUTF7;
2402
2403 case wxFONTENCODING_UTF8:
2404 return new wxMBConvUTF8;
2405
e95354ec
VZ
2406 case wxFONTENCODING_UTF16BE:
2407 return new wxMBConvUTF16BE;
2408
2409 case wxFONTENCODING_UTF16LE:
2410 return new wxMBConvUTF16LE;
2411
e95354ec
VZ
2412 case wxFONTENCODING_UTF32BE:
2413 return new wxMBConvUTF32BE;
2414
2415 case wxFONTENCODING_UTF32LE:
2416 return new wxMBConvUTF32LE;
2417
2418 default:
2419 // nothing to do but put here to suppress gcc warnings
2420 ;
2421 }
2422
2423 // step (3)
2424#if wxUSE_FONTMAP
2425 {
2426 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2427 : new wxMBConv_wxwin(m_encoding);
2428 if ( conv->IsOk() )
2429 return conv;
2430
2431 delete conv;
2432 }
2433#endif // wxUSE_FONTMAP
2434
a58d4f4d
VS
2435 // NB: This is a hack to prevent deadlock. What could otherwise happen
2436 // in Unicode build: wxConvLocal creation ends up being here
2437 // because of some failure and logs the error. But wxLog will try to
2438 // attach timestamp, for which it will need wxConvLocal (to convert
2439 // time to char* and then wchar_t*), but that fails, tries to log
2440 // error, but wxLog has a (already locked) critical section that
2441 // guards static buffer.
2442 static bool alreadyLoggingError = false;
2443 if (!alreadyLoggingError)
2444 {
2445 alreadyLoggingError = true;
2446 wxLogError(_("Cannot convert from the charset '%s'!"),
2447 m_name ? m_name
e95354ec
VZ
2448 :
2449#if wxUSE_FONTMAP
2450 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2451#else // !wxUSE_FONTMAP
2452 wxString::Format(_("encoding %s"), m_encoding).c_str()
2453#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2454 );
a58d4f4d
VS
2455 alreadyLoggingError = false;
2456 }
e95354ec
VZ
2457
2458 return NULL;
2459}
2460
2461void wxCSConv::CreateConvIfNeeded() const
2462{
2463 if ( m_deferred )
2464 {
2465 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2466
2467#if wxUSE_INTL
2468 // if we don't have neither the name nor the encoding, use the default
2469 // encoding for this system
2470 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2471 {
4d312c22 2472 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2473 }
2474#endif // wxUSE_INTL
2475
e95354ec
VZ
2476 self->m_convReal = DoCreate();
2477 self->m_deferred = false;
6001e347 2478 }
6001e347
RR
2479}
2480
2481size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2482{
e95354ec 2483 CreateConvIfNeeded();
dccce9ea 2484
e95354ec
VZ
2485 if (m_convReal)
2486 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2487
2488 // latin-1 (direct)
4def3b35 2489 size_t len = strlen(psz);
dccce9ea 2490
f1339c56
RR
2491 if (buf)
2492 {
4def3b35 2493 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2494 buf[c] = (unsigned char)(psz[c]);
2495 }
dccce9ea 2496
f1339c56 2497 return len;
6001e347
RR
2498}
2499
2500size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2501{
e95354ec 2502 CreateConvIfNeeded();
dccce9ea 2503
e95354ec
VZ
2504 if (m_convReal)
2505 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2506
f1339c56 2507 // latin-1 (direct)
f8d791e0 2508 const size_t len = wxWcslen(psz);
f1339c56
RR
2509 if (buf)
2510 {
4def3b35 2511 for (size_t c = 0; c <= len; c++)
24642831
VS
2512 {
2513 if (psz[c] > 0xFF)
2514 return (size_t)-1;
907173e5 2515 buf[c] = (char)psz[c];
24642831
VS
2516 }
2517 }
2518 else
2519 {
2520 for (size_t c = 0; c <= len; c++)
2521 {
2522 if (psz[c] > 0xFF)
2523 return (size_t)-1;
2524 }
f1339c56 2525 }
dccce9ea 2526
f1339c56 2527 return len;
6001e347
RR
2528}
2529
bde4baac
VZ
2530// ----------------------------------------------------------------------------
2531// globals
2532// ----------------------------------------------------------------------------
2533
2534#ifdef __WINDOWS__
2535 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2536#elif defined(__WXMAC__) && !defined(__MACH__)
2537 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2538#else
dcc8fac0 2539 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2540#endif
2541
2542static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2543static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2544static wxMBConvUTF7 wxConvUTF7Obj;
2545static wxMBConvUTF8 wxConvUTF8Obj;
2546
2547
2548WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2549WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2550WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2551WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2552WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2553WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2554
2555#else // !wxUSE_WCHAR_T
2556
2557// stand-ins in absence of wchar_t
2558WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2559 wxConvISO8859_1,
2560 wxConvLocal,
2561 wxConvUTF8;
2562
2563#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2564
2565