]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
properly terminate UTF-32 test string
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
6001e347
RR
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
373658eb
VZ
30#ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33#endif // WX_PRECOMP
34
bde4baac
VZ
35#include "wx/strconv.h"
36
37#if wxUSE_WCHAR_T
38
7608a683 39#ifdef __WINDOWS__
532d575b 40 #include "wx/msw/private.h"
13dd924a 41 #include "wx/msw/missing.h"
0a1c1e62
GRG
42#endif
43
1c193821 44#ifndef __WXWINCE__
1cd52418 45#include <errno.h>
1c193821
JS
46#endif
47
6001e347
RR
48#include <ctype.h>
49#include <string.h>
50#include <stdlib.h>
51
e95354ec
VZ
52#if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54#endif // __WIN32__ but !__WXMICROWIN__
55
6001e347 56#ifdef __SALFORDC__
373658eb 57 #include <clib.h>
6001e347
RR
58#endif
59
b040e242 60#ifdef HAVE_ICONV
373658eb 61 #include <iconv.h>
b1d547eb 62 #include "wx/thread.h"
1cd52418 63#endif
1cd52418 64
373658eb
VZ
65#include "wx/encconv.h"
66#include "wx/fontmap.h"
7608a683 67#include "wx/utils.h"
373658eb 68
335d31e0 69#ifdef __WXMAC__
40ba2f3b 70#ifndef __DARWIN__
4227afa4
SC
71#include <ATSUnicode.h>
72#include <TextCommon.h>
73#include <TextEncodingConverter.h>
40ba2f3b 74#endif
335d31e0
SC
75
76#include "wx/mac/private.h" // includes mac headers
77#endif
ce6f8d6f
VZ
78
79#define TRACE_STRCONV _T("strconv")
80
4948c2b6 81#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
82 #define WC_UTF16
83#endif
84
373658eb
VZ
85// ============================================================================
86// implementation
87// ============================================================================
88
89// ----------------------------------------------------------------------------
c91830cb 90// UTF-16 en/decoding to/from UCS-4
373658eb 91// ----------------------------------------------------------------------------
6001e347 92
b0a6bb75 93
c91830cb 94static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 95{
dccce9ea 96 if (input<=0xffff)
4def3b35 97 {
999836aa
VZ
98 if (output)
99 *output = (wxUint16) input;
4def3b35 100 return 1;
dccce9ea
VZ
101 }
102 else if (input>=0x110000)
4def3b35
VS
103 {
104 return (size_t)-1;
dccce9ea
VZ
105 }
106 else
4def3b35 107 {
dccce9ea 108 if (output)
4def3b35 109 {
c91830cb 110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
112 }
113 return 2;
1cd52418 114 }
1cd52418
OK
115}
116
c91830cb 117static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 118{
dccce9ea 119 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
120 {
121 output = *input;
122 return 1;
dccce9ea 123 }
cdb14ecb 124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
125 {
126 output = *input;
127 return (size_t)-1;
dccce9ea
VZ
128 }
129 else
4def3b35
VS
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
1cd52418
OK
134}
135
b0a6bb75 136
f6bcfd97 137// ----------------------------------------------------------------------------
6001e347 138// wxMBConv
f6bcfd97 139// ----------------------------------------------------------------------------
2c53a80a
WS
140
141wxMBConv::~wxMBConv()
142{
143 // nothing to do here (necessary for Darwin linking probably)
144}
6001e347 145
6001e347
RR
146const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147{
2b5f62a0 148 if ( psz )
6001e347 149 {
2b5f62a0
VZ
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
635f33ce
VS
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
2b5f62a0 161 }
f6bcfd97 162 }
2b5f62a0
VZ
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
6001e347
RR
167}
168
e5cceba0 169const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 170{
2b5f62a0
VZ
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
c91830cb 176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
2b5f62a0
VZ
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
e5cceba0 186
e5cceba0 187 return buf;
6001e347
RR
188}
189
c1464d9d
VZ
190// helper of cMB2WC(): check if n bytes at this location are all NUL
191static bool NotAllNULs(const char *p, size_t n)
192{
193 while ( n && *p++ == '\0' )
194 n--;
195
196 return n != 0;
197}
198
eec47cc6
VZ
199const wxWCharBuffer
200wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
e4e3bbb4 201{
eec47cc6
VZ
202 // the currently accumulated wide characters
203 wxWCharBuffer wbuf;
204
205 // the current length of wbuf
206 size_t lenBuf = 0;
207
c1464d9d
VZ
208 // the number of NULs terminating this string
209 size_t nulLen wxDUMMY_INITIALIZE(0);
eec47cc6
VZ
210
211 // make a copy of the input string unless it is already properly
212 // NUL-terminated
213 wxCharBuffer bufTmp;
214
c1464d9d
VZ
215 // if we were not given the input size we just have to assume that the
216 // string is properly terminated as we have no way of knowing how long it
217 // is anyhow, but if we do have the size check whether there are enough
218 // NULs at the end
219 if ( inLen != (size_t)-1 )
eec47cc6 220 {
c1464d9d
VZ
221 // we need to know how to find the end of this string
222 nulLen = GetMinMBCharWidth();
223 if ( nulLen == (size_t)-1 )
224 return wbuf;
e4e3bbb4 225
c1464d9d
VZ
226 // if there are enough NULs we can avoid the copy
227 if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
eec47cc6
VZ
228 {
229 // make a copy in order to properly NUL-terminate the string
230 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
c1464d9d
VZ
231 char * const p = bufTmp.data();
232 memcpy(p, in, inLen);
233 for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
234 *s = '\0';
eec47cc6
VZ
235 }
236 }
e4e3bbb4 237
eec47cc6
VZ
238 if ( bufTmp )
239 in = bufTmp;
e4e3bbb4 240
c1464d9d 241 size_t lenChunk;
eec47cc6
VZ
242 for ( const char * const inEnd = in + inLen;; )
243 {
c1464d9d
VZ
244 // try to convert the current chunk
245 lenChunk = MB2WC(NULL, in, 0);
eec47cc6 246 if ( lenChunk == 0 )
f5fb6871 247 {
eec47cc6 248 // nothing left in the input string, conversion succeeded
c1464d9d 249 break;
f5fb6871
RN
250 }
251
eec47cc6
VZ
252 if ( lenChunk == (size_t)-1 )
253 break;
e4e3bbb4 254
c1464d9d
VZ
255 // if we already have a previous chunk, leave the NUL separating it
256 // from this one
257 if ( lenBuf )
258 lenBuf++;
259
eec47cc6
VZ
260 const size_t lenBufNew = lenBuf + lenChunk;
261 if ( !wbuf.extend(lenBufNew) )
c1464d9d
VZ
262 {
263 lenChunk = (size_t)-1;
eec47cc6 264 break;
c1464d9d 265 }
e4e3bbb4 266
eec47cc6
VZ
267 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
268 if ( lenChunk == (size_t)-1 )
269 break;
f5fb6871 270
c1464d9d
VZ
271 lenBuf = lenBufNew;
272
273 if ( inLen == (size_t)-1 )
274 {
275 // convert only one chunk in this case, as we suppose that the
276 // string is NUL-terminated and so inEnd is not used at all
277 break;
278 }
eec47cc6
VZ
279
280 // advance the input pointer past the end of this chunk
c1464d9d
VZ
281 while ( NotAllNULs(in, nulLen) )
282 {
283 // notice that we must skip over multiple bytes here as we suppose
284 // that if NUL takes 2 or 4 bytes, then all the other characters do
285 // too and so if advanced by a single byte we might erroneously
286 // detect sequences of NUL bytes in the middle of the input
287 in += nulLen;
288 }
e4e3bbb4 289
eec47cc6 290 in += nulLen; // skipping over its terminator as well
c1464d9d
VZ
291
292 // note that ">=" (and not just "==") is needed here as the terminator
293 // we skipped just above could be inside or just after the buffer
294 // delimited by inEnd
295 if ( in >= inEnd )
296 break;
297 }
298
299 if ( lenChunk == (size_t)-1 )
300 {
301 // conversion failed
302 lenBuf = 0;
303 wbuf.reset();
e4e3bbb4
RN
304 }
305
eec47cc6 306 if ( outLen )
c1464d9d 307 *outLen = lenBuf;
eec47cc6 308
c1464d9d 309 return wbuf;
e4e3bbb4
RN
310}
311
eec47cc6
VZ
312const wxCharBuffer
313wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
e4e3bbb4 314{
eec47cc6
VZ
315 // the currently accumulated multibyte characters
316 wxCharBuffer buf;
f5fb6871 317
eec47cc6
VZ
318 // the current length of buf
319 size_t lenBuf = 0;
e4e3bbb4 320
eec47cc6
VZ
321 // make a copy of the input string unless it is already properly
322 // NUL-terminated
323 //
324 // if we don't know its length we have no choice but to assume that it is,
325 // indeed, properly terminated
326 wxWCharBuffer bufTmp;
327 if ( inLen == (size_t)-1 )
e4e3bbb4 328 {
eec47cc6
VZ
329 inLen = wxWcslen(in) + 1;
330 }
331 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
332 {
333 // make a copy in order to properly NUL-terminate the string
334 bufTmp = wxWCharBuffer(inLen);
335 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
336 }
e4e3bbb4 337
eec47cc6
VZ
338 if ( bufTmp )
339 in = bufTmp;
e4e3bbb4 340
eec47cc6
VZ
341 for ( const wchar_t * const inEnd = in + inLen;; )
342 {
343 // try to convert the current chunk, if anything left
344 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
345 if ( lenChunk == 0 )
f5fb6871 346 {
eec47cc6
VZ
347 // nothing left in the input string, conversion succeeded
348 if ( outLen )
349 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
350
351 return buf;
f5fb6871 352 }
e4e3bbb4 353
eec47cc6
VZ
354 if ( lenChunk == (size_t)-1 )
355 break;
3698ae71 356
eec47cc6
VZ
357 const size_t lenBufNew = lenBuf + lenChunk;
358 if ( !buf.extend(lenBufNew) )
359 break;
f5fb6871 360
eec47cc6
VZ
361 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
362 if ( lenChunk == (size_t)-1 )
363 break;
e4e3bbb4 364
eec47cc6
VZ
365 // chunk successfully converted, go to the next one
366 in += wxWcslen(in) + 1 /* skip NUL too */;
367 lenBuf = lenBufNew + 1;
e4e3bbb4
RN
368 }
369
eec47cc6
VZ
370 // conversion failed
371 if ( outLen )
372 *outLen = 0;
373
374 return wxCharBuffer();
e4e3bbb4
RN
375}
376
6001e347 377// ----------------------------------------------------------------------------
bde4baac 378// wxMBConvLibc
6001e347
RR
379// ----------------------------------------------------------------------------
380
bde4baac
VZ
381size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
382{
383 return wxMB2WC(buf, psz, n);
384}
385
386size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
387{
388 return wxWC2MB(buf, psz, n);
389}
e1bfe89e
RR
390
391// ----------------------------------------------------------------------------
532d575b 392// wxConvBrokenFileNames
e1bfe89e
RR
393// ----------------------------------------------------------------------------
394
eec47cc6
VZ
395#ifdef __UNIX__
396
845905d5 397wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 398{
845905d5
MW
399 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
400 || wxStricmp(charset, _T("UTF8")) == 0 )
401 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
402 else
403 m_conv = new wxCSConv(charset);
ea8ce907
RR
404}
405
eec47cc6 406#endif // __UNIX__
c12b7f79 407
bde4baac 408// ----------------------------------------------------------------------------
3698ae71 409// UTF-7
bde4baac 410// ----------------------------------------------------------------------------
6001e347 411
15f2ee32 412// Implementation (C) 2004 Fredrik Roubert
6001e347 413
15f2ee32
RN
414//
415// BASE64 decoding table
416//
417static const unsigned char utf7unb64[] =
6001e347 418{
15f2ee32
RN
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
425 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
426 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
428 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
429 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
430 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
432 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
433 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
434 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
451};
452
453size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
454{
15f2ee32
RN
455 size_t len = 0;
456
04a37834 457 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
458 {
459 unsigned char cc = *psz++;
460 if (cc != '+')
461 {
462 // plain ASCII char
463 if (buf)
464 *buf++ = cc;
465 len++;
466 }
467 else if (*psz == '-')
468 {
469 // encoded plus sign
470 if (buf)
471 *buf++ = cc;
472 len++;
473 psz++;
474 }
04a37834 475 else // start of BASE64 encoded string
15f2ee32 476 {
04a37834 477 bool lsb, ok;
15f2ee32 478 unsigned int d, l;
04a37834
VZ
479 for ( ok = lsb = false, d = 0, l = 0;
480 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
481 psz++ )
15f2ee32
RN
482 {
483 d <<= 6;
484 d += cc;
485 for (l += 6; l >= 8; lsb = !lsb)
486 {
04a37834 487 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
488 if (lsb)
489 {
490 if (buf)
491 *buf++ |= c;
492 len ++;
493 }
494 else
04a37834 495 {
15f2ee32 496 if (buf)
6356d52a 497 *buf = (wchar_t)(c << 8);
04a37834
VZ
498 }
499
500 ok = true;
15f2ee32
RN
501 }
502 }
04a37834
VZ
503
504 if ( !ok )
505 {
506 // in valid UTF7 we should have valid characters after '+'
507 return (size_t)-1;
508 }
509
15f2ee32
RN
510 if (*psz == '-')
511 psz++;
512 }
513 }
04a37834
VZ
514
515 if ( buf && (len < n) )
516 *buf = '\0';
517
15f2ee32 518 return len;
6001e347
RR
519}
520
15f2ee32
RN
521//
522// BASE64 encoding table
523//
524static const unsigned char utf7enb64[] =
525{
526 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
527 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
528 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
529 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
530 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
531 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
532 'w', 'x', 'y', 'z', '0', '1', '2', '3',
533 '4', '5', '6', '7', '8', '9', '+', '/'
534};
535
536//
537// UTF-7 encoding table
538//
539// 0 - Set D (directly encoded characters)
540// 1 - Set O (optional direct characters)
541// 2 - whitespace characters (optional)
542// 3 - special characters
543//
544static const unsigned char utf7encode[128] =
6001e347 545{
15f2ee32
RN
546 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
547 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
548 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
550 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
552 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
554};
555
667e5b3e 556size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 557{
15f2ee32
RN
558 size_t len = 0;
559
560 while (*psz && ((!buf) || (len < n)))
561 {
562 wchar_t cc = *psz++;
563 if (cc < 0x80 && utf7encode[cc] < 1)
564 {
565 // plain ASCII char
566 if (buf)
567 *buf++ = (char)cc;
568 len++;
569 }
570#ifndef WC_UTF16
79c78d42 571 else if (((wxUint32)cc) > 0xffff)
b2c13097 572 {
15f2ee32
RN
573 // no surrogate pair generation (yet?)
574 return (size_t)-1;
575 }
576#endif
577 else
578 {
579 if (buf)
580 *buf++ = '+';
581 len++;
582 if (cc != '+')
583 {
584 // BASE64 encode string
585 unsigned int lsb, d, l;
73c902d6 586 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
587 {
588 for (lsb = 0; lsb < 2; lsb ++)
589 {
590 d <<= 8;
591 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
592
593 for (l += 8; l >= 6; )
594 {
595 l -= 6;
596 if (buf)
597 *buf++ = utf7enb64[(d >> l) % 64];
598 len++;
599 }
600 }
601 cc = *psz;
602 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
603 break;
604 }
605 if (l != 0)
606 {
607 if (buf)
608 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
609 len++;
610 }
611 }
612 if (buf)
613 *buf++ = '-';
614 len++;
615 }
616 }
617 if (buf && (len < n))
618 *buf = 0;
619 return len;
6001e347
RR
620}
621
f6bcfd97 622// ----------------------------------------------------------------------------
6001e347 623// UTF-8
f6bcfd97 624// ----------------------------------------------------------------------------
6001e347 625
dccce9ea 626static wxUint32 utf8_max[]=
4def3b35 627 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 628
3698ae71
VZ
629// boundaries of the private use area we use to (temporarily) remap invalid
630// characters invalid in a UTF-8 encoded string
ea8ce907
RR
631const wxUint32 wxUnicodePUA = 0x100000;
632const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
633
6001e347
RR
634size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
635{
4def3b35
VS
636 size_t len = 0;
637
dccce9ea 638 while (*psz && ((!buf) || (len < n)))
4def3b35 639 {
ea8ce907
RR
640 const char *opsz = psz;
641 bool invalid = false;
4def3b35
VS
642 unsigned char cc = *psz++, fc = cc;
643 unsigned cnt;
dccce9ea 644 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 645 fc <<= 1;
dccce9ea 646 if (!cnt)
4def3b35
VS
647 {
648 // plain ASCII char
dccce9ea 649 if (buf)
4def3b35
VS
650 *buf++ = cc;
651 len++;
561488ef
MW
652
653 // escape the escape character for octal escapes
654 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
655 && cc == '\\' && (!buf || len < n))
656 {
657 if (buf)
658 *buf++ = cc;
659 len++;
660 }
dccce9ea
VZ
661 }
662 else
4def3b35
VS
663 {
664 cnt--;
dccce9ea 665 if (!cnt)
4def3b35
VS
666 {
667 // invalid UTF-8 sequence
ea8ce907 668 invalid = true;
dccce9ea
VZ
669 }
670 else
4def3b35
VS
671 {
672 unsigned ocnt = cnt - 1;
673 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 674 while (cnt--)
4def3b35 675 {
ea8ce907 676 cc = *psz;
dccce9ea 677 if ((cc & 0xC0) != 0x80)
4def3b35
VS
678 {
679 // invalid UTF-8 sequence
ea8ce907
RR
680 invalid = true;
681 break;
4def3b35 682 }
ea8ce907 683 psz++;
4def3b35
VS
684 res = (res << 6) | (cc & 0x3f);
685 }
ea8ce907 686 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
687 {
688 // illegal UTF-8 encoding
ea8ce907 689 invalid = true;
4def3b35 690 }
ea8ce907
RR
691 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
692 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
693 {
694 // if one of our PUA characters turns up externally
695 // it must also be treated as an illegal sequence
696 // (a bit like you have to escape an escape character)
697 invalid = true;
698 }
699 else
700 {
1cd52418 701#ifdef WC_UTF16
ea8ce907
RR
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa = encode_utf16(res, (wxUint16 *)buf);
704 if (pa == (size_t)-1)
705 {
706 invalid = true;
707 }
708 else
709 {
710 if (buf)
711 buf += pa;
712 len += pa;
713 }
373658eb 714#else // !WC_UTF16
ea8ce907 715 if (buf)
38d4b1e4 716 *buf++ = (wchar_t)res;
ea8ce907 717 len++;
373658eb 718#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
719 }
720 }
721 if (invalid)
722 {
723 if (m_options & MAP_INVALID_UTF8_TO_PUA)
724 {
725 while (opsz < psz && (!buf || len < n))
726 {
727#ifdef WC_UTF16
728 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
729 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
730 wxASSERT(pa != (size_t)-1);
731 if (buf)
732 buf += pa;
733 opsz++;
734 len += pa;
735#else
736 if (buf)
38d4b1e4 737 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
738 opsz++;
739 len++;
740#endif
741 }
742 }
3698ae71 743 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
744 {
745 while (opsz < psz && (!buf || len < n))
746 {
3698ae71
VZ
747 if ( buf && len + 3 < n )
748 {
17a1ebd1 749 unsigned char on = *opsz;
3698ae71 750 *buf++ = L'\\';
17a1ebd1
VZ
751 *buf++ = (wchar_t)( L'0' + on / 0100 );
752 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
753 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 754 }
ea8ce907
RR
755 opsz++;
756 len += 4;
757 }
758 }
3698ae71 759 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
760 {
761 return (size_t)-1;
762 }
4def3b35
VS
763 }
764 }
6001e347 765 }
dccce9ea 766 if (buf && (len < n))
4def3b35
VS
767 *buf = 0;
768 return len;
6001e347
RR
769}
770
3698ae71
VZ
771static inline bool isoctal(wchar_t wch)
772{
773 return L'0' <= wch && wch <= L'7';
774}
775
6001e347
RR
776size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
777{
4def3b35 778 size_t len = 0;
6001e347 779
dccce9ea 780 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
781 {
782 wxUint32 cc;
1cd52418 783#ifdef WC_UTF16
b5153fd8
VZ
784 // cast is ok for WC_UTF16
785 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 786 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 787#else
4def3b35
VS
788 cc=(*psz++) & 0x7fffffff;
789#endif
3698ae71
VZ
790
791 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
792 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 793 {
dccce9ea 794 if (buf)
ea8ce907 795 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 796 len++;
3698ae71 797 }
561488ef
MW
798 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
799 && cc == L'\\' && psz[0] == L'\\' )
800 {
801 if (buf)
802 *buf++ = (char)cc;
803 psz++;
804 len++;
805 }
3698ae71
VZ
806 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
807 cc == L'\\' &&
808 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 809 {
dccce9ea 810 if (buf)
3698ae71 811 {
b2c13097
WS
812 *buf++ = (char) ((psz[0] - L'0')*0100 +
813 (psz[1] - L'0')*010 +
814 (psz[2] - L'0'));
3698ae71
VZ
815 }
816
817 psz += 3;
ea8ce907
RR
818 len++;
819 }
820 else
821 {
822 unsigned cnt;
823 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
824 if (!cnt)
4def3b35 825 {
ea8ce907
RR
826 // plain ASCII char
827 if (buf)
828 *buf++ = (char) cc;
829 len++;
830 }
831
832 else
833 {
834 len += cnt + 1;
835 if (buf)
836 {
837 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
838 while (cnt--)
839 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
840 }
4def3b35
VS
841 }
842 }
6001e347 843 }
4def3b35 844
3698ae71
VZ
845 if (buf && (len<n))
846 *buf = 0;
adb45366 847
4def3b35 848 return len;
6001e347
RR
849}
850
c91830cb
VZ
851// ----------------------------------------------------------------------------
852// UTF-16
853// ----------------------------------------------------------------------------
854
855#ifdef WORDS_BIGENDIAN
bde4baac
VZ
856 #define wxMBConvUTF16straight wxMBConvUTF16BE
857 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 858#else
bde4baac
VZ
859 #define wxMBConvUTF16swap wxMBConvUTF16BE
860 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
861#endif
862
863
c91830cb
VZ
864#ifdef WC_UTF16
865
c91830cb
VZ
866// copy 16bit MB to 16bit String
867size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
868{
869 size_t len=0;
870
871 while (*(wxUint16*)psz && (!buf || len < n))
872 {
873 if (buf)
874 *buf++ = *(wxUint16*)psz;
875 len++;
876
877 psz += sizeof(wxUint16);
878 }
879 if (buf && len<n) *buf=0;
880
881 return len;
882}
883
884
885// copy 16bit String to 16bit MB
886size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
887{
888 size_t len=0;
889
890 while (*psz && (!buf || len < n))
891 {
892 if (buf)
893 {
894 *(wxUint16*)buf = *psz;
895 buf += sizeof(wxUint16);
896 }
897 len += sizeof(wxUint16);
898 psz++;
899 }
900 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
901
902 return len;
903}
904
905
906// swap 16bit MB to 16bit String
907size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
908{
bfab25d4 909 size_t len = 0;
c91830cb 910
da12017a
VZ
911 // UTF16 string must be terminated by 2 NULs as single NULs may occur
912 // inside the string
913 while ( (psz[0] || psz[1]) && (!buf || len < n) )
c91830cb 914 {
bfab25d4 915 if ( buf )
c91830cb
VZ
916 {
917 ((char *)buf)[0] = psz[1];
918 ((char *)buf)[1] = psz[0];
919 buf++;
920 }
921 len++;
bfab25d4 922 psz += 2;
c91830cb 923 }
bfab25d4
VZ
924
925 if ( buf && len < n )
926 *buf = L'\0';
c91830cb
VZ
927
928 return len;
929}
930
931
932// swap 16bit MB to 16bit String
933size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
934{
eec47cc6 935 size_t len = 0;
c91830cb 936
eec47cc6 937 while ( *psz && (!buf || len < n) )
c91830cb 938 {
eec47cc6 939 if ( buf )
c91830cb
VZ
940 {
941 *buf++ = ((char*)psz)[1];
942 *buf++ = ((char*)psz)[0];
943 }
eec47cc6 944 len += 2;
c91830cb
VZ
945 psz++;
946 }
eec47cc6
VZ
947
948 if ( buf && len < n )
949 *buf = '\0';
c91830cb
VZ
950
951 return len;
952}
953
954
955#else // WC_UTF16
956
957
958// copy 16bit MB to 32bit String
959size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
960{
961 size_t len=0;
962
963 while (*(wxUint16*)psz && (!buf || len < n))
964 {
965 wxUint32 cc;
966 size_t pa=decode_utf16((wxUint16*)psz, cc);
967 if (pa == (size_t)-1)
968 return pa;
969
970 if (buf)
38d4b1e4 971 *buf++ = (wchar_t)cc;
c91830cb
VZ
972 len++;
973 psz += pa * sizeof(wxUint16);
974 }
975 if (buf && len<n) *buf=0;
976
977 return len;
978}
979
980
981// copy 32bit String to 16bit MB
982size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
983{
984 size_t len=0;
985
986 while (*psz && (!buf || len < n))
987 {
988 wxUint16 cc[2];
989 size_t pa=encode_utf16(*psz, cc);
990
991 if (pa == (size_t)-1)
992 return pa;
993
994 if (buf)
995 {
69b80d28 996 *(wxUint16*)buf = cc[0];
b5153fd8 997 buf += sizeof(wxUint16);
c91830cb 998 if (pa > 1)
69b80d28
VZ
999 {
1000 *(wxUint16*)buf = cc[1];
1001 buf += sizeof(wxUint16);
1002 }
c91830cb
VZ
1003 }
1004
1005 len += pa*sizeof(wxUint16);
1006 psz++;
1007 }
1008 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1009
1010 return len;
1011}
1012
1013
1014// swap 16bit MB to 32bit String
1015size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016{
1017 size_t len=0;
1018
1019 while (*(wxUint16*)psz && (!buf || len < n))
1020 {
1021 wxUint32 cc;
1022 char tmp[4];
1023 tmp[0]=psz[1]; tmp[1]=psz[0];
1024 tmp[2]=psz[3]; tmp[3]=psz[2];
1025
1026 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1027 if (pa == (size_t)-1)
1028 return pa;
1029
1030 if (buf)
38d4b1e4 1031 *buf++ = (wchar_t)cc;
c91830cb
VZ
1032
1033 len++;
1034 psz += pa * sizeof(wxUint16);
1035 }
1036 if (buf && len<n) *buf=0;
1037
1038 return len;
1039}
1040
1041
1042// swap 32bit String to 16bit MB
1043size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044{
1045 size_t len=0;
1046
1047 while (*psz && (!buf || len < n))
1048 {
1049 wxUint16 cc[2];
1050 size_t pa=encode_utf16(*psz, cc);
1051
1052 if (pa == (size_t)-1)
1053 return pa;
1054
1055 if (buf)
1056 {
1057 *buf++ = ((char*)cc)[1];
1058 *buf++ = ((char*)cc)[0];
1059 if (pa > 1)
1060 {
1061 *buf++ = ((char*)cc)[3];
1062 *buf++ = ((char*)cc)[2];
1063 }
1064 }
1065
1066 len += pa*sizeof(wxUint16);
1067 psz++;
1068 }
1069 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1070
1071 return len;
1072}
1073
1074#endif // WC_UTF16
1075
1076
1077// ----------------------------------------------------------------------------
1078// UTF-32
1079// ----------------------------------------------------------------------------
1080
1081#ifdef WORDS_BIGENDIAN
1082#define wxMBConvUTF32straight wxMBConvUTF32BE
1083#define wxMBConvUTF32swap wxMBConvUTF32LE
1084#else
1085#define wxMBConvUTF32swap wxMBConvUTF32BE
1086#define wxMBConvUTF32straight wxMBConvUTF32LE
1087#endif
1088
1089
1090WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1091WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1092
1093
1094#ifdef WC_UTF16
1095
1096// copy 32bit MB to 16bit String
1097size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1098{
1099 size_t len=0;
1100
1101 while (*(wxUint32*)psz && (!buf || len < n))
1102 {
1103 wxUint16 cc[2];
1104
1105 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1106 if (pa == (size_t)-1)
1107 return pa;
1108
1109 if (buf)
1110 {
1111 *buf++ = cc[0];
1112 if (pa > 1)
1113 *buf++ = cc[1];
1114 }
1115 len += pa;
1116 psz += sizeof(wxUint32);
1117 }
1118 if (buf && len<n) *buf=0;
1119
1120 return len;
1121}
1122
1123
1124// copy 16bit String to 32bit MB
1125size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126{
1127 size_t len=0;
1128
1129 while (*psz && (!buf || len < n))
1130 {
1131 wxUint32 cc;
1132
b5153fd8
VZ
1133 // cast is ok for WC_UTF16
1134 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1135 if (pa == (size_t)-1)
1136 return pa;
1137
1138 if (buf)
1139 {
1140 *(wxUint32*)buf = cc;
1141 buf += sizeof(wxUint32);
1142 }
1143 len += sizeof(wxUint32);
1144 psz += pa;
1145 }
b5153fd8
VZ
1146
1147 if (buf && len<=n-sizeof(wxUint32))
1148 *(wxUint32*)buf=0;
c91830cb
VZ
1149
1150 return len;
1151}
1152
1153
1154
1155// swap 32bit MB to 16bit String
1156size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1157{
1158 size_t len=0;
1159
1160 while (*(wxUint32*)psz && (!buf || len < n))
1161 {
1162 char tmp[4];
1163 tmp[0] = psz[3]; tmp[1] = psz[2];
1164 tmp[2] = psz[1]; tmp[3] = psz[0];
1165
1166
1167 wxUint16 cc[2];
1168
1169 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1170 if (pa == (size_t)-1)
1171 return pa;
1172
1173 if (buf)
1174 {
1175 *buf++ = cc[0];
1176 if (pa > 1)
1177 *buf++ = cc[1];
1178 }
1179 len += pa;
1180 psz += sizeof(wxUint32);
1181 }
b5153fd8
VZ
1182
1183 if (buf && len<n)
1184 *buf=0;
c91830cb
VZ
1185
1186 return len;
1187}
1188
1189
1190// swap 16bit String to 32bit MB
1191size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1192{
1193 size_t len=0;
1194
1195 while (*psz && (!buf || len < n))
1196 {
1197 char cc[4];
1198
b5153fd8
VZ
1199 // cast is ok for WC_UTF16
1200 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1201 if (pa == (size_t)-1)
1202 return pa;
1203
1204 if (buf)
1205 {
1206 *buf++ = cc[3];
1207 *buf++ = cc[2];
1208 *buf++ = cc[1];
1209 *buf++ = cc[0];
1210 }
1211 len += sizeof(wxUint32);
1212 psz += pa;
1213 }
b5153fd8
VZ
1214
1215 if (buf && len<=n-sizeof(wxUint32))
1216 *(wxUint32*)buf=0;
c91830cb
VZ
1217
1218 return len;
1219}
1220
1221#else // WC_UTF16
1222
1223
1224// copy 32bit MB to 32bit String
1225size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1226{
1227 size_t len=0;
1228
1229 while (*(wxUint32*)psz && (!buf || len < n))
1230 {
1231 if (buf)
38d4b1e4 1232 *buf++ = (wchar_t)(*(wxUint32*)psz);
c91830cb
VZ
1233 len++;
1234 psz += sizeof(wxUint32);
1235 }
b5153fd8
VZ
1236
1237 if (buf && len<n)
1238 *buf=0;
c91830cb
VZ
1239
1240 return len;
1241}
1242
1243
1244// copy 32bit String to 32bit MB
1245size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246{
1247 size_t len=0;
1248
1249 while (*psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 *(wxUint32*)buf = *psz;
1254 buf += sizeof(wxUint32);
1255 }
1256
1257 len += sizeof(wxUint32);
1258 psz++;
1259 }
1260
b5153fd8
VZ
1261 if (buf && len<=n-sizeof(wxUint32))
1262 *(wxUint32*)buf=0;
c91830cb
VZ
1263
1264 return len;
1265}
1266
1267
1268// swap 32bit MB to 32bit String
1269size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1270{
1271 size_t len=0;
1272
1273 while (*(wxUint32*)psz && (!buf || len < n))
1274 {
1275 if (buf)
1276 {
1277 ((char *)buf)[0] = psz[3];
1278 ((char *)buf)[1] = psz[2];
1279 ((char *)buf)[2] = psz[1];
1280 ((char *)buf)[3] = psz[0];
1281 buf++;
1282 }
1283 len++;
1284 psz += sizeof(wxUint32);
1285 }
b5153fd8
VZ
1286
1287 if (buf && len<n)
1288 *buf=0;
c91830cb
VZ
1289
1290 return len;
1291}
1292
1293
1294// swap 32bit String to 32bit MB
1295size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1296{
1297 size_t len=0;
1298
1299 while (*psz && (!buf || len < n))
1300 {
1301 if (buf)
1302 {
1303 *buf++ = ((char *)psz)[3];
1304 *buf++ = ((char *)psz)[2];
1305 *buf++ = ((char *)psz)[1];
1306 *buf++ = ((char *)psz)[0];
1307 }
1308 len += sizeof(wxUint32);
1309 psz++;
1310 }
b5153fd8
VZ
1311
1312 if (buf && len<=n-sizeof(wxUint32))
1313 *(wxUint32*)buf=0;
c91830cb
VZ
1314
1315 return len;
1316}
1317
1318
1319#endif // WC_UTF16
1320
1321
36acb880
VZ
1322// ============================================================================
1323// The classes doing conversion using the iconv_xxx() functions
1324// ============================================================================
3caec1bb 1325
b040e242 1326#ifdef HAVE_ICONV
3a0d76bc 1327
b1d547eb
VS
1328// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1329// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1330// (unless there's yet another bug in glibc) the only case when iconv()
1331// returns with (size_t)-1 (which means error) and says there are 0 bytes
1332// left in the input buffer -- when _real_ error occurs,
1333// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1334// iconv() failure.
3caec1bb
VS
1335// [This bug does not appear in glibc 2.2.]
1336#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1337#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1338 (errno != E2BIG || bufLeft != 0))
1339#else
1340#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1341#endif
1342
ab217dba 1343#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1344
74a7eb0b
VZ
1345#define ICONV_T_INVALID ((iconv_t)-1)
1346
1347#if SIZEOF_WCHAR_T == 4
1348 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1349 #define WC_ENC wxFONTENCODING_UTF32
1350#elif SIZEOF_WCHAR_T == 2
1351 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1352 #define WC_ENC wxFONTENCODING_UTF16
1353#else // sizeof(wchar_t) != 2 nor 4
1354 // does this ever happen?
1355 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1356#endif
1357
36acb880 1358// ----------------------------------------------------------------------------
e95354ec 1359// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1360// ----------------------------------------------------------------------------
1361
e95354ec 1362class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1363{
1364public:
e95354ec
VZ
1365 wxMBConv_iconv(const wxChar *name);
1366 virtual ~wxMBConv_iconv();
36acb880 1367
bde4baac
VZ
1368 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1369 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1370
e95354ec 1371 bool IsOk() const
74a7eb0b 1372 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1373
1374protected:
1375 // the iconv handlers used to translate from multibyte to wide char and in
1376 // the other direction
1377 iconv_t m2w,
1378 w2m;
b1d547eb
VS
1379#if wxUSE_THREADS
1380 // guards access to m2w and w2m objects
1381 wxMutex m_iconvMutex;
1382#endif
36acb880
VZ
1383
1384private:
c1464d9d
VZ
1385 // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
1386 // comment
1387 virtual size_t GetMinMBCharWidth() const;
eec47cc6 1388
e95354ec 1389 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1390 // available on this machine, it will remain NULL
74a7eb0b 1391 static wxString ms_wcCharsetName;
36acb880
VZ
1392
1393 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1394 // different endian-ness than the native one
405d8f46 1395 static bool ms_wcNeedsSwap;
eec47cc6 1396
c1464d9d
VZ
1397 // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
1398 // initially
1399 size_t m_minMBCharWidth;
36acb880
VZ
1400};
1401
8f115891
MW
1402// make the constructor available for unit testing
1403WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1404{
1405 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1406 if ( !result->IsOk() )
1407 {
1408 delete result;
1409 return 0;
1410 }
1411 return result;
1412}
1413
422e411e 1414wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1415bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1416
e95354ec 1417wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1418{
c1464d9d 1419 m_minMBCharWidth = 0;
eec47cc6 1420
0331b385
VZ
1421 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1422 // names for the charsets
200a9923 1423 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1424
36acb880 1425 // check for charset that represents wchar_t:
74a7eb0b 1426 if ( ms_wcCharsetName.empty() )
f1339c56 1427 {
c2b83fdd
VZ
1428 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1429
74a7eb0b
VZ
1430#if wxUSE_FONTMAP
1431 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1432#else // !wxUSE_FONTMAP
1433 static const wxChar *names[] =
36acb880 1434 {
74a7eb0b
VZ
1435#if SIZEOF_WCHAR_T == 4
1436 _T("UCS-4"),
1437#elif SIZEOF_WCHAR_T = 2
1438 _T("UCS-2"),
1439#endif
1440 NULL
1441 };
1442#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1443
d1f024a8 1444 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1445 {
17a1ebd1 1446 const wxString nameCS(*names);
74a7eb0b
VZ
1447
1448 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1449 wxString nameXE(nameCS);
74a7eb0b
VZ
1450 #ifdef WORDS_BIGENDIAN
1451 nameXE += _T("BE");
1452 #else // little endian
1453 nameXE += _T("LE");
1454 #endif
1455
c2b83fdd
VZ
1456 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1457 nameXE.c_str());
1458
74a7eb0b
VZ
1459 m2w = iconv_open(nameXE.ToAscii(), cname);
1460 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1461 {
74a7eb0b 1462 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1463 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1464 nameCS.c_str());
17a1ebd1 1465 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1466
74a7eb0b
VZ
1467 // and check for bytesex ourselves:
1468 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1469 {
74a7eb0b
VZ
1470 char buf[2], *bufPtr;
1471 wchar_t wbuf[2], *wbufPtr;
1472 size_t insz, outsz;
1473 size_t res;
1474
1475 buf[0] = 'A';
1476 buf[1] = 0;
1477 wbuf[0] = 0;
1478 insz = 2;
1479 outsz = SIZEOF_WCHAR_T * 2;
1480 wbufPtr = wbuf;
1481 bufPtr = buf;
1482
1483 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1484 (char**)&wbufPtr, &outsz);
1485
1486 if (ICONV_FAILED(res, insz))
1487 {
1488 wxLogLastError(wxT("iconv"));
422e411e 1489 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1490 nameCS.c_str());
74a7eb0b
VZ
1491 }
1492 else // ok, can convert to this encoding, remember it
1493 {
17a1ebd1 1494 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1495 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1496 }
3a0d76bc
VS
1497 }
1498 }
74a7eb0b 1499 else // use charset not requiring byte swapping
36acb880 1500 {
74a7eb0b 1501 ms_wcCharsetName = nameXE;
36acb880 1502 }
3a0d76bc 1503 }
74a7eb0b 1504
0944fceb 1505 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1506 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1507 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1508 : ms_wcCharsetName.c_str(),
1509 ms_wcNeedsSwap ? _T(" (needs swap)")
1510 : _T(""));
3a0d76bc 1511 }
36acb880 1512 else // we already have ms_wcCharsetName
3caec1bb 1513 {
74a7eb0b 1514 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1515 }
dccce9ea 1516
74a7eb0b 1517 if ( ms_wcCharsetName.empty() )
f1339c56 1518 {
74a7eb0b 1519 w2m = ICONV_T_INVALID;
36acb880 1520 }
405d8f46
VZ
1521 else
1522 {
74a7eb0b
VZ
1523 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1524 if ( w2m == ICONV_T_INVALID )
1525 {
1526 wxLogTrace(TRACE_STRCONV,
1527 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1528 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1529 }
405d8f46 1530 }
36acb880 1531}
3caec1bb 1532
e95354ec 1533wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1534{
74a7eb0b 1535 if ( m2w != ICONV_T_INVALID )
36acb880 1536 iconv_close(m2w);
74a7eb0b 1537 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1538 iconv_close(w2m);
1539}
3a0d76bc 1540
bde4baac 1541size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1542{
b1d547eb
VS
1543#if wxUSE_THREADS
1544 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1545 // Unfortunately there is a couple of global wxCSConv objects such as
1546 // wxConvLocal that are used all over wx code, so we have to make sure
1547 // the handle is used by at most one thread at the time. Otherwise
1548 // only a few wx classes would be safe to use from non-main threads
1549 // as MB<->WC conversion would fail "randomly".
1550 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1551#endif
3698ae71 1552
36acb880
VZ
1553 size_t inbuf = strlen(psz);
1554 size_t outbuf = n * SIZEOF_WCHAR_T;
1555 size_t res, cres;
1556 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1557 wchar_t *bufPtr = buf;
1558 const char *pszPtr = psz;
1559
1560 if (buf)
1561 {
1562 // have destination buffer, convert there
1563 cres = iconv(m2w,
1564 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1565 (char**)&bufPtr, &outbuf);
1566 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1567
36acb880 1568 if (ms_wcNeedsSwap)
3a0d76bc 1569 {
36acb880 1570 // convert to native endianness
17a1ebd1
VZ
1571 for ( unsigned i = 0; i < res; i++ )
1572 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1573 }
adb45366 1574
49dd9820
VS
1575 // NB: iconv was given only strlen(psz) characters on input, and so
1576 // it couldn't convert the trailing zero. Let's do it ourselves
1577 // if there's some room left for it in the output buffer.
1578 if (res < n)
1579 buf[res] = 0;
36acb880
VZ
1580 }
1581 else
1582 {
1583 // no destination buffer... convert using temp buffer
1584 // to calculate destination buffer requirement
1585 wchar_t tbuf[8];
1586 res = 0;
1587 do {
1588 bufPtr = tbuf;
1589 outbuf = 8*SIZEOF_WCHAR_T;
1590
1591 cres = iconv(m2w,
1592 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1593 (char**)&bufPtr, &outbuf );
1594
1595 res += 8-(outbuf/SIZEOF_WCHAR_T);
1596 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1597 }
dccce9ea 1598
36acb880 1599 if (ICONV_FAILED(cres, inbuf))
f1339c56 1600 {
36acb880 1601 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1602 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1603 return (size_t)-1;
1604 }
1605
1606 return res;
1607}
1608
bde4baac 1609size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1610{
b1d547eb
VS
1611#if wxUSE_THREADS
1612 // NB: explained in MB2WC
1613 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1614#endif
3698ae71 1615
156162ec
MW
1616 size_t inlen = wxWcslen(psz);
1617 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1618 size_t outbuf = n;
1619 size_t res, cres;
3a0d76bc 1620
36acb880 1621 wchar_t *tmpbuf = 0;
3caec1bb 1622
36acb880
VZ
1623 if (ms_wcNeedsSwap)
1624 {
1625 // need to copy to temp buffer to switch endianness
74a7eb0b 1626 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1627 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1628 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1629 for ( size_t i = 0; i < inlen; i++ )
1630 tmpbuf[n] = WC_BSWAP(psz[i]);
156162ec 1631 tmpbuf[inlen] = L'\0';
74a7eb0b 1632 psz = tmpbuf;
36acb880 1633 }
3a0d76bc 1634
36acb880
VZ
1635 if (buf)
1636 {
1637 // have destination buffer, convert there
1638 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1639
36acb880 1640 res = n-outbuf;
adb45366 1641
49dd9820
VS
1642 // NB: iconv was given only wcslen(psz) characters on input, and so
1643 // it couldn't convert the trailing zero. Let's do it ourselves
1644 // if there's some room left for it in the output buffer.
1645 if (res < n)
1646 buf[0] = 0;
36acb880
VZ
1647 }
1648 else
1649 {
1650 // no destination buffer... convert using temp buffer
1651 // to calculate destination buffer requirement
1652 char tbuf[16];
1653 res = 0;
1654 do {
1655 buf = tbuf; outbuf = 16;
1656
1657 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1658
36acb880
VZ
1659 res += 16 - outbuf;
1660 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1661 }
dccce9ea 1662
36acb880
VZ
1663 if (ms_wcNeedsSwap)
1664 {
1665 free(tmpbuf);
1666 }
dccce9ea 1667
36acb880
VZ
1668 if (ICONV_FAILED(cres, inbuf))
1669 {
ce6f8d6f 1670 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1671 return (size_t)-1;
1672 }
1673
1674 return res;
1675}
1676
c1464d9d 1677size_t wxMBConv_iconv::GetMinMBCharWidth() const
eec47cc6 1678{
c1464d9d 1679 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1680 {
1681 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1682
1683#if wxUSE_THREADS
1684 // NB: explained in MB2WC
1685 wxMutexLocker lock(self->m_iconvMutex);
1686#endif
1687
356410fc 1688 wchar_t *wnul = L"";
c1464d9d 1689 char buf[8]; // should be enough for NUL in any encoding
356410fc 1690 size_t inLen = sizeof(wchar_t),
c1464d9d 1691 outLen = WXSIZEOF(buf);
39406a5d 1692 char *in = (char *)wnul;
c1464d9d 1693 char *out = buf;
39406a5d 1694 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
356410fc 1695 {
c1464d9d 1696 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1697 }
1698 else // ok
1699 {
c1464d9d 1700 self->m_minMBCharWidth = out - buf;
356410fc 1701 }
eec47cc6
VZ
1702 }
1703
c1464d9d 1704 return m_minMBCharWidth;
eec47cc6
VZ
1705}
1706
b040e242 1707#endif // HAVE_ICONV
36acb880 1708
e95354ec 1709
36acb880
VZ
1710// ============================================================================
1711// Win32 conversion classes
1712// ============================================================================
1cd52418 1713
e95354ec 1714#ifdef wxHAVE_WIN32_MB2WC
373658eb 1715
8b04d4c4 1716// from utils.cpp
d775fa82 1717#if wxUSE_FONTMAP
8b04d4c4
VZ
1718extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1719extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1720#endif
373658eb 1721
e95354ec 1722class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1723{
1724public:
bde4baac
VZ
1725 wxMBConv_win32()
1726 {
1727 m_CodePage = CP_ACP;
c1464d9d 1728 m_minMBCharWidth = 0;
bde4baac
VZ
1729 }
1730
7608a683 1731#if wxUSE_FONTMAP
e95354ec 1732 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1733 {
1734 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 1735 m_minMBCharWidth = 0;
bde4baac 1736 }
dccce9ea 1737
e95354ec 1738 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1739 {
1740 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 1741 m_minMBCharWidth = 0;
bde4baac 1742 }
eec47cc6 1743#endif // wxUSE_FONTMAP
8b04d4c4 1744
bde4baac 1745 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1746 {
02272c9c
VZ
1747 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1748 // the behaviour is not compatible with the Unix version (using iconv)
1749 // and break the library itself, e.g. wxTextInputStream::NextChar()
1750 // wouldn't work if reading an incomplete MB char didn't result in an
1751 // error
667e5b3e
VZ
1752 //
1753 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1754 // an error (tested under Windows Server 2003) and apparently it is
1755 // done on purpose, i.e. the function accepts any input in this case
1756 // and although I'd prefer to return error on ill-formed output, our
1757 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1758 // explicitly ill-formed according to RFC 2152) neither so we don't
1759 // even have any fallback here...
89028980
VS
1760 //
1761 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1762 // Win XP or newer and if it is specified on older versions, conversion
1763 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1764 // fails. So we can only use the flag on newer Windows versions.
1765 // Additionally, the flag is not supported by UTF7, symbol and CJK
1766 // encodings. See here:
1767 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1768 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1769 int flags = 0;
1770 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1771 m_CodePage < 50000 &&
1772 IsAtLeastWin2kSP4() )
1773 {
1774 flags = MB_ERR_INVALID_CHARS;
1775 }
1776 else if ( m_CodePage == CP_UTF8 )
1777 {
1778 // Avoid round-trip in the special case of UTF-8 by using our
1779 // own UTF-8 conversion code:
1780 return wxMBConvUTF8().MB2WC(buf, psz, n);
1781 }
667e5b3e 1782
2b5f62a0
VZ
1783 const size_t len = ::MultiByteToWideChar
1784 (
1785 m_CodePage, // code page
667e5b3e 1786 flags, // flags: fall on error
2b5f62a0
VZ
1787 psz, // input string
1788 -1, // its length (NUL-terminated)
b4da152e 1789 buf, // output string
2b5f62a0
VZ
1790 buf ? n : 0 // size of output buffer
1791 );
89028980
VS
1792 if ( !len )
1793 {
1794 // function totally failed
1795 return (size_t)-1;
1796 }
1797
1798 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1799 // check if we succeeded, by doing a double trip:
1800 if ( !flags && buf )
1801 {
53c174fc
VZ
1802 const size_t mbLen = strlen(psz);
1803 wxCharBuffer mbBuf(mbLen);
89028980
VS
1804 if ( ::WideCharToMultiByte
1805 (
1806 m_CodePage,
1807 0,
1808 buf,
1809 -1,
1810 mbBuf.data(),
53c174fc 1811 mbLen + 1, // size in bytes, not length
89028980
VS
1812 NULL,
1813 NULL
1814 ) == 0 ||
1815 strcmp(mbBuf, psz) != 0 )
1816 {
1817 // we didn't obtain the same thing we started from, hence
1818 // the conversion was lossy and we consider that it failed
1819 return (size_t)-1;
1820 }
1821 }
2b5f62a0 1822
03a991bc
VZ
1823 // note that it returns count of written chars for buf != NULL and size
1824 // of the needed buffer for buf == NULL so in either case the length of
1825 // the string (which never includes the terminating NUL) is one less
89028980 1826 return len - 1;
f1339c56 1827 }
dccce9ea 1828
13dd924a 1829 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1830 {
13dd924a
VZ
1831 /*
1832 we have a problem here: by default, WideCharToMultiByte() may
1833 replace characters unrepresentable in the target code page with bad
1834 quality approximations such as turning "1/2" symbol (U+00BD) into
1835 "1" for the code pages which don't have it and we, obviously, want
1836 to avoid this at any price
d775fa82 1837
13dd924a
VZ
1838 the trouble is that this function does it _silently_, i.e. it won't
1839 even tell us whether it did or not... Win98/2000 and higher provide
1840 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1841 we have to resort to a round trip, i.e. check that converting back
1842 results in the same string -- this is, of course, expensive but
1843 otherwise we simply can't be sure to not garble the data.
1844 */
1845
1846 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1847 // it doesn't work with CJK encodings (which we test for rather roughly
1848 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1849 // supporting it
907173e5
WS
1850 BOOL usedDef wxDUMMY_INITIALIZE(false);
1851 BOOL *pUsedDef;
13dd924a
VZ
1852 int flags;
1853 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1854 {
1855 // it's our lucky day
1856 flags = WC_NO_BEST_FIT_CHARS;
1857 pUsedDef = &usedDef;
1858 }
1859 else // old system or unsupported encoding
1860 {
1861 flags = 0;
1862 pUsedDef = NULL;
1863 }
1864
2b5f62a0
VZ
1865 const size_t len = ::WideCharToMultiByte
1866 (
1867 m_CodePage, // code page
13dd924a
VZ
1868 flags, // either none or no best fit
1869 pwz, // input string
2b5f62a0
VZ
1870 -1, // it is (wide) NUL-terminated
1871 buf, // output buffer
1872 buf ? n : 0, // and its size
1873 NULL, // default "replacement" char
13dd924a 1874 pUsedDef // [out] was it used?
2b5f62a0
VZ
1875 );
1876
13dd924a
VZ
1877 if ( !len )
1878 {
1879 // function totally failed
1880 return (size_t)-1;
1881 }
1882
1883 // if we were really converting, check if we succeeded
1884 if ( buf )
1885 {
1886 if ( flags )
1887 {
1888 // check if the conversion failed, i.e. if any replacements
1889 // were done
1890 if ( usedDef )
1891 return (size_t)-1;
1892 }
1893 else // we must resort to double tripping...
1894 {
1895 wxWCharBuffer wcBuf(n);
1896 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1897 wcscmp(wcBuf, pwz) != 0 )
1898 {
1899 // we didn't obtain the same thing we started from, hence
1900 // the conversion was lossy and we consider that it failed
1901 return (size_t)-1;
1902 }
1903 }
1904 }
1905
03a991bc 1906 // see the comment above for the reason of "len - 1"
13dd924a 1907 return len - 1;
f1339c56 1908 }
dccce9ea 1909
13dd924a
VZ
1910 bool IsOk() const { return m_CodePage != -1; }
1911
1912private:
1913 static bool CanUseNoBestFit()
1914 {
1915 static int s_isWin98Or2k = -1;
1916
1917 if ( s_isWin98Or2k == -1 )
1918 {
1919 int verMaj, verMin;
1920 switch ( wxGetOsVersion(&verMaj, &verMin) )
1921 {
1922 case wxWIN95:
1923 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1924 break;
1925
1926 case wxWINDOWS_NT:
1927 s_isWin98Or2k = verMaj >= 5;
1928 break;
1929
1930 default:
1931 // unknown, be conseravtive by default
1932 s_isWin98Or2k = 0;
1933 }
1934
1935 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1936 }
1937
1938 return s_isWin98Or2k == 1;
1939 }
f1339c56 1940
89028980
VS
1941 static bool IsAtLeastWin2kSP4()
1942 {
8942f83a
WS
1943#ifdef __WXWINCE__
1944 return false;
1945#else
89028980
VS
1946 static int s_isAtLeastWin2kSP4 = -1;
1947
1948 if ( s_isAtLeastWin2kSP4 == -1 )
1949 {
1950 OSVERSIONINFOEX ver;
1951
1952 memset(&ver, 0, sizeof(ver));
1953 ver.dwOSVersionInfoSize = sizeof(ver);
1954 GetVersionEx((OSVERSIONINFO*)&ver);
1955
1956 s_isAtLeastWin2kSP4 =
1957 ((ver.dwMajorVersion > 5) || // Vista+
1958 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1959 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1960 ver.wServicePackMajor >= 4)) // 2000 SP4+
1961 ? 1 : 0;
1962 }
1963
1964 return s_isAtLeastWin2kSP4 == 1;
8942f83a 1965#endif
89028980
VS
1966 }
1967
c1464d9d 1968 virtual size_t GetMinMBCharWidth() const
eec47cc6 1969 {
c1464d9d 1970 if ( m_minMBCharWidth == 0 )
eec47cc6 1971 {
c1464d9d
VZ
1972 int len = ::WideCharToMultiByte
1973 (
1974 m_CodePage, // code page
1975 0, // no flags
1976 L"", // input string
1977 1, // translate just the NUL
1978 NULL, // output buffer
1979 0, // and its size
1980 NULL, // no replacement char
1981 NULL // [out] don't care if it was used
1982 );
1983
eec47cc6 1984 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
c1464d9d
VZ
1985 switch ( len )
1986 {
1987 default:
1988 wxLogDebug(_T("Unexpected NUL length %d"), len);
1989 // fall through
eec47cc6 1990
c1464d9d
VZ
1991 case 0:
1992 self->m_minMBCharWidth = (size_t)-1;
1993 break;
eec47cc6 1994
c1464d9d
VZ
1995 case 1:
1996 case 2:
1997 case 4:
1998 self->m_minMBCharWidth = len;
1999 break;
2000 }
eec47cc6
VZ
2001 }
2002
c1464d9d 2003 return m_minMBCharWidth;
eec47cc6
VZ
2004 }
2005
c1464d9d 2006 // the code page we're working with
b1d66b54 2007 long m_CodePage;
c1464d9d
VZ
2008
2009 // cached result of GetMinMBCharWidth(), set to 0 initially meaning
2010 // "unknown"
2011 size_t m_minMBCharWidth;
1cd52418 2012};
e95354ec
VZ
2013
2014#endif // wxHAVE_WIN32_MB2WC
2015
f7e98dee
RN
2016// ============================================================================
2017// Cocoa conversion classes
2018// ============================================================================
2019
2020#if defined(__WXCOCOA__)
2021
ecd9653b 2022// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
2023// Cocoa. Strangely enough, internally Core Foundation uses
2024// UTF 32 internally quite a bit - its just not public (yet).
2025
2026#include <CoreFoundation/CFString.h>
2027#include <CoreFoundation/CFStringEncodingExt.h>
2028
2029CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2030{
638357a0 2031 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
2032 if ( encoding == wxFONTENCODING_DEFAULT )
2033 {
638357a0 2034 enc = CFStringGetSystemEncoding();
ecd9653b
WS
2035 }
2036 else switch( encoding)
2037 {
2038 case wxFONTENCODING_ISO8859_1 :
2039 enc = kCFStringEncodingISOLatin1 ;
2040 break ;
2041 case wxFONTENCODING_ISO8859_2 :
2042 enc = kCFStringEncodingISOLatin2;
2043 break ;
2044 case wxFONTENCODING_ISO8859_3 :
2045 enc = kCFStringEncodingISOLatin3 ;
2046 break ;
2047 case wxFONTENCODING_ISO8859_4 :
2048 enc = kCFStringEncodingISOLatin4;
2049 break ;
2050 case wxFONTENCODING_ISO8859_5 :
2051 enc = kCFStringEncodingISOLatinCyrillic;
2052 break ;
2053 case wxFONTENCODING_ISO8859_6 :
2054 enc = kCFStringEncodingISOLatinArabic;
2055 break ;
2056 case wxFONTENCODING_ISO8859_7 :
2057 enc = kCFStringEncodingISOLatinGreek;
2058 break ;
2059 case wxFONTENCODING_ISO8859_8 :
2060 enc = kCFStringEncodingISOLatinHebrew;
2061 break ;
2062 case wxFONTENCODING_ISO8859_9 :
2063 enc = kCFStringEncodingISOLatin5;
2064 break ;
2065 case wxFONTENCODING_ISO8859_10 :
2066 enc = kCFStringEncodingISOLatin6;
2067 break ;
2068 case wxFONTENCODING_ISO8859_11 :
2069 enc = kCFStringEncodingISOLatinThai;
2070 break ;
2071 case wxFONTENCODING_ISO8859_13 :
2072 enc = kCFStringEncodingISOLatin7;
2073 break ;
2074 case wxFONTENCODING_ISO8859_14 :
2075 enc = kCFStringEncodingISOLatin8;
2076 break ;
2077 case wxFONTENCODING_ISO8859_15 :
2078 enc = kCFStringEncodingISOLatin9;
2079 break ;
2080
2081 case wxFONTENCODING_KOI8 :
2082 enc = kCFStringEncodingKOI8_R;
2083 break ;
2084 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2085 enc = kCFStringEncodingDOSRussian;
2086 break ;
2087
2088// case wxFONTENCODING_BULGARIAN :
2089// enc = ;
2090// break ;
2091
2092 case wxFONTENCODING_CP437 :
2093 enc =kCFStringEncodingDOSLatinUS ;
2094 break ;
2095 case wxFONTENCODING_CP850 :
2096 enc = kCFStringEncodingDOSLatin1;
2097 break ;
2098 case wxFONTENCODING_CP852 :
2099 enc = kCFStringEncodingDOSLatin2;
2100 break ;
2101 case wxFONTENCODING_CP855 :
2102 enc = kCFStringEncodingDOSCyrillic;
2103 break ;
2104 case wxFONTENCODING_CP866 :
2105 enc =kCFStringEncodingDOSRussian ;
2106 break ;
2107 case wxFONTENCODING_CP874 :
2108 enc = kCFStringEncodingDOSThai;
2109 break ;
2110 case wxFONTENCODING_CP932 :
2111 enc = kCFStringEncodingDOSJapanese;
2112 break ;
2113 case wxFONTENCODING_CP936 :
2114 enc =kCFStringEncodingDOSChineseSimplif ;
2115 break ;
2116 case wxFONTENCODING_CP949 :
2117 enc = kCFStringEncodingDOSKorean;
2118 break ;
2119 case wxFONTENCODING_CP950 :
2120 enc = kCFStringEncodingDOSChineseTrad;
2121 break ;
ecd9653b
WS
2122 case wxFONTENCODING_CP1250 :
2123 enc = kCFStringEncodingWindowsLatin2;
2124 break ;
2125 case wxFONTENCODING_CP1251 :
2126 enc =kCFStringEncodingWindowsCyrillic ;
2127 break ;
2128 case wxFONTENCODING_CP1252 :
2129 enc =kCFStringEncodingWindowsLatin1 ;
2130 break ;
2131 case wxFONTENCODING_CP1253 :
2132 enc = kCFStringEncodingWindowsGreek;
2133 break ;
2134 case wxFONTENCODING_CP1254 :
2135 enc = kCFStringEncodingWindowsLatin5;
2136 break ;
2137 case wxFONTENCODING_CP1255 :
2138 enc =kCFStringEncodingWindowsHebrew ;
2139 break ;
2140 case wxFONTENCODING_CP1256 :
2141 enc =kCFStringEncodingWindowsArabic ;
2142 break ;
2143 case wxFONTENCODING_CP1257 :
2144 enc = kCFStringEncodingWindowsBalticRim;
2145 break ;
638357a0
RN
2146// This only really encodes to UTF7 (if that) evidently
2147// case wxFONTENCODING_UTF7 :
2148// enc = kCFStringEncodingNonLossyASCII ;
2149// break ;
ecd9653b
WS
2150 case wxFONTENCODING_UTF8 :
2151 enc = kCFStringEncodingUTF8 ;
2152 break ;
2153 case wxFONTENCODING_EUC_JP :
2154 enc = kCFStringEncodingEUC_JP;
2155 break ;
2156 case wxFONTENCODING_UTF16 :
f7e98dee 2157 enc = kCFStringEncodingUnicode ;
ecd9653b 2158 break ;
f7e98dee
RN
2159 case wxFONTENCODING_MACROMAN :
2160 enc = kCFStringEncodingMacRoman ;
2161 break ;
2162 case wxFONTENCODING_MACJAPANESE :
2163 enc = kCFStringEncodingMacJapanese ;
2164 break ;
2165 case wxFONTENCODING_MACCHINESETRAD :
2166 enc = kCFStringEncodingMacChineseTrad ;
2167 break ;
2168 case wxFONTENCODING_MACKOREAN :
2169 enc = kCFStringEncodingMacKorean ;
2170 break ;
2171 case wxFONTENCODING_MACARABIC :
2172 enc = kCFStringEncodingMacArabic ;
2173 break ;
2174 case wxFONTENCODING_MACHEBREW :
2175 enc = kCFStringEncodingMacHebrew ;
2176 break ;
2177 case wxFONTENCODING_MACGREEK :
2178 enc = kCFStringEncodingMacGreek ;
2179 break ;
2180 case wxFONTENCODING_MACCYRILLIC :
2181 enc = kCFStringEncodingMacCyrillic ;
2182 break ;
2183 case wxFONTENCODING_MACDEVANAGARI :
2184 enc = kCFStringEncodingMacDevanagari ;
2185 break ;
2186 case wxFONTENCODING_MACGURMUKHI :
2187 enc = kCFStringEncodingMacGurmukhi ;
2188 break ;
2189 case wxFONTENCODING_MACGUJARATI :
2190 enc = kCFStringEncodingMacGujarati ;
2191 break ;
2192 case wxFONTENCODING_MACORIYA :
2193 enc = kCFStringEncodingMacOriya ;
2194 break ;
2195 case wxFONTENCODING_MACBENGALI :
2196 enc = kCFStringEncodingMacBengali ;
2197 break ;
2198 case wxFONTENCODING_MACTAMIL :
2199 enc = kCFStringEncodingMacTamil ;
2200 break ;
2201 case wxFONTENCODING_MACTELUGU :
2202 enc = kCFStringEncodingMacTelugu ;
2203 break ;
2204 case wxFONTENCODING_MACKANNADA :
2205 enc = kCFStringEncodingMacKannada ;
2206 break ;
2207 case wxFONTENCODING_MACMALAJALAM :
2208 enc = kCFStringEncodingMacMalayalam ;
2209 break ;
2210 case wxFONTENCODING_MACSINHALESE :
2211 enc = kCFStringEncodingMacSinhalese ;
2212 break ;
2213 case wxFONTENCODING_MACBURMESE :
2214 enc = kCFStringEncodingMacBurmese ;
2215 break ;
2216 case wxFONTENCODING_MACKHMER :
2217 enc = kCFStringEncodingMacKhmer ;
2218 break ;
2219 case wxFONTENCODING_MACTHAI :
2220 enc = kCFStringEncodingMacThai ;
2221 break ;
2222 case wxFONTENCODING_MACLAOTIAN :
2223 enc = kCFStringEncodingMacLaotian ;
2224 break ;
2225 case wxFONTENCODING_MACGEORGIAN :
2226 enc = kCFStringEncodingMacGeorgian ;
2227 break ;
2228 case wxFONTENCODING_MACARMENIAN :
2229 enc = kCFStringEncodingMacArmenian ;
2230 break ;
2231 case wxFONTENCODING_MACCHINESESIMP :
2232 enc = kCFStringEncodingMacChineseSimp ;
2233 break ;
2234 case wxFONTENCODING_MACTIBETAN :
2235 enc = kCFStringEncodingMacTibetan ;
2236 break ;
2237 case wxFONTENCODING_MACMONGOLIAN :
2238 enc = kCFStringEncodingMacMongolian ;
2239 break ;
2240 case wxFONTENCODING_MACETHIOPIC :
2241 enc = kCFStringEncodingMacEthiopic ;
2242 break ;
2243 case wxFONTENCODING_MACCENTRALEUR :
2244 enc = kCFStringEncodingMacCentralEurRoman ;
2245 break ;
2246 case wxFONTENCODING_MACVIATNAMESE :
2247 enc = kCFStringEncodingMacVietnamese ;
2248 break ;
2249 case wxFONTENCODING_MACARABICEXT :
2250 enc = kCFStringEncodingMacExtArabic ;
2251 break ;
2252 case wxFONTENCODING_MACSYMBOL :
2253 enc = kCFStringEncodingMacSymbol ;
2254 break ;
2255 case wxFONTENCODING_MACDINGBATS :
2256 enc = kCFStringEncodingMacDingbats ;
2257 break ;
2258 case wxFONTENCODING_MACTURKISH :
2259 enc = kCFStringEncodingMacTurkish ;
2260 break ;
2261 case wxFONTENCODING_MACCROATIAN :
2262 enc = kCFStringEncodingMacCroatian ;
2263 break ;
2264 case wxFONTENCODING_MACICELANDIC :
2265 enc = kCFStringEncodingMacIcelandic ;
2266 break ;
2267 case wxFONTENCODING_MACROMANIAN :
2268 enc = kCFStringEncodingMacRomanian ;
2269 break ;
2270 case wxFONTENCODING_MACCELTIC :
2271 enc = kCFStringEncodingMacCeltic ;
2272 break ;
2273 case wxFONTENCODING_MACGAELIC :
2274 enc = kCFStringEncodingMacGaelic ;
2275 break ;
ecd9653b
WS
2276// case wxFONTENCODING_MACKEYBOARD :
2277// enc = kCFStringEncodingMacKeyboardGlyphs ;
2278// break ;
2279 default :
2280 // because gcc is picky
2281 break ;
2282 } ;
2283 return enc ;
f7e98dee
RN
2284}
2285
f7e98dee
RN
2286class wxMBConv_cocoa : public wxMBConv
2287{
2288public:
2289 wxMBConv_cocoa()
2290 {
2291 Init(CFStringGetSystemEncoding()) ;
2292 }
2293
a6900d10 2294#if wxUSE_FONTMAP
f7e98dee
RN
2295 wxMBConv_cocoa(const wxChar* name)
2296 {
267e11c5 2297 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2298 }
a6900d10 2299#endif
f7e98dee
RN
2300
2301 wxMBConv_cocoa(wxFontEncoding encoding)
2302 {
2303 Init( wxCFStringEncFromFontEnc(encoding) );
2304 }
2305
2306 ~wxMBConv_cocoa()
2307 {
2308 }
2309
2310 void Init( CFStringEncoding encoding)
2311 {
638357a0 2312 m_encoding = encoding ;
f7e98dee
RN
2313 }
2314
2315 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2316 {
2317 wxASSERT(szUnConv);
ecd9653b 2318
638357a0
RN
2319 CFStringRef theString = CFStringCreateWithBytes (
2320 NULL, //the allocator
2321 (const UInt8*)szUnConv,
2322 strlen(szUnConv),
2323 m_encoding,
2324 false //no BOM/external representation
f7e98dee
RN
2325 );
2326
2327 wxASSERT(theString);
2328
638357a0
RN
2329 size_t nOutLength = CFStringGetLength(theString);
2330
2331 if (szOut == NULL)
f7e98dee 2332 {
f7e98dee 2333 CFRelease(theString);
638357a0 2334 return nOutLength;
f7e98dee 2335 }
ecd9653b 2336
638357a0 2337 CFRange theRange = { 0, nOutSize };
ecd9653b 2338
638357a0
RN
2339#if SIZEOF_WCHAR_T == 4
2340 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2341#endif
3698ae71 2342
f7e98dee 2343 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2344
f7e98dee 2345 CFRelease(theString);
ecd9653b 2346
638357a0 2347 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2348
2349#if SIZEOF_WCHAR_T == 4
2350 wxMBConvUTF16 converter ;
638357a0 2351 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2352 delete[] szUniCharBuffer;
2353#endif
3698ae71 2354
638357a0 2355 return nOutLength;
f7e98dee
RN
2356 }
2357
2358 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2359 {
638357a0 2360 wxASSERT(szUnConv);
3698ae71 2361
f7e98dee 2362 size_t nRealOutSize;
638357a0 2363 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2364 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2365
f7e98dee 2366#if SIZEOF_WCHAR_T == 4
d9d488cf 2367 wxMBConvUTF16 converter ;
f7e98dee
RN
2368 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2369 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2370 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2371 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2372#endif
2373
2374 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2375 NULL, //allocator
2376 szUniBuffer,
2377 nBufSize,
638357a0 2378 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2379 );
ecd9653b 2380
f7e98dee 2381 wxASSERT(theString);
ecd9653b 2382
f7e98dee 2383 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2384 //so we check and use getchars instead in that case
2385 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2386 {
638357a0
RN
2387 if (szOut != NULL)
2388 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2389
638357a0
RN
2390 nRealOutSize = CFStringGetLength(theString) + 1;
2391 }
2392 else
2393 {
2394 CFStringGetBytes(
2395 theString,
2396 CFRangeMake(0, CFStringGetLength(theString)),
2397 m_encoding,
2398 0, //what to put in characters that can't be converted -
2399 //0 tells CFString to return NULL if it meets such a character
2400 false, //not an external representation
2401 (UInt8*) szOut,
3698ae71 2402 nOutSize,
638357a0
RN
2403 (CFIndex*) &nRealOutSize
2404 );
f7e98dee 2405 }
ecd9653b 2406
638357a0 2407 CFRelease(theString);
ecd9653b 2408
638357a0
RN
2409#if SIZEOF_WCHAR_T == 4
2410 delete[] szUniBuffer;
2411#endif
ecd9653b 2412
f7e98dee
RN
2413 return nRealOutSize - 1;
2414 }
2415
2416 bool IsOk() const
ecd9653b 2417 {
3698ae71 2418 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2419 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2420 }
2421
2422private:
638357a0 2423 CFStringEncoding m_encoding ;
f7e98dee
RN
2424};
2425
2426#endif // defined(__WXCOCOA__)
2427
335d31e0
SC
2428// ============================================================================
2429// Mac conversion classes
2430// ============================================================================
2431
2432#if defined(__WXMAC__) && defined(TARGET_CARBON)
2433
2434class wxMBConv_mac : public wxMBConv
2435{
2436public:
2437 wxMBConv_mac()
2438 {
2439 Init(CFStringGetSystemEncoding()) ;
2440 }
2441
2d1659cf 2442#if wxUSE_FONTMAP
335d31e0
SC
2443 wxMBConv_mac(const wxChar* name)
2444 {
267e11c5 2445 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2446 }
2d1659cf 2447#endif
335d31e0
SC
2448
2449 wxMBConv_mac(wxFontEncoding encoding)
2450 {
d775fa82
WS
2451 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2452 }
2453
2454 ~wxMBConv_mac()
2455 {
2456 OSStatus status = noErr ;
2457 status = TECDisposeConverter(m_MB2WC_converter);
2458 status = TECDisposeConverter(m_WC2MB_converter);
2459 }
2460
2461
2462 void Init( TextEncodingBase encoding)
2463 {
2464 OSStatus status = noErr ;
2465 m_char_encoding = encoding ;
2466 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2467
2468 status = TECCreateConverter(&m_MB2WC_converter,
2469 m_char_encoding,
2470 m_unicode_encoding);
2471 status = TECCreateConverter(&m_WC2MB_converter,
2472 m_unicode_encoding,
2473 m_char_encoding);
2474 }
2475
335d31e0
SC
2476 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2477 {
d775fa82
WS
2478 OSStatus status = noErr ;
2479 ByteCount byteOutLen ;
2480 ByteCount byteInLen = strlen(psz) ;
2481 wchar_t *tbuf = NULL ;
2482 UniChar* ubuf = NULL ;
2483 size_t res = 0 ;
2484
2485 if (buf == NULL)
2486 {
638357a0 2487 //apple specs say at least 32
c543817b 2488 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2489 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2490 }
2491 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2492#if SIZEOF_WCHAR_T == 4
d775fa82 2493 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2494#else
d775fa82 2495 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2496#endif
d775fa82
WS
2497 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2498 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2499#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2500 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2501 // is not properly terminated we get random characters at the end
2502 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2503 wxMBConvUTF16 converter ;
d775fa82
WS
2504 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2505 free( ubuf ) ;
f3a355ce 2506#else
d775fa82 2507 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2508#endif
d775fa82
WS
2509 if ( buf == NULL )
2510 free(tbuf) ;
335d31e0 2511
335d31e0
SC
2512 if ( buf && res < n)
2513 buf[res] = 0;
2514
d775fa82 2515 return res ;
335d31e0
SC
2516 }
2517
2518 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2519 {
2520 OSStatus status = noErr ;
2521 ByteCount byteOutLen ;
2522 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2523
2524 char *tbuf = NULL ;
2525
2526 if (buf == NULL)
2527 {
638357a0 2528 //apple specs say at least 32
c543817b 2529 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2530 tbuf = (char*) malloc( n ) ;
2531 }
2532
2533 ByteCount byteBufferLen = n ;
2534 UniChar* ubuf = NULL ;
f3a355ce 2535#if SIZEOF_WCHAR_T == 4
d9d488cf 2536 wxMBConvUTF16 converter ;
d775fa82
WS
2537 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2538 byteInLen = unicharlen ;
2539 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2540 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2541#else
d775fa82 2542 ubuf = (UniChar*) psz ;
f3a355ce 2543#endif
d775fa82
WS
2544 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2545 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2546#if SIZEOF_WCHAR_T == 4
d775fa82 2547 free( ubuf ) ;
f3a355ce 2548#endif
d775fa82
WS
2549 if ( buf == NULL )
2550 free(tbuf) ;
335d31e0 2551
d775fa82 2552 size_t res = byteOutLen ;
335d31e0 2553 if ( buf && res < n)
638357a0 2554 {
335d31e0 2555 buf[res] = 0;
3698ae71 2556
638357a0
RN
2557 //we need to double-trip to verify it didn't insert any ? in place
2558 //of bogus characters
2559 wxWCharBuffer wcBuf(n);
2560 size_t pszlen = wxWcslen(psz);
2561 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2562 wxWcslen(wcBuf) != pszlen ||
2563 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2564 {
2565 // we didn't obtain the same thing we started from, hence
2566 // the conversion was lossy and we consider that it failed
2567 return (size_t)-1;
2568 }
2569 }
335d31e0 2570
d775fa82 2571 return res ;
335d31e0
SC
2572 }
2573
2574 bool IsOk() const
2575 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2576
2577private:
d775fa82
WS
2578 TECObjectRef m_MB2WC_converter ;
2579 TECObjectRef m_WC2MB_converter ;
2580
2581 TextEncodingBase m_char_encoding ;
2582 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2583};
2584
2585#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2586
36acb880
VZ
2587// ============================================================================
2588// wxEncodingConverter based conversion classes
2589// ============================================================================
2590
1e6feb95 2591#if wxUSE_FONTMAP
1cd52418 2592
e95354ec 2593class wxMBConv_wxwin : public wxMBConv
1cd52418 2594{
8b04d4c4
VZ
2595private:
2596 void Init()
2597 {
2598 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2599 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2600 }
2601
6001e347 2602public:
f1339c56
RR
2603 // temporarily just use wxEncodingConverter stuff,
2604 // so that it works while a better implementation is built
e95354ec 2605 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2606 {
2607 if (name)
267e11c5 2608 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2609 else
2610 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2611
8b04d4c4
VZ
2612 Init();
2613 }
2614
e95354ec 2615 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2616 {
2617 m_enc = enc;
2618
2619 Init();
f1339c56 2620 }
dccce9ea 2621
bde4baac 2622 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2623 {
2624 size_t inbuf = strlen(psz);
dccce9ea 2625 if (buf)
c643a977
VS
2626 {
2627 if (!m2w.Convert(psz,buf))
2628 return (size_t)-1;
2629 }
f1339c56
RR
2630 return inbuf;
2631 }
dccce9ea 2632
bde4baac 2633 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2634 {
f8d791e0 2635 const size_t inbuf = wxWcslen(psz);
f1339c56 2636 if (buf)
c643a977
VS
2637 {
2638 if (!w2m.Convert(psz,buf))
2639 return (size_t)-1;
2640 }
dccce9ea 2641
f1339c56
RR
2642 return inbuf;
2643 }
dccce9ea 2644
e95354ec 2645 bool IsOk() const { return m_ok; }
f1339c56
RR
2646
2647public:
8b04d4c4 2648 wxFontEncoding m_enc;
f1339c56 2649 wxEncodingConverter m2w, w2m;
cafbf6fb 2650
eec47cc6 2651private:
c1464d9d 2652 virtual size_t GetMinMBCharWidth() const
eec47cc6
VZ
2653 {
2654 switch ( m_enc )
2655 {
2656 case wxFONTENCODING_UTF16BE:
2657 case wxFONTENCODING_UTF16LE:
c1464d9d 2658 return 2;
eec47cc6
VZ
2659
2660 case wxFONTENCODING_UTF32BE:
2661 case wxFONTENCODING_UTF32LE:
c1464d9d 2662 return 4;
eec47cc6
VZ
2663
2664 default:
c1464d9d 2665 return 1;
eec47cc6
VZ
2666 }
2667 }
2668
cafbf6fb
VZ
2669 // were we initialized successfully?
2670 bool m_ok;
fc7a2a60 2671
e95354ec 2672 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2673};
6001e347 2674
8f115891
MW
2675// make the constructors available for unit testing
2676WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2677{
2678 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2679 if ( !result->IsOk() )
2680 {
2681 delete result;
2682 return 0;
2683 }
2684 return result;
2685}
2686
1e6feb95
VZ
2687#endif // wxUSE_FONTMAP
2688
36acb880
VZ
2689// ============================================================================
2690// wxCSConv implementation
2691// ============================================================================
2692
8b04d4c4 2693void wxCSConv::Init()
6001e347 2694{
e95354ec
VZ
2695 m_name = NULL;
2696 m_convReal = NULL;
2697 m_deferred = true;
2698}
2699
8b04d4c4
VZ
2700wxCSConv::wxCSConv(const wxChar *charset)
2701{
2702 Init();
82713003 2703
e95354ec
VZ
2704 if ( charset )
2705 {
e95354ec
VZ
2706 SetName(charset);
2707 }
bda3d86a 2708
e4277538
VZ
2709#if wxUSE_FONTMAP
2710 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2711#else
bda3d86a 2712 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 2713#endif
6001e347
RR
2714}
2715
8b04d4c4
VZ
2716wxCSConv::wxCSConv(wxFontEncoding encoding)
2717{
bda3d86a 2718 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2719 {
2720 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2721
2722 encoding = wxFONTENCODING_SYSTEM;
2723 }
2724
8b04d4c4
VZ
2725 Init();
2726
bda3d86a 2727 m_encoding = encoding;
8b04d4c4
VZ
2728}
2729
6001e347
RR
2730wxCSConv::~wxCSConv()
2731{
65e50848
JS
2732 Clear();
2733}
2734
54380f29 2735wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2736 : wxMBConv()
54380f29 2737{
8b04d4c4
VZ
2738 Init();
2739
54380f29 2740 SetName(conv.m_name);
8b04d4c4 2741 m_encoding = conv.m_encoding;
54380f29
GD
2742}
2743
2744wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2745{
2746 Clear();
8b04d4c4 2747
54380f29 2748 SetName(conv.m_name);
8b04d4c4
VZ
2749 m_encoding = conv.m_encoding;
2750
54380f29
GD
2751 return *this;
2752}
2753
65e50848
JS
2754void wxCSConv::Clear()
2755{
8b04d4c4 2756 free(m_name);
e95354ec 2757 delete m_convReal;
8b04d4c4 2758
65e50848 2759 m_name = NULL;
e95354ec 2760 m_convReal = NULL;
6001e347
RR
2761}
2762
2763void wxCSConv::SetName(const wxChar *charset)
2764{
f1339c56
RR
2765 if (charset)
2766 {
2767 m_name = wxStrdup(charset);
e95354ec 2768 m_deferred = true;
f1339c56 2769 }
6001e347
RR
2770}
2771
8b3eb85d
VZ
2772#if wxUSE_FONTMAP
2773#include "wx/hashmap.h"
2774
2775WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 2776 wxEncodingNameCache );
8b3eb85d
VZ
2777
2778static wxEncodingNameCache gs_nameCache;
2779#endif
2780
e95354ec
VZ
2781wxMBConv *wxCSConv::DoCreate() const
2782{
ce6f8d6f
VZ
2783#if wxUSE_FONTMAP
2784 wxLogTrace(TRACE_STRCONV,
2785 wxT("creating conversion for %s"),
2786 (m_name ? m_name
2787 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2788#endif // wxUSE_FONTMAP
2789
c547282d
VZ
2790 // check for the special case of ASCII or ISO8859-1 charset: as we have
2791 // special knowledge of it anyhow, we don't need to create a special
2792 // conversion object
e4277538
VZ
2793 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2794 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 2795 {
e95354ec
VZ
2796 // don't convert at all
2797 return NULL;
2798 }
dccce9ea 2799
e95354ec
VZ
2800 // we trust OS to do conversion better than we can so try external
2801 // conversion methods first
2802 //
2803 // the full order is:
2804 // 1. OS conversion (iconv() under Unix or Win32 API)
2805 // 2. hard coded conversions for UTF
2806 // 3. wxEncodingConverter as fall back
2807
2808 // step (1)
2809#ifdef HAVE_ICONV
c547282d 2810#if !wxUSE_FONTMAP
e95354ec 2811 if ( m_name )
c547282d 2812#endif // !wxUSE_FONTMAP
e95354ec 2813 {
c547282d 2814 wxString name(m_name);
8b3eb85d
VZ
2815 wxFontEncoding encoding(m_encoding);
2816
2817 if ( !name.empty() )
2818 {
2819 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2820 if ( conv->IsOk() )
2821 return conv;
2822
2823 delete conv;
c547282d
VZ
2824
2825#if wxUSE_FONTMAP
8b3eb85d
VZ
2826 encoding =
2827 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 2828#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2829 }
2830#if wxUSE_FONTMAP
2831 {
2832 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2833 if ( it != gs_nameCache.end() )
2834 {
2835 if ( it->second.empty() )
2836 return NULL;
c547282d 2837
8b3eb85d
VZ
2838 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2839 if ( conv->IsOk() )
2840 return conv;
e95354ec 2841
8b3eb85d
VZ
2842 delete conv;
2843 }
2844
2845 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2846
2847 for ( ; *names; ++names )
2848 {
2849 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2850 if ( conv->IsOk() )
2851 {
2852 gs_nameCache[encoding] = *names;
2853 return conv;
2854 }
2855
2856 delete conv;
2857 }
2858
40711af8 2859 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d
VZ
2860 }
2861#endif // wxUSE_FONTMAP
e95354ec
VZ
2862 }
2863#endif // HAVE_ICONV
2864
2865#ifdef wxHAVE_WIN32_MB2WC
2866 {
7608a683 2867#if wxUSE_FONTMAP
e95354ec
VZ
2868 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2869 : new wxMBConv_win32(m_encoding);
2870 if ( conv->IsOk() )
2871 return conv;
2872
2873 delete conv;
7608a683
WS
2874#else
2875 return NULL;
2876#endif
e95354ec
VZ
2877 }
2878#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2879#if defined(__WXMAC__)
2880 {
5c3c8676 2881 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2882 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2883 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2884 {
2885
2d1659cf 2886#if wxUSE_FONTMAP
d775fa82
WS
2887 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2888 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2889#else
2890 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2891#endif
d775fa82 2892 if ( conv->IsOk() )
f7e98dee
RN
2893 return conv;
2894
2895 delete conv;
2896 }
2897 }
2898#endif
2899#if defined(__WXCOCOA__)
2900 {
2901 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2902 {
2903
a6900d10 2904#if wxUSE_FONTMAP
f7e98dee
RN
2905 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2906 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2907#else
2908 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2909#endif
f7e98dee 2910 if ( conv->IsOk() )
d775fa82
WS
2911 return conv;
2912
2913 delete conv;
2914 }
335d31e0
SC
2915 }
2916#endif
e95354ec
VZ
2917 // step (2)
2918 wxFontEncoding enc = m_encoding;
2919#if wxUSE_FONTMAP
c547282d
VZ
2920 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2921 {
2922 // use "false" to suppress interactive dialogs -- we can be called from
2923 // anywhere and popping up a dialog from here is the last thing we want to
2924 // do
267e11c5 2925 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2926 }
e95354ec
VZ
2927#endif // wxUSE_FONTMAP
2928
2929 switch ( enc )
2930 {
2931 case wxFONTENCODING_UTF7:
2932 return new wxMBConvUTF7;
2933
2934 case wxFONTENCODING_UTF8:
2935 return new wxMBConvUTF8;
2936
e95354ec
VZ
2937 case wxFONTENCODING_UTF16BE:
2938 return new wxMBConvUTF16BE;
2939
2940 case wxFONTENCODING_UTF16LE:
2941 return new wxMBConvUTF16LE;
2942
e95354ec
VZ
2943 case wxFONTENCODING_UTF32BE:
2944 return new wxMBConvUTF32BE;
2945
2946 case wxFONTENCODING_UTF32LE:
2947 return new wxMBConvUTF32LE;
2948
2949 default:
2950 // nothing to do but put here to suppress gcc warnings
2951 ;
2952 }
2953
2954 // step (3)
2955#if wxUSE_FONTMAP
2956 {
2957 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2958 : new wxMBConv_wxwin(m_encoding);
2959 if ( conv->IsOk() )
2960 return conv;
2961
2962 delete conv;
2963 }
2964#endif // wxUSE_FONTMAP
2965
a58d4f4d
VS
2966 // NB: This is a hack to prevent deadlock. What could otherwise happen
2967 // in Unicode build: wxConvLocal creation ends up being here
2968 // because of some failure and logs the error. But wxLog will try to
2969 // attach timestamp, for which it will need wxConvLocal (to convert
2970 // time to char* and then wchar_t*), but that fails, tries to log
2971 // error, but wxLog has a (already locked) critical section that
2972 // guards static buffer.
2973 static bool alreadyLoggingError = false;
2974 if (!alreadyLoggingError)
2975 {
2976 alreadyLoggingError = true;
2977 wxLogError(_("Cannot convert from the charset '%s'!"),
2978 m_name ? m_name
e95354ec
VZ
2979 :
2980#if wxUSE_FONTMAP
267e11c5 2981 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
2982#else // !wxUSE_FONTMAP
2983 wxString::Format(_("encoding %s"), m_encoding).c_str()
2984#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2985 );
a58d4f4d
VS
2986 alreadyLoggingError = false;
2987 }
e95354ec
VZ
2988
2989 return NULL;
2990}
2991
2992void wxCSConv::CreateConvIfNeeded() const
2993{
2994 if ( m_deferred )
2995 {
2996 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2997
2998#if wxUSE_INTL
2999 // if we don't have neither the name nor the encoding, use the default
3000 // encoding for this system
3001 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3002 {
4d312c22 3003 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
3004 }
3005#endif // wxUSE_INTL
3006
e95354ec
VZ
3007 self->m_convReal = DoCreate();
3008 self->m_deferred = false;
6001e347 3009 }
6001e347
RR
3010}
3011
3012size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3013{
e95354ec 3014 CreateConvIfNeeded();
dccce9ea 3015
e95354ec
VZ
3016 if (m_convReal)
3017 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3018
3019 // latin-1 (direct)
4def3b35 3020 size_t len = strlen(psz);
dccce9ea 3021
f1339c56
RR
3022 if (buf)
3023 {
4def3b35 3024 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3025 buf[c] = (unsigned char)(psz[c]);
3026 }
dccce9ea 3027
f1339c56 3028 return len;
6001e347
RR
3029}
3030
3031size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3032{
e95354ec 3033 CreateConvIfNeeded();
dccce9ea 3034
e95354ec
VZ
3035 if (m_convReal)
3036 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3037
f1339c56 3038 // latin-1 (direct)
f8d791e0 3039 const size_t len = wxWcslen(psz);
f1339c56
RR
3040 if (buf)
3041 {
4def3b35 3042 for (size_t c = 0; c <= len; c++)
24642831
VS
3043 {
3044 if (psz[c] > 0xFF)
3045 return (size_t)-1;
907173e5 3046 buf[c] = (char)psz[c];
24642831
VS
3047 }
3048 }
3049 else
3050 {
3051 for (size_t c = 0; c <= len; c++)
3052 {
3053 if (psz[c] > 0xFF)
3054 return (size_t)-1;
3055 }
f1339c56 3056 }
dccce9ea 3057
f1339c56 3058 return len;
6001e347
RR
3059}
3060
c1464d9d 3061size_t wxCSConv::GetMinMBCharWidth() const
eec47cc6
VZ
3062{
3063 CreateConvIfNeeded();
3064
3065 if ( m_convReal )
3066 {
3067 // cast needed just to call private function of m_convReal
c1464d9d 3068 return ((wxCSConv *)m_convReal)->GetMinMBCharWidth();
eec47cc6
VZ
3069 }
3070
c1464d9d 3071 return 1;
eec47cc6
VZ
3072}
3073
bde4baac
VZ
3074// ----------------------------------------------------------------------------
3075// globals
3076// ----------------------------------------------------------------------------
3077
3078#ifdef __WINDOWS__
3079 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3080#elif defined(__WXMAC__) && !defined(__MACH__)
3081 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3082#else
dcc8fac0 3083 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3084#endif
3085
3086static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3087static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3088static wxMBConvUTF7 wxConvUTF7Obj;
3089static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 3090
bde4baac
VZ
3091WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3092WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3093WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3094WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3095WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3096WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
3097WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3098#ifdef __WXOSX__
ea8ce907 3099 wxConvUTF8Obj;
f5a1953b 3100#else
ea8ce907 3101 wxConvLibcObj;
f5a1953b
VZ
3102#endif
3103
bde4baac
VZ
3104
3105#else // !wxUSE_WCHAR_T
3106
3107// stand-ins in absence of wchar_t
3108WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3109 wxConvISO8859_1,
3110 wxConvLocal,
3111 wxConvUTF8;
3112
3113#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T