]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
ignore bakefile-generated test.dsw too
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
480f42ec
VS
18#ifdef __BORLANDC__
19 #pragma hdrstop
20#endif //__BORLANDC__
21
373658eb
VZ
22#ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
de6185e2 25 #include "wx/utils.h"
df69528b 26 #include "wx/hashmap.h"
ef199164 27#endif
373658eb 28
bde4baac
VZ
29#include "wx/strconv.h"
30
31#if wxUSE_WCHAR_T
32
1c193821 33#ifndef __WXWINCE__
1cd52418 34#include <errno.h>
1c193821
JS
35#endif
36
6001e347
RR
37#include <ctype.h>
38#include <string.h>
39#include <stdlib.h>
40
e95354ec 41#if defined(__WIN32__) && !defined(__WXMICROWIN__)
a6c2e2c7
VZ
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
e95354ec 44 #define wxHAVE_WIN32_MB2WC
ef199164 45#endif
e95354ec 46
6001e347 47#ifdef __SALFORDC__
373658eb 48 #include <clib.h>
6001e347
RR
49#endif
50
b040e242 51#ifdef HAVE_ICONV
373658eb 52 #include <iconv.h>
b1d547eb 53 #include "wx/thread.h"
1cd52418 54#endif
1cd52418 55
373658eb
VZ
56#include "wx/encconv.h"
57#include "wx/fontmap.h"
58
5c4ed98d 59#ifdef __DARWIN__
e4dd1e19 60#include "wx/mac/corefoundation/private/strconv_cf.h"
5c4ed98d
DE
61#endif //def __DARWIN__
62
ef199164 63
ce6f8d6f
VZ
64#define TRACE_STRCONV _T("strconv")
65
467e0479
VZ
66// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
67// be 4 bytes
4948c2b6 68#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
69 #define WC_UTF16
70#endif
71
ef199164 72
373658eb
VZ
73// ============================================================================
74// implementation
75// ============================================================================
76
69373110
VZ
77// helper function of cMB2WC(): check if n bytes at this location are all NUL
78static bool NotAllNULs(const char *p, size_t n)
79{
80 while ( n && *p++ == '\0' )
81 n--;
82
83 return n != 0;
84}
85
373658eb 86// ----------------------------------------------------------------------------
467e0479 87// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 88// ----------------------------------------------------------------------------
6001e347 89
c91830cb 90static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 91{
ef199164 92 if (input <= 0xffff)
4def3b35 93 {
999836aa
VZ
94 if (output)
95 *output = (wxUint16) input;
ef199164 96
4def3b35 97 return 1;
dccce9ea 98 }
ef199164 99 else if (input >= 0x110000)
4def3b35 100 {
467e0479 101 return wxCONV_FAILED;
dccce9ea
VZ
102 }
103 else
4def3b35 104 {
dccce9ea 105 if (output)
4def3b35 106 {
ef199164
DS
107 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
108 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 109 }
ef199164 110
4def3b35 111 return 2;
1cd52418 112 }
1cd52418
OK
113}
114
c91830cb 115static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 116{
ef199164 117 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
118 {
119 output = *input;
120 return 1;
dccce9ea 121 }
ef199164 122 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
123 {
124 output = *input;
467e0479 125 return wxCONV_FAILED;
dccce9ea
VZ
126 }
127 else
4def3b35
VS
128 {
129 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
130 return 2;
131 }
1cd52418
OK
132}
133
467e0479 134#ifdef WC_UTF16
35d11700
VZ
135 typedef wchar_t wxDecodeSurrogate_t;
136#else // !WC_UTF16
137 typedef wxUint16 wxDecodeSurrogate_t;
138#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
139
140// returns the next UTF-32 character from the wchar_t buffer and advances the
141// pointer to the character after this one
142//
143// if an invalid character is found, *pSrc is set to NULL, the caller must
144// check for this
35d11700 145static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
146{
147 wxUint32 out;
8d3dd069
VZ
148 const size_t
149 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
150 if ( n == wxCONV_FAILED )
151 *pSrc = NULL;
152 else
153 *pSrc += n;
154
155 return out;
156}
157
f6bcfd97 158// ----------------------------------------------------------------------------
6001e347 159// wxMBConv
f6bcfd97 160// ----------------------------------------------------------------------------
2c53a80a 161
483b0434
VZ
162size_t
163wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
164 const char *src, size_t srcLen) const
6001e347 165{
483b0434
VZ
166 // although new conversion classes are supposed to implement this function
167 // directly, the existins ones only implement the old MB2WC() and so, to
168 // avoid to have to rewrite all conversion classes at once, we provide a
169 // default (but not efficient) implementation of this one in terms of the
170 // old function by copying the input to ensure that it's NUL-terminated and
171 // then using MB2WC() to convert it
6001e347 172
483b0434
VZ
173 // the number of chars [which would be] written to dst [if it were not NULL]
174 size_t dstWritten = 0;
eec47cc6 175
c1464d9d 176 // the number of NULs terminating this string
a78c43f1 177 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 178
c1464d9d
VZ
179 // if we were not given the input size we just have to assume that the
180 // string is properly terminated as we have no way of knowing how long it
181 // is anyhow, but if we do have the size check whether there are enough
182 // NULs at the end
483b0434
VZ
183 wxCharBuffer bufTmp;
184 const char *srcEnd;
467e0479 185 if ( srcLen != wxNO_LEN )
eec47cc6 186 {
c1464d9d 187 // we need to know how to find the end of this string
7ef3ab50 188 nulLen = GetMBNulLen();
483b0434
VZ
189 if ( nulLen == wxCONV_FAILED )
190 return wxCONV_FAILED;
e4e3bbb4 191
c1464d9d 192 // if there are enough NULs we can avoid the copy
483b0434 193 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
194 {
195 // make a copy in order to properly NUL-terminate the string
483b0434 196 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 197 char * const p = bufTmp.data();
483b0434
VZ
198 memcpy(p, src, srcLen);
199 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 200 *s = '\0';
483b0434
VZ
201
202 src = bufTmp;
eec47cc6 203 }
e4e3bbb4 204
483b0434
VZ
205 srcEnd = src + srcLen;
206 }
207 else // quit after the first loop iteration
208 {
209 srcEnd = NULL;
210 }
e4e3bbb4 211
483b0434 212 for ( ;; )
eec47cc6 213 {
c1464d9d 214 // try to convert the current chunk
483b0434 215 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
216 if ( lenChunk == wxCONV_FAILED )
217 return wxCONV_FAILED;
e4e3bbb4 218
467e0479 219 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 220
483b0434 221 dstWritten += lenChunk;
f5fb6871 222
467e0479
VZ
223 if ( lenChunk == 1 )
224 {
225 // nothing left in the input string, conversion succeeded
226 break;
227 }
228
483b0434
VZ
229 if ( dst )
230 {
231 if ( dstWritten > dstLen )
232 return wxCONV_FAILED;
233
830f8f11 234 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
235 return wxCONV_FAILED;
236
237 dst += lenChunk;
238 }
c1464d9d 239
483b0434 240 if ( !srcEnd )
c1464d9d 241 {
467e0479
VZ
242 // we convert just one chunk in this case as this is the entire
243 // string anyhow
c1464d9d
VZ
244 break;
245 }
eec47cc6
VZ
246
247 // advance the input pointer past the end of this chunk
483b0434 248 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
249 {
250 // notice that we must skip over multiple bytes here as we suppose
251 // that if NUL takes 2 or 4 bytes, then all the other characters do
252 // too and so if advanced by a single byte we might erroneously
253 // detect sequences of NUL bytes in the middle of the input
483b0434 254 src += nulLen;
c1464d9d 255 }
e4e3bbb4 256
483b0434 257 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
258
259 // note that ">=" (and not just "==") is needed here as the terminator
260 // we skipped just above could be inside or just after the buffer
261 // delimited by inEnd
483b0434 262 if ( src >= srcEnd )
c1464d9d
VZ
263 break;
264 }
265
483b0434 266 return dstWritten;
e4e3bbb4
RN
267}
268
483b0434
VZ
269size_t
270wxMBConv::FromWChar(char *dst, size_t dstLen,
271 const wchar_t *src, size_t srcLen) const
e4e3bbb4 272{
483b0434
VZ
273 // the number of chars [which would be] written to dst [if it were not NULL]
274 size_t dstWritten = 0;
e4e3bbb4 275
eec47cc6
VZ
276 // make a copy of the input string unless it is already properly
277 // NUL-terminated
278 //
279 // if we don't know its length we have no choice but to assume that it is,
280 // indeed, properly terminated
281 wxWCharBuffer bufTmp;
467e0479 282 if ( srcLen == wxNO_LEN )
e4e3bbb4 283 {
483b0434 284 srcLen = wxWcslen(src) + 1;
eec47cc6 285 }
483b0434 286 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
287 {
288 // make a copy in order to properly NUL-terminate the string
483b0434 289 bufTmp = wxWCharBuffer(srcLen);
ef199164 290 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
291 src = bufTmp;
292 }
293
294 const size_t lenNul = GetMBNulLen();
295 for ( const wchar_t * const srcEnd = src + srcLen;
296 src < srcEnd;
297 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
298 {
299 // try to convert the current chunk
300 size_t lenChunk = WC2MB(NULL, src, 0);
301
302 if ( lenChunk == wxCONV_FAILED )
303 return wxCONV_FAILED;
304
305 lenChunk += lenNul;
306 dstWritten += lenChunk;
307
308 if ( dst )
309 {
310 if ( dstWritten > dstLen )
311 return wxCONV_FAILED;
312
313 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
314 return wxCONV_FAILED;
315
316 dst += lenChunk;
317 }
eec47cc6 318 }
e4e3bbb4 319
483b0434
VZ
320 return dstWritten;
321}
322
ef199164 323size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 324{
ef199164 325 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 326 if ( rc != wxCONV_FAILED )
509da451
VZ
327 {
328 // ToWChar() returns the buffer length, i.e. including the trailing
329 // NUL, while this method doesn't take it into account
330 rc--;
331 }
332
333 return rc;
334}
335
ef199164 336size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 337{
ef199164 338 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 339 if ( rc != wxCONV_FAILED )
509da451
VZ
340 {
341 rc -= GetMBNulLen();
342 }
343
344 return rc;
345}
346
483b0434
VZ
347wxMBConv::~wxMBConv()
348{
349 // nothing to do here (necessary for Darwin linking probably)
350}
e4e3bbb4 351
483b0434
VZ
352const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
353{
354 if ( psz )
eec47cc6 355 {
483b0434
VZ
356 // calculate the length of the buffer needed first
357 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 358 if ( nLen != wxCONV_FAILED )
f5fb6871 359 {
483b0434
VZ
360 // now do the actual conversion
361 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 362
483b0434
VZ
363 // +1 for the trailing NULL
364 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
365 return buf;
f5fb6871 366 }
483b0434 367 }
e4e3bbb4 368
483b0434
VZ
369 return wxWCharBuffer();
370}
3698ae71 371
483b0434
VZ
372const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
373{
374 if ( pwz )
375 {
376 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 377 if ( nLen != wxCONV_FAILED )
483b0434
VZ
378 {
379 // extra space for trailing NUL(s)
380 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 381
483b0434
VZ
382 wxCharBuffer buf(nLen + extraLen - 1);
383 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
384 return buf;
385 }
386 }
387
388 return wxCharBuffer();
389}
e4e3bbb4 390
483b0434 391const wxWCharBuffer
ef199164 392wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 393{
ef199164 394 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 395 if ( dstLen != wxCONV_FAILED )
483b0434 396 {
830f8f11 397 wxWCharBuffer wbuf(dstLen - 1);
ef199164 398 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
399 {
400 if ( outLen )
467e0479
VZ
401 {
402 *outLen = dstLen;
403 if ( wbuf[dstLen - 1] == L'\0' )
404 (*outLen)--;
405 }
406
483b0434
VZ
407 return wbuf;
408 }
409 }
410
411 if ( outLen )
412 *outLen = 0;
413
414 return wxWCharBuffer();
415}
416
417const wxCharBuffer
ef199164 418wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 419{
13d92ad6 420 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 421 if ( dstLen != wxCONV_FAILED )
483b0434 422 {
168a76fe
VZ
423 // special case of empty input: can't allocate 0 size buffer below as
424 // wxCharBuffer insists on NUL-terminating it
425 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 426 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
427 {
428 if ( outLen )
467e0479
VZ
429 {
430 *outLen = dstLen;
431
432 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
433 if ( dstLen >= nulLen &&
434 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
435 {
436 // in this case the output is NUL-terminated and we're not
437 // supposed to count NUL
13d92ad6 438 *outLen -= nulLen;
467e0479
VZ
439 }
440 }
d32a507d 441
483b0434
VZ
442 return buf;
443 }
e4e3bbb4
RN
444 }
445
eec47cc6
VZ
446 if ( outLen )
447 *outLen = 0;
448
449 return wxCharBuffer();
e4e3bbb4
RN
450}
451
6001e347 452// ----------------------------------------------------------------------------
bde4baac 453// wxMBConvLibc
6001e347
RR
454// ----------------------------------------------------------------------------
455
bde4baac
VZ
456size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
457{
458 return wxMB2WC(buf, psz, n);
459}
460
461size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
462{
463 return wxWC2MB(buf, psz, n);
464}
e1bfe89e
RR
465
466// ----------------------------------------------------------------------------
532d575b 467// wxConvBrokenFileNames
e1bfe89e
RR
468// ----------------------------------------------------------------------------
469
eec47cc6
VZ
470#ifdef __UNIX__
471
86501081 472wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
ea8ce907 473{
86501081
VS
474 if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
475 wxStricmp(charset, _T("UTF8")) == 0 )
5deedd6e 476 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
845905d5
MW
477 else
478 m_conv = new wxCSConv(charset);
ea8ce907
RR
479}
480
eec47cc6 481#endif // __UNIX__
c12b7f79 482
bde4baac 483// ----------------------------------------------------------------------------
3698ae71 484// UTF-7
bde4baac 485// ----------------------------------------------------------------------------
6001e347 486
15f2ee32 487// Implementation (C) 2004 Fredrik Roubert
6001e347 488
15f2ee32
RN
489//
490// BASE64 decoding table
491//
492static const unsigned char utf7unb64[] =
6001e347 493{
15f2ee32
RN
494 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
495 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
496 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
500 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
501 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
503 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
504 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
505 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
507 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
508 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
509 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
511 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
515 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
526};
527
528size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
529{
15f2ee32
RN
530 size_t len = 0;
531
04a37834 532 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
533 {
534 unsigned char cc = *psz++;
535 if (cc != '+')
536 {
537 // plain ASCII char
538 if (buf)
539 *buf++ = cc;
540 len++;
541 }
542 else if (*psz == '-')
543 {
544 // encoded plus sign
545 if (buf)
546 *buf++ = cc;
547 len++;
548 psz++;
549 }
04a37834 550 else // start of BASE64 encoded string
15f2ee32 551 {
04a37834 552 bool lsb, ok;
15f2ee32 553 unsigned int d, l;
04a37834
VZ
554 for ( ok = lsb = false, d = 0, l = 0;
555 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
556 psz++ )
15f2ee32
RN
557 {
558 d <<= 6;
559 d += cc;
560 for (l += 6; l >= 8; lsb = !lsb)
561 {
04a37834 562 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
563 if (lsb)
564 {
565 if (buf)
566 *buf++ |= c;
567 len ++;
568 }
569 else
04a37834 570 {
15f2ee32 571 if (buf)
6356d52a 572 *buf = (wchar_t)(c << 8);
04a37834
VZ
573 }
574
575 ok = true;
15f2ee32
RN
576 }
577 }
04a37834
VZ
578
579 if ( !ok )
580 {
581 // in valid UTF7 we should have valid characters after '+'
467e0479 582 return wxCONV_FAILED;
04a37834
VZ
583 }
584
15f2ee32
RN
585 if (*psz == '-')
586 psz++;
587 }
588 }
04a37834
VZ
589
590 if ( buf && (len < n) )
591 *buf = '\0';
592
15f2ee32 593 return len;
6001e347
RR
594}
595
15f2ee32
RN
596//
597// BASE64 encoding table
598//
599static const unsigned char utf7enb64[] =
600{
601 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
602 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
603 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
604 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
605 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
606 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
607 'w', 'x', 'y', 'z', '0', '1', '2', '3',
608 '4', '5', '6', '7', '8', '9', '+', '/'
609};
610
611//
612// UTF-7 encoding table
613//
614// 0 - Set D (directly encoded characters)
615// 1 - Set O (optional direct characters)
616// 2 - whitespace characters (optional)
617// 3 - special characters
618//
619static const unsigned char utf7encode[128] =
6001e347 620{
15f2ee32
RN
621 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
622 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
623 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
625 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
627 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
629};
630
667e5b3e 631size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 632{
15f2ee32
RN
633 size_t len = 0;
634
635 while (*psz && ((!buf) || (len < n)))
636 {
637 wchar_t cc = *psz++;
638 if (cc < 0x80 && utf7encode[cc] < 1)
639 {
640 // plain ASCII char
641 if (buf)
642 *buf++ = (char)cc;
ef199164 643
15f2ee32
RN
644 len++;
645 }
646#ifndef WC_UTF16
79c78d42 647 else if (((wxUint32)cc) > 0xffff)
b2c13097 648 {
15f2ee32 649 // no surrogate pair generation (yet?)
467e0479 650 return wxCONV_FAILED;
15f2ee32
RN
651 }
652#endif
653 else
654 {
655 if (buf)
656 *buf++ = '+';
ef199164 657
15f2ee32
RN
658 len++;
659 if (cc != '+')
660 {
661 // BASE64 encode string
662 unsigned int lsb, d, l;
73c902d6 663 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
664 {
665 for (lsb = 0; lsb < 2; lsb ++)
666 {
667 d <<= 8;
668 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
669
670 for (l += 8; l >= 6; )
671 {
672 l -= 6;
673 if (buf)
674 *buf++ = utf7enb64[(d >> l) % 64];
675 len++;
676 }
677 }
ef199164 678
15f2ee32
RN
679 cc = *psz;
680 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
681 break;
682 }
ef199164 683
15f2ee32
RN
684 if (l != 0)
685 {
686 if (buf)
687 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 688
15f2ee32
RN
689 len++;
690 }
691 }
ef199164 692
15f2ee32
RN
693 if (buf)
694 *buf++ = '-';
695 len++;
696 }
697 }
ef199164 698
15f2ee32
RN
699 if (buf && (len < n))
700 *buf = 0;
ef199164 701
15f2ee32 702 return len;
6001e347
RR
703}
704
f6bcfd97 705// ----------------------------------------------------------------------------
6001e347 706// UTF-8
f6bcfd97 707// ----------------------------------------------------------------------------
6001e347 708
dccce9ea 709static wxUint32 utf8_max[]=
4def3b35 710 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 711
3698ae71
VZ
712// boundaries of the private use area we use to (temporarily) remap invalid
713// characters invalid in a UTF-8 encoded string
ea8ce907
RR
714const wxUint32 wxUnicodePUA = 0x100000;
715const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
716
6001e347
RR
717size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
718{
4def3b35
VS
719 size_t len = 0;
720
dccce9ea 721 while (*psz && ((!buf) || (len < n)))
4def3b35 722 {
ea8ce907
RR
723 const char *opsz = psz;
724 bool invalid = false;
4def3b35
VS
725 unsigned char cc = *psz++, fc = cc;
726 unsigned cnt;
dccce9ea 727 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 728 fc <<= 1;
ef199164 729
dccce9ea 730 if (!cnt)
4def3b35
VS
731 {
732 // plain ASCII char
dccce9ea 733 if (buf)
4def3b35
VS
734 *buf++ = cc;
735 len++;
561488ef
MW
736
737 // escape the escape character for octal escapes
738 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
739 && cc == '\\' && (!buf || len < n))
740 {
741 if (buf)
742 *buf++ = cc;
743 len++;
744 }
dccce9ea
VZ
745 }
746 else
4def3b35
VS
747 {
748 cnt--;
dccce9ea 749 if (!cnt)
4def3b35
VS
750 {
751 // invalid UTF-8 sequence
ea8ce907 752 invalid = true;
dccce9ea
VZ
753 }
754 else
4def3b35
VS
755 {
756 unsigned ocnt = cnt - 1;
757 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 758 while (cnt--)
4def3b35 759 {
ea8ce907 760 cc = *psz;
dccce9ea 761 if ((cc & 0xC0) != 0x80)
4def3b35
VS
762 {
763 // invalid UTF-8 sequence
ea8ce907
RR
764 invalid = true;
765 break;
4def3b35 766 }
ef199164 767
ea8ce907 768 psz++;
4def3b35
VS
769 res = (res << 6) | (cc & 0x3f);
770 }
ef199164 771
ea8ce907 772 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
773 {
774 // illegal UTF-8 encoding
ea8ce907 775 invalid = true;
4def3b35 776 }
ea8ce907
RR
777 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
778 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
779 {
780 // if one of our PUA characters turns up externally
781 // it must also be treated as an illegal sequence
782 // (a bit like you have to escape an escape character)
783 invalid = true;
784 }
785 else
786 {
1cd52418 787#ifdef WC_UTF16
ea8ce907
RR
788 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
789 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 790 if (pa == wxCONV_FAILED)
ea8ce907
RR
791 {
792 invalid = true;
793 }
794 else
795 {
796 if (buf)
797 buf += pa;
798 len += pa;
799 }
373658eb 800#else // !WC_UTF16
ea8ce907 801 if (buf)
38d4b1e4 802 *buf++ = (wchar_t)res;
ea8ce907 803 len++;
373658eb 804#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
805 }
806 }
ef199164 807
ea8ce907
RR
808 if (invalid)
809 {
810 if (m_options & MAP_INVALID_UTF8_TO_PUA)
811 {
812 while (opsz < psz && (!buf || len < n))
813 {
814#ifdef WC_UTF16
815 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
816 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 817 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
818 if (buf)
819 buf += pa;
820 opsz++;
821 len += pa;
822#else
823 if (buf)
38d4b1e4 824 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
825 opsz++;
826 len++;
827#endif
828 }
829 }
3698ae71 830 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
831 {
832 while (opsz < psz && (!buf || len < n))
833 {
3698ae71
VZ
834 if ( buf && len + 3 < n )
835 {
17a1ebd1 836 unsigned char on = *opsz;
3698ae71 837 *buf++ = L'\\';
17a1ebd1
VZ
838 *buf++ = (wchar_t)( L'0' + on / 0100 );
839 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
840 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 841 }
ef199164 842
ea8ce907
RR
843 opsz++;
844 len += 4;
845 }
846 }
3698ae71 847 else // MAP_INVALID_UTF8_NOT
ea8ce907 848 {
467e0479 849 return wxCONV_FAILED;
ea8ce907 850 }
4def3b35
VS
851 }
852 }
6001e347 853 }
ef199164 854
dccce9ea 855 if (buf && (len < n))
4def3b35 856 *buf = 0;
ef199164 857
4def3b35 858 return len;
6001e347
RR
859}
860
3698ae71
VZ
861static inline bool isoctal(wchar_t wch)
862{
863 return L'0' <= wch && wch <= L'7';
864}
865
6001e347
RR
866size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
867{
4def3b35 868 size_t len = 0;
6001e347 869
dccce9ea 870 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
871 {
872 wxUint32 cc;
ef199164 873
1cd52418 874#ifdef WC_UTF16
b5153fd8
VZ
875 // cast is ok for WC_UTF16
876 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 877 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 878#else
ef199164 879 cc = (*psz++) & 0x7fffffff;
4def3b35 880#endif
3698ae71
VZ
881
882 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
883 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 884 {
dccce9ea 885 if (buf)
ea8ce907 886 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 887 len++;
3698ae71 888 }
561488ef
MW
889 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
890 && cc == L'\\' && psz[0] == L'\\' )
891 {
892 if (buf)
893 *buf++ = (char)cc;
894 psz++;
895 len++;
896 }
3698ae71
VZ
897 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
898 cc == L'\\' &&
899 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 900 {
dccce9ea 901 if (buf)
3698ae71 902 {
ef199164
DS
903 *buf++ = (char) ((psz[0] - L'0') * 0100 +
904 (psz[1] - L'0') * 010 +
b2c13097 905 (psz[2] - L'0'));
3698ae71
VZ
906 }
907
908 psz += 3;
ea8ce907
RR
909 len++;
910 }
911 else
912 {
913 unsigned cnt;
ef199164
DS
914 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
915 {
916 }
917
ea8ce907 918 if (!cnt)
4def3b35 919 {
ea8ce907
RR
920 // plain ASCII char
921 if (buf)
922 *buf++ = (char) cc;
923 len++;
924 }
ea8ce907
RR
925 else
926 {
927 len += cnt + 1;
928 if (buf)
929 {
930 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
931 while (cnt--)
932 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
933 }
4def3b35
VS
934 }
935 }
6001e347 936 }
4def3b35 937
ef199164 938 if (buf && (len < n))
3698ae71 939 *buf = 0;
adb45366 940
4def3b35 941 return len;
6001e347
RR
942}
943
467e0479 944// ============================================================================
c91830cb 945// UTF-16
467e0479 946// ============================================================================
c91830cb
VZ
947
948#ifdef WORDS_BIGENDIAN
bde4baac
VZ
949 #define wxMBConvUTF16straight wxMBConvUTF16BE
950 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 951#else
bde4baac
VZ
952 #define wxMBConvUTF16swap wxMBConvUTF16BE
953 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
954#endif
955
467e0479
VZ
956/* static */
957size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
958{
959 if ( srcLen == wxNO_LEN )
960 {
961 // count the number of bytes in input, including the trailing NULs
ef199164
DS
962 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
963 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 964 ;
c91830cb 965
467e0479
VZ
966 srcLen *= BYTES_PER_CHAR;
967 }
968 else // we already have the length
969 {
970 // we can only convert an entire number of UTF-16 characters
971 if ( srcLen % BYTES_PER_CHAR )
972 return wxCONV_FAILED;
973 }
974
975 return srcLen;
976}
977
978// case when in-memory representation is UTF-16 too
c91830cb
VZ
979#ifdef WC_UTF16
980
467e0479
VZ
981// ----------------------------------------------------------------------------
982// conversions without endianness change
983// ----------------------------------------------------------------------------
984
985size_t
986wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
987 const char *src, size_t srcLen) const
c91830cb 988{
467e0479
VZ
989 // set up the scene for using memcpy() (which is presumably more efficient
990 // than copying the bytes one by one)
991 srcLen = GetLength(src, srcLen);
992 if ( srcLen == wxNO_LEN )
993 return wxCONV_FAILED;
c91830cb 994
ef199164 995 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 996 if ( dst )
c91830cb 997 {
467e0479
VZ
998 if ( dstLen < inLen )
999 return wxCONV_FAILED;
c91830cb 1000
467e0479 1001 memcpy(dst, src, srcLen);
c91830cb 1002 }
d32a507d 1003
467e0479 1004 return inLen;
c91830cb
VZ
1005}
1006
467e0479
VZ
1007size_t
1008wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1009 const wchar_t *src, size_t srcLen) const
c91830cb 1010{
467e0479
VZ
1011 if ( srcLen == wxNO_LEN )
1012 srcLen = wxWcslen(src) + 1;
c91830cb 1013
467e0479
VZ
1014 srcLen *= BYTES_PER_CHAR;
1015
1016 if ( dst )
c91830cb 1017 {
467e0479
VZ
1018 if ( dstLen < srcLen )
1019 return wxCONV_FAILED;
d32a507d 1020
467e0479 1021 memcpy(dst, src, srcLen);
c91830cb 1022 }
d32a507d 1023
467e0479 1024 return srcLen;
c91830cb
VZ
1025}
1026
467e0479
VZ
1027// ----------------------------------------------------------------------------
1028// endian-reversing conversions
1029// ----------------------------------------------------------------------------
c91830cb 1030
467e0479
VZ
1031size_t
1032wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1033 const char *src, size_t srcLen) const
c91830cb 1034{
467e0479
VZ
1035 srcLen = GetLength(src, srcLen);
1036 if ( srcLen == wxNO_LEN )
1037 return wxCONV_FAILED;
c91830cb 1038
467e0479
VZ
1039 srcLen /= BYTES_PER_CHAR;
1040
1041 if ( dst )
c91830cb 1042 {
467e0479
VZ
1043 if ( dstLen < srcLen )
1044 return wxCONV_FAILED;
1045
ef199164
DS
1046 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1047 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1048 {
ef199164 1049 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1050 }
c91830cb 1051 }
bfab25d4 1052
467e0479 1053 return srcLen;
c91830cb
VZ
1054}
1055
467e0479
VZ
1056size_t
1057wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1058 const wchar_t *src, size_t srcLen) const
c91830cb 1059{
467e0479
VZ
1060 if ( srcLen == wxNO_LEN )
1061 srcLen = wxWcslen(src) + 1;
c91830cb 1062
467e0479
VZ
1063 srcLen *= BYTES_PER_CHAR;
1064
1065 if ( dst )
c91830cb 1066 {
467e0479
VZ
1067 if ( dstLen < srcLen )
1068 return wxCONV_FAILED;
1069
ef199164 1070 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1071 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1072 {
ef199164 1073 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1074 }
c91830cb 1075 }
eec47cc6 1076
467e0479 1077 return srcLen;
c91830cb
VZ
1078}
1079
467e0479 1080#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1081
467e0479
VZ
1082// ----------------------------------------------------------------------------
1083// conversions without endianness change
1084// ----------------------------------------------------------------------------
c91830cb 1085
35d11700
VZ
1086size_t
1087wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1088 const char *src, size_t srcLen) const
c91830cb 1089{
35d11700
VZ
1090 srcLen = GetLength(src, srcLen);
1091 if ( srcLen == wxNO_LEN )
1092 return wxCONV_FAILED;
c91830cb 1093
ef199164 1094 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1095 if ( !dst )
c91830cb 1096 {
35d11700
VZ
1097 // optimization: return maximal space which could be needed for this
1098 // string even if the real size could be smaller if the buffer contains
1099 // any surrogates
1100 return inLen;
c91830cb 1101 }
c91830cb 1102
35d11700 1103 size_t outLen = 0;
ef199164
DS
1104 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1105 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1106 {
ef199164
DS
1107 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1108 if ( !inBuff )
35d11700
VZ
1109 return wxCONV_FAILED;
1110
1111 if ( ++outLen > dstLen )
1112 return wxCONV_FAILED;
c91830cb 1113
35d11700
VZ
1114 *dst++ = ch;
1115 }
1116
1117
1118 return outLen;
1119}
c91830cb 1120
35d11700
VZ
1121size_t
1122wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1123 const wchar_t *src, size_t srcLen) const
c91830cb 1124{
35d11700
VZ
1125 if ( srcLen == wxNO_LEN )
1126 srcLen = wxWcslen(src) + 1;
c91830cb 1127
35d11700 1128 size_t outLen = 0;
ef199164 1129 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1130 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1131 {
1132 wxUint16 cc[2];
35d11700
VZ
1133 const size_t numChars = encode_utf16(*src++, cc);
1134 if ( numChars == wxCONV_FAILED )
1135 return wxCONV_FAILED;
c91830cb 1136
ef199164
DS
1137 outLen += numChars * BYTES_PER_CHAR;
1138 if ( outBuff )
c91830cb 1139 {
35d11700
VZ
1140 if ( outLen > dstLen )
1141 return wxCONV_FAILED;
1142
ef199164 1143 *outBuff++ = cc[0];
35d11700 1144 if ( numChars == 2 )
69b80d28 1145 {
35d11700 1146 // second character of a surrogate
ef199164 1147 *outBuff++ = cc[1];
69b80d28 1148 }
c91830cb 1149 }
c91830cb 1150 }
c91830cb 1151
35d11700 1152 return outLen;
c91830cb
VZ
1153}
1154
467e0479
VZ
1155// ----------------------------------------------------------------------------
1156// endian-reversing conversions
1157// ----------------------------------------------------------------------------
c91830cb 1158
35d11700
VZ
1159size_t
1160wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1161 const char *src, size_t srcLen) const
c91830cb 1162{
35d11700
VZ
1163 srcLen = GetLength(src, srcLen);
1164 if ( srcLen == wxNO_LEN )
1165 return wxCONV_FAILED;
1166
ef199164 1167 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1168 if ( !dst )
1169 {
1170 // optimization: return maximal space which could be needed for this
1171 // string even if the real size could be smaller if the buffer contains
1172 // any surrogates
1173 return inLen;
1174 }
c91830cb 1175
35d11700 1176 size_t outLen = 0;
ef199164
DS
1177 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1178 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1179 {
35d11700
VZ
1180 wxUint32 ch;
1181 wxUint16 tmp[2];
ef199164
DS
1182
1183 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1184 inBuff++;
1185 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1186
35d11700
VZ
1187 const size_t numChars = decode_utf16(tmp, ch);
1188 if ( numChars == wxCONV_FAILED )
1189 return wxCONV_FAILED;
c91830cb 1190
35d11700 1191 if ( numChars == 2 )
ef199164 1192 inBuff++;
35d11700
VZ
1193
1194 if ( ++outLen > dstLen )
1195 return wxCONV_FAILED;
c91830cb 1196
35d11700 1197 *dst++ = ch;
c91830cb 1198 }
c91830cb 1199
c91830cb 1200
35d11700
VZ
1201 return outLen;
1202}
c91830cb 1203
35d11700
VZ
1204size_t
1205wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1206 const wchar_t *src, size_t srcLen) const
c91830cb 1207{
35d11700
VZ
1208 if ( srcLen == wxNO_LEN )
1209 srcLen = wxWcslen(src) + 1;
c91830cb 1210
35d11700 1211 size_t outLen = 0;
ef199164 1212 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1213 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1214 {
1215 wxUint16 cc[2];
35d11700
VZ
1216 const size_t numChars = encode_utf16(*src, cc);
1217 if ( numChars == wxCONV_FAILED )
1218 return wxCONV_FAILED;
c91830cb 1219
ef199164
DS
1220 outLen += numChars * BYTES_PER_CHAR;
1221 if ( outBuff )
c91830cb 1222 {
35d11700
VZ
1223 if ( outLen > dstLen )
1224 return wxCONV_FAILED;
1225
ef199164 1226 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1227 if ( numChars == 2 )
c91830cb 1228 {
35d11700 1229 // second character of a surrogate
ef199164 1230 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1231 }
1232 }
c91830cb 1233 }
c91830cb 1234
35d11700 1235 return outLen;
c91830cb
VZ
1236}
1237
467e0479 1238#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1239
1240
35d11700 1241// ============================================================================
c91830cb 1242// UTF-32
35d11700 1243// ============================================================================
c91830cb
VZ
1244
1245#ifdef WORDS_BIGENDIAN
467e0479
VZ
1246 #define wxMBConvUTF32straight wxMBConvUTF32BE
1247 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1248#else
467e0479
VZ
1249 #define wxMBConvUTF32swap wxMBConvUTF32BE
1250 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1251#endif
1252
1253
1254WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1255WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1256
467e0479
VZ
1257/* static */
1258size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1259{
1260 if ( srcLen == wxNO_LEN )
1261 {
1262 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1263 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1264 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1265 ;
c91830cb 1266
467e0479
VZ
1267 srcLen *= BYTES_PER_CHAR;
1268 }
1269 else // we already have the length
1270 {
1271 // we can only convert an entire number of UTF-32 characters
1272 if ( srcLen % BYTES_PER_CHAR )
1273 return wxCONV_FAILED;
1274 }
1275
1276 return srcLen;
1277}
1278
1279// case when in-memory representation is UTF-16
c91830cb
VZ
1280#ifdef WC_UTF16
1281
467e0479
VZ
1282// ----------------------------------------------------------------------------
1283// conversions without endianness change
1284// ----------------------------------------------------------------------------
1285
1286size_t
1287wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1288 const char *src, size_t srcLen) const
c91830cb 1289{
467e0479
VZ
1290 srcLen = GetLength(src, srcLen);
1291 if ( srcLen == wxNO_LEN )
1292 return wxCONV_FAILED;
c91830cb 1293
ef199164
DS
1294 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1295 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1296 size_t outLen = 0;
1297 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1298 {
1299 wxUint16 cc[2];
ef199164 1300 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1301 if ( numChars == wxCONV_FAILED )
1302 return wxCONV_FAILED;
c91830cb 1303
467e0479
VZ
1304 outLen += numChars;
1305 if ( dst )
c91830cb 1306 {
467e0479
VZ
1307 if ( outLen > dstLen )
1308 return wxCONV_FAILED;
d32a507d 1309
467e0479
VZ
1310 *dst++ = cc[0];
1311 if ( numChars == 2 )
1312 {
1313 // second character of a surrogate
1314 *dst++ = cc[1];
1315 }
1316 }
c91830cb 1317 }
d32a507d 1318
467e0479 1319 return outLen;
c91830cb
VZ
1320}
1321
467e0479
VZ
1322size_t
1323wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1324 const wchar_t *src, size_t srcLen) const
c91830cb 1325{
467e0479
VZ
1326 if ( srcLen == wxNO_LEN )
1327 srcLen = wxWcslen(src) + 1;
c91830cb 1328
467e0479 1329 if ( !dst )
c91830cb 1330 {
467e0479
VZ
1331 // optimization: return maximal space which could be needed for this
1332 // string instead of the exact amount which could be less if there are
1333 // any surrogates in the input
1334 //
1335 // we consider that surrogates are rare enough to make it worthwhile to
1336 // avoid running the loop below at the cost of slightly extra memory
1337 // consumption
ef199164 1338 return srcLen * BYTES_PER_CHAR;
467e0479 1339 }
c91830cb 1340
ef199164 1341 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1342 size_t outLen = 0;
1343 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1344 {
1345 const wxUint32 ch = wxDecodeSurrogate(&src);
1346 if ( !src )
1347 return wxCONV_FAILED;
c91830cb 1348
467e0479 1349 outLen += BYTES_PER_CHAR;
d32a507d 1350
467e0479
VZ
1351 if ( outLen > dstLen )
1352 return wxCONV_FAILED;
b5153fd8 1353
ef199164 1354 *outBuff++ = ch;
467e0479 1355 }
c91830cb 1356
467e0479 1357 return outLen;
c91830cb
VZ
1358}
1359
467e0479
VZ
1360// ----------------------------------------------------------------------------
1361// endian-reversing conversions
1362// ----------------------------------------------------------------------------
c91830cb 1363
467e0479
VZ
1364size_t
1365wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1366 const char *src, size_t srcLen) const
c91830cb 1367{
467e0479
VZ
1368 srcLen = GetLength(src, srcLen);
1369 if ( srcLen == wxNO_LEN )
1370 return wxCONV_FAILED;
c91830cb 1371
ef199164
DS
1372 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1373 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1374 size_t outLen = 0;
ef199164 1375 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1376 {
c91830cb 1377 wxUint16 cc[2];
ef199164 1378 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1379 if ( numChars == wxCONV_FAILED )
1380 return wxCONV_FAILED;
c91830cb 1381
467e0479
VZ
1382 outLen += numChars;
1383 if ( dst )
c91830cb 1384 {
467e0479
VZ
1385 if ( outLen > dstLen )
1386 return wxCONV_FAILED;
d32a507d 1387
467e0479
VZ
1388 *dst++ = cc[0];
1389 if ( numChars == 2 )
1390 {
1391 // second character of a surrogate
1392 *dst++ = cc[1];
1393 }
1394 }
c91830cb 1395 }
b5153fd8 1396
467e0479 1397 return outLen;
c91830cb
VZ
1398}
1399
467e0479
VZ
1400size_t
1401wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1402 const wchar_t *src, size_t srcLen) const
c91830cb 1403{
467e0479
VZ
1404 if ( srcLen == wxNO_LEN )
1405 srcLen = wxWcslen(src) + 1;
c91830cb 1406
467e0479 1407 if ( !dst )
c91830cb 1408 {
467e0479
VZ
1409 // optimization: return maximal space which could be needed for this
1410 // string instead of the exact amount which could be less if there are
1411 // any surrogates in the input
1412 //
1413 // we consider that surrogates are rare enough to make it worthwhile to
1414 // avoid running the loop below at the cost of slightly extra memory
1415 // consumption
1416 return srcLen*BYTES_PER_CHAR;
1417 }
c91830cb 1418
ef199164 1419 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1420 size_t outLen = 0;
1421 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1422 {
1423 const wxUint32 ch = wxDecodeSurrogate(&src);
1424 if ( !src )
1425 return wxCONV_FAILED;
c91830cb 1426
467e0479 1427 outLen += BYTES_PER_CHAR;
d32a507d 1428
467e0479
VZ
1429 if ( outLen > dstLen )
1430 return wxCONV_FAILED;
b5153fd8 1431
ef199164 1432 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1433 }
c91830cb 1434
467e0479 1435 return outLen;
c91830cb
VZ
1436}
1437
467e0479 1438#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1439
35d11700
VZ
1440// ----------------------------------------------------------------------------
1441// conversions without endianness change
1442// ----------------------------------------------------------------------------
1443
1444size_t
1445wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1446 const char *src, size_t srcLen) const
c91830cb 1447{
35d11700
VZ
1448 // use memcpy() as it should be much faster than hand-written loop
1449 srcLen = GetLength(src, srcLen);
1450 if ( srcLen == wxNO_LEN )
1451 return wxCONV_FAILED;
c91830cb 1452
35d11700
VZ
1453 const size_t inLen = srcLen/BYTES_PER_CHAR;
1454 if ( dst )
c91830cb 1455 {
35d11700
VZ
1456 if ( dstLen < inLen )
1457 return wxCONV_FAILED;
b5153fd8 1458
35d11700
VZ
1459 memcpy(dst, src, srcLen);
1460 }
c91830cb 1461
35d11700 1462 return inLen;
c91830cb
VZ
1463}
1464
35d11700
VZ
1465size_t
1466wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1467 const wchar_t *src, size_t srcLen) const
c91830cb 1468{
35d11700
VZ
1469 if ( srcLen == wxNO_LEN )
1470 srcLen = wxWcslen(src) + 1;
1471
1472 srcLen *= BYTES_PER_CHAR;
c91830cb 1473
35d11700 1474 if ( dst )
c91830cb 1475 {
35d11700
VZ
1476 if ( dstLen < srcLen )
1477 return wxCONV_FAILED;
c91830cb 1478
35d11700 1479 memcpy(dst, src, srcLen);
c91830cb
VZ
1480 }
1481
35d11700 1482 return srcLen;
c91830cb
VZ
1483}
1484
35d11700
VZ
1485// ----------------------------------------------------------------------------
1486// endian-reversing conversions
1487// ----------------------------------------------------------------------------
c91830cb 1488
35d11700
VZ
1489size_t
1490wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1491 const char *src, size_t srcLen) const
c91830cb 1492{
35d11700
VZ
1493 srcLen = GetLength(src, srcLen);
1494 if ( srcLen == wxNO_LEN )
1495 return wxCONV_FAILED;
1496
1497 srcLen /= BYTES_PER_CHAR;
c91830cb 1498
35d11700 1499 if ( dst )
c91830cb 1500 {
35d11700
VZ
1501 if ( dstLen < srcLen )
1502 return wxCONV_FAILED;
1503
ef199164
DS
1504 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1505 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1506 {
ef199164 1507 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1508 }
c91830cb 1509 }
b5153fd8 1510
35d11700 1511 return srcLen;
c91830cb
VZ
1512}
1513
35d11700
VZ
1514size_t
1515wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1516 const wchar_t *src, size_t srcLen) const
c91830cb 1517{
35d11700
VZ
1518 if ( srcLen == wxNO_LEN )
1519 srcLen = wxWcslen(src) + 1;
1520
1521 srcLen *= BYTES_PER_CHAR;
c91830cb 1522
35d11700 1523 if ( dst )
c91830cb 1524 {
35d11700
VZ
1525 if ( dstLen < srcLen )
1526 return wxCONV_FAILED;
1527
ef199164 1528 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1529 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1530 {
ef199164 1531 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1532 }
c91830cb 1533 }
b5153fd8 1534
35d11700 1535 return srcLen;
c91830cb
VZ
1536}
1537
467e0479 1538#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1539
1540
36acb880
VZ
1541// ============================================================================
1542// The classes doing conversion using the iconv_xxx() functions
1543// ============================================================================
3caec1bb 1544
b040e242 1545#ifdef HAVE_ICONV
3a0d76bc 1546
b1d547eb
VS
1547// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1548// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1549// (unless there's yet another bug in glibc) the only case when iconv()
1550// returns with (size_t)-1 (which means error) and says there are 0 bytes
1551// left in the input buffer -- when _real_ error occurs,
1552// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1553// iconv() failure.
3caec1bb
VS
1554// [This bug does not appear in glibc 2.2.]
1555#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1556#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1557 (errno != E2BIG || bufLeft != 0))
1558#else
1559#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1560#endif
1561
ab217dba 1562#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1563
74a7eb0b
VZ
1564#define ICONV_T_INVALID ((iconv_t)-1)
1565
1566#if SIZEOF_WCHAR_T == 4
1567 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1568 #define WC_ENC wxFONTENCODING_UTF32
1569#elif SIZEOF_WCHAR_T == 2
1570 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1571 #define WC_ENC wxFONTENCODING_UTF16
1572#else // sizeof(wchar_t) != 2 nor 4
1573 // does this ever happen?
1574 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1575#endif
1576
36acb880 1577// ----------------------------------------------------------------------------
e95354ec 1578// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1579// ----------------------------------------------------------------------------
1580
e95354ec 1581class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1582{
1583public:
86501081 1584 wxMBConv_iconv(const char *name);
e95354ec 1585 virtual ~wxMBConv_iconv();
36acb880 1586
bde4baac
VZ
1587 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1588 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1589
d36c9347 1590 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1591 virtual size_t GetMBNulLen() const;
1592
ba98e032
VS
1593#if wxUSE_UNICODE_UTF8
1594 virtual bool IsUTF8() const;
1595#endif
1596
d36c9347
VZ
1597 virtual wxMBConv *Clone() const
1598 {
86501081 1599 wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
d36c9347
VZ
1600 p->m_minMBCharWidth = m_minMBCharWidth;
1601 return p;
1602 }
1603
e95354ec 1604 bool IsOk() const
74a7eb0b 1605 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1606
1607protected:
ef199164
DS
1608 // the iconv handlers used to translate from multibyte
1609 // to wide char and in the other direction
36acb880
VZ
1610 iconv_t m2w,
1611 w2m;
ef199164 1612
b1d547eb
VS
1613#if wxUSE_THREADS
1614 // guards access to m2w and w2m objects
1615 wxMutex m_iconvMutex;
1616#endif
36acb880
VZ
1617
1618private:
e95354ec 1619 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1620 // available on this machine, it will remain NULL
74a7eb0b 1621 static wxString ms_wcCharsetName;
36acb880
VZ
1622
1623 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1624 // different endian-ness than the native one
405d8f46 1625 static bool ms_wcNeedsSwap;
eec47cc6 1626
d36c9347
VZ
1627
1628 // name of the encoding handled by this conversion
1629 wxString m_name;
1630
7ef3ab50 1631 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1632 // initially
1633 size_t m_minMBCharWidth;
36acb880
VZ
1634};
1635
8f115891 1636// make the constructor available for unit testing
86501081 1637WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
8f115891
MW
1638{
1639 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1640 if ( !result->IsOk() )
1641 {
1642 delete result;
1643 return 0;
1644 }
ef199164 1645
8f115891
MW
1646 return result;
1647}
1648
422e411e 1649wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1650bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1651
86501081 1652wxMBConv_iconv::wxMBConv_iconv(const char *name)
d36c9347 1653 : m_name(name)
36acb880 1654{
c1464d9d 1655 m_minMBCharWidth = 0;
eec47cc6 1656
36acb880 1657 // check for charset that represents wchar_t:
74a7eb0b 1658 if ( ms_wcCharsetName.empty() )
f1339c56 1659 {
c2b83fdd
VZ
1660 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1661
74a7eb0b
VZ
1662#if wxUSE_FONTMAP
1663 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1664#else // !wxUSE_FONTMAP
91cb7f52 1665 static const wxChar *names_static[] =
36acb880 1666 {
74a7eb0b
VZ
1667#if SIZEOF_WCHAR_T == 4
1668 _T("UCS-4"),
1669#elif SIZEOF_WCHAR_T = 2
1670 _T("UCS-2"),
1671#endif
1672 NULL
1673 };
91cb7f52 1674 const wxChar **names = names_static;
74a7eb0b 1675#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1676
d1f024a8 1677 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1678 {
17a1ebd1 1679 const wxString nameCS(*names);
74a7eb0b
VZ
1680
1681 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1682 wxString nameXE(nameCS);
ef199164
DS
1683
1684#ifdef WORDS_BIGENDIAN
74a7eb0b 1685 nameXE += _T("BE");
ef199164 1686#else // little endian
74a7eb0b 1687 nameXE += _T("LE");
ef199164 1688#endif
74a7eb0b 1689
c2b83fdd
VZ
1690 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1691 nameXE.c_str());
1692
86501081 1693 m2w = iconv_open(nameXE.ToAscii(), name);
74a7eb0b 1694 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1695 {
74a7eb0b 1696 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1697 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1698 nameCS.c_str());
86501081 1699 m2w = iconv_open(nameCS.ToAscii(), name);
3a0d76bc 1700
74a7eb0b
VZ
1701 // and check for bytesex ourselves:
1702 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1703 {
74a7eb0b
VZ
1704 char buf[2], *bufPtr;
1705 wchar_t wbuf[2], *wbufPtr;
1706 size_t insz, outsz;
1707 size_t res;
1708
1709 buf[0] = 'A';
1710 buf[1] = 0;
1711 wbuf[0] = 0;
1712 insz = 2;
1713 outsz = SIZEOF_WCHAR_T * 2;
1714 wbufPtr = wbuf;
1715 bufPtr = buf;
1716
ef199164
DS
1717 res = iconv(
1718 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1719 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1720
1721 if (ICONV_FAILED(res, insz))
1722 {
1723 wxLogLastError(wxT("iconv"));
422e411e 1724 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1725 nameCS.c_str());
74a7eb0b
VZ
1726 }
1727 else // ok, can convert to this encoding, remember it
1728 {
17a1ebd1 1729 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1730 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1731 }
3a0d76bc
VS
1732 }
1733 }
74a7eb0b 1734 else // use charset not requiring byte swapping
36acb880 1735 {
74a7eb0b 1736 ms_wcCharsetName = nameXE;
36acb880 1737 }
3a0d76bc 1738 }
74a7eb0b 1739
0944fceb 1740 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1741 wxT("iconv wchar_t charset is \"%s\"%s"),
999020e1
VZ
1742 ms_wcCharsetName.empty() ? wxString("<none>")
1743 : ms_wcCharsetName,
74a7eb0b
VZ
1744 ms_wcNeedsSwap ? _T(" (needs swap)")
1745 : _T(""));
3a0d76bc 1746 }
36acb880 1747 else // we already have ms_wcCharsetName
3caec1bb 1748 {
86501081 1749 m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
f1339c56 1750 }
dccce9ea 1751
74a7eb0b 1752 if ( ms_wcCharsetName.empty() )
f1339c56 1753 {
74a7eb0b 1754 w2m = ICONV_T_INVALID;
36acb880 1755 }
405d8f46
VZ
1756 else
1757 {
86501081 1758 w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
74a7eb0b
VZ
1759 if ( w2m == ICONV_T_INVALID )
1760 {
1761 wxLogTrace(TRACE_STRCONV,
1762 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
86501081 1763 ms_wcCharsetName.c_str(), name);
74a7eb0b 1764 }
405d8f46 1765 }
36acb880 1766}
3caec1bb 1767
e95354ec 1768wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1769{
74a7eb0b 1770 if ( m2w != ICONV_T_INVALID )
36acb880 1771 iconv_close(m2w);
74a7eb0b 1772 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1773 iconv_close(w2m);
1774}
3a0d76bc 1775
bde4baac 1776size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1777{
69373110
VZ
1778 // find the string length: notice that must be done differently for
1779 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1780 size_t inbuf;
7ef3ab50 1781 const size_t nulLen = GetMBNulLen();
69373110
VZ
1782 switch ( nulLen )
1783 {
1784 default:
467e0479 1785 return wxCONV_FAILED;
69373110
VZ
1786
1787 case 1:
1788 inbuf = strlen(psz); // arguably more optimized than our version
1789 break;
1790
1791 case 2:
1792 case 4:
1793 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1794 // they also have to start at character boundary and not span two
1795 // adjacent characters
1796 const char *p;
1797 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1798 ;
1799 inbuf = p - psz;
1800 break;
1801 }
1802
b1d547eb 1803#if wxUSE_THREADS
6a17b868
SN
1804 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1805 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1806 // wxConvLocal that are used all over wx code, so we have to make sure
1807 // the handle is used by at most one thread at the time. Otherwise
1808 // only a few wx classes would be safe to use from non-main threads
1809 // as MB<->WC conversion would fail "randomly".
1810 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1811#endif // wxUSE_THREADS
1812
36acb880
VZ
1813 size_t outbuf = n * SIZEOF_WCHAR_T;
1814 size_t res, cres;
1815 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1816 wchar_t *bufPtr = buf;
1817 const char *pszPtr = psz;
1818
1819 if (buf)
1820 {
1821 // have destination buffer, convert there
1822 cres = iconv(m2w,
1823 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1824 (char**)&bufPtr, &outbuf);
1825 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1826
36acb880 1827 if (ms_wcNeedsSwap)
3a0d76bc 1828 {
36acb880 1829 // convert to native endianness
17a1ebd1
VZ
1830 for ( unsigned i = 0; i < res; i++ )
1831 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1832 }
adb45366 1833
69373110 1834 // NUL-terminate the string if there is any space left
49dd9820
VS
1835 if (res < n)
1836 buf[res] = 0;
36acb880
VZ
1837 }
1838 else
1839 {
1840 // no destination buffer... convert using temp buffer
1841 // to calculate destination buffer requirement
1842 wchar_t tbuf[8];
1843 res = 0;
ef199164
DS
1844
1845 do
1846 {
36acb880 1847 bufPtr = tbuf;
ef199164 1848 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1849
1850 cres = iconv(m2w,
1851 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1852 (char**)&bufPtr, &outbuf );
1853
ef199164
DS
1854 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1855 }
1856 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1857 }
dccce9ea 1858
36acb880 1859 if (ICONV_FAILED(cres, inbuf))
f1339c56 1860 {
36acb880 1861 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1862 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1863 return wxCONV_FAILED;
36acb880
VZ
1864 }
1865
1866 return res;
1867}
1868
bde4baac 1869size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1870{
b1d547eb
VS
1871#if wxUSE_THREADS
1872 // NB: explained in MB2WC
1873 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1874#endif
3698ae71 1875
156162ec
MW
1876 size_t inlen = wxWcslen(psz);
1877 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1878 size_t outbuf = n;
1879 size_t res, cres;
3a0d76bc 1880
36acb880 1881 wchar_t *tmpbuf = 0;
3caec1bb 1882
36acb880
VZ
1883 if (ms_wcNeedsSwap)
1884 {
1885 // need to copy to temp buffer to switch endianness
74a7eb0b 1886 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1887 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1888 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1889 for ( size_t i = 0; i < inlen; i++ )
1890 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1891
156162ec 1892 tmpbuf[inlen] = L'\0';
74a7eb0b 1893 psz = tmpbuf;
36acb880 1894 }
3a0d76bc 1895
36acb880
VZ
1896 if (buf)
1897 {
1898 // have destination buffer, convert there
1899 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1900
ef199164 1901 res = n - outbuf;
adb45366 1902
49dd9820
VS
1903 // NB: iconv was given only wcslen(psz) characters on input, and so
1904 // it couldn't convert the trailing zero. Let's do it ourselves
1905 // if there's some room left for it in the output buffer.
1906 if (res < n)
1907 buf[0] = 0;
36acb880
VZ
1908 }
1909 else
1910 {
ef199164 1911 // no destination buffer: convert using temp buffer
36acb880
VZ
1912 // to calculate destination buffer requirement
1913 char tbuf[16];
1914 res = 0;
ef199164
DS
1915 do
1916 {
1917 buf = tbuf;
1918 outbuf = 16;
36acb880
VZ
1919
1920 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1921
36acb880 1922 res += 16 - outbuf;
ef199164
DS
1923 }
1924 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1925 }
dccce9ea 1926
36acb880
VZ
1927 if (ms_wcNeedsSwap)
1928 {
1929 free(tmpbuf);
1930 }
dccce9ea 1931
36acb880
VZ
1932 if (ICONV_FAILED(cres, inbuf))
1933 {
ce6f8d6f 1934 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1935 return wxCONV_FAILED;
36acb880
VZ
1936 }
1937
1938 return res;
1939}
1940
7ef3ab50 1941size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1942{
c1464d9d 1943 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1944 {
1945 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1946
1947#if wxUSE_THREADS
1948 // NB: explained in MB2WC
1949 wxMutexLocker lock(self->m_iconvMutex);
1950#endif
1951
999020e1 1952 const wchar_t *wnul = L"";
c1464d9d 1953 char buf[8]; // should be enough for NUL in any encoding
356410fc 1954 size_t inLen = sizeof(wchar_t),
c1464d9d 1955 outLen = WXSIZEOF(buf);
ef199164
DS
1956 char *inBuff = (char *)wnul;
1957 char *outBuff = buf;
1958 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1959 {
c1464d9d 1960 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1961 }
1962 else // ok
1963 {
ef199164 1964 self->m_minMBCharWidth = outBuff - buf;
356410fc 1965 }
eec47cc6
VZ
1966 }
1967
c1464d9d 1968 return m_minMBCharWidth;
eec47cc6
VZ
1969}
1970
ba98e032
VS
1971#if wxUSE_UNICODE_UTF8
1972bool wxMBConv_iconv::IsUTF8() const
1973{
86501081
VS
1974 return wxStricmp(m_name, "UTF-8") == 0 ||
1975 wxStricmp(m_name, "UTF8") == 0;
ba98e032
VS
1976}
1977#endif
1978
b040e242 1979#endif // HAVE_ICONV
36acb880 1980
e95354ec 1981
36acb880
VZ
1982// ============================================================================
1983// Win32 conversion classes
1984// ============================================================================
1cd52418 1985
e95354ec 1986#ifdef wxHAVE_WIN32_MB2WC
373658eb 1987
8b04d4c4 1988// from utils.cpp
d775fa82 1989#if wxUSE_FONTMAP
86501081 1990extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
8b04d4c4 1991extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1992#endif
373658eb 1993
e95354ec 1994class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1995{
1996public:
bde4baac
VZ
1997 wxMBConv_win32()
1998 {
1999 m_CodePage = CP_ACP;
c1464d9d 2000 m_minMBCharWidth = 0;
bde4baac
VZ
2001 }
2002
d36c9347 2003 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2004 : wxMBConv()
d36c9347
VZ
2005 {
2006 m_CodePage = conv.m_CodePage;
2007 m_minMBCharWidth = conv.m_minMBCharWidth;
2008 }
2009
7608a683 2010#if wxUSE_FONTMAP
86501081 2011 wxMBConv_win32(const char* name)
bde4baac
VZ
2012 {
2013 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2014 m_minMBCharWidth = 0;
bde4baac 2015 }
dccce9ea 2016
e95354ec 2017 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2018 {
2019 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2020 m_minMBCharWidth = 0;
bde4baac 2021 }
eec47cc6 2022#endif // wxUSE_FONTMAP
8b04d4c4 2023
d36c9347 2024 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2025 {
02272c9c
VZ
2026 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2027 // the behaviour is not compatible with the Unix version (using iconv)
2028 // and break the library itself, e.g. wxTextInputStream::NextChar()
2029 // wouldn't work if reading an incomplete MB char didn't result in an
2030 // error
667e5b3e 2031 //
89028980 2032 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2033 // Win XP or newer and it is not supported for UTF-[78] so we always
2034 // use our own conversions in this case. See
89028980
VS
2035 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2036 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2037 if ( m_CodePage == CP_UTF8 )
89028980 2038 {
5487ff0f 2039 return wxMBConvUTF8().MB2WC(buf, psz, n);
89028980 2040 }
830f8f11
VZ
2041
2042 if ( m_CodePage == CP_UTF7 )
2043 {
5487ff0f 2044 return wxMBConvUTF7().MB2WC(buf, psz, n);
830f8f11
VZ
2045 }
2046
2047 int flags = 0;
2048 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2049 IsAtLeastWin2kSP4() )
89028980 2050 {
830f8f11 2051 flags = MB_ERR_INVALID_CHARS;
89028980 2052 }
667e5b3e 2053
2b5f62a0
VZ
2054 const size_t len = ::MultiByteToWideChar
2055 (
2056 m_CodePage, // code page
667e5b3e 2057 flags, // flags: fall on error
2b5f62a0
VZ
2058 psz, // input string
2059 -1, // its length (NUL-terminated)
b4da152e 2060 buf, // output string
2b5f62a0
VZ
2061 buf ? n : 0 // size of output buffer
2062 );
89028980
VS
2063 if ( !len )
2064 {
2065 // function totally failed
467e0479 2066 return wxCONV_FAILED;
89028980
VS
2067 }
2068
2069 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2070 // check if we succeeded, by doing a double trip:
2071 if ( !flags && buf )
2072 {
53c174fc
VZ
2073 const size_t mbLen = strlen(psz);
2074 wxCharBuffer mbBuf(mbLen);
89028980
VS
2075 if ( ::WideCharToMultiByte
2076 (
2077 m_CodePage,
2078 0,
2079 buf,
2080 -1,
2081 mbBuf.data(),
53c174fc 2082 mbLen + 1, // size in bytes, not length
89028980
VS
2083 NULL,
2084 NULL
2085 ) == 0 ||
2086 strcmp(mbBuf, psz) != 0 )
2087 {
2088 // we didn't obtain the same thing we started from, hence
2089 // the conversion was lossy and we consider that it failed
467e0479 2090 return wxCONV_FAILED;
89028980
VS
2091 }
2092 }
2b5f62a0 2093
03a991bc
VZ
2094 // note that it returns count of written chars for buf != NULL and size
2095 // of the needed buffer for buf == NULL so in either case the length of
2096 // the string (which never includes the terminating NUL) is one less
89028980 2097 return len - 1;
f1339c56 2098 }
dccce9ea 2099
d36c9347 2100 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2101 {
13dd924a
VZ
2102 /*
2103 we have a problem here: by default, WideCharToMultiByte() may
2104 replace characters unrepresentable in the target code page with bad
2105 quality approximations such as turning "1/2" symbol (U+00BD) into
2106 "1" for the code pages which don't have it and we, obviously, want
2107 to avoid this at any price
d775fa82 2108
13dd924a
VZ
2109 the trouble is that this function does it _silently_, i.e. it won't
2110 even tell us whether it did or not... Win98/2000 and higher provide
2111 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2112 we have to resort to a round trip, i.e. check that converting back
2113 results in the same string -- this is, of course, expensive but
2114 otherwise we simply can't be sure to not garble the data.
2115 */
2116
2117 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2118 // it doesn't work with CJK encodings (which we test for rather roughly
2119 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2120 // supporting it
907173e5
WS
2121 BOOL usedDef wxDUMMY_INITIALIZE(false);
2122 BOOL *pUsedDef;
13dd924a
VZ
2123 int flags;
2124 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2125 {
2126 // it's our lucky day
2127 flags = WC_NO_BEST_FIT_CHARS;
2128 pUsedDef = &usedDef;
2129 }
2130 else // old system or unsupported encoding
2131 {
2132 flags = 0;
2133 pUsedDef = NULL;
2134 }
2135
2b5f62a0
VZ
2136 const size_t len = ::WideCharToMultiByte
2137 (
2138 m_CodePage, // code page
13dd924a
VZ
2139 flags, // either none or no best fit
2140 pwz, // input string
2b5f62a0
VZ
2141 -1, // it is (wide) NUL-terminated
2142 buf, // output buffer
2143 buf ? n : 0, // and its size
2144 NULL, // default "replacement" char
13dd924a 2145 pUsedDef // [out] was it used?
2b5f62a0
VZ
2146 );
2147
13dd924a
VZ
2148 if ( !len )
2149 {
2150 // function totally failed
467e0479 2151 return wxCONV_FAILED;
13dd924a
VZ
2152 }
2153
2154 // if we were really converting, check if we succeeded
2155 if ( buf )
2156 {
2157 if ( flags )
2158 {
2159 // check if the conversion failed, i.e. if any replacements
2160 // were done
2161 if ( usedDef )
467e0479 2162 return wxCONV_FAILED;
13dd924a
VZ
2163 }
2164 else // we must resort to double tripping...
2165 {
2166 wxWCharBuffer wcBuf(n);
467e0479 2167 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2168 wcscmp(wcBuf, pwz) != 0 )
2169 {
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
467e0479 2172 return wxCONV_FAILED;
13dd924a
VZ
2173 }
2174 }
2175 }
2176
03a991bc 2177 // see the comment above for the reason of "len - 1"
13dd924a 2178 return len - 1;
f1339c56 2179 }
dccce9ea 2180
7ef3ab50
VZ
2181 virtual size_t GetMBNulLen() const
2182 {
2183 if ( m_minMBCharWidth == 0 )
2184 {
2185 int len = ::WideCharToMultiByte
2186 (
2187 m_CodePage, // code page
2188 0, // no flags
2189 L"", // input string
2190 1, // translate just the NUL
2191 NULL, // output buffer
2192 0, // and its size
2193 NULL, // no replacement char
2194 NULL // [out] don't care if it was used
2195 );
2196
2197 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2198 switch ( len )
2199 {
2200 default:
2201 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2202 self->m_minMBCharWidth = (size_t)-1;
2203 break;
7ef3ab50
VZ
2204
2205 case 0:
2206 self->m_minMBCharWidth = (size_t)-1;
2207 break;
2208
2209 case 1:
2210 case 2:
2211 case 4:
2212 self->m_minMBCharWidth = len;
2213 break;
2214 }
2215 }
2216
2217 return m_minMBCharWidth;
2218 }
2219
d36c9347
VZ
2220 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2221
13dd924a
VZ
2222 bool IsOk() const { return m_CodePage != -1; }
2223
2224private:
2225 static bool CanUseNoBestFit()
2226 {
2227 static int s_isWin98Or2k = -1;
2228
2229 if ( s_isWin98Or2k == -1 )
2230 {
2231 int verMaj, verMin;
2232 switch ( wxGetOsVersion(&verMaj, &verMin) )
2233 {
406d283a 2234 case wxOS_WINDOWS_9X:
13dd924a
VZ
2235 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2236 break;
2237
406d283a 2238 case wxOS_WINDOWS_NT:
13dd924a
VZ
2239 s_isWin98Or2k = verMaj >= 5;
2240 break;
2241
2242 default:
ef199164 2243 // unknown: be conservative by default
13dd924a 2244 s_isWin98Or2k = 0;
ef199164 2245 break;
13dd924a
VZ
2246 }
2247
2248 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2249 }
2250
2251 return s_isWin98Or2k == 1;
2252 }
f1339c56 2253
89028980
VS
2254 static bool IsAtLeastWin2kSP4()
2255 {
8942f83a
WS
2256#ifdef __WXWINCE__
2257 return false;
2258#else
89028980
VS
2259 static int s_isAtLeastWin2kSP4 = -1;
2260
2261 if ( s_isAtLeastWin2kSP4 == -1 )
2262 {
2263 OSVERSIONINFOEX ver;
2264
2265 memset(&ver, 0, sizeof(ver));
2266 ver.dwOSVersionInfoSize = sizeof(ver);
2267 GetVersionEx((OSVERSIONINFO*)&ver);
2268
2269 s_isAtLeastWin2kSP4 =
2270 ((ver.dwMajorVersion > 5) || // Vista+
2271 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2272 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2273 ver.wServicePackMajor >= 4)) // 2000 SP4+
2274 ? 1 : 0;
2275 }
2276
2277 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2278#endif
89028980
VS
2279 }
2280
eec47cc6 2281
c1464d9d 2282 // the code page we're working with
b1d66b54 2283 long m_CodePage;
c1464d9d 2284
7ef3ab50 2285 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2286 // "unknown"
2287 size_t m_minMBCharWidth;
1cd52418 2288};
e95354ec
VZ
2289
2290#endif // wxHAVE_WIN32_MB2WC
2291
f7e98dee 2292
36acb880
VZ
2293// ============================================================================
2294// wxEncodingConverter based conversion classes
2295// ============================================================================
2296
1e6feb95 2297#if wxUSE_FONTMAP
1cd52418 2298
e95354ec 2299class wxMBConv_wxwin : public wxMBConv
1cd52418 2300{
8b04d4c4
VZ
2301private:
2302 void Init()
2303 {
6ac84a78
DE
2304 // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2305 // The wxMBConv_cf class does a better job.
2306 m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2307 m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
8b04d4c4
VZ
2308 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2309 }
2310
6001e347 2311public:
f1339c56
RR
2312 // temporarily just use wxEncodingConverter stuff,
2313 // so that it works while a better implementation is built
86501081 2314 wxMBConv_wxwin(const char* name)
f1339c56
RR
2315 {
2316 if (name)
267e11c5 2317 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2318 else
2319 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2320
8b04d4c4
VZ
2321 Init();
2322 }
2323
e95354ec 2324 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2325 {
2326 m_enc = enc;
2327
2328 Init();
f1339c56 2329 }
dccce9ea 2330
bde4baac 2331 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2332 {
2333 size_t inbuf = strlen(psz);
dccce9ea 2334 if (buf)
c643a977 2335 {
ef199164 2336 if (!m2w.Convert(psz, buf))
467e0479 2337 return wxCONV_FAILED;
c643a977 2338 }
f1339c56
RR
2339 return inbuf;
2340 }
dccce9ea 2341
bde4baac 2342 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2343 {
f8d791e0 2344 const size_t inbuf = wxWcslen(psz);
f1339c56 2345 if (buf)
c643a977 2346 {
ef199164 2347 if (!w2m.Convert(psz, buf))
467e0479 2348 return wxCONV_FAILED;
c643a977 2349 }
dccce9ea 2350
f1339c56
RR
2351 return inbuf;
2352 }
dccce9ea 2353
7ef3ab50 2354 virtual size_t GetMBNulLen() const
eec47cc6
VZ
2355 {
2356 switch ( m_enc )
2357 {
2358 case wxFONTENCODING_UTF16BE:
2359 case wxFONTENCODING_UTF16LE:
c1464d9d 2360 return 2;
eec47cc6
VZ
2361
2362 case wxFONTENCODING_UTF32BE:
2363 case wxFONTENCODING_UTF32LE:
c1464d9d 2364 return 4;
eec47cc6
VZ
2365
2366 default:
c1464d9d 2367 return 1;
eec47cc6
VZ
2368 }
2369 }
2370
d36c9347
VZ
2371 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2372
7ef3ab50
VZ
2373 bool IsOk() const { return m_ok; }
2374
2375public:
2376 wxFontEncoding m_enc;
2377 wxEncodingConverter m2w, w2m;
2378
2379private:
cafbf6fb
VZ
2380 // were we initialized successfully?
2381 bool m_ok;
fc7a2a60 2382
e95354ec 2383 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2384};
6001e347 2385
8f115891 2386// make the constructors available for unit testing
86501081 2387WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
8f115891
MW
2388{
2389 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2390 if ( !result->IsOk() )
2391 {
2392 delete result;
2393 return 0;
2394 }
ef199164 2395
8f115891
MW
2396 return result;
2397}
2398
1e6feb95
VZ
2399#endif // wxUSE_FONTMAP
2400
36acb880
VZ
2401// ============================================================================
2402// wxCSConv implementation
2403// ============================================================================
2404
8b04d4c4 2405void wxCSConv::Init()
6001e347 2406{
e95354ec
VZ
2407 m_name = NULL;
2408 m_convReal = NULL;
2409 m_deferred = true;
2410}
2411
86501081 2412wxCSConv::wxCSConv(const wxString& charset)
8b04d4c4
VZ
2413{
2414 Init();
82713003 2415
86501081 2416 if ( !charset.empty() )
e95354ec 2417 {
86501081 2418 SetName(charset.ToAscii());
e95354ec 2419 }
bda3d86a 2420
e4277538
VZ
2421#if wxUSE_FONTMAP
2422 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2423#else
bda3d86a 2424 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 2425#endif
6001e347
RR
2426}
2427
8b04d4c4
VZ
2428wxCSConv::wxCSConv(wxFontEncoding encoding)
2429{
bda3d86a 2430 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2431 {
2432 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2433
2434 encoding = wxFONTENCODING_SYSTEM;
2435 }
2436
8b04d4c4
VZ
2437 Init();
2438
bda3d86a 2439 m_encoding = encoding;
8b04d4c4
VZ
2440}
2441
6001e347
RR
2442wxCSConv::~wxCSConv()
2443{
65e50848
JS
2444 Clear();
2445}
2446
54380f29 2447wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2448 : wxMBConv()
54380f29 2449{
8b04d4c4
VZ
2450 Init();
2451
54380f29 2452 SetName(conv.m_name);
8b04d4c4 2453 m_encoding = conv.m_encoding;
54380f29
GD
2454}
2455
2456wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2457{
2458 Clear();
8b04d4c4 2459
54380f29 2460 SetName(conv.m_name);
8b04d4c4
VZ
2461 m_encoding = conv.m_encoding;
2462
54380f29
GD
2463 return *this;
2464}
2465
65e50848
JS
2466void wxCSConv::Clear()
2467{
8b04d4c4 2468 free(m_name);
e95354ec 2469 delete m_convReal;
8b04d4c4 2470
65e50848 2471 m_name = NULL;
e95354ec 2472 m_convReal = NULL;
6001e347
RR
2473}
2474
86501081 2475void wxCSConv::SetName(const char *charset)
6001e347 2476{
f1339c56
RR
2477 if (charset)
2478 {
86501081 2479 m_name = strdup(charset);
e95354ec 2480 m_deferred = true;
f1339c56 2481 }
6001e347
RR
2482}
2483
8b3eb85d 2484#if wxUSE_FONTMAP
8b3eb85d
VZ
2485
2486WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 2487 wxEncodingNameCache );
8b3eb85d
VZ
2488
2489static wxEncodingNameCache gs_nameCache;
2490#endif
2491
e95354ec
VZ
2492wxMBConv *wxCSConv::DoCreate() const
2493{
ce6f8d6f
VZ
2494#if wxUSE_FONTMAP
2495 wxLogTrace(TRACE_STRCONV,
2496 wxT("creating conversion for %s"),
2497 (m_name ? m_name
86501081 2498 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
ce6f8d6f
VZ
2499#endif // wxUSE_FONTMAP
2500
c547282d
VZ
2501 // check for the special case of ASCII or ISO8859-1 charset: as we have
2502 // special knowledge of it anyhow, we don't need to create a special
2503 // conversion object
e4277538
VZ
2504 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2505 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 2506 {
e95354ec
VZ
2507 // don't convert at all
2508 return NULL;
2509 }
dccce9ea 2510
e95354ec
VZ
2511 // we trust OS to do conversion better than we can so try external
2512 // conversion methods first
2513 //
2514 // the full order is:
2515 // 1. OS conversion (iconv() under Unix or Win32 API)
2516 // 2. hard coded conversions for UTF
2517 // 3. wxEncodingConverter as fall back
2518
2519 // step (1)
2520#ifdef HAVE_ICONV
c547282d 2521#if !wxUSE_FONTMAP
e95354ec 2522 if ( m_name )
c547282d 2523#endif // !wxUSE_FONTMAP
e95354ec 2524 {
3ef10cfc 2525#if wxUSE_FONTMAP
8b3eb85d 2526 wxFontEncoding encoding(m_encoding);
3ef10cfc 2527#endif
8b3eb85d 2528
86501081 2529 if ( m_name )
8b3eb85d 2530 {
86501081 2531 wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
8b3eb85d
VZ
2532 if ( conv->IsOk() )
2533 return conv;
2534
2535 delete conv;
c547282d
VZ
2536
2537#if wxUSE_FONTMAP
8b3eb85d 2538 encoding =
86501081 2539 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2540#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2541 }
2542#if wxUSE_FONTMAP
2543 {
2544 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2545 if ( it != gs_nameCache.end() )
2546 {
2547 if ( it->second.empty() )
2548 return NULL;
c547282d 2549
86501081 2550 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
8b3eb85d
VZ
2551 if ( conv->IsOk() )
2552 return conv;
e95354ec 2553
8b3eb85d
VZ
2554 delete conv;
2555 }
2556
2557 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
86501081
VS
2558 // CS : in case this does not return valid names (eg for MacRoman)
2559 // encoding got a 'failure' entry in the cache all the same,
2560 // although it just has to be created using a different method, so
2561 // only store failed iconv creation attempts (or perhaps we
2562 // shoulnd't do this at all ?)
3c67ec06 2563 if ( names[0] != NULL )
8b3eb85d 2564 {
3c67ec06 2565 for ( ; *names; ++names )
8b3eb85d 2566 {
86501081
VS
2567 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
2568 // will need changes that will obsolete this
2569 wxString name(*names);
2570 wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3c67ec06
SC
2571 if ( conv->IsOk() )
2572 {
2573 gs_nameCache[encoding] = *names;
2574 return conv;
2575 }
2576
2577 delete conv;
8b3eb85d
VZ
2578 }
2579
3c67ec06 2580 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 2581 }
8b3eb85d
VZ
2582 }
2583#endif // wxUSE_FONTMAP
e95354ec
VZ
2584 }
2585#endif // HAVE_ICONV
2586
2587#ifdef wxHAVE_WIN32_MB2WC
2588 {
7608a683 2589#if wxUSE_FONTMAP
e95354ec
VZ
2590 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2591 : new wxMBConv_win32(m_encoding);
2592 if ( conv->IsOk() )
2593 return conv;
2594
2595 delete conv;
7608a683
WS
2596#else
2597 return NULL;
2598#endif
e95354ec
VZ
2599 }
2600#endif // wxHAVE_WIN32_MB2WC
ef199164 2601
5c4ed98d 2602#ifdef __DARWIN__
f7e98dee 2603 {
6ff49cbc
DE
2604 // leave UTF16 and UTF32 to the built-ins of wx
2605 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2606 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
f7e98dee 2607 {
a6900d10 2608#if wxUSE_FONTMAP
5c4ed98d
DE
2609 wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
2610 : new wxMBConv_cf(m_encoding);
a6900d10 2611#else
5c4ed98d 2612 wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
a6900d10 2613#endif
ef199164 2614
f7e98dee 2615 if ( conv->IsOk() )
d775fa82
WS
2616 return conv;
2617
2618 delete conv;
2619 }
335d31e0 2620 }
5c4ed98d
DE
2621#endif // __DARWIN__
2622
e95354ec
VZ
2623 // step (2)
2624 wxFontEncoding enc = m_encoding;
2625#if wxUSE_FONTMAP
c547282d
VZ
2626 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2627 {
2628 // use "false" to suppress interactive dialogs -- we can be called from
2629 // anywhere and popping up a dialog from here is the last thing we want to
2630 // do
267e11c5 2631 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 2632 }
e95354ec
VZ
2633#endif // wxUSE_FONTMAP
2634
2635 switch ( enc )
2636 {
2637 case wxFONTENCODING_UTF7:
2638 return new wxMBConvUTF7;
2639
2640 case wxFONTENCODING_UTF8:
2641 return new wxMBConvUTF8;
2642
e95354ec
VZ
2643 case wxFONTENCODING_UTF16BE:
2644 return new wxMBConvUTF16BE;
2645
2646 case wxFONTENCODING_UTF16LE:
2647 return new wxMBConvUTF16LE;
2648
e95354ec
VZ
2649 case wxFONTENCODING_UTF32BE:
2650 return new wxMBConvUTF32BE;
2651
2652 case wxFONTENCODING_UTF32LE:
2653 return new wxMBConvUTF32LE;
2654
2655 default:
2656 // nothing to do but put here to suppress gcc warnings
ef199164 2657 break;
e95354ec
VZ
2658 }
2659
2660 // step (3)
2661#if wxUSE_FONTMAP
2662 {
2663 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2664 : new wxMBConv_wxwin(m_encoding);
2665 if ( conv->IsOk() )
2666 return conv;
2667
2668 delete conv;
2669 }
2670#endif // wxUSE_FONTMAP
2671
a58d4f4d
VS
2672 // NB: This is a hack to prevent deadlock. What could otherwise happen
2673 // in Unicode build: wxConvLocal creation ends up being here
2674 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
2675 // attach a timestamp, for which it will need wxConvLocal (to convert
2676 // time to char* and then wchar_t*), but that fails, tries to log the
2677 // error, but wxLog has an (already locked) critical section that
2678 // guards the static buffer.
a58d4f4d
VS
2679 static bool alreadyLoggingError = false;
2680 if (!alreadyLoggingError)
2681 {
2682 alreadyLoggingError = true;
2683 wxLogError(_("Cannot convert from the charset '%s'!"),
2684 m_name ? m_name
e95354ec
VZ
2685 :
2686#if wxUSE_FONTMAP
86501081 2687 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
e95354ec 2688#else // !wxUSE_FONTMAP
86501081 2689 (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
e95354ec
VZ
2690#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2691 );
ef199164 2692
a58d4f4d
VS
2693 alreadyLoggingError = false;
2694 }
e95354ec
VZ
2695
2696 return NULL;
2697}
2698
2699void wxCSConv::CreateConvIfNeeded() const
2700{
2701 if ( m_deferred )
2702 {
2703 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a 2704
bda3d86a
VZ
2705 // if we don't have neither the name nor the encoding, use the default
2706 // encoding for this system
2707 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2708 {
4c75209f 2709#if wxUSE_INTL
02c7347b 2710 self->m_encoding = wxLocale::GetSystemEncoding();
4c75209f
VS
2711#else
2712 // fallback to some reasonable default:
2713 self->m_encoding = wxFONTENCODING_ISO8859_1;
bda3d86a 2714#endif // wxUSE_INTL
4c75209f 2715 }
bda3d86a 2716
e95354ec
VZ
2717 self->m_convReal = DoCreate();
2718 self->m_deferred = false;
6001e347 2719 }
6001e347
RR
2720}
2721
0f0298b1
VZ
2722bool wxCSConv::IsOk() const
2723{
2724 CreateConvIfNeeded();
2725
2726 // special case: no convReal created for wxFONTENCODING_ISO8859_1
2727 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2728 return true; // always ok as we do it ourselves
2729
2730 // m_convReal->IsOk() is called at its own creation, so we know it must
2731 // be ok if m_convReal is non-NULL
2732 return m_convReal != NULL;
2733}
2734
1c714a5d
VZ
2735size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
2736 const char *src, size_t srcLen) const
2737{
2738 CreateConvIfNeeded();
2739
2c74c558
VS
2740 if (m_convReal)
2741 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
2742
2743 // latin-1 (direct)
2744 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
2745}
2746
2747size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
2748 const wchar_t *src, size_t srcLen) const
2749{
2750 CreateConvIfNeeded();
2751
2c74c558
VS
2752 if (m_convReal)
2753 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
2754
2755 // latin-1 (direct)
2756 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
2757}
2758
6001e347
RR
2759size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2760{
e95354ec 2761 CreateConvIfNeeded();
dccce9ea 2762
e95354ec
VZ
2763 if (m_convReal)
2764 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2765
2766 // latin-1 (direct)
4def3b35 2767 size_t len = strlen(psz);
dccce9ea 2768
f1339c56
RR
2769 if (buf)
2770 {
4def3b35 2771 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2772 buf[c] = (unsigned char)(psz[c]);
2773 }
dccce9ea 2774
f1339c56 2775 return len;
6001e347
RR
2776}
2777
2778size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2779{
e95354ec 2780 CreateConvIfNeeded();
dccce9ea 2781
e95354ec
VZ
2782 if (m_convReal)
2783 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2784
f1339c56 2785 // latin-1 (direct)
f8d791e0 2786 const size_t len = wxWcslen(psz);
f1339c56
RR
2787 if (buf)
2788 {
4def3b35 2789 for (size_t c = 0; c <= len; c++)
24642831
VS
2790 {
2791 if (psz[c] > 0xFF)
467e0479 2792 return wxCONV_FAILED;
ef199164 2793
907173e5 2794 buf[c] = (char)psz[c];
24642831
VS
2795 }
2796 }
2797 else
2798 {
2799 for (size_t c = 0; c <= len; c++)
2800 {
2801 if (psz[c] > 0xFF)
467e0479 2802 return wxCONV_FAILED;
24642831 2803 }
f1339c56 2804 }
dccce9ea 2805
f1339c56 2806 return len;
6001e347
RR
2807}
2808
7ef3ab50 2809size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
2810{
2811 CreateConvIfNeeded();
2812
2813 if ( m_convReal )
2814 {
7ef3ab50 2815 return m_convReal->GetMBNulLen();
eec47cc6
VZ
2816 }
2817
ba98e032 2818 // otherwise, we are ISO-8859-1
c1464d9d 2819 return 1;
eec47cc6
VZ
2820}
2821
ba98e032
VS
2822#if wxUSE_UNICODE_UTF8
2823bool wxCSConv::IsUTF8() const
2824{
2825 CreateConvIfNeeded();
2826
2827 if ( m_convReal )
2828 {
2829 return m_convReal->IsUTF8();
2830 }
2831
2832 // otherwise, we are ISO-8859-1
2833 return false;
2834}
2835#endif
2836
69c928ef
VZ
2837
2838#if wxUSE_UNICODE
2839
2840wxWCharBuffer wxSafeConvertMB2WX(const char *s)
2841{
2842 if ( !s )
2843 return wxWCharBuffer();
2844
2845 wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
2846 if ( !wbuf )
5487ff0f 2847 wbuf = wxMBConvUTF8().cMB2WX(s);
69c928ef
VZ
2848 if ( !wbuf )
2849 wbuf = wxConvISO8859_1.cMB2WX(s);
2850
2851 return wbuf;
2852}
2853
2854wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
2855{
2856 if ( !ws )
2857 return wxCharBuffer();
2858
2859 wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
2860 if ( !buf )
2861 buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
2862
2863 return buf;
2864}
2865
2866#endif // wxUSE_UNICODE
f5a1953b 2867
1e50d914
VS
2868// ----------------------------------------------------------------------------
2869// globals
2870// ----------------------------------------------------------------------------
2871
2872// NB: The reason why we create converted objects in this convoluted way,
2873// using a factory function instead of global variable, is that they
2874// may be used at static initialization time (some of them are used by
2875// wxString ctors and there may be a global wxString object). In other
2876// words, possibly _before_ the converter global object would be
2877// initialized.
2878
2879#undef wxConvLibc
2880#undef wxConvUTF8
2881#undef wxConvUTF7
2882#undef wxConvLocal
2883#undef wxConvISO8859_1
2884
2885#define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
2886 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
092ee46f 2887 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
1e50d914
VS
2888 { \
2889 static impl_klass name##Obj ctor_args; \
2890 return &name##Obj; \
2891 } \
2892 /* this ensures that all global converter objects are created */ \
2893 /* by the time static initialization is done, i.e. before any */ \
2894 /* thread is launched: */ \
2895 static klass* gs_##name##instance = wxGet_##name##Ptr()
2896
2897#define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
2898 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
2899
2900#ifdef __WINDOWS__
2901 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
1e50d914
VS
2902#else
2903 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
2904#endif
2905
2906WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
2907WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
2908
2909WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
2910WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
2911
2912WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
2913WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
2914
6ac84a78
DE
2915#ifdef __DARWIN__
2916// The xnu kernel always communicates file paths in decomposed UTF-8.
2917// WARNING: Are we sure that CFString's conversion will cause decomposition?
2918static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
1e50d914 2919#endif
6ac84a78 2920
1e50d914 2921WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
6ac84a78 2922#ifdef __DARWIN__
1e50d914 2923 &wxConvMacUTF8DObj;
6ac84a78 2924#else // !__DARWIN__
1e50d914 2925 wxGet_wxConvLibcPtr();
6ac84a78 2926#endif // __DARWIN__/!__DARWIN__
1e50d914 2927
bde4baac
VZ
2928#else // !wxUSE_WCHAR_T
2929
1e50d914 2930// FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
bde4baac
VZ
2931// stand-ins in absence of wchar_t
2932WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2933 wxConvISO8859_1,
2934 wxConvLocal,
2935 wxConvUTF8;
2936
2937#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T