]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
compilation fixes for --with-regex=sys
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
480f42ec
VS
18#ifdef __BORLANDC__
19 #pragma hdrstop
20#endif //__BORLANDC__
21
373658eb
VZ
22#ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
de6185e2 25 #include "wx/utils.h"
df69528b 26 #include "wx/hashmap.h"
ef199164 27#endif
373658eb 28
bde4baac
VZ
29#include "wx/strconv.h"
30
31#if wxUSE_WCHAR_T
32
1c193821 33#ifndef __WXWINCE__
1cd52418 34#include <errno.h>
1c193821
JS
35#endif
36
6001e347
RR
37#include <ctype.h>
38#include <string.h>
39#include <stdlib.h>
40
e95354ec 41#if defined(__WIN32__) && !defined(__WXMICROWIN__)
a6c2e2c7
VZ
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
e95354ec 44 #define wxHAVE_WIN32_MB2WC
ef199164 45#endif
e95354ec 46
6001e347 47#ifdef __SALFORDC__
373658eb 48 #include <clib.h>
6001e347
RR
49#endif
50
b040e242 51#ifdef HAVE_ICONV
373658eb 52 #include <iconv.h>
b1d547eb 53 #include "wx/thread.h"
1cd52418 54#endif
1cd52418 55
373658eb
VZ
56#include "wx/encconv.h"
57#include "wx/fontmap.h"
58
335d31e0 59#ifdef __WXMAC__
40ba2f3b 60#ifndef __DARWIN__
4227afa4
SC
61#include <ATSUnicode.h>
62#include <TextCommon.h>
63#include <TextEncodingConverter.h>
40ba2f3b 64#endif
335d31e0 65
ef199164
DS
66// includes Mac headers
67#include "wx/mac/private.h"
335d31e0 68#endif
ce6f8d6f 69
ef199164 70
ce6f8d6f
VZ
71#define TRACE_STRCONV _T("strconv")
72
467e0479
VZ
73// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
74// be 4 bytes
4948c2b6 75#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
76 #define WC_UTF16
77#endif
78
ef199164 79
373658eb
VZ
80// ============================================================================
81// implementation
82// ============================================================================
83
69373110
VZ
84// helper function of cMB2WC(): check if n bytes at this location are all NUL
85static bool NotAllNULs(const char *p, size_t n)
86{
87 while ( n && *p++ == '\0' )
88 n--;
89
90 return n != 0;
91}
92
373658eb 93// ----------------------------------------------------------------------------
467e0479 94// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 95// ----------------------------------------------------------------------------
6001e347 96
c91830cb 97static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 98{
ef199164 99 if (input <= 0xffff)
4def3b35 100 {
999836aa
VZ
101 if (output)
102 *output = (wxUint16) input;
ef199164 103
4def3b35 104 return 1;
dccce9ea 105 }
ef199164 106 else if (input >= 0x110000)
4def3b35 107 {
467e0479 108 return wxCONV_FAILED;
dccce9ea
VZ
109 }
110 else
4def3b35 111 {
dccce9ea 112 if (output)
4def3b35 113 {
ef199164
DS
114 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
115 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 116 }
ef199164 117
4def3b35 118 return 2;
1cd52418 119 }
1cd52418
OK
120}
121
c91830cb 122static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 123{
ef199164 124 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
125 {
126 output = *input;
127 return 1;
dccce9ea 128 }
ef199164 129 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
130 {
131 output = *input;
467e0479 132 return wxCONV_FAILED;
dccce9ea
VZ
133 }
134 else
4def3b35
VS
135 {
136 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
137 return 2;
138 }
1cd52418
OK
139}
140
467e0479 141#ifdef WC_UTF16
35d11700
VZ
142 typedef wchar_t wxDecodeSurrogate_t;
143#else // !WC_UTF16
144 typedef wxUint16 wxDecodeSurrogate_t;
145#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
146
147// returns the next UTF-32 character from the wchar_t buffer and advances the
148// pointer to the character after this one
149//
150// if an invalid character is found, *pSrc is set to NULL, the caller must
151// check for this
35d11700 152static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
153{
154 wxUint32 out;
8d3dd069
VZ
155 const size_t
156 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
157 if ( n == wxCONV_FAILED )
158 *pSrc = NULL;
159 else
160 *pSrc += n;
161
162 return out;
163}
164
f6bcfd97 165// ----------------------------------------------------------------------------
6001e347 166// wxMBConv
f6bcfd97 167// ----------------------------------------------------------------------------
2c53a80a 168
483b0434
VZ
169size_t
170wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
171 const char *src, size_t srcLen) const
6001e347 172{
483b0434
VZ
173 // although new conversion classes are supposed to implement this function
174 // directly, the existins ones only implement the old MB2WC() and so, to
175 // avoid to have to rewrite all conversion classes at once, we provide a
176 // default (but not efficient) implementation of this one in terms of the
177 // old function by copying the input to ensure that it's NUL-terminated and
178 // then using MB2WC() to convert it
6001e347 179
483b0434
VZ
180 // the number of chars [which would be] written to dst [if it were not NULL]
181 size_t dstWritten = 0;
eec47cc6 182
c1464d9d 183 // the number of NULs terminating this string
a78c43f1 184 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 185
c1464d9d
VZ
186 // if we were not given the input size we just have to assume that the
187 // string is properly terminated as we have no way of knowing how long it
188 // is anyhow, but if we do have the size check whether there are enough
189 // NULs at the end
483b0434
VZ
190 wxCharBuffer bufTmp;
191 const char *srcEnd;
467e0479 192 if ( srcLen != wxNO_LEN )
eec47cc6 193 {
c1464d9d 194 // we need to know how to find the end of this string
7ef3ab50 195 nulLen = GetMBNulLen();
483b0434
VZ
196 if ( nulLen == wxCONV_FAILED )
197 return wxCONV_FAILED;
e4e3bbb4 198
c1464d9d 199 // if there are enough NULs we can avoid the copy
483b0434 200 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
201 {
202 // make a copy in order to properly NUL-terminate the string
483b0434 203 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 204 char * const p = bufTmp.data();
483b0434
VZ
205 memcpy(p, src, srcLen);
206 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 207 *s = '\0';
483b0434
VZ
208
209 src = bufTmp;
eec47cc6 210 }
e4e3bbb4 211
483b0434
VZ
212 srcEnd = src + srcLen;
213 }
214 else // quit after the first loop iteration
215 {
216 srcEnd = NULL;
217 }
e4e3bbb4 218
483b0434 219 for ( ;; )
eec47cc6 220 {
c1464d9d 221 // try to convert the current chunk
483b0434 222 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
223 if ( lenChunk == wxCONV_FAILED )
224 return wxCONV_FAILED;
e4e3bbb4 225
467e0479 226 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 227
483b0434 228 dstWritten += lenChunk;
f5fb6871 229
467e0479
VZ
230 if ( lenChunk == 1 )
231 {
232 // nothing left in the input string, conversion succeeded
233 break;
234 }
235
483b0434
VZ
236 if ( dst )
237 {
238 if ( dstWritten > dstLen )
239 return wxCONV_FAILED;
240
830f8f11 241 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
242 return wxCONV_FAILED;
243
244 dst += lenChunk;
245 }
c1464d9d 246
483b0434 247 if ( !srcEnd )
c1464d9d 248 {
467e0479
VZ
249 // we convert just one chunk in this case as this is the entire
250 // string anyhow
c1464d9d
VZ
251 break;
252 }
eec47cc6
VZ
253
254 // advance the input pointer past the end of this chunk
483b0434 255 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
256 {
257 // notice that we must skip over multiple bytes here as we suppose
258 // that if NUL takes 2 or 4 bytes, then all the other characters do
259 // too and so if advanced by a single byte we might erroneously
260 // detect sequences of NUL bytes in the middle of the input
483b0434 261 src += nulLen;
c1464d9d 262 }
e4e3bbb4 263
483b0434 264 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
265
266 // note that ">=" (and not just "==") is needed here as the terminator
267 // we skipped just above could be inside or just after the buffer
268 // delimited by inEnd
483b0434 269 if ( src >= srcEnd )
c1464d9d
VZ
270 break;
271 }
272
483b0434 273 return dstWritten;
e4e3bbb4
RN
274}
275
483b0434
VZ
276size_t
277wxMBConv::FromWChar(char *dst, size_t dstLen,
278 const wchar_t *src, size_t srcLen) const
e4e3bbb4 279{
483b0434
VZ
280 // the number of chars [which would be] written to dst [if it were not NULL]
281 size_t dstWritten = 0;
e4e3bbb4 282
eec47cc6
VZ
283 // make a copy of the input string unless it is already properly
284 // NUL-terminated
285 //
286 // if we don't know its length we have no choice but to assume that it is,
287 // indeed, properly terminated
288 wxWCharBuffer bufTmp;
467e0479 289 if ( srcLen == wxNO_LEN )
e4e3bbb4 290 {
483b0434 291 srcLen = wxWcslen(src) + 1;
eec47cc6 292 }
483b0434 293 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
294 {
295 // make a copy in order to properly NUL-terminate the string
483b0434 296 bufTmp = wxWCharBuffer(srcLen);
ef199164 297 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
298 src = bufTmp;
299 }
300
301 const size_t lenNul = GetMBNulLen();
302 for ( const wchar_t * const srcEnd = src + srcLen;
303 src < srcEnd;
304 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
305 {
306 // try to convert the current chunk
307 size_t lenChunk = WC2MB(NULL, src, 0);
308
309 if ( lenChunk == wxCONV_FAILED )
310 return wxCONV_FAILED;
311
312 lenChunk += lenNul;
313 dstWritten += lenChunk;
314
315 if ( dst )
316 {
317 if ( dstWritten > dstLen )
318 return wxCONV_FAILED;
319
320 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
321 return wxCONV_FAILED;
322
323 dst += lenChunk;
324 }
eec47cc6 325 }
e4e3bbb4 326
483b0434
VZ
327 return dstWritten;
328}
329
ef199164 330size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 331{
ef199164 332 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 333 if ( rc != wxCONV_FAILED )
509da451
VZ
334 {
335 // ToWChar() returns the buffer length, i.e. including the trailing
336 // NUL, while this method doesn't take it into account
337 rc--;
338 }
339
340 return rc;
341}
342
ef199164 343size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 344{
ef199164 345 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 346 if ( rc != wxCONV_FAILED )
509da451
VZ
347 {
348 rc -= GetMBNulLen();
349 }
350
351 return rc;
352}
353
483b0434
VZ
354wxMBConv::~wxMBConv()
355{
356 // nothing to do here (necessary for Darwin linking probably)
357}
e4e3bbb4 358
483b0434
VZ
359const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
360{
361 if ( psz )
eec47cc6 362 {
483b0434
VZ
363 // calculate the length of the buffer needed first
364 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 365 if ( nLen != wxCONV_FAILED )
f5fb6871 366 {
483b0434
VZ
367 // now do the actual conversion
368 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 369
483b0434
VZ
370 // +1 for the trailing NULL
371 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
372 return buf;
f5fb6871 373 }
483b0434 374 }
e4e3bbb4 375
483b0434
VZ
376 return wxWCharBuffer();
377}
3698ae71 378
483b0434
VZ
379const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
380{
381 if ( pwz )
382 {
383 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 384 if ( nLen != wxCONV_FAILED )
483b0434
VZ
385 {
386 // extra space for trailing NUL(s)
387 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 388
483b0434
VZ
389 wxCharBuffer buf(nLen + extraLen - 1);
390 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
391 return buf;
392 }
393 }
394
395 return wxCharBuffer();
396}
e4e3bbb4 397
483b0434 398const wxWCharBuffer
ef199164 399wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 400{
ef199164 401 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 402 if ( dstLen != wxCONV_FAILED )
483b0434 403 {
830f8f11 404 wxWCharBuffer wbuf(dstLen - 1);
ef199164 405 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
406 {
407 if ( outLen )
467e0479
VZ
408 {
409 *outLen = dstLen;
410 if ( wbuf[dstLen - 1] == L'\0' )
411 (*outLen)--;
412 }
413
483b0434
VZ
414 return wbuf;
415 }
416 }
417
418 if ( outLen )
419 *outLen = 0;
420
421 return wxWCharBuffer();
422}
423
424const wxCharBuffer
ef199164 425wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 426{
13d92ad6 427 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 428 if ( dstLen != wxCONV_FAILED )
483b0434 429 {
168a76fe
VZ
430 // special case of empty input: can't allocate 0 size buffer below as
431 // wxCharBuffer insists on NUL-terminating it
432 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 433 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
434 {
435 if ( outLen )
467e0479
VZ
436 {
437 *outLen = dstLen;
438
439 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
440 if ( dstLen >= nulLen &&
441 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
442 {
443 // in this case the output is NUL-terminated and we're not
444 // supposed to count NUL
13d92ad6 445 *outLen -= nulLen;
467e0479
VZ
446 }
447 }
d32a507d 448
483b0434
VZ
449 return buf;
450 }
e4e3bbb4
RN
451 }
452
eec47cc6
VZ
453 if ( outLen )
454 *outLen = 0;
455
456 return wxCharBuffer();
e4e3bbb4
RN
457}
458
6001e347 459// ----------------------------------------------------------------------------
bde4baac 460// wxMBConvLibc
6001e347
RR
461// ----------------------------------------------------------------------------
462
bde4baac
VZ
463size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
464{
465 return wxMB2WC(buf, psz, n);
466}
467
468size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
469{
470 return wxWC2MB(buf, psz, n);
471}
e1bfe89e
RR
472
473// ----------------------------------------------------------------------------
532d575b 474// wxConvBrokenFileNames
e1bfe89e
RR
475// ----------------------------------------------------------------------------
476
eec47cc6
VZ
477#ifdef __UNIX__
478
86501081 479wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
ea8ce907 480{
86501081
VS
481 if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
482 wxStricmp(charset, _T("UTF8")) == 0 )
5deedd6e 483 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
845905d5
MW
484 else
485 m_conv = new wxCSConv(charset);
ea8ce907
RR
486}
487
eec47cc6 488#endif // __UNIX__
c12b7f79 489
bde4baac 490// ----------------------------------------------------------------------------
3698ae71 491// UTF-7
bde4baac 492// ----------------------------------------------------------------------------
6001e347 493
15f2ee32 494// Implementation (C) 2004 Fredrik Roubert
6001e347 495
15f2ee32
RN
496//
497// BASE64 decoding table
498//
499static const unsigned char utf7unb64[] =
6001e347 500{
15f2ee32
RN
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
507 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
508 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
510 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
511 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
512 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
514 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
515 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
516 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
533};
534
535size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
536{
15f2ee32
RN
537 size_t len = 0;
538
04a37834 539 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
540 {
541 unsigned char cc = *psz++;
542 if (cc != '+')
543 {
544 // plain ASCII char
545 if (buf)
546 *buf++ = cc;
547 len++;
548 }
549 else if (*psz == '-')
550 {
551 // encoded plus sign
552 if (buf)
553 *buf++ = cc;
554 len++;
555 psz++;
556 }
04a37834 557 else // start of BASE64 encoded string
15f2ee32 558 {
04a37834 559 bool lsb, ok;
15f2ee32 560 unsigned int d, l;
04a37834
VZ
561 for ( ok = lsb = false, d = 0, l = 0;
562 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
563 psz++ )
15f2ee32
RN
564 {
565 d <<= 6;
566 d += cc;
567 for (l += 6; l >= 8; lsb = !lsb)
568 {
04a37834 569 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
570 if (lsb)
571 {
572 if (buf)
573 *buf++ |= c;
574 len ++;
575 }
576 else
04a37834 577 {
15f2ee32 578 if (buf)
6356d52a 579 *buf = (wchar_t)(c << 8);
04a37834
VZ
580 }
581
582 ok = true;
15f2ee32
RN
583 }
584 }
04a37834
VZ
585
586 if ( !ok )
587 {
588 // in valid UTF7 we should have valid characters after '+'
467e0479 589 return wxCONV_FAILED;
04a37834
VZ
590 }
591
15f2ee32
RN
592 if (*psz == '-')
593 psz++;
594 }
595 }
04a37834
VZ
596
597 if ( buf && (len < n) )
598 *buf = '\0';
599
15f2ee32 600 return len;
6001e347
RR
601}
602
15f2ee32
RN
603//
604// BASE64 encoding table
605//
606static const unsigned char utf7enb64[] =
607{
608 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
609 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
610 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
611 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
612 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
613 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
614 'w', 'x', 'y', 'z', '0', '1', '2', '3',
615 '4', '5', '6', '7', '8', '9', '+', '/'
616};
617
618//
619// UTF-7 encoding table
620//
621// 0 - Set D (directly encoded characters)
622// 1 - Set O (optional direct characters)
623// 2 - whitespace characters (optional)
624// 3 - special characters
625//
626static const unsigned char utf7encode[128] =
6001e347 627{
15f2ee32
RN
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
629 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
630 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
632 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
634 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
636};
637
667e5b3e 638size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 639{
15f2ee32
RN
640 size_t len = 0;
641
642 while (*psz && ((!buf) || (len < n)))
643 {
644 wchar_t cc = *psz++;
645 if (cc < 0x80 && utf7encode[cc] < 1)
646 {
647 // plain ASCII char
648 if (buf)
649 *buf++ = (char)cc;
ef199164 650
15f2ee32
RN
651 len++;
652 }
653#ifndef WC_UTF16
79c78d42 654 else if (((wxUint32)cc) > 0xffff)
b2c13097 655 {
15f2ee32 656 // no surrogate pair generation (yet?)
467e0479 657 return wxCONV_FAILED;
15f2ee32
RN
658 }
659#endif
660 else
661 {
662 if (buf)
663 *buf++ = '+';
ef199164 664
15f2ee32
RN
665 len++;
666 if (cc != '+')
667 {
668 // BASE64 encode string
669 unsigned int lsb, d, l;
73c902d6 670 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
671 {
672 for (lsb = 0; lsb < 2; lsb ++)
673 {
674 d <<= 8;
675 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
676
677 for (l += 8; l >= 6; )
678 {
679 l -= 6;
680 if (buf)
681 *buf++ = utf7enb64[(d >> l) % 64];
682 len++;
683 }
684 }
ef199164 685
15f2ee32
RN
686 cc = *psz;
687 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
688 break;
689 }
ef199164 690
15f2ee32
RN
691 if (l != 0)
692 {
693 if (buf)
694 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 695
15f2ee32
RN
696 len++;
697 }
698 }
ef199164 699
15f2ee32
RN
700 if (buf)
701 *buf++ = '-';
702 len++;
703 }
704 }
ef199164 705
15f2ee32
RN
706 if (buf && (len < n))
707 *buf = 0;
ef199164 708
15f2ee32 709 return len;
6001e347
RR
710}
711
f6bcfd97 712// ----------------------------------------------------------------------------
6001e347 713// UTF-8
f6bcfd97 714// ----------------------------------------------------------------------------
6001e347 715
dccce9ea 716static wxUint32 utf8_max[]=
4def3b35 717 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 718
3698ae71
VZ
719// boundaries of the private use area we use to (temporarily) remap invalid
720// characters invalid in a UTF-8 encoded string
ea8ce907
RR
721const wxUint32 wxUnicodePUA = 0x100000;
722const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
723
6001e347
RR
724size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
725{
4def3b35
VS
726 size_t len = 0;
727
dccce9ea 728 while (*psz && ((!buf) || (len < n)))
4def3b35 729 {
ea8ce907
RR
730 const char *opsz = psz;
731 bool invalid = false;
4def3b35
VS
732 unsigned char cc = *psz++, fc = cc;
733 unsigned cnt;
dccce9ea 734 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 735 fc <<= 1;
ef199164 736
dccce9ea 737 if (!cnt)
4def3b35
VS
738 {
739 // plain ASCII char
dccce9ea 740 if (buf)
4def3b35
VS
741 *buf++ = cc;
742 len++;
561488ef
MW
743
744 // escape the escape character for octal escapes
745 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
746 && cc == '\\' && (!buf || len < n))
747 {
748 if (buf)
749 *buf++ = cc;
750 len++;
751 }
dccce9ea
VZ
752 }
753 else
4def3b35
VS
754 {
755 cnt--;
dccce9ea 756 if (!cnt)
4def3b35
VS
757 {
758 // invalid UTF-8 sequence
ea8ce907 759 invalid = true;
dccce9ea
VZ
760 }
761 else
4def3b35
VS
762 {
763 unsigned ocnt = cnt - 1;
764 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 765 while (cnt--)
4def3b35 766 {
ea8ce907 767 cc = *psz;
dccce9ea 768 if ((cc & 0xC0) != 0x80)
4def3b35
VS
769 {
770 // invalid UTF-8 sequence
ea8ce907
RR
771 invalid = true;
772 break;
4def3b35 773 }
ef199164 774
ea8ce907 775 psz++;
4def3b35
VS
776 res = (res << 6) | (cc & 0x3f);
777 }
ef199164 778
ea8ce907 779 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
780 {
781 // illegal UTF-8 encoding
ea8ce907 782 invalid = true;
4def3b35 783 }
ea8ce907
RR
784 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
785 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
786 {
787 // if one of our PUA characters turns up externally
788 // it must also be treated as an illegal sequence
789 // (a bit like you have to escape an escape character)
790 invalid = true;
791 }
792 else
793 {
1cd52418 794#ifdef WC_UTF16
ea8ce907
RR
795 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
796 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 797 if (pa == wxCONV_FAILED)
ea8ce907
RR
798 {
799 invalid = true;
800 }
801 else
802 {
803 if (buf)
804 buf += pa;
805 len += pa;
806 }
373658eb 807#else // !WC_UTF16
ea8ce907 808 if (buf)
38d4b1e4 809 *buf++ = (wchar_t)res;
ea8ce907 810 len++;
373658eb 811#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
812 }
813 }
ef199164 814
ea8ce907
RR
815 if (invalid)
816 {
817 if (m_options & MAP_INVALID_UTF8_TO_PUA)
818 {
819 while (opsz < psz && (!buf || len < n))
820 {
821#ifdef WC_UTF16
822 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
823 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 824 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
825 if (buf)
826 buf += pa;
827 opsz++;
828 len += pa;
829#else
830 if (buf)
38d4b1e4 831 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
832 opsz++;
833 len++;
834#endif
835 }
836 }
3698ae71 837 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
838 {
839 while (opsz < psz && (!buf || len < n))
840 {
3698ae71
VZ
841 if ( buf && len + 3 < n )
842 {
17a1ebd1 843 unsigned char on = *opsz;
3698ae71 844 *buf++ = L'\\';
17a1ebd1
VZ
845 *buf++ = (wchar_t)( L'0' + on / 0100 );
846 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
847 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 848 }
ef199164 849
ea8ce907
RR
850 opsz++;
851 len += 4;
852 }
853 }
3698ae71 854 else // MAP_INVALID_UTF8_NOT
ea8ce907 855 {
467e0479 856 return wxCONV_FAILED;
ea8ce907 857 }
4def3b35
VS
858 }
859 }
6001e347 860 }
ef199164 861
dccce9ea 862 if (buf && (len < n))
4def3b35 863 *buf = 0;
ef199164 864
4def3b35 865 return len;
6001e347
RR
866}
867
3698ae71
VZ
868static inline bool isoctal(wchar_t wch)
869{
870 return L'0' <= wch && wch <= L'7';
871}
872
6001e347
RR
873size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
874{
4def3b35 875 size_t len = 0;
6001e347 876
dccce9ea 877 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
878 {
879 wxUint32 cc;
ef199164 880
1cd52418 881#ifdef WC_UTF16
b5153fd8
VZ
882 // cast is ok for WC_UTF16
883 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 884 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 885#else
ef199164 886 cc = (*psz++) & 0x7fffffff;
4def3b35 887#endif
3698ae71
VZ
888
889 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
890 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 891 {
dccce9ea 892 if (buf)
ea8ce907 893 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 894 len++;
3698ae71 895 }
561488ef
MW
896 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
897 && cc == L'\\' && psz[0] == L'\\' )
898 {
899 if (buf)
900 *buf++ = (char)cc;
901 psz++;
902 len++;
903 }
3698ae71
VZ
904 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
905 cc == L'\\' &&
906 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 907 {
dccce9ea 908 if (buf)
3698ae71 909 {
ef199164
DS
910 *buf++ = (char) ((psz[0] - L'0') * 0100 +
911 (psz[1] - L'0') * 010 +
b2c13097 912 (psz[2] - L'0'));
3698ae71
VZ
913 }
914
915 psz += 3;
ea8ce907
RR
916 len++;
917 }
918 else
919 {
920 unsigned cnt;
ef199164
DS
921 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
922 {
923 }
924
ea8ce907 925 if (!cnt)
4def3b35 926 {
ea8ce907
RR
927 // plain ASCII char
928 if (buf)
929 *buf++ = (char) cc;
930 len++;
931 }
ea8ce907
RR
932 else
933 {
934 len += cnt + 1;
935 if (buf)
936 {
937 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
938 while (cnt--)
939 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
940 }
4def3b35
VS
941 }
942 }
6001e347 943 }
4def3b35 944
ef199164 945 if (buf && (len < n))
3698ae71 946 *buf = 0;
adb45366 947
4def3b35 948 return len;
6001e347
RR
949}
950
467e0479 951// ============================================================================
c91830cb 952// UTF-16
467e0479 953// ============================================================================
c91830cb
VZ
954
955#ifdef WORDS_BIGENDIAN
bde4baac
VZ
956 #define wxMBConvUTF16straight wxMBConvUTF16BE
957 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 958#else
bde4baac
VZ
959 #define wxMBConvUTF16swap wxMBConvUTF16BE
960 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
961#endif
962
467e0479
VZ
963/* static */
964size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
965{
966 if ( srcLen == wxNO_LEN )
967 {
968 // count the number of bytes in input, including the trailing NULs
ef199164
DS
969 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
970 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 971 ;
c91830cb 972
467e0479
VZ
973 srcLen *= BYTES_PER_CHAR;
974 }
975 else // we already have the length
976 {
977 // we can only convert an entire number of UTF-16 characters
978 if ( srcLen % BYTES_PER_CHAR )
979 return wxCONV_FAILED;
980 }
981
982 return srcLen;
983}
984
985// case when in-memory representation is UTF-16 too
c91830cb
VZ
986#ifdef WC_UTF16
987
467e0479
VZ
988// ----------------------------------------------------------------------------
989// conversions without endianness change
990// ----------------------------------------------------------------------------
991
992size_t
993wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
994 const char *src, size_t srcLen) const
c91830cb 995{
467e0479
VZ
996 // set up the scene for using memcpy() (which is presumably more efficient
997 // than copying the bytes one by one)
998 srcLen = GetLength(src, srcLen);
999 if ( srcLen == wxNO_LEN )
1000 return wxCONV_FAILED;
c91830cb 1001
ef199164 1002 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1003 if ( dst )
c91830cb 1004 {
467e0479
VZ
1005 if ( dstLen < inLen )
1006 return wxCONV_FAILED;
c91830cb 1007
467e0479 1008 memcpy(dst, src, srcLen);
c91830cb 1009 }
d32a507d 1010
467e0479 1011 return inLen;
c91830cb
VZ
1012}
1013
467e0479
VZ
1014size_t
1015wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1016 const wchar_t *src, size_t srcLen) const
c91830cb 1017{
467e0479
VZ
1018 if ( srcLen == wxNO_LEN )
1019 srcLen = wxWcslen(src) + 1;
c91830cb 1020
467e0479
VZ
1021 srcLen *= BYTES_PER_CHAR;
1022
1023 if ( dst )
c91830cb 1024 {
467e0479
VZ
1025 if ( dstLen < srcLen )
1026 return wxCONV_FAILED;
d32a507d 1027
467e0479 1028 memcpy(dst, src, srcLen);
c91830cb 1029 }
d32a507d 1030
467e0479 1031 return srcLen;
c91830cb
VZ
1032}
1033
467e0479
VZ
1034// ----------------------------------------------------------------------------
1035// endian-reversing conversions
1036// ----------------------------------------------------------------------------
c91830cb 1037
467e0479
VZ
1038size_t
1039wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1040 const char *src, size_t srcLen) const
c91830cb 1041{
467e0479
VZ
1042 srcLen = GetLength(src, srcLen);
1043 if ( srcLen == wxNO_LEN )
1044 return wxCONV_FAILED;
c91830cb 1045
467e0479
VZ
1046 srcLen /= BYTES_PER_CHAR;
1047
1048 if ( dst )
c91830cb 1049 {
467e0479
VZ
1050 if ( dstLen < srcLen )
1051 return wxCONV_FAILED;
1052
ef199164
DS
1053 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1054 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1055 {
ef199164 1056 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1057 }
c91830cb 1058 }
bfab25d4 1059
467e0479 1060 return srcLen;
c91830cb
VZ
1061}
1062
467e0479
VZ
1063size_t
1064wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1065 const wchar_t *src, size_t srcLen) const
c91830cb 1066{
467e0479
VZ
1067 if ( srcLen == wxNO_LEN )
1068 srcLen = wxWcslen(src) + 1;
c91830cb 1069
467e0479
VZ
1070 srcLen *= BYTES_PER_CHAR;
1071
1072 if ( dst )
c91830cb 1073 {
467e0479
VZ
1074 if ( dstLen < srcLen )
1075 return wxCONV_FAILED;
1076
ef199164 1077 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1078 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1079 {
ef199164 1080 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1081 }
c91830cb 1082 }
eec47cc6 1083
467e0479 1084 return srcLen;
c91830cb
VZ
1085}
1086
467e0479 1087#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1088
467e0479
VZ
1089// ----------------------------------------------------------------------------
1090// conversions without endianness change
1091// ----------------------------------------------------------------------------
c91830cb 1092
35d11700
VZ
1093size_t
1094wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1095 const char *src, size_t srcLen) const
c91830cb 1096{
35d11700
VZ
1097 srcLen = GetLength(src, srcLen);
1098 if ( srcLen == wxNO_LEN )
1099 return wxCONV_FAILED;
c91830cb 1100
ef199164 1101 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1102 if ( !dst )
c91830cb 1103 {
35d11700
VZ
1104 // optimization: return maximal space which could be needed for this
1105 // string even if the real size could be smaller if the buffer contains
1106 // any surrogates
1107 return inLen;
c91830cb 1108 }
c91830cb 1109
35d11700 1110 size_t outLen = 0;
ef199164
DS
1111 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1112 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1113 {
ef199164
DS
1114 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1115 if ( !inBuff )
35d11700
VZ
1116 return wxCONV_FAILED;
1117
1118 if ( ++outLen > dstLen )
1119 return wxCONV_FAILED;
c91830cb 1120
35d11700
VZ
1121 *dst++ = ch;
1122 }
1123
1124
1125 return outLen;
1126}
c91830cb 1127
35d11700
VZ
1128size_t
1129wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1130 const wchar_t *src, size_t srcLen) const
c91830cb 1131{
35d11700
VZ
1132 if ( srcLen == wxNO_LEN )
1133 srcLen = wxWcslen(src) + 1;
c91830cb 1134
35d11700 1135 size_t outLen = 0;
ef199164 1136 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1137 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1138 {
1139 wxUint16 cc[2];
35d11700
VZ
1140 const size_t numChars = encode_utf16(*src++, cc);
1141 if ( numChars == wxCONV_FAILED )
1142 return wxCONV_FAILED;
c91830cb 1143
ef199164
DS
1144 outLen += numChars * BYTES_PER_CHAR;
1145 if ( outBuff )
c91830cb 1146 {
35d11700
VZ
1147 if ( outLen > dstLen )
1148 return wxCONV_FAILED;
1149
ef199164 1150 *outBuff++ = cc[0];
35d11700 1151 if ( numChars == 2 )
69b80d28 1152 {
35d11700 1153 // second character of a surrogate
ef199164 1154 *outBuff++ = cc[1];
69b80d28 1155 }
c91830cb 1156 }
c91830cb 1157 }
c91830cb 1158
35d11700 1159 return outLen;
c91830cb
VZ
1160}
1161
467e0479
VZ
1162// ----------------------------------------------------------------------------
1163// endian-reversing conversions
1164// ----------------------------------------------------------------------------
c91830cb 1165
35d11700
VZ
1166size_t
1167wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1168 const char *src, size_t srcLen) const
c91830cb 1169{
35d11700
VZ
1170 srcLen = GetLength(src, srcLen);
1171 if ( srcLen == wxNO_LEN )
1172 return wxCONV_FAILED;
1173
ef199164 1174 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1175 if ( !dst )
1176 {
1177 // optimization: return maximal space which could be needed for this
1178 // string even if the real size could be smaller if the buffer contains
1179 // any surrogates
1180 return inLen;
1181 }
c91830cb 1182
35d11700 1183 size_t outLen = 0;
ef199164
DS
1184 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1185 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1186 {
35d11700
VZ
1187 wxUint32 ch;
1188 wxUint16 tmp[2];
ef199164
DS
1189
1190 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1191 inBuff++;
1192 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1193
35d11700
VZ
1194 const size_t numChars = decode_utf16(tmp, ch);
1195 if ( numChars == wxCONV_FAILED )
1196 return wxCONV_FAILED;
c91830cb 1197
35d11700 1198 if ( numChars == 2 )
ef199164 1199 inBuff++;
35d11700
VZ
1200
1201 if ( ++outLen > dstLen )
1202 return wxCONV_FAILED;
c91830cb 1203
35d11700 1204 *dst++ = ch;
c91830cb 1205 }
c91830cb 1206
c91830cb 1207
35d11700
VZ
1208 return outLen;
1209}
c91830cb 1210
35d11700
VZ
1211size_t
1212wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1213 const wchar_t *src, size_t srcLen) const
c91830cb 1214{
35d11700
VZ
1215 if ( srcLen == wxNO_LEN )
1216 srcLen = wxWcslen(src) + 1;
c91830cb 1217
35d11700 1218 size_t outLen = 0;
ef199164 1219 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1220 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1221 {
1222 wxUint16 cc[2];
35d11700
VZ
1223 const size_t numChars = encode_utf16(*src, cc);
1224 if ( numChars == wxCONV_FAILED )
1225 return wxCONV_FAILED;
c91830cb 1226
ef199164
DS
1227 outLen += numChars * BYTES_PER_CHAR;
1228 if ( outBuff )
c91830cb 1229 {
35d11700
VZ
1230 if ( outLen > dstLen )
1231 return wxCONV_FAILED;
1232
ef199164 1233 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1234 if ( numChars == 2 )
c91830cb 1235 {
35d11700 1236 // second character of a surrogate
ef199164 1237 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1238 }
1239 }
c91830cb 1240 }
c91830cb 1241
35d11700 1242 return outLen;
c91830cb
VZ
1243}
1244
467e0479 1245#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1246
1247
35d11700 1248// ============================================================================
c91830cb 1249// UTF-32
35d11700 1250// ============================================================================
c91830cb
VZ
1251
1252#ifdef WORDS_BIGENDIAN
467e0479
VZ
1253 #define wxMBConvUTF32straight wxMBConvUTF32BE
1254 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1255#else
467e0479
VZ
1256 #define wxMBConvUTF32swap wxMBConvUTF32BE
1257 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1258#endif
1259
1260
1261WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1262WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1263
467e0479
VZ
1264/* static */
1265size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1266{
1267 if ( srcLen == wxNO_LEN )
1268 {
1269 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1270 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1271 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1272 ;
c91830cb 1273
467e0479
VZ
1274 srcLen *= BYTES_PER_CHAR;
1275 }
1276 else // we already have the length
1277 {
1278 // we can only convert an entire number of UTF-32 characters
1279 if ( srcLen % BYTES_PER_CHAR )
1280 return wxCONV_FAILED;
1281 }
1282
1283 return srcLen;
1284}
1285
1286// case when in-memory representation is UTF-16
c91830cb
VZ
1287#ifdef WC_UTF16
1288
467e0479
VZ
1289// ----------------------------------------------------------------------------
1290// conversions without endianness change
1291// ----------------------------------------------------------------------------
1292
1293size_t
1294wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1295 const char *src, size_t srcLen) const
c91830cb 1296{
467e0479
VZ
1297 srcLen = GetLength(src, srcLen);
1298 if ( srcLen == wxNO_LEN )
1299 return wxCONV_FAILED;
c91830cb 1300
ef199164
DS
1301 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1302 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1303 size_t outLen = 0;
1304 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1305 {
1306 wxUint16 cc[2];
ef199164 1307 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1308 if ( numChars == wxCONV_FAILED )
1309 return wxCONV_FAILED;
c91830cb 1310
467e0479
VZ
1311 outLen += numChars;
1312 if ( dst )
c91830cb 1313 {
467e0479
VZ
1314 if ( outLen > dstLen )
1315 return wxCONV_FAILED;
d32a507d 1316
467e0479
VZ
1317 *dst++ = cc[0];
1318 if ( numChars == 2 )
1319 {
1320 // second character of a surrogate
1321 *dst++ = cc[1];
1322 }
1323 }
c91830cb 1324 }
d32a507d 1325
467e0479 1326 return outLen;
c91830cb
VZ
1327}
1328
467e0479
VZ
1329size_t
1330wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1331 const wchar_t *src, size_t srcLen) const
c91830cb 1332{
467e0479
VZ
1333 if ( srcLen == wxNO_LEN )
1334 srcLen = wxWcslen(src) + 1;
c91830cb 1335
467e0479 1336 if ( !dst )
c91830cb 1337 {
467e0479
VZ
1338 // optimization: return maximal space which could be needed for this
1339 // string instead of the exact amount which could be less if there are
1340 // any surrogates in the input
1341 //
1342 // we consider that surrogates are rare enough to make it worthwhile to
1343 // avoid running the loop below at the cost of slightly extra memory
1344 // consumption
ef199164 1345 return srcLen * BYTES_PER_CHAR;
467e0479 1346 }
c91830cb 1347
ef199164 1348 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1349 size_t outLen = 0;
1350 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1351 {
1352 const wxUint32 ch = wxDecodeSurrogate(&src);
1353 if ( !src )
1354 return wxCONV_FAILED;
c91830cb 1355
467e0479 1356 outLen += BYTES_PER_CHAR;
d32a507d 1357
467e0479
VZ
1358 if ( outLen > dstLen )
1359 return wxCONV_FAILED;
b5153fd8 1360
ef199164 1361 *outBuff++ = ch;
467e0479 1362 }
c91830cb 1363
467e0479 1364 return outLen;
c91830cb
VZ
1365}
1366
467e0479
VZ
1367// ----------------------------------------------------------------------------
1368// endian-reversing conversions
1369// ----------------------------------------------------------------------------
c91830cb 1370
467e0479
VZ
1371size_t
1372wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1373 const char *src, size_t srcLen) const
c91830cb 1374{
467e0479
VZ
1375 srcLen = GetLength(src, srcLen);
1376 if ( srcLen == wxNO_LEN )
1377 return wxCONV_FAILED;
c91830cb 1378
ef199164
DS
1379 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1380 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1381 size_t outLen = 0;
ef199164 1382 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1383 {
c91830cb 1384 wxUint16 cc[2];
ef199164 1385 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1386 if ( numChars == wxCONV_FAILED )
1387 return wxCONV_FAILED;
c91830cb 1388
467e0479
VZ
1389 outLen += numChars;
1390 if ( dst )
c91830cb 1391 {
467e0479
VZ
1392 if ( outLen > dstLen )
1393 return wxCONV_FAILED;
d32a507d 1394
467e0479
VZ
1395 *dst++ = cc[0];
1396 if ( numChars == 2 )
1397 {
1398 // second character of a surrogate
1399 *dst++ = cc[1];
1400 }
1401 }
c91830cb 1402 }
b5153fd8 1403
467e0479 1404 return outLen;
c91830cb
VZ
1405}
1406
467e0479
VZ
1407size_t
1408wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1409 const wchar_t *src, size_t srcLen) const
c91830cb 1410{
467e0479
VZ
1411 if ( srcLen == wxNO_LEN )
1412 srcLen = wxWcslen(src) + 1;
c91830cb 1413
467e0479 1414 if ( !dst )
c91830cb 1415 {
467e0479
VZ
1416 // optimization: return maximal space which could be needed for this
1417 // string instead of the exact amount which could be less if there are
1418 // any surrogates in the input
1419 //
1420 // we consider that surrogates are rare enough to make it worthwhile to
1421 // avoid running the loop below at the cost of slightly extra memory
1422 // consumption
1423 return srcLen*BYTES_PER_CHAR;
1424 }
c91830cb 1425
ef199164 1426 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1427 size_t outLen = 0;
1428 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1429 {
1430 const wxUint32 ch = wxDecodeSurrogate(&src);
1431 if ( !src )
1432 return wxCONV_FAILED;
c91830cb 1433
467e0479 1434 outLen += BYTES_PER_CHAR;
d32a507d 1435
467e0479
VZ
1436 if ( outLen > dstLen )
1437 return wxCONV_FAILED;
b5153fd8 1438
ef199164 1439 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1440 }
c91830cb 1441
467e0479 1442 return outLen;
c91830cb
VZ
1443}
1444
467e0479 1445#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1446
35d11700
VZ
1447// ----------------------------------------------------------------------------
1448// conversions without endianness change
1449// ----------------------------------------------------------------------------
1450
1451size_t
1452wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1453 const char *src, size_t srcLen) const
c91830cb 1454{
35d11700
VZ
1455 // use memcpy() as it should be much faster than hand-written loop
1456 srcLen = GetLength(src, srcLen);
1457 if ( srcLen == wxNO_LEN )
1458 return wxCONV_FAILED;
c91830cb 1459
35d11700
VZ
1460 const size_t inLen = srcLen/BYTES_PER_CHAR;
1461 if ( dst )
c91830cb 1462 {
35d11700
VZ
1463 if ( dstLen < inLen )
1464 return wxCONV_FAILED;
b5153fd8 1465
35d11700
VZ
1466 memcpy(dst, src, srcLen);
1467 }
c91830cb 1468
35d11700 1469 return inLen;
c91830cb
VZ
1470}
1471
35d11700
VZ
1472size_t
1473wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1474 const wchar_t *src, size_t srcLen) const
c91830cb 1475{
35d11700
VZ
1476 if ( srcLen == wxNO_LEN )
1477 srcLen = wxWcslen(src) + 1;
1478
1479 srcLen *= BYTES_PER_CHAR;
c91830cb 1480
35d11700 1481 if ( dst )
c91830cb 1482 {
35d11700
VZ
1483 if ( dstLen < srcLen )
1484 return wxCONV_FAILED;
c91830cb 1485
35d11700 1486 memcpy(dst, src, srcLen);
c91830cb
VZ
1487 }
1488
35d11700 1489 return srcLen;
c91830cb
VZ
1490}
1491
35d11700
VZ
1492// ----------------------------------------------------------------------------
1493// endian-reversing conversions
1494// ----------------------------------------------------------------------------
c91830cb 1495
35d11700
VZ
1496size_t
1497wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1498 const char *src, size_t srcLen) const
c91830cb 1499{
35d11700
VZ
1500 srcLen = GetLength(src, srcLen);
1501 if ( srcLen == wxNO_LEN )
1502 return wxCONV_FAILED;
1503
1504 srcLen /= BYTES_PER_CHAR;
c91830cb 1505
35d11700 1506 if ( dst )
c91830cb 1507 {
35d11700
VZ
1508 if ( dstLen < srcLen )
1509 return wxCONV_FAILED;
1510
ef199164
DS
1511 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1512 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1513 {
ef199164 1514 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1515 }
c91830cb 1516 }
b5153fd8 1517
35d11700 1518 return srcLen;
c91830cb
VZ
1519}
1520
35d11700
VZ
1521size_t
1522wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1523 const wchar_t *src, size_t srcLen) const
c91830cb 1524{
35d11700
VZ
1525 if ( srcLen == wxNO_LEN )
1526 srcLen = wxWcslen(src) + 1;
1527
1528 srcLen *= BYTES_PER_CHAR;
c91830cb 1529
35d11700 1530 if ( dst )
c91830cb 1531 {
35d11700
VZ
1532 if ( dstLen < srcLen )
1533 return wxCONV_FAILED;
1534
ef199164 1535 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1536 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1537 {
ef199164 1538 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1539 }
c91830cb 1540 }
b5153fd8 1541
35d11700 1542 return srcLen;
c91830cb
VZ
1543}
1544
467e0479 1545#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1546
1547
36acb880
VZ
1548// ============================================================================
1549// The classes doing conversion using the iconv_xxx() functions
1550// ============================================================================
3caec1bb 1551
b040e242 1552#ifdef HAVE_ICONV
3a0d76bc 1553
b1d547eb
VS
1554// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1555// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1556// (unless there's yet another bug in glibc) the only case when iconv()
1557// returns with (size_t)-1 (which means error) and says there are 0 bytes
1558// left in the input buffer -- when _real_ error occurs,
1559// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1560// iconv() failure.
3caec1bb
VS
1561// [This bug does not appear in glibc 2.2.]
1562#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1563#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1564 (errno != E2BIG || bufLeft != 0))
1565#else
1566#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1567#endif
1568
ab217dba 1569#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1570
74a7eb0b
VZ
1571#define ICONV_T_INVALID ((iconv_t)-1)
1572
1573#if SIZEOF_WCHAR_T == 4
1574 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1575 #define WC_ENC wxFONTENCODING_UTF32
1576#elif SIZEOF_WCHAR_T == 2
1577 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1578 #define WC_ENC wxFONTENCODING_UTF16
1579#else // sizeof(wchar_t) != 2 nor 4
1580 // does this ever happen?
1581 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1582#endif
1583
36acb880 1584// ----------------------------------------------------------------------------
e95354ec 1585// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1586// ----------------------------------------------------------------------------
1587
e95354ec 1588class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1589{
1590public:
86501081 1591 wxMBConv_iconv(const char *name);
e95354ec 1592 virtual ~wxMBConv_iconv();
36acb880 1593
bde4baac
VZ
1594 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1595 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1596
d36c9347 1597 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1598 virtual size_t GetMBNulLen() const;
1599
ba98e032
VS
1600#if wxUSE_UNICODE_UTF8
1601 virtual bool IsUTF8() const;
1602#endif
1603
d36c9347
VZ
1604 virtual wxMBConv *Clone() const
1605 {
86501081 1606 wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
d36c9347
VZ
1607 p->m_minMBCharWidth = m_minMBCharWidth;
1608 return p;
1609 }
1610
e95354ec 1611 bool IsOk() const
74a7eb0b 1612 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1613
1614protected:
ef199164
DS
1615 // the iconv handlers used to translate from multibyte
1616 // to wide char and in the other direction
36acb880
VZ
1617 iconv_t m2w,
1618 w2m;
ef199164 1619
b1d547eb
VS
1620#if wxUSE_THREADS
1621 // guards access to m2w and w2m objects
1622 wxMutex m_iconvMutex;
1623#endif
36acb880
VZ
1624
1625private:
e95354ec 1626 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1627 // available on this machine, it will remain NULL
74a7eb0b 1628 static wxString ms_wcCharsetName;
36acb880
VZ
1629
1630 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1631 // different endian-ness than the native one
405d8f46 1632 static bool ms_wcNeedsSwap;
eec47cc6 1633
d36c9347
VZ
1634
1635 // name of the encoding handled by this conversion
1636 wxString m_name;
1637
7ef3ab50 1638 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1639 // initially
1640 size_t m_minMBCharWidth;
36acb880
VZ
1641};
1642
8f115891 1643// make the constructor available for unit testing
86501081 1644WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
8f115891
MW
1645{
1646 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1647 if ( !result->IsOk() )
1648 {
1649 delete result;
1650 return 0;
1651 }
ef199164 1652
8f115891
MW
1653 return result;
1654}
1655
422e411e 1656wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1657bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1658
86501081 1659wxMBConv_iconv::wxMBConv_iconv(const char *name)
d36c9347 1660 : m_name(name)
36acb880 1661{
c1464d9d 1662 m_minMBCharWidth = 0;
eec47cc6 1663
36acb880 1664 // check for charset that represents wchar_t:
74a7eb0b 1665 if ( ms_wcCharsetName.empty() )
f1339c56 1666 {
c2b83fdd
VZ
1667 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1668
74a7eb0b
VZ
1669#if wxUSE_FONTMAP
1670 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1671#else // !wxUSE_FONTMAP
91cb7f52 1672 static const wxChar *names_static[] =
36acb880 1673 {
74a7eb0b
VZ
1674#if SIZEOF_WCHAR_T == 4
1675 _T("UCS-4"),
1676#elif SIZEOF_WCHAR_T = 2
1677 _T("UCS-2"),
1678#endif
1679 NULL
1680 };
91cb7f52 1681 const wxChar **names = names_static;
74a7eb0b 1682#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1683
d1f024a8 1684 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1685 {
17a1ebd1 1686 const wxString nameCS(*names);
74a7eb0b
VZ
1687
1688 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1689 wxString nameXE(nameCS);
ef199164
DS
1690
1691#ifdef WORDS_BIGENDIAN
74a7eb0b 1692 nameXE += _T("BE");
ef199164 1693#else // little endian
74a7eb0b 1694 nameXE += _T("LE");
ef199164 1695#endif
74a7eb0b 1696
c2b83fdd
VZ
1697 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1698 nameXE.c_str());
1699
86501081 1700 m2w = iconv_open(nameXE.ToAscii(), name);
74a7eb0b 1701 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1702 {
74a7eb0b 1703 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1704 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1705 nameCS.c_str());
86501081 1706 m2w = iconv_open(nameCS.ToAscii(), name);
3a0d76bc 1707
74a7eb0b
VZ
1708 // and check for bytesex ourselves:
1709 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1710 {
74a7eb0b
VZ
1711 char buf[2], *bufPtr;
1712 wchar_t wbuf[2], *wbufPtr;
1713 size_t insz, outsz;
1714 size_t res;
1715
1716 buf[0] = 'A';
1717 buf[1] = 0;
1718 wbuf[0] = 0;
1719 insz = 2;
1720 outsz = SIZEOF_WCHAR_T * 2;
1721 wbufPtr = wbuf;
1722 bufPtr = buf;
1723
ef199164
DS
1724 res = iconv(
1725 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1726 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1727
1728 if (ICONV_FAILED(res, insz))
1729 {
1730 wxLogLastError(wxT("iconv"));
422e411e 1731 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1732 nameCS.c_str());
74a7eb0b
VZ
1733 }
1734 else // ok, can convert to this encoding, remember it
1735 {
17a1ebd1 1736 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1737 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1738 }
3a0d76bc
VS
1739 }
1740 }
74a7eb0b 1741 else // use charset not requiring byte swapping
36acb880 1742 {
74a7eb0b 1743 ms_wcCharsetName = nameXE;
36acb880 1744 }
3a0d76bc 1745 }
74a7eb0b 1746
0944fceb 1747 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1748 wxT("iconv wchar_t charset is \"%s\"%s"),
999020e1
VZ
1749 ms_wcCharsetName.empty() ? wxString("<none>")
1750 : ms_wcCharsetName,
74a7eb0b
VZ
1751 ms_wcNeedsSwap ? _T(" (needs swap)")
1752 : _T(""));
3a0d76bc 1753 }
36acb880 1754 else // we already have ms_wcCharsetName
3caec1bb 1755 {
86501081 1756 m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
f1339c56 1757 }
dccce9ea 1758
74a7eb0b 1759 if ( ms_wcCharsetName.empty() )
f1339c56 1760 {
74a7eb0b 1761 w2m = ICONV_T_INVALID;
36acb880 1762 }
405d8f46
VZ
1763 else
1764 {
86501081 1765 w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
74a7eb0b
VZ
1766 if ( w2m == ICONV_T_INVALID )
1767 {
1768 wxLogTrace(TRACE_STRCONV,
1769 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
86501081 1770 ms_wcCharsetName.c_str(), name);
74a7eb0b 1771 }
405d8f46 1772 }
36acb880 1773}
3caec1bb 1774
e95354ec 1775wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1776{
74a7eb0b 1777 if ( m2w != ICONV_T_INVALID )
36acb880 1778 iconv_close(m2w);
74a7eb0b 1779 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1780 iconv_close(w2m);
1781}
3a0d76bc 1782
bde4baac 1783size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1784{
69373110
VZ
1785 // find the string length: notice that must be done differently for
1786 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1787 size_t inbuf;
7ef3ab50 1788 const size_t nulLen = GetMBNulLen();
69373110
VZ
1789 switch ( nulLen )
1790 {
1791 default:
467e0479 1792 return wxCONV_FAILED;
69373110
VZ
1793
1794 case 1:
1795 inbuf = strlen(psz); // arguably more optimized than our version
1796 break;
1797
1798 case 2:
1799 case 4:
1800 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1801 // they also have to start at character boundary and not span two
1802 // adjacent characters
1803 const char *p;
1804 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1805 ;
1806 inbuf = p - psz;
1807 break;
1808 }
1809
b1d547eb 1810#if wxUSE_THREADS
6a17b868
SN
1811 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1812 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1813 // wxConvLocal that are used all over wx code, so we have to make sure
1814 // the handle is used by at most one thread at the time. Otherwise
1815 // only a few wx classes would be safe to use from non-main threads
1816 // as MB<->WC conversion would fail "randomly".
1817 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1818#endif // wxUSE_THREADS
1819
36acb880
VZ
1820 size_t outbuf = n * SIZEOF_WCHAR_T;
1821 size_t res, cres;
1822 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1823 wchar_t *bufPtr = buf;
1824 const char *pszPtr = psz;
1825
1826 if (buf)
1827 {
1828 // have destination buffer, convert there
1829 cres = iconv(m2w,
1830 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1831 (char**)&bufPtr, &outbuf);
1832 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1833
36acb880 1834 if (ms_wcNeedsSwap)
3a0d76bc 1835 {
36acb880 1836 // convert to native endianness
17a1ebd1
VZ
1837 for ( unsigned i = 0; i < res; i++ )
1838 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1839 }
adb45366 1840
69373110 1841 // NUL-terminate the string if there is any space left
49dd9820
VS
1842 if (res < n)
1843 buf[res] = 0;
36acb880
VZ
1844 }
1845 else
1846 {
1847 // no destination buffer... convert using temp buffer
1848 // to calculate destination buffer requirement
1849 wchar_t tbuf[8];
1850 res = 0;
ef199164
DS
1851
1852 do
1853 {
36acb880 1854 bufPtr = tbuf;
ef199164 1855 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1856
1857 cres = iconv(m2w,
1858 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1859 (char**)&bufPtr, &outbuf );
1860
ef199164
DS
1861 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1862 }
1863 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1864 }
dccce9ea 1865
36acb880 1866 if (ICONV_FAILED(cres, inbuf))
f1339c56 1867 {
36acb880 1868 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1869 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1870 return wxCONV_FAILED;
36acb880
VZ
1871 }
1872
1873 return res;
1874}
1875
bde4baac 1876size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1877{
b1d547eb
VS
1878#if wxUSE_THREADS
1879 // NB: explained in MB2WC
1880 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1881#endif
3698ae71 1882
156162ec
MW
1883 size_t inlen = wxWcslen(psz);
1884 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1885 size_t outbuf = n;
1886 size_t res, cres;
3a0d76bc 1887
36acb880 1888 wchar_t *tmpbuf = 0;
3caec1bb 1889
36acb880
VZ
1890 if (ms_wcNeedsSwap)
1891 {
1892 // need to copy to temp buffer to switch endianness
74a7eb0b 1893 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1894 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1895 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1896 for ( size_t i = 0; i < inlen; i++ )
1897 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1898
156162ec 1899 tmpbuf[inlen] = L'\0';
74a7eb0b 1900 psz = tmpbuf;
36acb880 1901 }
3a0d76bc 1902
36acb880
VZ
1903 if (buf)
1904 {
1905 // have destination buffer, convert there
1906 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1907
ef199164 1908 res = n - outbuf;
adb45366 1909
49dd9820
VS
1910 // NB: iconv was given only wcslen(psz) characters on input, and so
1911 // it couldn't convert the trailing zero. Let's do it ourselves
1912 // if there's some room left for it in the output buffer.
1913 if (res < n)
1914 buf[0] = 0;
36acb880
VZ
1915 }
1916 else
1917 {
ef199164 1918 // no destination buffer: convert using temp buffer
36acb880
VZ
1919 // to calculate destination buffer requirement
1920 char tbuf[16];
1921 res = 0;
ef199164
DS
1922 do
1923 {
1924 buf = tbuf;
1925 outbuf = 16;
36acb880
VZ
1926
1927 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1928
36acb880 1929 res += 16 - outbuf;
ef199164
DS
1930 }
1931 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1932 }
dccce9ea 1933
36acb880
VZ
1934 if (ms_wcNeedsSwap)
1935 {
1936 free(tmpbuf);
1937 }
dccce9ea 1938
36acb880
VZ
1939 if (ICONV_FAILED(cres, inbuf))
1940 {
ce6f8d6f 1941 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1942 return wxCONV_FAILED;
36acb880
VZ
1943 }
1944
1945 return res;
1946}
1947
7ef3ab50 1948size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1949{
c1464d9d 1950 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1951 {
1952 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1953
1954#if wxUSE_THREADS
1955 // NB: explained in MB2WC
1956 wxMutexLocker lock(self->m_iconvMutex);
1957#endif
1958
999020e1 1959 const wchar_t *wnul = L"";
c1464d9d 1960 char buf[8]; // should be enough for NUL in any encoding
356410fc 1961 size_t inLen = sizeof(wchar_t),
c1464d9d 1962 outLen = WXSIZEOF(buf);
ef199164
DS
1963 char *inBuff = (char *)wnul;
1964 char *outBuff = buf;
1965 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1966 {
c1464d9d 1967 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1968 }
1969 else // ok
1970 {
ef199164 1971 self->m_minMBCharWidth = outBuff - buf;
356410fc 1972 }
eec47cc6
VZ
1973 }
1974
c1464d9d 1975 return m_minMBCharWidth;
eec47cc6
VZ
1976}
1977
ba98e032
VS
1978#if wxUSE_UNICODE_UTF8
1979bool wxMBConv_iconv::IsUTF8() const
1980{
86501081
VS
1981 return wxStricmp(m_name, "UTF-8") == 0 ||
1982 wxStricmp(m_name, "UTF8") == 0;
ba98e032
VS
1983}
1984#endif
1985
b040e242 1986#endif // HAVE_ICONV
36acb880 1987
e95354ec 1988
36acb880
VZ
1989// ============================================================================
1990// Win32 conversion classes
1991// ============================================================================
1cd52418 1992
e95354ec 1993#ifdef wxHAVE_WIN32_MB2WC
373658eb 1994
8b04d4c4 1995// from utils.cpp
d775fa82 1996#if wxUSE_FONTMAP
86501081 1997extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
8b04d4c4 1998extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1999#endif
373658eb 2000
e95354ec 2001class wxMBConv_win32 : public wxMBConv
1cd52418
OK
2002{
2003public:
bde4baac
VZ
2004 wxMBConv_win32()
2005 {
2006 m_CodePage = CP_ACP;
c1464d9d 2007 m_minMBCharWidth = 0;
bde4baac
VZ
2008 }
2009
d36c9347 2010 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2011 : wxMBConv()
d36c9347
VZ
2012 {
2013 m_CodePage = conv.m_CodePage;
2014 m_minMBCharWidth = conv.m_minMBCharWidth;
2015 }
2016
7608a683 2017#if wxUSE_FONTMAP
86501081 2018 wxMBConv_win32(const char* name)
bde4baac
VZ
2019 {
2020 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2021 m_minMBCharWidth = 0;
bde4baac 2022 }
dccce9ea 2023
e95354ec 2024 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2025 {
2026 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2027 m_minMBCharWidth = 0;
bde4baac 2028 }
eec47cc6 2029#endif // wxUSE_FONTMAP
8b04d4c4 2030
d36c9347 2031 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2032 {
02272c9c
VZ
2033 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2034 // the behaviour is not compatible with the Unix version (using iconv)
2035 // and break the library itself, e.g. wxTextInputStream::NextChar()
2036 // wouldn't work if reading an incomplete MB char didn't result in an
2037 // error
667e5b3e 2038 //
89028980 2039 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2040 // Win XP or newer and it is not supported for UTF-[78] so we always
2041 // use our own conversions in this case. See
89028980
VS
2042 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2043 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2044 if ( m_CodePage == CP_UTF8 )
89028980 2045 {
5487ff0f 2046 return wxMBConvUTF8().MB2WC(buf, psz, n);
89028980 2047 }
830f8f11
VZ
2048
2049 if ( m_CodePage == CP_UTF7 )
2050 {
5487ff0f 2051 return wxMBConvUTF7().MB2WC(buf, psz, n);
830f8f11
VZ
2052 }
2053
2054 int flags = 0;
2055 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2056 IsAtLeastWin2kSP4() )
89028980 2057 {
830f8f11 2058 flags = MB_ERR_INVALID_CHARS;
89028980 2059 }
667e5b3e 2060
2b5f62a0
VZ
2061 const size_t len = ::MultiByteToWideChar
2062 (
2063 m_CodePage, // code page
667e5b3e 2064 flags, // flags: fall on error
2b5f62a0
VZ
2065 psz, // input string
2066 -1, // its length (NUL-terminated)
b4da152e 2067 buf, // output string
2b5f62a0
VZ
2068 buf ? n : 0 // size of output buffer
2069 );
89028980
VS
2070 if ( !len )
2071 {
2072 // function totally failed
467e0479 2073 return wxCONV_FAILED;
89028980
VS
2074 }
2075
2076 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2077 // check if we succeeded, by doing a double trip:
2078 if ( !flags && buf )
2079 {
53c174fc
VZ
2080 const size_t mbLen = strlen(psz);
2081 wxCharBuffer mbBuf(mbLen);
89028980
VS
2082 if ( ::WideCharToMultiByte
2083 (
2084 m_CodePage,
2085 0,
2086 buf,
2087 -1,
2088 mbBuf.data(),
53c174fc 2089 mbLen + 1, // size in bytes, not length
89028980
VS
2090 NULL,
2091 NULL
2092 ) == 0 ||
2093 strcmp(mbBuf, psz) != 0 )
2094 {
2095 // we didn't obtain the same thing we started from, hence
2096 // the conversion was lossy and we consider that it failed
467e0479 2097 return wxCONV_FAILED;
89028980
VS
2098 }
2099 }
2b5f62a0 2100
03a991bc
VZ
2101 // note that it returns count of written chars for buf != NULL and size
2102 // of the needed buffer for buf == NULL so in either case the length of
2103 // the string (which never includes the terminating NUL) is one less
89028980 2104 return len - 1;
f1339c56 2105 }
dccce9ea 2106
d36c9347 2107 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2108 {
13dd924a
VZ
2109 /*
2110 we have a problem here: by default, WideCharToMultiByte() may
2111 replace characters unrepresentable in the target code page with bad
2112 quality approximations such as turning "1/2" symbol (U+00BD) into
2113 "1" for the code pages which don't have it and we, obviously, want
2114 to avoid this at any price
d775fa82 2115
13dd924a
VZ
2116 the trouble is that this function does it _silently_, i.e. it won't
2117 even tell us whether it did or not... Win98/2000 and higher provide
2118 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2119 we have to resort to a round trip, i.e. check that converting back
2120 results in the same string -- this is, of course, expensive but
2121 otherwise we simply can't be sure to not garble the data.
2122 */
2123
2124 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2125 // it doesn't work with CJK encodings (which we test for rather roughly
2126 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2127 // supporting it
907173e5
WS
2128 BOOL usedDef wxDUMMY_INITIALIZE(false);
2129 BOOL *pUsedDef;
13dd924a
VZ
2130 int flags;
2131 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2132 {
2133 // it's our lucky day
2134 flags = WC_NO_BEST_FIT_CHARS;
2135 pUsedDef = &usedDef;
2136 }
2137 else // old system or unsupported encoding
2138 {
2139 flags = 0;
2140 pUsedDef = NULL;
2141 }
2142
2b5f62a0
VZ
2143 const size_t len = ::WideCharToMultiByte
2144 (
2145 m_CodePage, // code page
13dd924a
VZ
2146 flags, // either none or no best fit
2147 pwz, // input string
2b5f62a0
VZ
2148 -1, // it is (wide) NUL-terminated
2149 buf, // output buffer
2150 buf ? n : 0, // and its size
2151 NULL, // default "replacement" char
13dd924a 2152 pUsedDef // [out] was it used?
2b5f62a0
VZ
2153 );
2154
13dd924a
VZ
2155 if ( !len )
2156 {
2157 // function totally failed
467e0479 2158 return wxCONV_FAILED;
13dd924a
VZ
2159 }
2160
2161 // if we were really converting, check if we succeeded
2162 if ( buf )
2163 {
2164 if ( flags )
2165 {
2166 // check if the conversion failed, i.e. if any replacements
2167 // were done
2168 if ( usedDef )
467e0479 2169 return wxCONV_FAILED;
13dd924a
VZ
2170 }
2171 else // we must resort to double tripping...
2172 {
2173 wxWCharBuffer wcBuf(n);
467e0479 2174 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2175 wcscmp(wcBuf, pwz) != 0 )
2176 {
2177 // we didn't obtain the same thing we started from, hence
2178 // the conversion was lossy and we consider that it failed
467e0479 2179 return wxCONV_FAILED;
13dd924a
VZ
2180 }
2181 }
2182 }
2183
03a991bc 2184 // see the comment above for the reason of "len - 1"
13dd924a 2185 return len - 1;
f1339c56 2186 }
dccce9ea 2187
7ef3ab50
VZ
2188 virtual size_t GetMBNulLen() const
2189 {
2190 if ( m_minMBCharWidth == 0 )
2191 {
2192 int len = ::WideCharToMultiByte
2193 (
2194 m_CodePage, // code page
2195 0, // no flags
2196 L"", // input string
2197 1, // translate just the NUL
2198 NULL, // output buffer
2199 0, // and its size
2200 NULL, // no replacement char
2201 NULL // [out] don't care if it was used
2202 );
2203
2204 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2205 switch ( len )
2206 {
2207 default:
2208 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2209 self->m_minMBCharWidth = (size_t)-1;
2210 break;
7ef3ab50
VZ
2211
2212 case 0:
2213 self->m_minMBCharWidth = (size_t)-1;
2214 break;
2215
2216 case 1:
2217 case 2:
2218 case 4:
2219 self->m_minMBCharWidth = len;
2220 break;
2221 }
2222 }
2223
2224 return m_minMBCharWidth;
2225 }
2226
d36c9347
VZ
2227 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2228
13dd924a
VZ
2229 bool IsOk() const { return m_CodePage != -1; }
2230
2231private:
2232 static bool CanUseNoBestFit()
2233 {
2234 static int s_isWin98Or2k = -1;
2235
2236 if ( s_isWin98Or2k == -1 )
2237 {
2238 int verMaj, verMin;
2239 switch ( wxGetOsVersion(&verMaj, &verMin) )
2240 {
406d283a 2241 case wxOS_WINDOWS_9X:
13dd924a
VZ
2242 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2243 break;
2244
406d283a 2245 case wxOS_WINDOWS_NT:
13dd924a
VZ
2246 s_isWin98Or2k = verMaj >= 5;
2247 break;
2248
2249 default:
ef199164 2250 // unknown: be conservative by default
13dd924a 2251 s_isWin98Or2k = 0;
ef199164 2252 break;
13dd924a
VZ
2253 }
2254
2255 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2256 }
2257
2258 return s_isWin98Or2k == 1;
2259 }
f1339c56 2260
89028980
VS
2261 static bool IsAtLeastWin2kSP4()
2262 {
8942f83a
WS
2263#ifdef __WXWINCE__
2264 return false;
2265#else
89028980
VS
2266 static int s_isAtLeastWin2kSP4 = -1;
2267
2268 if ( s_isAtLeastWin2kSP4 == -1 )
2269 {
2270 OSVERSIONINFOEX ver;
2271
2272 memset(&ver, 0, sizeof(ver));
2273 ver.dwOSVersionInfoSize = sizeof(ver);
2274 GetVersionEx((OSVERSIONINFO*)&ver);
2275
2276 s_isAtLeastWin2kSP4 =
2277 ((ver.dwMajorVersion > 5) || // Vista+
2278 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2279 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2280 ver.wServicePackMajor >= 4)) // 2000 SP4+
2281 ? 1 : 0;
2282 }
2283
2284 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2285#endif
89028980
VS
2286 }
2287
eec47cc6 2288
c1464d9d 2289 // the code page we're working with
b1d66b54 2290 long m_CodePage;
c1464d9d 2291
7ef3ab50 2292 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2293 // "unknown"
2294 size_t m_minMBCharWidth;
1cd52418 2295};
e95354ec
VZ
2296
2297#endif // wxHAVE_WIN32_MB2WC
2298
f7e98dee
RN
2299// ============================================================================
2300// Cocoa conversion classes
2301// ============================================================================
2302
2303#if defined(__WXCOCOA__)
2304
ef199164
DS
2305// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2306// Strangely enough, internally Core Foundation uses
2307// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee
RN
2308
2309#include <CoreFoundation/CFString.h>
2310#include <CoreFoundation/CFStringEncodingExt.h>
2311
2312CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2313{
638357a0 2314 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2315
2316 switch (encoding)
ecd9653b 2317 {
ef199164
DS
2318 case wxFONTENCODING_DEFAULT :
2319 enc = CFStringGetSystemEncoding();
2320 break ;
2321
ecd9653b
WS
2322 case wxFONTENCODING_ISO8859_1 :
2323 enc = kCFStringEncodingISOLatin1 ;
2324 break ;
2325 case wxFONTENCODING_ISO8859_2 :
2326 enc = kCFStringEncodingISOLatin2;
2327 break ;
2328 case wxFONTENCODING_ISO8859_3 :
2329 enc = kCFStringEncodingISOLatin3 ;
2330 break ;
2331 case wxFONTENCODING_ISO8859_4 :
2332 enc = kCFStringEncodingISOLatin4;
2333 break ;
2334 case wxFONTENCODING_ISO8859_5 :
2335 enc = kCFStringEncodingISOLatinCyrillic;
2336 break ;
2337 case wxFONTENCODING_ISO8859_6 :
2338 enc = kCFStringEncodingISOLatinArabic;
2339 break ;
2340 case wxFONTENCODING_ISO8859_7 :
2341 enc = kCFStringEncodingISOLatinGreek;
2342 break ;
2343 case wxFONTENCODING_ISO8859_8 :
2344 enc = kCFStringEncodingISOLatinHebrew;
2345 break ;
2346 case wxFONTENCODING_ISO8859_9 :
2347 enc = kCFStringEncodingISOLatin5;
2348 break ;
2349 case wxFONTENCODING_ISO8859_10 :
2350 enc = kCFStringEncodingISOLatin6;
2351 break ;
2352 case wxFONTENCODING_ISO8859_11 :
2353 enc = kCFStringEncodingISOLatinThai;
2354 break ;
2355 case wxFONTENCODING_ISO8859_13 :
2356 enc = kCFStringEncodingISOLatin7;
2357 break ;
2358 case wxFONTENCODING_ISO8859_14 :
2359 enc = kCFStringEncodingISOLatin8;
2360 break ;
2361 case wxFONTENCODING_ISO8859_15 :
2362 enc = kCFStringEncodingISOLatin9;
2363 break ;
2364
2365 case wxFONTENCODING_KOI8 :
2366 enc = kCFStringEncodingKOI8_R;
2367 break ;
2368 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2369 enc = kCFStringEncodingDOSRussian;
2370 break ;
2371
2372// case wxFONTENCODING_BULGARIAN :
2373// enc = ;
2374// break ;
2375
2376 case wxFONTENCODING_CP437 :
ef199164 2377 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2378 break ;
2379 case wxFONTENCODING_CP850 :
2380 enc = kCFStringEncodingDOSLatin1;
2381 break ;
2382 case wxFONTENCODING_CP852 :
2383 enc = kCFStringEncodingDOSLatin2;
2384 break ;
2385 case wxFONTENCODING_CP855 :
2386 enc = kCFStringEncodingDOSCyrillic;
2387 break ;
2388 case wxFONTENCODING_CP866 :
ef199164 2389 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2390 break ;
2391 case wxFONTENCODING_CP874 :
2392 enc = kCFStringEncodingDOSThai;
2393 break ;
2394 case wxFONTENCODING_CP932 :
2395 enc = kCFStringEncodingDOSJapanese;
2396 break ;
2397 case wxFONTENCODING_CP936 :
ef199164 2398 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2399 break ;
2400 case wxFONTENCODING_CP949 :
2401 enc = kCFStringEncodingDOSKorean;
2402 break ;
2403 case wxFONTENCODING_CP950 :
2404 enc = kCFStringEncodingDOSChineseTrad;
2405 break ;
ecd9653b
WS
2406 case wxFONTENCODING_CP1250 :
2407 enc = kCFStringEncodingWindowsLatin2;
2408 break ;
2409 case wxFONTENCODING_CP1251 :
ef199164 2410 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2411 break ;
2412 case wxFONTENCODING_CP1252 :
ef199164 2413 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2414 break ;
2415 case wxFONTENCODING_CP1253 :
2416 enc = kCFStringEncodingWindowsGreek;
2417 break ;
2418 case wxFONTENCODING_CP1254 :
2419 enc = kCFStringEncodingWindowsLatin5;
2420 break ;
2421 case wxFONTENCODING_CP1255 :
ef199164 2422 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2423 break ;
2424 case wxFONTENCODING_CP1256 :
ef199164 2425 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2426 break ;
2427 case wxFONTENCODING_CP1257 :
2428 enc = kCFStringEncodingWindowsBalticRim;
2429 break ;
638357a0
RN
2430// This only really encodes to UTF7 (if that) evidently
2431// case wxFONTENCODING_UTF7 :
2432// enc = kCFStringEncodingNonLossyASCII ;
2433// break ;
ecd9653b
WS
2434 case wxFONTENCODING_UTF8 :
2435 enc = kCFStringEncodingUTF8 ;
2436 break ;
2437 case wxFONTENCODING_EUC_JP :
2438 enc = kCFStringEncodingEUC_JP;
2439 break ;
2440 case wxFONTENCODING_UTF16 :
f7e98dee 2441 enc = kCFStringEncodingUnicode ;
ecd9653b 2442 break ;
f7e98dee
RN
2443 case wxFONTENCODING_MACROMAN :
2444 enc = kCFStringEncodingMacRoman ;
2445 break ;
2446 case wxFONTENCODING_MACJAPANESE :
2447 enc = kCFStringEncodingMacJapanese ;
2448 break ;
2449 case wxFONTENCODING_MACCHINESETRAD :
2450 enc = kCFStringEncodingMacChineseTrad ;
2451 break ;
2452 case wxFONTENCODING_MACKOREAN :
2453 enc = kCFStringEncodingMacKorean ;
2454 break ;
2455 case wxFONTENCODING_MACARABIC :
2456 enc = kCFStringEncodingMacArabic ;
2457 break ;
2458 case wxFONTENCODING_MACHEBREW :
2459 enc = kCFStringEncodingMacHebrew ;
2460 break ;
2461 case wxFONTENCODING_MACGREEK :
2462 enc = kCFStringEncodingMacGreek ;
2463 break ;
2464 case wxFONTENCODING_MACCYRILLIC :
2465 enc = kCFStringEncodingMacCyrillic ;
2466 break ;
2467 case wxFONTENCODING_MACDEVANAGARI :
2468 enc = kCFStringEncodingMacDevanagari ;
2469 break ;
2470 case wxFONTENCODING_MACGURMUKHI :
2471 enc = kCFStringEncodingMacGurmukhi ;
2472 break ;
2473 case wxFONTENCODING_MACGUJARATI :
2474 enc = kCFStringEncodingMacGujarati ;
2475 break ;
2476 case wxFONTENCODING_MACORIYA :
2477 enc = kCFStringEncodingMacOriya ;
2478 break ;
2479 case wxFONTENCODING_MACBENGALI :
2480 enc = kCFStringEncodingMacBengali ;
2481 break ;
2482 case wxFONTENCODING_MACTAMIL :
2483 enc = kCFStringEncodingMacTamil ;
2484 break ;
2485 case wxFONTENCODING_MACTELUGU :
2486 enc = kCFStringEncodingMacTelugu ;
2487 break ;
2488 case wxFONTENCODING_MACKANNADA :
2489 enc = kCFStringEncodingMacKannada ;
2490 break ;
2491 case wxFONTENCODING_MACMALAJALAM :
2492 enc = kCFStringEncodingMacMalayalam ;
2493 break ;
2494 case wxFONTENCODING_MACSINHALESE :
2495 enc = kCFStringEncodingMacSinhalese ;
2496 break ;
2497 case wxFONTENCODING_MACBURMESE :
2498 enc = kCFStringEncodingMacBurmese ;
2499 break ;
2500 case wxFONTENCODING_MACKHMER :
2501 enc = kCFStringEncodingMacKhmer ;
2502 break ;
2503 case wxFONTENCODING_MACTHAI :
2504 enc = kCFStringEncodingMacThai ;
2505 break ;
2506 case wxFONTENCODING_MACLAOTIAN :
2507 enc = kCFStringEncodingMacLaotian ;
2508 break ;
2509 case wxFONTENCODING_MACGEORGIAN :
2510 enc = kCFStringEncodingMacGeorgian ;
2511 break ;
2512 case wxFONTENCODING_MACARMENIAN :
2513 enc = kCFStringEncodingMacArmenian ;
2514 break ;
2515 case wxFONTENCODING_MACCHINESESIMP :
2516 enc = kCFStringEncodingMacChineseSimp ;
2517 break ;
2518 case wxFONTENCODING_MACTIBETAN :
2519 enc = kCFStringEncodingMacTibetan ;
2520 break ;
2521 case wxFONTENCODING_MACMONGOLIAN :
2522 enc = kCFStringEncodingMacMongolian ;
2523 break ;
2524 case wxFONTENCODING_MACETHIOPIC :
2525 enc = kCFStringEncodingMacEthiopic ;
2526 break ;
2527 case wxFONTENCODING_MACCENTRALEUR :
2528 enc = kCFStringEncodingMacCentralEurRoman ;
2529 break ;
2530 case wxFONTENCODING_MACVIATNAMESE :
2531 enc = kCFStringEncodingMacVietnamese ;
2532 break ;
2533 case wxFONTENCODING_MACARABICEXT :
2534 enc = kCFStringEncodingMacExtArabic ;
2535 break ;
2536 case wxFONTENCODING_MACSYMBOL :
2537 enc = kCFStringEncodingMacSymbol ;
2538 break ;
2539 case wxFONTENCODING_MACDINGBATS :
2540 enc = kCFStringEncodingMacDingbats ;
2541 break ;
2542 case wxFONTENCODING_MACTURKISH :
2543 enc = kCFStringEncodingMacTurkish ;
2544 break ;
2545 case wxFONTENCODING_MACCROATIAN :
2546 enc = kCFStringEncodingMacCroatian ;
2547 break ;
2548 case wxFONTENCODING_MACICELANDIC :
2549 enc = kCFStringEncodingMacIcelandic ;
2550 break ;
2551 case wxFONTENCODING_MACROMANIAN :
2552 enc = kCFStringEncodingMacRomanian ;
2553 break ;
2554 case wxFONTENCODING_MACCELTIC :
2555 enc = kCFStringEncodingMacCeltic ;
2556 break ;
2557 case wxFONTENCODING_MACGAELIC :
2558 enc = kCFStringEncodingMacGaelic ;
2559 break ;
ecd9653b
WS
2560// case wxFONTENCODING_MACKEYBOARD :
2561// enc = kCFStringEncodingMacKeyboardGlyphs ;
2562// break ;
ef199164 2563
ecd9653b
WS
2564 default :
2565 // because gcc is picky
2566 break ;
ef199164
DS
2567 }
2568
ecd9653b 2569 return enc ;
f7e98dee
RN
2570}
2571
f7e98dee
RN
2572class wxMBConv_cocoa : public wxMBConv
2573{
2574public:
2575 wxMBConv_cocoa()
2576 {
2577 Init(CFStringGetSystemEncoding()) ;
2578 }
2579
d36c9347
VZ
2580 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2581 {
2582 m_encoding = conv.m_encoding;
2583 }
2584
a6900d10 2585#if wxUSE_FONTMAP
f7e98dee
RN
2586 wxMBConv_cocoa(const wxChar* name)
2587 {
267e11c5 2588 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2589 }
a6900d10 2590#endif
f7e98dee
RN
2591
2592 wxMBConv_cocoa(wxFontEncoding encoding)
2593 {
2594 Init( wxCFStringEncFromFontEnc(encoding) );
2595 }
2596
d3c7fc99 2597 virtual ~wxMBConv_cocoa()
f7e98dee
RN
2598 {
2599 }
2600
2601 void Init( CFStringEncoding encoding)
2602 {
638357a0 2603 m_encoding = encoding ;
f7e98dee
RN
2604 }
2605
2606 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2607 {
2608 wxASSERT(szUnConv);
ecd9653b 2609
638357a0
RN
2610 CFStringRef theString = CFStringCreateWithBytes (
2611 NULL, //the allocator
2612 (const UInt8*)szUnConv,
2613 strlen(szUnConv),
2614 m_encoding,
2615 false //no BOM/external representation
f7e98dee
RN
2616 );
2617
2618 wxASSERT(theString);
2619
638357a0
RN
2620 size_t nOutLength = CFStringGetLength(theString);
2621
2622 if (szOut == NULL)
f7e98dee 2623 {
f7e98dee 2624 CFRelease(theString);
638357a0 2625 return nOutLength;
f7e98dee 2626 }
ecd9653b 2627
638357a0 2628 CFRange theRange = { 0, nOutSize };
ecd9653b 2629
638357a0
RN
2630#if SIZEOF_WCHAR_T == 4
2631 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2632#endif
3698ae71 2633
f7e98dee 2634 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2635
f7e98dee 2636 CFRelease(theString);
ecd9653b 2637
ef199164 2638 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2639
2640#if SIZEOF_WCHAR_T == 4
ef199164
DS
2641 wxMBConvUTF16 converter;
2642 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2643 delete [] szUniCharBuffer;
f7e98dee 2644#endif
3698ae71 2645
638357a0 2646 return nOutLength;
f7e98dee
RN
2647 }
2648
2649 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2650 {
638357a0 2651 wxASSERT(szUnConv);
3698ae71 2652
f7e98dee 2653 size_t nRealOutSize;
638357a0 2654 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2655 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2656
f7e98dee 2657#if SIZEOF_WCHAR_T == 4
d9d488cf 2658 wxMBConvUTF16 converter ;
ef199164
DS
2659 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2660 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2661 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2662 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2663#endif
2664
2665 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2666 NULL, //allocator
2667 szUniBuffer,
2668 nBufSize,
638357a0 2669 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2670 );
ecd9653b 2671
f7e98dee 2672 wxASSERT(theString);
ecd9653b 2673
f7e98dee 2674 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2675 //so we check and use getchars instead in that case
2676 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2677 {
638357a0
RN
2678 if (szOut != NULL)
2679 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2680
638357a0
RN
2681 nRealOutSize = CFStringGetLength(theString) + 1;
2682 }
2683 else
2684 {
2685 CFStringGetBytes(
2686 theString,
2687 CFRangeMake(0, CFStringGetLength(theString)),
2688 m_encoding,
2689 0, //what to put in characters that can't be converted -
2690 //0 tells CFString to return NULL if it meets such a character
2691 false, //not an external representation
2692 (UInt8*) szOut,
3698ae71 2693 nOutSize,
638357a0
RN
2694 (CFIndex*) &nRealOutSize
2695 );
f7e98dee 2696 }
ecd9653b 2697
638357a0 2698 CFRelease(theString);
ecd9653b 2699
638357a0
RN
2700#if SIZEOF_WCHAR_T == 4
2701 delete[] szUniBuffer;
2702#endif
ecd9653b 2703
f7e98dee
RN
2704 return nRealOutSize - 1;
2705 }
2706
d36c9347
VZ
2707 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2708
f7e98dee 2709 bool IsOk() const
ecd9653b 2710 {
3698ae71 2711 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2712 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2713 }
2714
2715private:
638357a0 2716 CFStringEncoding m_encoding ;
f7e98dee
RN
2717};
2718
2719#endif // defined(__WXCOCOA__)
2720
335d31e0
SC
2721// ============================================================================
2722// Mac conversion classes
2723// ============================================================================
2724
2725#if defined(__WXMAC__) && defined(TARGET_CARBON)
2726
2727class wxMBConv_mac : public wxMBConv
2728{
2729public:
2730 wxMBConv_mac()
2731 {
2732 Init(CFStringGetSystemEncoding()) ;
2733 }
2734
d36c9347
VZ
2735 wxMBConv_mac(const wxMBConv_mac& conv)
2736 {
2737 Init(conv.m_char_encoding);
2738 }
2739
2d1659cf 2740#if wxUSE_FONTMAP
faa60a4f 2741 wxMBConv_mac(const char* name)
335d31e0 2742 {
ef199164 2743 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2744 }
2d1659cf 2745#endif
335d31e0
SC
2746
2747 wxMBConv_mac(wxFontEncoding encoding)
2748 {
d775fa82
WS
2749 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2750 }
2751
d3c7fc99 2752 virtual ~wxMBConv_mac()
d775fa82
WS
2753 {
2754 OSStatus status = noErr ;
739cb14a
SC
2755 if (m_MB2WC_converter)
2756 status = TECDisposeConverter(m_MB2WC_converter);
2757 if (m_WC2MB_converter)
2758 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2759 }
2760
739cb14a
SC
2761 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2762 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2763 {
739cb14a
SC
2764 m_MB2WC_converter = NULL ;
2765 m_WC2MB_converter = NULL ;
2766 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2767 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2768 }
d775fa82 2769
739cb14a
SC
2770 virtual void CreateIfNeeded() const
2771 {
2772 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2773 {
2774 OSStatus status = noErr ;
2775 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2776 m_char_encoding,
2777 m_unicode_encoding);
739cb14a
SC
2778 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2779 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2780 m_unicode_encoding,
2781 m_char_encoding);
739cb14a
SC
2782 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2783 }
d775fa82 2784 }
57bd4c60 2785
335d31e0
SC
2786 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2787 {
739cb14a 2788 CreateIfNeeded() ;
d775fa82
WS
2789 OSStatus status = noErr ;
2790 ByteCount byteOutLen ;
9088c87b 2791 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2792 wchar_t *tbuf = NULL ;
2793 UniChar* ubuf = NULL ;
2794 size_t res = 0 ;
2795
2796 if (buf == NULL)
2797 {
ef199164
DS
2798 // Apple specs say at least 32
2799 n = wxMax( 32, byteInLen ) ;
2800 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2801 }
ef199164 2802
d775fa82 2803 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2804
f3a355ce 2805#if SIZEOF_WCHAR_T == 4
d775fa82 2806 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2807#else
d775fa82 2808 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2809#endif
ef199164
DS
2810
2811 status = TECConvertText(
2812 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2813 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2814
f3a355ce 2815#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2816 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2817 // is not properly terminated we get random characters at the end
2818 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2819 wxMBConvUTF16 converter ;
ef199164 2820 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2821 free( ubuf ) ;
f3a355ce 2822#else
d775fa82 2823 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2824#endif
ef199164 2825
d775fa82
WS
2826 if ( buf == NULL )
2827 free(tbuf) ;
335d31e0 2828
335d31e0
SC
2829 if ( buf && res < n)
2830 buf[res] = 0;
2831
d775fa82 2832 return res ;
335d31e0
SC
2833 }
2834
2835 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2836 {
739cb14a 2837 CreateIfNeeded() ;
d775fa82
WS
2838 OSStatus status = noErr ;
2839 ByteCount byteOutLen ;
2840 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2841
2842 char *tbuf = NULL ;
2843
2844 if (buf == NULL)
2845 {
ef199164
DS
2846 // Apple specs say at least 32
2847 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2848 tbuf = (char*) malloc( n ) ;
2849 }
2850
2851 ByteCount byteBufferLen = n ;
2852 UniChar* ubuf = NULL ;
ef199164 2853
f3a355ce 2854#if SIZEOF_WCHAR_T == 4
d9d488cf 2855 wxMBConvUTF16 converter ;
ef199164 2856 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2857 byteInLen = unicharlen ;
2858 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2859 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2860#else
d775fa82 2861 ubuf = (UniChar*) psz ;
f3a355ce 2862#endif
ef199164
DS
2863
2864 status = TECConvertText(
2865 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2866 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2867
f3a355ce 2868#if SIZEOF_WCHAR_T == 4
d775fa82 2869 free( ubuf ) ;
f3a355ce 2870#endif
ef199164 2871
d775fa82
WS
2872 if ( buf == NULL )
2873 free(tbuf) ;
335d31e0 2874
d775fa82 2875 size_t res = byteOutLen ;
335d31e0 2876 if ( buf && res < n)
638357a0 2877 {
335d31e0 2878 buf[res] = 0;
3698ae71 2879
638357a0
RN
2880 //we need to double-trip to verify it didn't insert any ? in place
2881 //of bogus characters
2882 wxWCharBuffer wcBuf(n);
2883 size_t pszlen = wxWcslen(psz);
467e0479 2884 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2885 wxWcslen(wcBuf) != pszlen ||
2886 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2887 {
2888 // we didn't obtain the same thing we started from, hence
2889 // the conversion was lossy and we consider that it failed
467e0479 2890 return wxCONV_FAILED;
638357a0
RN
2891 }
2892 }
335d31e0 2893
d775fa82 2894 return res ;
335d31e0
SC
2895 }
2896
d3478e2c 2897 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2898
335d31e0 2899 bool IsOk() const
57bd4c60 2900 {
739cb14a 2901 CreateIfNeeded() ;
57bd4c60 2902 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
739cb14a 2903 }
335d31e0 2904
739cb14a
SC
2905protected :
2906 mutable TECObjectRef m_MB2WC_converter;
2907 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2908
ef199164
DS
2909 TextEncodingBase m_char_encoding;
2910 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2911};
2912
739cb14a
SC
2913// MB is decomposed (D) normalized UTF8
2914
2915class wxMBConv_macUTF8D : public wxMBConv_mac
2916{
2917public :
57bd4c60 2918 wxMBConv_macUTF8D()
739cb14a
SC
2919 {
2920 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2921 m_uni = NULL;
fbb0b8af 2922 m_uniBack = NULL ;
739cb14a 2923 }
57bd4c60 2924
d3c7fc99 2925 virtual ~wxMBConv_macUTF8D()
739cb14a 2926 {
fbb0b8af
SC
2927 if (m_uni!=NULL)
2928 DisposeUnicodeToTextInfo(&m_uni);
2929 if (m_uniBack!=NULL)
2930 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a 2931 }
57bd4c60 2932
739cb14a
SC
2933 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2934 {
2935 CreateIfNeeded() ;
2936 OSStatus status = noErr ;
2937 ByteCount byteOutLen ;
2938 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2939
2940 char *tbuf = NULL ;
2941
2942 if (buf == NULL)
2943 {
2944 // Apple specs say at least 32
2945 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2946 tbuf = (char*) malloc( n ) ;
2947 }
2948
2949 ByteCount byteBufferLen = n ;
2950 UniChar* ubuf = NULL ;
2951
2952#if SIZEOF_WCHAR_T == 4
2953 wxMBConvUTF16 converter ;
2954 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2955 byteInLen = unicharlen ;
2956 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2957 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2958#else
2959 ubuf = (UniChar*) psz ;
2960#endif
2961
57bd4c60
WS
2962 // ubuf is a non-decomposed UniChar buffer
2963
739cb14a
SC
2964 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2965 ByteCount dcubufread , dcubufwritten ;
57bd4c60
WS
2966 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2967
2968 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
739cb14a 2969 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
57bd4c60 2970
739cb14a
SC
2971 // we now convert that decomposed buffer into UTF8
2972
2973 status = TECConvertText(
2974 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2975 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2976
2977 free( dcubuf );
2978
2979#if SIZEOF_WCHAR_T == 4
2980 free( ubuf ) ;
2981#endif
2982
2983 if ( buf == NULL )
2984 free(tbuf) ;
2985
2986 size_t res = byteOutLen ;
2987 if ( buf && res < n)
2988 {
2989 buf[res] = 0;
2990 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2991 }
2992
2993 return res ;
2994 }
57bd4c60 2995
fbb0b8af
SC
2996 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2997 {
2998 CreateIfNeeded() ;
2999 OSStatus status = noErr ;
3000 ByteCount byteOutLen ;
3001 ByteCount byteInLen = strlen(psz) + 1;
3002 wchar_t *tbuf = NULL ;
3003 UniChar* ubuf = NULL ;
3004 size_t res = 0 ;
57bd4c60 3005
fbb0b8af
SC
3006 if (buf == NULL)
3007 {
3008 // Apple specs say at least 32
3009 n = wxMax( 32, byteInLen ) ;
3010 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3011 }
57bd4c60 3012
fbb0b8af 3013 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
57bd4c60 3014
fbb0b8af
SC
3015#if SIZEOF_WCHAR_T == 4
3016 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3017#else
3018 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3019#endif
57bd4c60 3020
fbb0b8af
SC
3021 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3022 ByteCount dcubufread , dcubufwritten ;
57bd4c60 3023 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
fbb0b8af
SC
3024
3025 status = TECConvertText(
3026 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3027 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3028 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3029 // is not properly terminated we get random characters at the end
3030 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60 3031
fbb0b8af 3032 // now from the decomposed UniChar to properly composed uniChar
57bd4c60 3033 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
fbb0b8af
SC
3034 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3035
3036 free( dcubuf );
3037 byteOutLen = dcubufwritten ;
3038 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60
WS
3039
3040
fbb0b8af
SC
3041#if SIZEOF_WCHAR_T == 4
3042 wxMBConvUTF16 converter ;
3043 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3044 free( ubuf ) ;
3045#else
3046 res = byteOutLen / sizeof( UniChar ) ;
3047#endif
57bd4c60 3048
fbb0b8af
SC
3049 if ( buf == NULL )
3050 free(tbuf) ;
57bd4c60 3051
fbb0b8af
SC
3052 if ( buf && res < n)
3053 buf[res] = 0;
57bd4c60 3054
fbb0b8af
SC
3055 return res ;
3056 }
3057
739cb14a
SC
3058 virtual void CreateIfNeeded() const
3059 {
3060 wxMBConv_mac::CreateIfNeeded() ;
3061 if ( m_uni == NULL )
3062 {
3063 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3064 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3065 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3066 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3067 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60
WS
3068
3069 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
739cb14a 3070 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
57bd4c60 3071
fbb0b8af
SC
3072 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3073 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3074 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3075 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3076 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60 3077 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
fbb0b8af 3078 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3079 }
3080 }
3081protected :
3082 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3083 mutable UnicodeToTextInfo m_uniBack;
739cb14a 3084 mutable UnicodeMapping m_map;
57bd4c60 3085};
335d31e0 3086#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3087
36acb880
VZ
3088// ============================================================================
3089// wxEncodingConverter based conversion classes
3090// ============================================================================
3091
1e6feb95 3092#if wxUSE_FONTMAP
1cd52418 3093
e95354ec 3094class wxMBConv_wxwin : public wxMBConv
1cd52418 3095{
8b04d4c4
VZ
3096private:
3097 void Init()
3098 {
3099 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3100 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3101 }
3102
6001e347 3103public:
f1339c56
RR
3104 // temporarily just use wxEncodingConverter stuff,
3105 // so that it works while a better implementation is built
86501081 3106 wxMBConv_wxwin(const char* name)
f1339c56
RR
3107 {
3108 if (name)
267e11c5 3109 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3110 else
3111 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3112
8b04d4c4
VZ
3113 Init();
3114 }
3115
e95354ec 3116 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3117 {
3118 m_enc = enc;
3119
3120 Init();
f1339c56 3121 }
dccce9ea 3122
bde4baac 3123 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3124 {
3125 size_t inbuf = strlen(psz);
dccce9ea 3126 if (buf)
c643a977 3127 {
ef199164 3128 if (!m2w.Convert(psz, buf))
467e0479 3129 return wxCONV_FAILED;
c643a977 3130 }
f1339c56
RR
3131 return inbuf;
3132 }
dccce9ea 3133
bde4baac 3134 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3135 {
f8d791e0 3136 const size_t inbuf = wxWcslen(psz);
f1339c56 3137 if (buf)
c643a977 3138 {
ef199164 3139 if (!w2m.Convert(psz, buf))
467e0479 3140 return wxCONV_FAILED;
c643a977 3141 }
dccce9ea 3142
f1339c56
RR
3143 return inbuf;
3144 }
dccce9ea 3145
7ef3ab50 3146 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3147 {
3148 switch ( m_enc )
3149 {
3150 case wxFONTENCODING_UTF16BE:
3151 case wxFONTENCODING_UTF16LE:
c1464d9d 3152 return 2;
eec47cc6
VZ
3153
3154 case wxFONTENCODING_UTF32BE:
3155 case wxFONTENCODING_UTF32LE:
c1464d9d 3156 return 4;
eec47cc6
VZ
3157
3158 default:
c1464d9d 3159 return 1;
eec47cc6
VZ
3160 }
3161 }
3162
d36c9347
VZ
3163 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3164
7ef3ab50
VZ
3165 bool IsOk() const { return m_ok; }
3166
3167public:
3168 wxFontEncoding m_enc;
3169 wxEncodingConverter m2w, w2m;
3170
3171private:
cafbf6fb
VZ
3172 // were we initialized successfully?
3173 bool m_ok;
fc7a2a60 3174
e95354ec 3175 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3176};
6001e347 3177
8f115891 3178// make the constructors available for unit testing
86501081 3179WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
8f115891
MW
3180{
3181 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3182 if ( !result->IsOk() )
3183 {
3184 delete result;
3185 return 0;
3186 }
ef199164 3187
8f115891
MW
3188 return result;
3189}
3190
1e6feb95
VZ
3191#endif // wxUSE_FONTMAP
3192
36acb880
VZ
3193// ============================================================================
3194// wxCSConv implementation
3195// ============================================================================
3196
8b04d4c4 3197void wxCSConv::Init()
6001e347 3198{
e95354ec
VZ
3199 m_name = NULL;
3200 m_convReal = NULL;
3201 m_deferred = true;
3202}
3203
86501081 3204wxCSConv::wxCSConv(const wxString& charset)
8b04d4c4
VZ
3205{
3206 Init();
82713003 3207
86501081 3208 if ( !charset.empty() )
e95354ec 3209 {
86501081 3210 SetName(charset.ToAscii());
e95354ec 3211 }
bda3d86a 3212
e4277538
VZ
3213#if wxUSE_FONTMAP
3214 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3215#else
bda3d86a 3216 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3217#endif
6001e347
RR
3218}
3219
8b04d4c4
VZ
3220wxCSConv::wxCSConv(wxFontEncoding encoding)
3221{
bda3d86a 3222 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3223 {
3224 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3225
3226 encoding = wxFONTENCODING_SYSTEM;
3227 }
3228
8b04d4c4
VZ
3229 Init();
3230
bda3d86a 3231 m_encoding = encoding;
8b04d4c4
VZ
3232}
3233
6001e347
RR
3234wxCSConv::~wxCSConv()
3235{
65e50848
JS
3236 Clear();
3237}
3238
54380f29 3239wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3240 : wxMBConv()
54380f29 3241{
8b04d4c4
VZ
3242 Init();
3243
54380f29 3244 SetName(conv.m_name);
8b04d4c4 3245 m_encoding = conv.m_encoding;
54380f29
GD
3246}
3247
3248wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3249{
3250 Clear();
8b04d4c4 3251
54380f29 3252 SetName(conv.m_name);
8b04d4c4
VZ
3253 m_encoding = conv.m_encoding;
3254
54380f29
GD
3255 return *this;
3256}
3257
65e50848
JS
3258void wxCSConv::Clear()
3259{
8b04d4c4 3260 free(m_name);
e95354ec 3261 delete m_convReal;
8b04d4c4 3262
65e50848 3263 m_name = NULL;
e95354ec 3264 m_convReal = NULL;
6001e347
RR
3265}
3266
86501081 3267void wxCSConv::SetName(const char *charset)
6001e347 3268{
f1339c56
RR
3269 if (charset)
3270 {
86501081 3271 m_name = strdup(charset);
e95354ec 3272 m_deferred = true;
f1339c56 3273 }
6001e347
RR
3274}
3275
8b3eb85d 3276#if wxUSE_FONTMAP
8b3eb85d
VZ
3277
3278WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3279 wxEncodingNameCache );
8b3eb85d
VZ
3280
3281static wxEncodingNameCache gs_nameCache;
3282#endif
3283
e95354ec
VZ
3284wxMBConv *wxCSConv::DoCreate() const
3285{
ce6f8d6f
VZ
3286#if wxUSE_FONTMAP
3287 wxLogTrace(TRACE_STRCONV,
3288 wxT("creating conversion for %s"),
3289 (m_name ? m_name
86501081 3290 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
ce6f8d6f
VZ
3291#endif // wxUSE_FONTMAP
3292
c547282d
VZ
3293 // check for the special case of ASCII or ISO8859-1 charset: as we have
3294 // special knowledge of it anyhow, we don't need to create a special
3295 // conversion object
e4277538
VZ
3296 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3297 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3298 {
e95354ec
VZ
3299 // don't convert at all
3300 return NULL;
3301 }
dccce9ea 3302
e95354ec
VZ
3303 // we trust OS to do conversion better than we can so try external
3304 // conversion methods first
3305 //
3306 // the full order is:
3307 // 1. OS conversion (iconv() under Unix or Win32 API)
3308 // 2. hard coded conversions for UTF
3309 // 3. wxEncodingConverter as fall back
3310
3311 // step (1)
3312#ifdef HAVE_ICONV
c547282d 3313#if !wxUSE_FONTMAP
e95354ec 3314 if ( m_name )
c547282d 3315#endif // !wxUSE_FONTMAP
e95354ec 3316 {
3ef10cfc 3317#if wxUSE_FONTMAP
8b3eb85d 3318 wxFontEncoding encoding(m_encoding);
3ef10cfc 3319#endif
8b3eb85d 3320
86501081 3321 if ( m_name )
8b3eb85d 3322 {
86501081 3323 wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
8b3eb85d
VZ
3324 if ( conv->IsOk() )
3325 return conv;
3326
3327 delete conv;
c547282d
VZ
3328
3329#if wxUSE_FONTMAP
8b3eb85d 3330 encoding =
86501081 3331 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3332#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3333 }
3334#if wxUSE_FONTMAP
3335 {
3336 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3337 if ( it != gs_nameCache.end() )
3338 {
3339 if ( it->second.empty() )
3340 return NULL;
c547282d 3341
86501081 3342 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
8b3eb85d
VZ
3343 if ( conv->IsOk() )
3344 return conv;
e95354ec 3345
8b3eb85d
VZ
3346 delete conv;
3347 }
3348
3349 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
86501081
VS
3350 // CS : in case this does not return valid names (eg for MacRoman)
3351 // encoding got a 'failure' entry in the cache all the same,
3352 // although it just has to be created using a different method, so
3353 // only store failed iconv creation attempts (or perhaps we
3354 // shoulnd't do this at all ?)
3c67ec06 3355 if ( names[0] != NULL )
8b3eb85d 3356 {
3c67ec06 3357 for ( ; *names; ++names )
8b3eb85d 3358 {
86501081
VS
3359 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3360 // will need changes that will obsolete this
3361 wxString name(*names);
3362 wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3c67ec06
SC
3363 if ( conv->IsOk() )
3364 {
3365 gs_nameCache[encoding] = *names;
3366 return conv;
3367 }
3368
3369 delete conv;
8b3eb85d
VZ
3370 }
3371
3c67ec06 3372 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 3373 }
8b3eb85d
VZ
3374 }
3375#endif // wxUSE_FONTMAP
e95354ec
VZ
3376 }
3377#endif // HAVE_ICONV
3378
3379#ifdef wxHAVE_WIN32_MB2WC
3380 {
7608a683 3381#if wxUSE_FONTMAP
e95354ec
VZ
3382 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3383 : new wxMBConv_win32(m_encoding);
3384 if ( conv->IsOk() )
3385 return conv;
3386
3387 delete conv;
7608a683
WS
3388#else
3389 return NULL;
3390#endif
e95354ec
VZ
3391 }
3392#endif // wxHAVE_WIN32_MB2WC
ef199164 3393
d775fa82
WS
3394#if defined(__WXMAC__)
3395 {
5c3c8676 3396 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3397 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3398 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3399 {
2d1659cf 3400#if wxUSE_FONTMAP
d775fa82
WS
3401 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3402 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3403#else
3404 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3405#endif
d775fa82 3406 if ( conv->IsOk() )
f7e98dee
RN
3407 return conv;
3408
3409 delete conv;
3410 }
3411 }
3412#endif
ef199164 3413
f7e98dee
RN
3414#if defined(__WXCOCOA__)
3415 {
3416 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3417 {
a6900d10 3418#if wxUSE_FONTMAP
f7e98dee
RN
3419 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3420 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3421#else
3422 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3423#endif
ef199164 3424
f7e98dee 3425 if ( conv->IsOk() )
d775fa82
WS
3426 return conv;
3427
3428 delete conv;
3429 }
335d31e0
SC
3430 }
3431#endif
e95354ec
VZ
3432 // step (2)
3433 wxFontEncoding enc = m_encoding;
3434#if wxUSE_FONTMAP
c547282d
VZ
3435 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3436 {
3437 // use "false" to suppress interactive dialogs -- we can be called from
3438 // anywhere and popping up a dialog from here is the last thing we want to
3439 // do
267e11c5 3440 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3441 }
e95354ec
VZ
3442#endif // wxUSE_FONTMAP
3443
3444 switch ( enc )
3445 {
3446 case wxFONTENCODING_UTF7:
3447 return new wxMBConvUTF7;
3448
3449 case wxFONTENCODING_UTF8:
3450 return new wxMBConvUTF8;
3451
e95354ec
VZ
3452 case wxFONTENCODING_UTF16BE:
3453 return new wxMBConvUTF16BE;
3454
3455 case wxFONTENCODING_UTF16LE:
3456 return new wxMBConvUTF16LE;
3457
e95354ec
VZ
3458 case wxFONTENCODING_UTF32BE:
3459 return new wxMBConvUTF32BE;
3460
3461 case wxFONTENCODING_UTF32LE:
3462 return new wxMBConvUTF32LE;
3463
3464 default:
3465 // nothing to do but put here to suppress gcc warnings
ef199164 3466 break;
e95354ec
VZ
3467 }
3468
3469 // step (3)
3470#if wxUSE_FONTMAP
3471 {
3472 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3473 : new wxMBConv_wxwin(m_encoding);
3474 if ( conv->IsOk() )
3475 return conv;
3476
3477 delete conv;
3478 }
3479#endif // wxUSE_FONTMAP
3480
a58d4f4d
VS
3481 // NB: This is a hack to prevent deadlock. What could otherwise happen
3482 // in Unicode build: wxConvLocal creation ends up being here
3483 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3484 // attach a timestamp, for which it will need wxConvLocal (to convert
3485 // time to char* and then wchar_t*), but that fails, tries to log the
3486 // error, but wxLog has an (already locked) critical section that
3487 // guards the static buffer.
a58d4f4d
VS
3488 static bool alreadyLoggingError = false;
3489 if (!alreadyLoggingError)
3490 {
3491 alreadyLoggingError = true;
3492 wxLogError(_("Cannot convert from the charset '%s'!"),
3493 m_name ? m_name
e95354ec
VZ
3494 :
3495#if wxUSE_FONTMAP
86501081 3496 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
e95354ec 3497#else // !wxUSE_FONTMAP
86501081 3498 (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
e95354ec
VZ
3499#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3500 );
ef199164 3501
a58d4f4d
VS
3502 alreadyLoggingError = false;
3503 }
e95354ec
VZ
3504
3505 return NULL;
3506}
3507
3508void wxCSConv::CreateConvIfNeeded() const
3509{
3510 if ( m_deferred )
3511 {
3512 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a 3513
bda3d86a
VZ
3514 // if we don't have neither the name nor the encoding, use the default
3515 // encoding for this system
3516 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3517 {
4c75209f 3518#if wxUSE_INTL
02c7347b 3519 self->m_encoding = wxLocale::GetSystemEncoding();
4c75209f
VS
3520#else
3521 // fallback to some reasonable default:
3522 self->m_encoding = wxFONTENCODING_ISO8859_1;
bda3d86a 3523#endif // wxUSE_INTL
4c75209f 3524 }
bda3d86a 3525
e95354ec
VZ
3526 self->m_convReal = DoCreate();
3527 self->m_deferred = false;
6001e347 3528 }
6001e347
RR
3529}
3530
0f0298b1
VZ
3531bool wxCSConv::IsOk() const
3532{
3533 CreateConvIfNeeded();
3534
3535 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3536 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3537 return true; // always ok as we do it ourselves
3538
3539 // m_convReal->IsOk() is called at its own creation, so we know it must
3540 // be ok if m_convReal is non-NULL
3541 return m_convReal != NULL;
3542}
3543
1c714a5d
VZ
3544size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3545 const char *src, size_t srcLen) const
3546{
3547 CreateConvIfNeeded();
3548
2c74c558
VS
3549 if (m_convReal)
3550 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3551
3552 // latin-1 (direct)
3553 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3554}
3555
3556size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3557 const wchar_t *src, size_t srcLen) const
3558{
3559 CreateConvIfNeeded();
3560
2c74c558
VS
3561 if (m_convReal)
3562 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3563
3564 // latin-1 (direct)
3565 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3566}
3567
6001e347
RR
3568size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3569{
e95354ec 3570 CreateConvIfNeeded();
dccce9ea 3571
e95354ec
VZ
3572 if (m_convReal)
3573 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3574
3575 // latin-1 (direct)
4def3b35 3576 size_t len = strlen(psz);
dccce9ea 3577
f1339c56
RR
3578 if (buf)
3579 {
4def3b35 3580 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3581 buf[c] = (unsigned char)(psz[c]);
3582 }
dccce9ea 3583
f1339c56 3584 return len;
6001e347
RR
3585}
3586
3587size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3588{
e95354ec 3589 CreateConvIfNeeded();
dccce9ea 3590
e95354ec
VZ
3591 if (m_convReal)
3592 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3593
f1339c56 3594 // latin-1 (direct)
f8d791e0 3595 const size_t len = wxWcslen(psz);
f1339c56
RR
3596 if (buf)
3597 {
4def3b35 3598 for (size_t c = 0; c <= len; c++)
24642831
VS
3599 {
3600 if (psz[c] > 0xFF)
467e0479 3601 return wxCONV_FAILED;
ef199164 3602
907173e5 3603 buf[c] = (char)psz[c];
24642831
VS
3604 }
3605 }
3606 else
3607 {
3608 for (size_t c = 0; c <= len; c++)
3609 {
3610 if (psz[c] > 0xFF)
467e0479 3611 return wxCONV_FAILED;
24642831 3612 }
f1339c56 3613 }
dccce9ea 3614
f1339c56 3615 return len;
6001e347
RR
3616}
3617
7ef3ab50 3618size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3619{
3620 CreateConvIfNeeded();
3621
3622 if ( m_convReal )
3623 {
7ef3ab50 3624 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3625 }
3626
ba98e032 3627 // otherwise, we are ISO-8859-1
c1464d9d 3628 return 1;
eec47cc6
VZ
3629}
3630
ba98e032
VS
3631#if wxUSE_UNICODE_UTF8
3632bool wxCSConv::IsUTF8() const
3633{
3634 CreateConvIfNeeded();
3635
3636 if ( m_convReal )
3637 {
3638 return m_convReal->IsUTF8();
3639 }
3640
3641 // otherwise, we are ISO-8859-1
3642 return false;
3643}
3644#endif
3645
69c928ef
VZ
3646
3647#if wxUSE_UNICODE
3648
3649wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3650{
3651 if ( !s )
3652 return wxWCharBuffer();
3653
3654 wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3655 if ( !wbuf )
5487ff0f 3656 wbuf = wxMBConvUTF8().cMB2WX(s);
69c928ef
VZ
3657 if ( !wbuf )
3658 wbuf = wxConvISO8859_1.cMB2WX(s);
3659
3660 return wbuf;
3661}
3662
3663wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3664{
3665 if ( !ws )
3666 return wxCharBuffer();
3667
3668 wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3669 if ( !buf )
3670 buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3671
3672 return buf;
3673}
3674
3675#endif // wxUSE_UNICODE
f5a1953b 3676
1e50d914
VS
3677// ----------------------------------------------------------------------------
3678// globals
3679// ----------------------------------------------------------------------------
3680
3681// NB: The reason why we create converted objects in this convoluted way,
3682// using a factory function instead of global variable, is that they
3683// may be used at static initialization time (some of them are used by
3684// wxString ctors and there may be a global wxString object). In other
3685// words, possibly _before_ the converter global object would be
3686// initialized.
3687
3688#undef wxConvLibc
3689#undef wxConvUTF8
3690#undef wxConvUTF7
3691#undef wxConvLocal
3692#undef wxConvISO8859_1
3693
3694#define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
3695 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
092ee46f 3696 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
1e50d914
VS
3697 { \
3698 static impl_klass name##Obj ctor_args; \
3699 return &name##Obj; \
3700 } \
3701 /* this ensures that all global converter objects are created */ \
3702 /* by the time static initialization is done, i.e. before any */ \
3703 /* thread is launched: */ \
3704 static klass* gs_##name##instance = wxGet_##name##Ptr()
3705
3706#define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3707 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3708
3709#ifdef __WINDOWS__
3710 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3711#elif defined(__WXMAC__) && !defined(__MACH__)
3712 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3713#else
3714 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3715#endif
3716
3717WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
3718WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
3719
3720WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3721WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3722
3723WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3724WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3725
3726#if defined(__WXMAC__) && defined(TARGET_CARBON)
3727static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3728#endif
3729WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3730#ifdef __WXOSX__
3731#if defined(__WXMAC__) && defined(TARGET_CARBON)
3732 &wxConvMacUTF8DObj;
3733#else
3734 wxGet_wxConvUTF8Ptr();
3735#endif
3736#else // !__WXOSX__
3737 wxGet_wxConvLibcPtr();
3738#endif // __WXOSX__/!__WXOSX__
3739
bde4baac
VZ
3740#else // !wxUSE_WCHAR_T
3741
1e50d914 3742// FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
bde4baac
VZ
3743// stand-ins in absence of wchar_t
3744WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3745 wxConvISO8859_1,
3746 wxConvLocal,
3747 wxConvUTF8;
3748
3749#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T