]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
added alpha channel support to wxDFB's wxBitmap
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
480f42ec
VS
18#ifdef __BORLANDC__
19 #pragma hdrstop
20#endif //__BORLANDC__
21
373658eb
VZ
22#ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
de6185e2 25 #include "wx/utils.h"
df69528b 26 #include "wx/hashmap.h"
ef199164 27#endif
373658eb 28
bde4baac
VZ
29#include "wx/strconv.h"
30
31#if wxUSE_WCHAR_T
32
1c193821 33#ifndef __WXWINCE__
1cd52418 34#include <errno.h>
1c193821
JS
35#endif
36
6001e347
RR
37#include <ctype.h>
38#include <string.h>
39#include <stdlib.h>
40
e95354ec 41#if defined(__WIN32__) && !defined(__WXMICROWIN__)
a6c2e2c7
VZ
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
e95354ec 44 #define wxHAVE_WIN32_MB2WC
ef199164 45#endif
e95354ec 46
6001e347 47#ifdef __SALFORDC__
373658eb 48 #include <clib.h>
6001e347
RR
49#endif
50
b040e242 51#ifdef HAVE_ICONV
373658eb 52 #include <iconv.h>
b1d547eb 53 #include "wx/thread.h"
1cd52418 54#endif
1cd52418 55
373658eb
VZ
56#include "wx/encconv.h"
57#include "wx/fontmap.h"
58
5c4ed98d
DE
59#ifdef __DARWIN__
60#include <CoreFoundation/CFString.h>
61#include <CoreFoundation/CFStringEncodingExt.h>
62#endif //def __DARWIN__
63
335d31e0 64#ifdef __WXMAC__
40ba2f3b 65#ifndef __DARWIN__
4227afa4
SC
66#include <ATSUnicode.h>
67#include <TextCommon.h>
68#include <TextEncodingConverter.h>
40ba2f3b 69#endif
335d31e0 70
ef199164
DS
71// includes Mac headers
72#include "wx/mac/private.h"
335d31e0 73#endif
ce6f8d6f 74
ef199164 75
ce6f8d6f
VZ
76#define TRACE_STRCONV _T("strconv")
77
467e0479
VZ
78// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
79// be 4 bytes
4948c2b6 80#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
81 #define WC_UTF16
82#endif
83
ef199164 84
373658eb
VZ
85// ============================================================================
86// implementation
87// ============================================================================
88
69373110
VZ
89// helper function of cMB2WC(): check if n bytes at this location are all NUL
90static bool NotAllNULs(const char *p, size_t n)
91{
92 while ( n && *p++ == '\0' )
93 n--;
94
95 return n != 0;
96}
97
373658eb 98// ----------------------------------------------------------------------------
467e0479 99// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 100// ----------------------------------------------------------------------------
6001e347 101
c91830cb 102static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 103{
ef199164 104 if (input <= 0xffff)
4def3b35 105 {
999836aa
VZ
106 if (output)
107 *output = (wxUint16) input;
ef199164 108
4def3b35 109 return 1;
dccce9ea 110 }
ef199164 111 else if (input >= 0x110000)
4def3b35 112 {
467e0479 113 return wxCONV_FAILED;
dccce9ea
VZ
114 }
115 else
4def3b35 116 {
dccce9ea 117 if (output)
4def3b35 118 {
ef199164
DS
119 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
120 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 121 }
ef199164 122
4def3b35 123 return 2;
1cd52418 124 }
1cd52418
OK
125}
126
c91830cb 127static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 128{
ef199164 129 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
130 {
131 output = *input;
132 return 1;
dccce9ea 133 }
ef199164 134 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
135 {
136 output = *input;
467e0479 137 return wxCONV_FAILED;
dccce9ea
VZ
138 }
139 else
4def3b35
VS
140 {
141 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
142 return 2;
143 }
1cd52418
OK
144}
145
467e0479 146#ifdef WC_UTF16
35d11700
VZ
147 typedef wchar_t wxDecodeSurrogate_t;
148#else // !WC_UTF16
149 typedef wxUint16 wxDecodeSurrogate_t;
150#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
151
152// returns the next UTF-32 character from the wchar_t buffer and advances the
153// pointer to the character after this one
154//
155// if an invalid character is found, *pSrc is set to NULL, the caller must
156// check for this
35d11700 157static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
158{
159 wxUint32 out;
8d3dd069
VZ
160 const size_t
161 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
162 if ( n == wxCONV_FAILED )
163 *pSrc = NULL;
164 else
165 *pSrc += n;
166
167 return out;
168}
169
f6bcfd97 170// ----------------------------------------------------------------------------
6001e347 171// wxMBConv
f6bcfd97 172// ----------------------------------------------------------------------------
2c53a80a 173
483b0434
VZ
174size_t
175wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
176 const char *src, size_t srcLen) const
6001e347 177{
483b0434
VZ
178 // although new conversion classes are supposed to implement this function
179 // directly, the existins ones only implement the old MB2WC() and so, to
180 // avoid to have to rewrite all conversion classes at once, we provide a
181 // default (but not efficient) implementation of this one in terms of the
182 // old function by copying the input to ensure that it's NUL-terminated and
183 // then using MB2WC() to convert it
6001e347 184
483b0434
VZ
185 // the number of chars [which would be] written to dst [if it were not NULL]
186 size_t dstWritten = 0;
eec47cc6 187
c1464d9d 188 // the number of NULs terminating this string
a78c43f1 189 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 190
c1464d9d
VZ
191 // if we were not given the input size we just have to assume that the
192 // string is properly terminated as we have no way of knowing how long it
193 // is anyhow, but if we do have the size check whether there are enough
194 // NULs at the end
483b0434
VZ
195 wxCharBuffer bufTmp;
196 const char *srcEnd;
467e0479 197 if ( srcLen != wxNO_LEN )
eec47cc6 198 {
c1464d9d 199 // we need to know how to find the end of this string
7ef3ab50 200 nulLen = GetMBNulLen();
483b0434
VZ
201 if ( nulLen == wxCONV_FAILED )
202 return wxCONV_FAILED;
e4e3bbb4 203
c1464d9d 204 // if there are enough NULs we can avoid the copy
483b0434 205 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
206 {
207 // make a copy in order to properly NUL-terminate the string
483b0434 208 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 209 char * const p = bufTmp.data();
483b0434
VZ
210 memcpy(p, src, srcLen);
211 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 212 *s = '\0';
483b0434
VZ
213
214 src = bufTmp;
eec47cc6 215 }
e4e3bbb4 216
483b0434
VZ
217 srcEnd = src + srcLen;
218 }
219 else // quit after the first loop iteration
220 {
221 srcEnd = NULL;
222 }
e4e3bbb4 223
483b0434 224 for ( ;; )
eec47cc6 225 {
c1464d9d 226 // try to convert the current chunk
483b0434 227 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
228 if ( lenChunk == wxCONV_FAILED )
229 return wxCONV_FAILED;
e4e3bbb4 230
467e0479 231 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 232
483b0434 233 dstWritten += lenChunk;
f5fb6871 234
467e0479
VZ
235 if ( lenChunk == 1 )
236 {
237 // nothing left in the input string, conversion succeeded
238 break;
239 }
240
483b0434
VZ
241 if ( dst )
242 {
243 if ( dstWritten > dstLen )
244 return wxCONV_FAILED;
245
830f8f11 246 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
247 return wxCONV_FAILED;
248
249 dst += lenChunk;
250 }
c1464d9d 251
483b0434 252 if ( !srcEnd )
c1464d9d 253 {
467e0479
VZ
254 // we convert just one chunk in this case as this is the entire
255 // string anyhow
c1464d9d
VZ
256 break;
257 }
eec47cc6
VZ
258
259 // advance the input pointer past the end of this chunk
483b0434 260 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
261 {
262 // notice that we must skip over multiple bytes here as we suppose
263 // that if NUL takes 2 or 4 bytes, then all the other characters do
264 // too and so if advanced by a single byte we might erroneously
265 // detect sequences of NUL bytes in the middle of the input
483b0434 266 src += nulLen;
c1464d9d 267 }
e4e3bbb4 268
483b0434 269 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
270
271 // note that ">=" (and not just "==") is needed here as the terminator
272 // we skipped just above could be inside or just after the buffer
273 // delimited by inEnd
483b0434 274 if ( src >= srcEnd )
c1464d9d
VZ
275 break;
276 }
277
483b0434 278 return dstWritten;
e4e3bbb4
RN
279}
280
483b0434
VZ
281size_t
282wxMBConv::FromWChar(char *dst, size_t dstLen,
283 const wchar_t *src, size_t srcLen) const
e4e3bbb4 284{
483b0434
VZ
285 // the number of chars [which would be] written to dst [if it were not NULL]
286 size_t dstWritten = 0;
e4e3bbb4 287
eec47cc6
VZ
288 // make a copy of the input string unless it is already properly
289 // NUL-terminated
290 //
291 // if we don't know its length we have no choice but to assume that it is,
292 // indeed, properly terminated
293 wxWCharBuffer bufTmp;
467e0479 294 if ( srcLen == wxNO_LEN )
e4e3bbb4 295 {
483b0434 296 srcLen = wxWcslen(src) + 1;
eec47cc6 297 }
483b0434 298 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
299 {
300 // make a copy in order to properly NUL-terminate the string
483b0434 301 bufTmp = wxWCharBuffer(srcLen);
ef199164 302 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
303 src = bufTmp;
304 }
305
306 const size_t lenNul = GetMBNulLen();
307 for ( const wchar_t * const srcEnd = src + srcLen;
308 src < srcEnd;
309 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
310 {
311 // try to convert the current chunk
312 size_t lenChunk = WC2MB(NULL, src, 0);
313
314 if ( lenChunk == wxCONV_FAILED )
315 return wxCONV_FAILED;
316
317 lenChunk += lenNul;
318 dstWritten += lenChunk;
319
320 if ( dst )
321 {
322 if ( dstWritten > dstLen )
323 return wxCONV_FAILED;
324
325 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
326 return wxCONV_FAILED;
327
328 dst += lenChunk;
329 }
eec47cc6 330 }
e4e3bbb4 331
483b0434
VZ
332 return dstWritten;
333}
334
ef199164 335size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 336{
ef199164 337 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 338 if ( rc != wxCONV_FAILED )
509da451
VZ
339 {
340 // ToWChar() returns the buffer length, i.e. including the trailing
341 // NUL, while this method doesn't take it into account
342 rc--;
343 }
344
345 return rc;
346}
347
ef199164 348size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 349{
ef199164 350 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 351 if ( rc != wxCONV_FAILED )
509da451
VZ
352 {
353 rc -= GetMBNulLen();
354 }
355
356 return rc;
357}
358
483b0434
VZ
359wxMBConv::~wxMBConv()
360{
361 // nothing to do here (necessary for Darwin linking probably)
362}
e4e3bbb4 363
483b0434
VZ
364const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
365{
366 if ( psz )
eec47cc6 367 {
483b0434
VZ
368 // calculate the length of the buffer needed first
369 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 370 if ( nLen != wxCONV_FAILED )
f5fb6871 371 {
483b0434
VZ
372 // now do the actual conversion
373 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 374
483b0434
VZ
375 // +1 for the trailing NULL
376 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
377 return buf;
f5fb6871 378 }
483b0434 379 }
e4e3bbb4 380
483b0434
VZ
381 return wxWCharBuffer();
382}
3698ae71 383
483b0434
VZ
384const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
385{
386 if ( pwz )
387 {
388 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 389 if ( nLen != wxCONV_FAILED )
483b0434
VZ
390 {
391 // extra space for trailing NUL(s)
392 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 393
483b0434
VZ
394 wxCharBuffer buf(nLen + extraLen - 1);
395 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
396 return buf;
397 }
398 }
399
400 return wxCharBuffer();
401}
e4e3bbb4 402
483b0434 403const wxWCharBuffer
ef199164 404wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 405{
ef199164 406 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 407 if ( dstLen != wxCONV_FAILED )
483b0434 408 {
830f8f11 409 wxWCharBuffer wbuf(dstLen - 1);
ef199164 410 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
411 {
412 if ( outLen )
467e0479
VZ
413 {
414 *outLen = dstLen;
415 if ( wbuf[dstLen - 1] == L'\0' )
416 (*outLen)--;
417 }
418
483b0434
VZ
419 return wbuf;
420 }
421 }
422
423 if ( outLen )
424 *outLen = 0;
425
426 return wxWCharBuffer();
427}
428
429const wxCharBuffer
ef199164 430wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 431{
13d92ad6 432 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 433 if ( dstLen != wxCONV_FAILED )
483b0434 434 {
168a76fe
VZ
435 // special case of empty input: can't allocate 0 size buffer below as
436 // wxCharBuffer insists on NUL-terminating it
437 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 438 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
439 {
440 if ( outLen )
467e0479
VZ
441 {
442 *outLen = dstLen;
443
444 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
445 if ( dstLen >= nulLen &&
446 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
447 {
448 // in this case the output is NUL-terminated and we're not
449 // supposed to count NUL
13d92ad6 450 *outLen -= nulLen;
467e0479
VZ
451 }
452 }
d32a507d 453
483b0434
VZ
454 return buf;
455 }
e4e3bbb4
RN
456 }
457
eec47cc6
VZ
458 if ( outLen )
459 *outLen = 0;
460
461 return wxCharBuffer();
e4e3bbb4
RN
462}
463
6001e347 464// ----------------------------------------------------------------------------
bde4baac 465// wxMBConvLibc
6001e347
RR
466// ----------------------------------------------------------------------------
467
bde4baac
VZ
468size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
469{
470 return wxMB2WC(buf, psz, n);
471}
472
473size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
474{
475 return wxWC2MB(buf, psz, n);
476}
e1bfe89e
RR
477
478// ----------------------------------------------------------------------------
532d575b 479// wxConvBrokenFileNames
e1bfe89e
RR
480// ----------------------------------------------------------------------------
481
eec47cc6
VZ
482#ifdef __UNIX__
483
86501081 484wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
ea8ce907 485{
86501081
VS
486 if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
487 wxStricmp(charset, _T("UTF8")) == 0 )
5deedd6e 488 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
845905d5
MW
489 else
490 m_conv = new wxCSConv(charset);
ea8ce907
RR
491}
492
eec47cc6 493#endif // __UNIX__
c12b7f79 494
bde4baac 495// ----------------------------------------------------------------------------
3698ae71 496// UTF-7
bde4baac 497// ----------------------------------------------------------------------------
6001e347 498
15f2ee32 499// Implementation (C) 2004 Fredrik Roubert
6001e347 500
15f2ee32
RN
501//
502// BASE64 decoding table
503//
504static const unsigned char utf7unb64[] =
6001e347 505{
15f2ee32
RN
506 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
507 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
511 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
512 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
513 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
515 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
516 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
517 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
519 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
520 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
521 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
533 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
534 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
535 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
536 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
537 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
538};
539
540size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
541{
15f2ee32
RN
542 size_t len = 0;
543
04a37834 544 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
545 {
546 unsigned char cc = *psz++;
547 if (cc != '+')
548 {
549 // plain ASCII char
550 if (buf)
551 *buf++ = cc;
552 len++;
553 }
554 else if (*psz == '-')
555 {
556 // encoded plus sign
557 if (buf)
558 *buf++ = cc;
559 len++;
560 psz++;
561 }
04a37834 562 else // start of BASE64 encoded string
15f2ee32 563 {
04a37834 564 bool lsb, ok;
15f2ee32 565 unsigned int d, l;
04a37834
VZ
566 for ( ok = lsb = false, d = 0, l = 0;
567 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
568 psz++ )
15f2ee32
RN
569 {
570 d <<= 6;
571 d += cc;
572 for (l += 6; l >= 8; lsb = !lsb)
573 {
04a37834 574 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
575 if (lsb)
576 {
577 if (buf)
578 *buf++ |= c;
579 len ++;
580 }
581 else
04a37834 582 {
15f2ee32 583 if (buf)
6356d52a 584 *buf = (wchar_t)(c << 8);
04a37834
VZ
585 }
586
587 ok = true;
15f2ee32
RN
588 }
589 }
04a37834
VZ
590
591 if ( !ok )
592 {
593 // in valid UTF7 we should have valid characters after '+'
467e0479 594 return wxCONV_FAILED;
04a37834
VZ
595 }
596
15f2ee32
RN
597 if (*psz == '-')
598 psz++;
599 }
600 }
04a37834
VZ
601
602 if ( buf && (len < n) )
603 *buf = '\0';
604
15f2ee32 605 return len;
6001e347
RR
606}
607
15f2ee32
RN
608//
609// BASE64 encoding table
610//
611static const unsigned char utf7enb64[] =
612{
613 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
614 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
615 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
616 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
617 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
618 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
619 'w', 'x', 'y', 'z', '0', '1', '2', '3',
620 '4', '5', '6', '7', '8', '9', '+', '/'
621};
622
623//
624// UTF-7 encoding table
625//
626// 0 - Set D (directly encoded characters)
627// 1 - Set O (optional direct characters)
628// 2 - whitespace characters (optional)
629// 3 - special characters
630//
631static const unsigned char utf7encode[128] =
6001e347 632{
15f2ee32
RN
633 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
634 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
635 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
637 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
639 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
641};
642
667e5b3e 643size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 644{
15f2ee32
RN
645 size_t len = 0;
646
647 while (*psz && ((!buf) || (len < n)))
648 {
649 wchar_t cc = *psz++;
650 if (cc < 0x80 && utf7encode[cc] < 1)
651 {
652 // plain ASCII char
653 if (buf)
654 *buf++ = (char)cc;
ef199164 655
15f2ee32
RN
656 len++;
657 }
658#ifndef WC_UTF16
79c78d42 659 else if (((wxUint32)cc) > 0xffff)
b2c13097 660 {
15f2ee32 661 // no surrogate pair generation (yet?)
467e0479 662 return wxCONV_FAILED;
15f2ee32
RN
663 }
664#endif
665 else
666 {
667 if (buf)
668 *buf++ = '+';
ef199164 669
15f2ee32
RN
670 len++;
671 if (cc != '+')
672 {
673 // BASE64 encode string
674 unsigned int lsb, d, l;
73c902d6 675 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
676 {
677 for (lsb = 0; lsb < 2; lsb ++)
678 {
679 d <<= 8;
680 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
681
682 for (l += 8; l >= 6; )
683 {
684 l -= 6;
685 if (buf)
686 *buf++ = utf7enb64[(d >> l) % 64];
687 len++;
688 }
689 }
ef199164 690
15f2ee32
RN
691 cc = *psz;
692 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
693 break;
694 }
ef199164 695
15f2ee32
RN
696 if (l != 0)
697 {
698 if (buf)
699 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 700
15f2ee32
RN
701 len++;
702 }
703 }
ef199164 704
15f2ee32
RN
705 if (buf)
706 *buf++ = '-';
707 len++;
708 }
709 }
ef199164 710
15f2ee32
RN
711 if (buf && (len < n))
712 *buf = 0;
ef199164 713
15f2ee32 714 return len;
6001e347
RR
715}
716
f6bcfd97 717// ----------------------------------------------------------------------------
6001e347 718// UTF-8
f6bcfd97 719// ----------------------------------------------------------------------------
6001e347 720
dccce9ea 721static wxUint32 utf8_max[]=
4def3b35 722 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 723
3698ae71
VZ
724// boundaries of the private use area we use to (temporarily) remap invalid
725// characters invalid in a UTF-8 encoded string
ea8ce907
RR
726const wxUint32 wxUnicodePUA = 0x100000;
727const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
728
6001e347
RR
729size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
730{
4def3b35
VS
731 size_t len = 0;
732
dccce9ea 733 while (*psz && ((!buf) || (len < n)))
4def3b35 734 {
ea8ce907
RR
735 const char *opsz = psz;
736 bool invalid = false;
4def3b35
VS
737 unsigned char cc = *psz++, fc = cc;
738 unsigned cnt;
dccce9ea 739 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 740 fc <<= 1;
ef199164 741
dccce9ea 742 if (!cnt)
4def3b35
VS
743 {
744 // plain ASCII char
dccce9ea 745 if (buf)
4def3b35
VS
746 *buf++ = cc;
747 len++;
561488ef
MW
748
749 // escape the escape character for octal escapes
750 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
751 && cc == '\\' && (!buf || len < n))
752 {
753 if (buf)
754 *buf++ = cc;
755 len++;
756 }
dccce9ea
VZ
757 }
758 else
4def3b35
VS
759 {
760 cnt--;
dccce9ea 761 if (!cnt)
4def3b35
VS
762 {
763 // invalid UTF-8 sequence
ea8ce907 764 invalid = true;
dccce9ea
VZ
765 }
766 else
4def3b35
VS
767 {
768 unsigned ocnt = cnt - 1;
769 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 770 while (cnt--)
4def3b35 771 {
ea8ce907 772 cc = *psz;
dccce9ea 773 if ((cc & 0xC0) != 0x80)
4def3b35
VS
774 {
775 // invalid UTF-8 sequence
ea8ce907
RR
776 invalid = true;
777 break;
4def3b35 778 }
ef199164 779
ea8ce907 780 psz++;
4def3b35
VS
781 res = (res << 6) | (cc & 0x3f);
782 }
ef199164 783
ea8ce907 784 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
785 {
786 // illegal UTF-8 encoding
ea8ce907 787 invalid = true;
4def3b35 788 }
ea8ce907
RR
789 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
790 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
791 {
792 // if one of our PUA characters turns up externally
793 // it must also be treated as an illegal sequence
794 // (a bit like you have to escape an escape character)
795 invalid = true;
796 }
797 else
798 {
1cd52418 799#ifdef WC_UTF16
ea8ce907
RR
800 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
801 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 802 if (pa == wxCONV_FAILED)
ea8ce907
RR
803 {
804 invalid = true;
805 }
806 else
807 {
808 if (buf)
809 buf += pa;
810 len += pa;
811 }
373658eb 812#else // !WC_UTF16
ea8ce907 813 if (buf)
38d4b1e4 814 *buf++ = (wchar_t)res;
ea8ce907 815 len++;
373658eb 816#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
817 }
818 }
ef199164 819
ea8ce907
RR
820 if (invalid)
821 {
822 if (m_options & MAP_INVALID_UTF8_TO_PUA)
823 {
824 while (opsz < psz && (!buf || len < n))
825 {
826#ifdef WC_UTF16
827 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
828 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 829 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
830 if (buf)
831 buf += pa;
832 opsz++;
833 len += pa;
834#else
835 if (buf)
38d4b1e4 836 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
837 opsz++;
838 len++;
839#endif
840 }
841 }
3698ae71 842 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
843 {
844 while (opsz < psz && (!buf || len < n))
845 {
3698ae71
VZ
846 if ( buf && len + 3 < n )
847 {
17a1ebd1 848 unsigned char on = *opsz;
3698ae71 849 *buf++ = L'\\';
17a1ebd1
VZ
850 *buf++ = (wchar_t)( L'0' + on / 0100 );
851 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
852 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 853 }
ef199164 854
ea8ce907
RR
855 opsz++;
856 len += 4;
857 }
858 }
3698ae71 859 else // MAP_INVALID_UTF8_NOT
ea8ce907 860 {
467e0479 861 return wxCONV_FAILED;
ea8ce907 862 }
4def3b35
VS
863 }
864 }
6001e347 865 }
ef199164 866
dccce9ea 867 if (buf && (len < n))
4def3b35 868 *buf = 0;
ef199164 869
4def3b35 870 return len;
6001e347
RR
871}
872
3698ae71
VZ
873static inline bool isoctal(wchar_t wch)
874{
875 return L'0' <= wch && wch <= L'7';
876}
877
6001e347
RR
878size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
879{
4def3b35 880 size_t len = 0;
6001e347 881
dccce9ea 882 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
883 {
884 wxUint32 cc;
ef199164 885
1cd52418 886#ifdef WC_UTF16
b5153fd8
VZ
887 // cast is ok for WC_UTF16
888 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 889 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 890#else
ef199164 891 cc = (*psz++) & 0x7fffffff;
4def3b35 892#endif
3698ae71
VZ
893
894 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
895 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 896 {
dccce9ea 897 if (buf)
ea8ce907 898 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 899 len++;
3698ae71 900 }
561488ef
MW
901 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
902 && cc == L'\\' && psz[0] == L'\\' )
903 {
904 if (buf)
905 *buf++ = (char)cc;
906 psz++;
907 len++;
908 }
3698ae71
VZ
909 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
910 cc == L'\\' &&
911 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 912 {
dccce9ea 913 if (buf)
3698ae71 914 {
ef199164
DS
915 *buf++ = (char) ((psz[0] - L'0') * 0100 +
916 (psz[1] - L'0') * 010 +
b2c13097 917 (psz[2] - L'0'));
3698ae71
VZ
918 }
919
920 psz += 3;
ea8ce907
RR
921 len++;
922 }
923 else
924 {
925 unsigned cnt;
ef199164
DS
926 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
927 {
928 }
929
ea8ce907 930 if (!cnt)
4def3b35 931 {
ea8ce907
RR
932 // plain ASCII char
933 if (buf)
934 *buf++ = (char) cc;
935 len++;
936 }
ea8ce907
RR
937 else
938 {
939 len += cnt + 1;
940 if (buf)
941 {
942 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
943 while (cnt--)
944 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
945 }
4def3b35
VS
946 }
947 }
6001e347 948 }
4def3b35 949
ef199164 950 if (buf && (len < n))
3698ae71 951 *buf = 0;
adb45366 952
4def3b35 953 return len;
6001e347
RR
954}
955
467e0479 956// ============================================================================
c91830cb 957// UTF-16
467e0479 958// ============================================================================
c91830cb
VZ
959
960#ifdef WORDS_BIGENDIAN
bde4baac
VZ
961 #define wxMBConvUTF16straight wxMBConvUTF16BE
962 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 963#else
bde4baac
VZ
964 #define wxMBConvUTF16swap wxMBConvUTF16BE
965 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
966#endif
967
467e0479
VZ
968/* static */
969size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
970{
971 if ( srcLen == wxNO_LEN )
972 {
973 // count the number of bytes in input, including the trailing NULs
ef199164
DS
974 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
975 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 976 ;
c91830cb 977
467e0479
VZ
978 srcLen *= BYTES_PER_CHAR;
979 }
980 else // we already have the length
981 {
982 // we can only convert an entire number of UTF-16 characters
983 if ( srcLen % BYTES_PER_CHAR )
984 return wxCONV_FAILED;
985 }
986
987 return srcLen;
988}
989
990// case when in-memory representation is UTF-16 too
c91830cb
VZ
991#ifdef WC_UTF16
992
467e0479
VZ
993// ----------------------------------------------------------------------------
994// conversions without endianness change
995// ----------------------------------------------------------------------------
996
997size_t
998wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
999 const char *src, size_t srcLen) const
c91830cb 1000{
467e0479
VZ
1001 // set up the scene for using memcpy() (which is presumably more efficient
1002 // than copying the bytes one by one)
1003 srcLen = GetLength(src, srcLen);
1004 if ( srcLen == wxNO_LEN )
1005 return wxCONV_FAILED;
c91830cb 1006
ef199164 1007 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1008 if ( dst )
c91830cb 1009 {
467e0479
VZ
1010 if ( dstLen < inLen )
1011 return wxCONV_FAILED;
c91830cb 1012
467e0479 1013 memcpy(dst, src, srcLen);
c91830cb 1014 }
d32a507d 1015
467e0479 1016 return inLen;
c91830cb
VZ
1017}
1018
467e0479
VZ
1019size_t
1020wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1021 const wchar_t *src, size_t srcLen) const
c91830cb 1022{
467e0479
VZ
1023 if ( srcLen == wxNO_LEN )
1024 srcLen = wxWcslen(src) + 1;
c91830cb 1025
467e0479
VZ
1026 srcLen *= BYTES_PER_CHAR;
1027
1028 if ( dst )
c91830cb 1029 {
467e0479
VZ
1030 if ( dstLen < srcLen )
1031 return wxCONV_FAILED;
d32a507d 1032
467e0479 1033 memcpy(dst, src, srcLen);
c91830cb 1034 }
d32a507d 1035
467e0479 1036 return srcLen;
c91830cb
VZ
1037}
1038
467e0479
VZ
1039// ----------------------------------------------------------------------------
1040// endian-reversing conversions
1041// ----------------------------------------------------------------------------
c91830cb 1042
467e0479
VZ
1043size_t
1044wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1045 const char *src, size_t srcLen) const
c91830cb 1046{
467e0479
VZ
1047 srcLen = GetLength(src, srcLen);
1048 if ( srcLen == wxNO_LEN )
1049 return wxCONV_FAILED;
c91830cb 1050
467e0479
VZ
1051 srcLen /= BYTES_PER_CHAR;
1052
1053 if ( dst )
c91830cb 1054 {
467e0479
VZ
1055 if ( dstLen < srcLen )
1056 return wxCONV_FAILED;
1057
ef199164
DS
1058 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1059 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1060 {
ef199164 1061 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1062 }
c91830cb 1063 }
bfab25d4 1064
467e0479 1065 return srcLen;
c91830cb
VZ
1066}
1067
467e0479
VZ
1068size_t
1069wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1070 const wchar_t *src, size_t srcLen) const
c91830cb 1071{
467e0479
VZ
1072 if ( srcLen == wxNO_LEN )
1073 srcLen = wxWcslen(src) + 1;
c91830cb 1074
467e0479
VZ
1075 srcLen *= BYTES_PER_CHAR;
1076
1077 if ( dst )
c91830cb 1078 {
467e0479
VZ
1079 if ( dstLen < srcLen )
1080 return wxCONV_FAILED;
1081
ef199164 1082 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1083 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1084 {
ef199164 1085 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1086 }
c91830cb 1087 }
eec47cc6 1088
467e0479 1089 return srcLen;
c91830cb
VZ
1090}
1091
467e0479 1092#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1093
467e0479
VZ
1094// ----------------------------------------------------------------------------
1095// conversions without endianness change
1096// ----------------------------------------------------------------------------
c91830cb 1097
35d11700
VZ
1098size_t
1099wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1100 const char *src, size_t srcLen) const
c91830cb 1101{
35d11700
VZ
1102 srcLen = GetLength(src, srcLen);
1103 if ( srcLen == wxNO_LEN )
1104 return wxCONV_FAILED;
c91830cb 1105
ef199164 1106 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1107 if ( !dst )
c91830cb 1108 {
35d11700
VZ
1109 // optimization: return maximal space which could be needed for this
1110 // string even if the real size could be smaller if the buffer contains
1111 // any surrogates
1112 return inLen;
c91830cb 1113 }
c91830cb 1114
35d11700 1115 size_t outLen = 0;
ef199164
DS
1116 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1117 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1118 {
ef199164
DS
1119 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1120 if ( !inBuff )
35d11700
VZ
1121 return wxCONV_FAILED;
1122
1123 if ( ++outLen > dstLen )
1124 return wxCONV_FAILED;
c91830cb 1125
35d11700
VZ
1126 *dst++ = ch;
1127 }
1128
1129
1130 return outLen;
1131}
c91830cb 1132
35d11700
VZ
1133size_t
1134wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1135 const wchar_t *src, size_t srcLen) const
c91830cb 1136{
35d11700
VZ
1137 if ( srcLen == wxNO_LEN )
1138 srcLen = wxWcslen(src) + 1;
c91830cb 1139
35d11700 1140 size_t outLen = 0;
ef199164 1141 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1142 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1143 {
1144 wxUint16 cc[2];
35d11700
VZ
1145 const size_t numChars = encode_utf16(*src++, cc);
1146 if ( numChars == wxCONV_FAILED )
1147 return wxCONV_FAILED;
c91830cb 1148
ef199164
DS
1149 outLen += numChars * BYTES_PER_CHAR;
1150 if ( outBuff )
c91830cb 1151 {
35d11700
VZ
1152 if ( outLen > dstLen )
1153 return wxCONV_FAILED;
1154
ef199164 1155 *outBuff++ = cc[0];
35d11700 1156 if ( numChars == 2 )
69b80d28 1157 {
35d11700 1158 // second character of a surrogate
ef199164 1159 *outBuff++ = cc[1];
69b80d28 1160 }
c91830cb 1161 }
c91830cb 1162 }
c91830cb 1163
35d11700 1164 return outLen;
c91830cb
VZ
1165}
1166
467e0479
VZ
1167// ----------------------------------------------------------------------------
1168// endian-reversing conversions
1169// ----------------------------------------------------------------------------
c91830cb 1170
35d11700
VZ
1171size_t
1172wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1173 const char *src, size_t srcLen) const
c91830cb 1174{
35d11700
VZ
1175 srcLen = GetLength(src, srcLen);
1176 if ( srcLen == wxNO_LEN )
1177 return wxCONV_FAILED;
1178
ef199164 1179 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1180 if ( !dst )
1181 {
1182 // optimization: return maximal space which could be needed for this
1183 // string even if the real size could be smaller if the buffer contains
1184 // any surrogates
1185 return inLen;
1186 }
c91830cb 1187
35d11700 1188 size_t outLen = 0;
ef199164
DS
1189 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1190 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1191 {
35d11700
VZ
1192 wxUint32 ch;
1193 wxUint16 tmp[2];
ef199164
DS
1194
1195 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1196 inBuff++;
1197 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1198
35d11700
VZ
1199 const size_t numChars = decode_utf16(tmp, ch);
1200 if ( numChars == wxCONV_FAILED )
1201 return wxCONV_FAILED;
c91830cb 1202
35d11700 1203 if ( numChars == 2 )
ef199164 1204 inBuff++;
35d11700
VZ
1205
1206 if ( ++outLen > dstLen )
1207 return wxCONV_FAILED;
c91830cb 1208
35d11700 1209 *dst++ = ch;
c91830cb 1210 }
c91830cb 1211
c91830cb 1212
35d11700
VZ
1213 return outLen;
1214}
c91830cb 1215
35d11700
VZ
1216size_t
1217wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1218 const wchar_t *src, size_t srcLen) const
c91830cb 1219{
35d11700
VZ
1220 if ( srcLen == wxNO_LEN )
1221 srcLen = wxWcslen(src) + 1;
c91830cb 1222
35d11700 1223 size_t outLen = 0;
ef199164 1224 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1225 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1226 {
1227 wxUint16 cc[2];
35d11700
VZ
1228 const size_t numChars = encode_utf16(*src, cc);
1229 if ( numChars == wxCONV_FAILED )
1230 return wxCONV_FAILED;
c91830cb 1231
ef199164
DS
1232 outLen += numChars * BYTES_PER_CHAR;
1233 if ( outBuff )
c91830cb 1234 {
35d11700
VZ
1235 if ( outLen > dstLen )
1236 return wxCONV_FAILED;
1237
ef199164 1238 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1239 if ( numChars == 2 )
c91830cb 1240 {
35d11700 1241 // second character of a surrogate
ef199164 1242 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1243 }
1244 }
c91830cb 1245 }
c91830cb 1246
35d11700 1247 return outLen;
c91830cb
VZ
1248}
1249
467e0479 1250#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1251
1252
35d11700 1253// ============================================================================
c91830cb 1254// UTF-32
35d11700 1255// ============================================================================
c91830cb
VZ
1256
1257#ifdef WORDS_BIGENDIAN
467e0479
VZ
1258 #define wxMBConvUTF32straight wxMBConvUTF32BE
1259 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1260#else
467e0479
VZ
1261 #define wxMBConvUTF32swap wxMBConvUTF32BE
1262 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1263#endif
1264
1265
1266WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1267WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1268
467e0479
VZ
1269/* static */
1270size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1271{
1272 if ( srcLen == wxNO_LEN )
1273 {
1274 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1275 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1276 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1277 ;
c91830cb 1278
467e0479
VZ
1279 srcLen *= BYTES_PER_CHAR;
1280 }
1281 else // we already have the length
1282 {
1283 // we can only convert an entire number of UTF-32 characters
1284 if ( srcLen % BYTES_PER_CHAR )
1285 return wxCONV_FAILED;
1286 }
1287
1288 return srcLen;
1289}
1290
1291// case when in-memory representation is UTF-16
c91830cb
VZ
1292#ifdef WC_UTF16
1293
467e0479
VZ
1294// ----------------------------------------------------------------------------
1295// conversions without endianness change
1296// ----------------------------------------------------------------------------
1297
1298size_t
1299wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1300 const char *src, size_t srcLen) const
c91830cb 1301{
467e0479
VZ
1302 srcLen = GetLength(src, srcLen);
1303 if ( srcLen == wxNO_LEN )
1304 return wxCONV_FAILED;
c91830cb 1305
ef199164
DS
1306 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1307 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1308 size_t outLen = 0;
1309 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1310 {
1311 wxUint16 cc[2];
ef199164 1312 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1313 if ( numChars == wxCONV_FAILED )
1314 return wxCONV_FAILED;
c91830cb 1315
467e0479
VZ
1316 outLen += numChars;
1317 if ( dst )
c91830cb 1318 {
467e0479
VZ
1319 if ( outLen > dstLen )
1320 return wxCONV_FAILED;
d32a507d 1321
467e0479
VZ
1322 *dst++ = cc[0];
1323 if ( numChars == 2 )
1324 {
1325 // second character of a surrogate
1326 *dst++ = cc[1];
1327 }
1328 }
c91830cb 1329 }
d32a507d 1330
467e0479 1331 return outLen;
c91830cb
VZ
1332}
1333
467e0479
VZ
1334size_t
1335wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1336 const wchar_t *src, size_t srcLen) const
c91830cb 1337{
467e0479
VZ
1338 if ( srcLen == wxNO_LEN )
1339 srcLen = wxWcslen(src) + 1;
c91830cb 1340
467e0479 1341 if ( !dst )
c91830cb 1342 {
467e0479
VZ
1343 // optimization: return maximal space which could be needed for this
1344 // string instead of the exact amount which could be less if there are
1345 // any surrogates in the input
1346 //
1347 // we consider that surrogates are rare enough to make it worthwhile to
1348 // avoid running the loop below at the cost of slightly extra memory
1349 // consumption
ef199164 1350 return srcLen * BYTES_PER_CHAR;
467e0479 1351 }
c91830cb 1352
ef199164 1353 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1354 size_t outLen = 0;
1355 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1356 {
1357 const wxUint32 ch = wxDecodeSurrogate(&src);
1358 if ( !src )
1359 return wxCONV_FAILED;
c91830cb 1360
467e0479 1361 outLen += BYTES_PER_CHAR;
d32a507d 1362
467e0479
VZ
1363 if ( outLen > dstLen )
1364 return wxCONV_FAILED;
b5153fd8 1365
ef199164 1366 *outBuff++ = ch;
467e0479 1367 }
c91830cb 1368
467e0479 1369 return outLen;
c91830cb
VZ
1370}
1371
467e0479
VZ
1372// ----------------------------------------------------------------------------
1373// endian-reversing conversions
1374// ----------------------------------------------------------------------------
c91830cb 1375
467e0479
VZ
1376size_t
1377wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1378 const char *src, size_t srcLen) const
c91830cb 1379{
467e0479
VZ
1380 srcLen = GetLength(src, srcLen);
1381 if ( srcLen == wxNO_LEN )
1382 return wxCONV_FAILED;
c91830cb 1383
ef199164
DS
1384 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1385 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1386 size_t outLen = 0;
ef199164 1387 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1388 {
c91830cb 1389 wxUint16 cc[2];
ef199164 1390 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1391 if ( numChars == wxCONV_FAILED )
1392 return wxCONV_FAILED;
c91830cb 1393
467e0479
VZ
1394 outLen += numChars;
1395 if ( dst )
c91830cb 1396 {
467e0479
VZ
1397 if ( outLen > dstLen )
1398 return wxCONV_FAILED;
d32a507d 1399
467e0479
VZ
1400 *dst++ = cc[0];
1401 if ( numChars == 2 )
1402 {
1403 // second character of a surrogate
1404 *dst++ = cc[1];
1405 }
1406 }
c91830cb 1407 }
b5153fd8 1408
467e0479 1409 return outLen;
c91830cb
VZ
1410}
1411
467e0479
VZ
1412size_t
1413wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1414 const wchar_t *src, size_t srcLen) const
c91830cb 1415{
467e0479
VZ
1416 if ( srcLen == wxNO_LEN )
1417 srcLen = wxWcslen(src) + 1;
c91830cb 1418
467e0479 1419 if ( !dst )
c91830cb 1420 {
467e0479
VZ
1421 // optimization: return maximal space which could be needed for this
1422 // string instead of the exact amount which could be less if there are
1423 // any surrogates in the input
1424 //
1425 // we consider that surrogates are rare enough to make it worthwhile to
1426 // avoid running the loop below at the cost of slightly extra memory
1427 // consumption
1428 return srcLen*BYTES_PER_CHAR;
1429 }
c91830cb 1430
ef199164 1431 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1432 size_t outLen = 0;
1433 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1434 {
1435 const wxUint32 ch = wxDecodeSurrogate(&src);
1436 if ( !src )
1437 return wxCONV_FAILED;
c91830cb 1438
467e0479 1439 outLen += BYTES_PER_CHAR;
d32a507d 1440
467e0479
VZ
1441 if ( outLen > dstLen )
1442 return wxCONV_FAILED;
b5153fd8 1443
ef199164 1444 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1445 }
c91830cb 1446
467e0479 1447 return outLen;
c91830cb
VZ
1448}
1449
467e0479 1450#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1451
35d11700
VZ
1452// ----------------------------------------------------------------------------
1453// conversions without endianness change
1454// ----------------------------------------------------------------------------
1455
1456size_t
1457wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1458 const char *src, size_t srcLen) const
c91830cb 1459{
35d11700
VZ
1460 // use memcpy() as it should be much faster than hand-written loop
1461 srcLen = GetLength(src, srcLen);
1462 if ( srcLen == wxNO_LEN )
1463 return wxCONV_FAILED;
c91830cb 1464
35d11700
VZ
1465 const size_t inLen = srcLen/BYTES_PER_CHAR;
1466 if ( dst )
c91830cb 1467 {
35d11700
VZ
1468 if ( dstLen < inLen )
1469 return wxCONV_FAILED;
b5153fd8 1470
35d11700
VZ
1471 memcpy(dst, src, srcLen);
1472 }
c91830cb 1473
35d11700 1474 return inLen;
c91830cb
VZ
1475}
1476
35d11700
VZ
1477size_t
1478wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1479 const wchar_t *src, size_t srcLen) const
c91830cb 1480{
35d11700
VZ
1481 if ( srcLen == wxNO_LEN )
1482 srcLen = wxWcslen(src) + 1;
1483
1484 srcLen *= BYTES_PER_CHAR;
c91830cb 1485
35d11700 1486 if ( dst )
c91830cb 1487 {
35d11700
VZ
1488 if ( dstLen < srcLen )
1489 return wxCONV_FAILED;
c91830cb 1490
35d11700 1491 memcpy(dst, src, srcLen);
c91830cb
VZ
1492 }
1493
35d11700 1494 return srcLen;
c91830cb
VZ
1495}
1496
35d11700
VZ
1497// ----------------------------------------------------------------------------
1498// endian-reversing conversions
1499// ----------------------------------------------------------------------------
c91830cb 1500
35d11700
VZ
1501size_t
1502wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1503 const char *src, size_t srcLen) const
c91830cb 1504{
35d11700
VZ
1505 srcLen = GetLength(src, srcLen);
1506 if ( srcLen == wxNO_LEN )
1507 return wxCONV_FAILED;
1508
1509 srcLen /= BYTES_PER_CHAR;
c91830cb 1510
35d11700 1511 if ( dst )
c91830cb 1512 {
35d11700
VZ
1513 if ( dstLen < srcLen )
1514 return wxCONV_FAILED;
1515
ef199164
DS
1516 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1517 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1518 {
ef199164 1519 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1520 }
c91830cb 1521 }
b5153fd8 1522
35d11700 1523 return srcLen;
c91830cb
VZ
1524}
1525
35d11700
VZ
1526size_t
1527wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1528 const wchar_t *src, size_t srcLen) const
c91830cb 1529{
35d11700
VZ
1530 if ( srcLen == wxNO_LEN )
1531 srcLen = wxWcslen(src) + 1;
1532
1533 srcLen *= BYTES_PER_CHAR;
c91830cb 1534
35d11700 1535 if ( dst )
c91830cb 1536 {
35d11700
VZ
1537 if ( dstLen < srcLen )
1538 return wxCONV_FAILED;
1539
ef199164 1540 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1541 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1542 {
ef199164 1543 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1544 }
c91830cb 1545 }
b5153fd8 1546
35d11700 1547 return srcLen;
c91830cb
VZ
1548}
1549
467e0479 1550#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1551
1552
36acb880
VZ
1553// ============================================================================
1554// The classes doing conversion using the iconv_xxx() functions
1555// ============================================================================
3caec1bb 1556
b040e242 1557#ifdef HAVE_ICONV
3a0d76bc 1558
b1d547eb
VS
1559// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1560// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1561// (unless there's yet another bug in glibc) the only case when iconv()
1562// returns with (size_t)-1 (which means error) and says there are 0 bytes
1563// left in the input buffer -- when _real_ error occurs,
1564// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1565// iconv() failure.
3caec1bb
VS
1566// [This bug does not appear in glibc 2.2.]
1567#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1568#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1569 (errno != E2BIG || bufLeft != 0))
1570#else
1571#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1572#endif
1573
ab217dba 1574#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1575
74a7eb0b
VZ
1576#define ICONV_T_INVALID ((iconv_t)-1)
1577
1578#if SIZEOF_WCHAR_T == 4
1579 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1580 #define WC_ENC wxFONTENCODING_UTF32
1581#elif SIZEOF_WCHAR_T == 2
1582 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1583 #define WC_ENC wxFONTENCODING_UTF16
1584#else // sizeof(wchar_t) != 2 nor 4
1585 // does this ever happen?
1586 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1587#endif
1588
36acb880 1589// ----------------------------------------------------------------------------
e95354ec 1590// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1591// ----------------------------------------------------------------------------
1592
e95354ec 1593class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1594{
1595public:
86501081 1596 wxMBConv_iconv(const char *name);
e95354ec 1597 virtual ~wxMBConv_iconv();
36acb880 1598
bde4baac
VZ
1599 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1600 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1601
d36c9347 1602 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1603 virtual size_t GetMBNulLen() const;
1604
ba98e032
VS
1605#if wxUSE_UNICODE_UTF8
1606 virtual bool IsUTF8() const;
1607#endif
1608
d36c9347
VZ
1609 virtual wxMBConv *Clone() const
1610 {
86501081 1611 wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
d36c9347
VZ
1612 p->m_minMBCharWidth = m_minMBCharWidth;
1613 return p;
1614 }
1615
e95354ec 1616 bool IsOk() const
74a7eb0b 1617 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1618
1619protected:
ef199164
DS
1620 // the iconv handlers used to translate from multibyte
1621 // to wide char and in the other direction
36acb880
VZ
1622 iconv_t m2w,
1623 w2m;
ef199164 1624
b1d547eb
VS
1625#if wxUSE_THREADS
1626 // guards access to m2w and w2m objects
1627 wxMutex m_iconvMutex;
1628#endif
36acb880
VZ
1629
1630private:
e95354ec 1631 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1632 // available on this machine, it will remain NULL
74a7eb0b 1633 static wxString ms_wcCharsetName;
36acb880
VZ
1634
1635 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1636 // different endian-ness than the native one
405d8f46 1637 static bool ms_wcNeedsSwap;
eec47cc6 1638
d36c9347
VZ
1639
1640 // name of the encoding handled by this conversion
1641 wxString m_name;
1642
7ef3ab50 1643 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1644 // initially
1645 size_t m_minMBCharWidth;
36acb880
VZ
1646};
1647
8f115891 1648// make the constructor available for unit testing
86501081 1649WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
8f115891
MW
1650{
1651 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1652 if ( !result->IsOk() )
1653 {
1654 delete result;
1655 return 0;
1656 }
ef199164 1657
8f115891
MW
1658 return result;
1659}
1660
422e411e 1661wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1662bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1663
86501081 1664wxMBConv_iconv::wxMBConv_iconv(const char *name)
d36c9347 1665 : m_name(name)
36acb880 1666{
c1464d9d 1667 m_minMBCharWidth = 0;
eec47cc6 1668
36acb880 1669 // check for charset that represents wchar_t:
74a7eb0b 1670 if ( ms_wcCharsetName.empty() )
f1339c56 1671 {
c2b83fdd
VZ
1672 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1673
74a7eb0b
VZ
1674#if wxUSE_FONTMAP
1675 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1676#else // !wxUSE_FONTMAP
91cb7f52 1677 static const wxChar *names_static[] =
36acb880 1678 {
74a7eb0b
VZ
1679#if SIZEOF_WCHAR_T == 4
1680 _T("UCS-4"),
1681#elif SIZEOF_WCHAR_T = 2
1682 _T("UCS-2"),
1683#endif
1684 NULL
1685 };
91cb7f52 1686 const wxChar **names = names_static;
74a7eb0b 1687#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1688
d1f024a8 1689 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1690 {
17a1ebd1 1691 const wxString nameCS(*names);
74a7eb0b
VZ
1692
1693 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1694 wxString nameXE(nameCS);
ef199164
DS
1695
1696#ifdef WORDS_BIGENDIAN
74a7eb0b 1697 nameXE += _T("BE");
ef199164 1698#else // little endian
74a7eb0b 1699 nameXE += _T("LE");
ef199164 1700#endif
74a7eb0b 1701
c2b83fdd
VZ
1702 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1703 nameXE.c_str());
1704
86501081 1705 m2w = iconv_open(nameXE.ToAscii(), name);
74a7eb0b 1706 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1707 {
74a7eb0b 1708 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1709 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1710 nameCS.c_str());
86501081 1711 m2w = iconv_open(nameCS.ToAscii(), name);
3a0d76bc 1712
74a7eb0b
VZ
1713 // and check for bytesex ourselves:
1714 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1715 {
74a7eb0b
VZ
1716 char buf[2], *bufPtr;
1717 wchar_t wbuf[2], *wbufPtr;
1718 size_t insz, outsz;
1719 size_t res;
1720
1721 buf[0] = 'A';
1722 buf[1] = 0;
1723 wbuf[0] = 0;
1724 insz = 2;
1725 outsz = SIZEOF_WCHAR_T * 2;
1726 wbufPtr = wbuf;
1727 bufPtr = buf;
1728
ef199164
DS
1729 res = iconv(
1730 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1731 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1732
1733 if (ICONV_FAILED(res, insz))
1734 {
1735 wxLogLastError(wxT("iconv"));
422e411e 1736 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1737 nameCS.c_str());
74a7eb0b
VZ
1738 }
1739 else // ok, can convert to this encoding, remember it
1740 {
17a1ebd1 1741 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1742 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1743 }
3a0d76bc
VS
1744 }
1745 }
74a7eb0b 1746 else // use charset not requiring byte swapping
36acb880 1747 {
74a7eb0b 1748 ms_wcCharsetName = nameXE;
36acb880 1749 }
3a0d76bc 1750 }
74a7eb0b 1751
0944fceb 1752 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1753 wxT("iconv wchar_t charset is \"%s\"%s"),
999020e1
VZ
1754 ms_wcCharsetName.empty() ? wxString("<none>")
1755 : ms_wcCharsetName,
74a7eb0b
VZ
1756 ms_wcNeedsSwap ? _T(" (needs swap)")
1757 : _T(""));
3a0d76bc 1758 }
36acb880 1759 else // we already have ms_wcCharsetName
3caec1bb 1760 {
86501081 1761 m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
f1339c56 1762 }
dccce9ea 1763
74a7eb0b 1764 if ( ms_wcCharsetName.empty() )
f1339c56 1765 {
74a7eb0b 1766 w2m = ICONV_T_INVALID;
36acb880 1767 }
405d8f46
VZ
1768 else
1769 {
86501081 1770 w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
74a7eb0b
VZ
1771 if ( w2m == ICONV_T_INVALID )
1772 {
1773 wxLogTrace(TRACE_STRCONV,
1774 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
86501081 1775 ms_wcCharsetName.c_str(), name);
74a7eb0b 1776 }
405d8f46 1777 }
36acb880 1778}
3caec1bb 1779
e95354ec 1780wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1781{
74a7eb0b 1782 if ( m2w != ICONV_T_INVALID )
36acb880 1783 iconv_close(m2w);
74a7eb0b 1784 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1785 iconv_close(w2m);
1786}
3a0d76bc 1787
bde4baac 1788size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1789{
69373110
VZ
1790 // find the string length: notice that must be done differently for
1791 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1792 size_t inbuf;
7ef3ab50 1793 const size_t nulLen = GetMBNulLen();
69373110
VZ
1794 switch ( nulLen )
1795 {
1796 default:
467e0479 1797 return wxCONV_FAILED;
69373110
VZ
1798
1799 case 1:
1800 inbuf = strlen(psz); // arguably more optimized than our version
1801 break;
1802
1803 case 2:
1804 case 4:
1805 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1806 // they also have to start at character boundary and not span two
1807 // adjacent characters
1808 const char *p;
1809 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1810 ;
1811 inbuf = p - psz;
1812 break;
1813 }
1814
b1d547eb 1815#if wxUSE_THREADS
6a17b868
SN
1816 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1817 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1818 // wxConvLocal that are used all over wx code, so we have to make sure
1819 // the handle is used by at most one thread at the time. Otherwise
1820 // only a few wx classes would be safe to use from non-main threads
1821 // as MB<->WC conversion would fail "randomly".
1822 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1823#endif // wxUSE_THREADS
1824
36acb880
VZ
1825 size_t outbuf = n * SIZEOF_WCHAR_T;
1826 size_t res, cres;
1827 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1828 wchar_t *bufPtr = buf;
1829 const char *pszPtr = psz;
1830
1831 if (buf)
1832 {
1833 // have destination buffer, convert there
1834 cres = iconv(m2w,
1835 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1836 (char**)&bufPtr, &outbuf);
1837 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1838
36acb880 1839 if (ms_wcNeedsSwap)
3a0d76bc 1840 {
36acb880 1841 // convert to native endianness
17a1ebd1
VZ
1842 for ( unsigned i = 0; i < res; i++ )
1843 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1844 }
adb45366 1845
69373110 1846 // NUL-terminate the string if there is any space left
49dd9820
VS
1847 if (res < n)
1848 buf[res] = 0;
36acb880
VZ
1849 }
1850 else
1851 {
1852 // no destination buffer... convert using temp buffer
1853 // to calculate destination buffer requirement
1854 wchar_t tbuf[8];
1855 res = 0;
ef199164
DS
1856
1857 do
1858 {
36acb880 1859 bufPtr = tbuf;
ef199164 1860 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1861
1862 cres = iconv(m2w,
1863 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1864 (char**)&bufPtr, &outbuf );
1865
ef199164
DS
1866 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1867 }
1868 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1869 }
dccce9ea 1870
36acb880 1871 if (ICONV_FAILED(cres, inbuf))
f1339c56 1872 {
36acb880 1873 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1874 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1875 return wxCONV_FAILED;
36acb880
VZ
1876 }
1877
1878 return res;
1879}
1880
bde4baac 1881size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1882{
b1d547eb
VS
1883#if wxUSE_THREADS
1884 // NB: explained in MB2WC
1885 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1886#endif
3698ae71 1887
156162ec
MW
1888 size_t inlen = wxWcslen(psz);
1889 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1890 size_t outbuf = n;
1891 size_t res, cres;
3a0d76bc 1892
36acb880 1893 wchar_t *tmpbuf = 0;
3caec1bb 1894
36acb880
VZ
1895 if (ms_wcNeedsSwap)
1896 {
1897 // need to copy to temp buffer to switch endianness
74a7eb0b 1898 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1899 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1900 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1901 for ( size_t i = 0; i < inlen; i++ )
1902 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1903
156162ec 1904 tmpbuf[inlen] = L'\0';
74a7eb0b 1905 psz = tmpbuf;
36acb880 1906 }
3a0d76bc 1907
36acb880
VZ
1908 if (buf)
1909 {
1910 // have destination buffer, convert there
1911 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1912
ef199164 1913 res = n - outbuf;
adb45366 1914
49dd9820
VS
1915 // NB: iconv was given only wcslen(psz) characters on input, and so
1916 // it couldn't convert the trailing zero. Let's do it ourselves
1917 // if there's some room left for it in the output buffer.
1918 if (res < n)
1919 buf[0] = 0;
36acb880
VZ
1920 }
1921 else
1922 {
ef199164 1923 // no destination buffer: convert using temp buffer
36acb880
VZ
1924 // to calculate destination buffer requirement
1925 char tbuf[16];
1926 res = 0;
ef199164
DS
1927 do
1928 {
1929 buf = tbuf;
1930 outbuf = 16;
36acb880
VZ
1931
1932 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1933
36acb880 1934 res += 16 - outbuf;
ef199164
DS
1935 }
1936 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1937 }
dccce9ea 1938
36acb880
VZ
1939 if (ms_wcNeedsSwap)
1940 {
1941 free(tmpbuf);
1942 }
dccce9ea 1943
36acb880
VZ
1944 if (ICONV_FAILED(cres, inbuf))
1945 {
ce6f8d6f 1946 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1947 return wxCONV_FAILED;
36acb880
VZ
1948 }
1949
1950 return res;
1951}
1952
7ef3ab50 1953size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1954{
c1464d9d 1955 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1956 {
1957 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1958
1959#if wxUSE_THREADS
1960 // NB: explained in MB2WC
1961 wxMutexLocker lock(self->m_iconvMutex);
1962#endif
1963
999020e1 1964 const wchar_t *wnul = L"";
c1464d9d 1965 char buf[8]; // should be enough for NUL in any encoding
356410fc 1966 size_t inLen = sizeof(wchar_t),
c1464d9d 1967 outLen = WXSIZEOF(buf);
ef199164
DS
1968 char *inBuff = (char *)wnul;
1969 char *outBuff = buf;
1970 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1971 {
c1464d9d 1972 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1973 }
1974 else // ok
1975 {
ef199164 1976 self->m_minMBCharWidth = outBuff - buf;
356410fc 1977 }
eec47cc6
VZ
1978 }
1979
c1464d9d 1980 return m_minMBCharWidth;
eec47cc6
VZ
1981}
1982
ba98e032
VS
1983#if wxUSE_UNICODE_UTF8
1984bool wxMBConv_iconv::IsUTF8() const
1985{
86501081
VS
1986 return wxStricmp(m_name, "UTF-8") == 0 ||
1987 wxStricmp(m_name, "UTF8") == 0;
ba98e032
VS
1988}
1989#endif
1990
b040e242 1991#endif // HAVE_ICONV
36acb880 1992
e95354ec 1993
36acb880
VZ
1994// ============================================================================
1995// Win32 conversion classes
1996// ============================================================================
1cd52418 1997
e95354ec 1998#ifdef wxHAVE_WIN32_MB2WC
373658eb 1999
8b04d4c4 2000// from utils.cpp
d775fa82 2001#if wxUSE_FONTMAP
86501081 2002extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
8b04d4c4 2003extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 2004#endif
373658eb 2005
e95354ec 2006class wxMBConv_win32 : public wxMBConv
1cd52418
OK
2007{
2008public:
bde4baac
VZ
2009 wxMBConv_win32()
2010 {
2011 m_CodePage = CP_ACP;
c1464d9d 2012 m_minMBCharWidth = 0;
bde4baac
VZ
2013 }
2014
d36c9347 2015 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2016 : wxMBConv()
d36c9347
VZ
2017 {
2018 m_CodePage = conv.m_CodePage;
2019 m_minMBCharWidth = conv.m_minMBCharWidth;
2020 }
2021
7608a683 2022#if wxUSE_FONTMAP
86501081 2023 wxMBConv_win32(const char* name)
bde4baac
VZ
2024 {
2025 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2026 m_minMBCharWidth = 0;
bde4baac 2027 }
dccce9ea 2028
e95354ec 2029 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2030 {
2031 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2032 m_minMBCharWidth = 0;
bde4baac 2033 }
eec47cc6 2034#endif // wxUSE_FONTMAP
8b04d4c4 2035
d36c9347 2036 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2037 {
02272c9c
VZ
2038 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2039 // the behaviour is not compatible with the Unix version (using iconv)
2040 // and break the library itself, e.g. wxTextInputStream::NextChar()
2041 // wouldn't work if reading an incomplete MB char didn't result in an
2042 // error
667e5b3e 2043 //
89028980 2044 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2045 // Win XP or newer and it is not supported for UTF-[78] so we always
2046 // use our own conversions in this case. See
89028980
VS
2047 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2048 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2049 if ( m_CodePage == CP_UTF8 )
89028980 2050 {
5487ff0f 2051 return wxMBConvUTF8().MB2WC(buf, psz, n);
89028980 2052 }
830f8f11
VZ
2053
2054 if ( m_CodePage == CP_UTF7 )
2055 {
5487ff0f 2056 return wxMBConvUTF7().MB2WC(buf, psz, n);
830f8f11
VZ
2057 }
2058
2059 int flags = 0;
2060 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2061 IsAtLeastWin2kSP4() )
89028980 2062 {
830f8f11 2063 flags = MB_ERR_INVALID_CHARS;
89028980 2064 }
667e5b3e 2065
2b5f62a0
VZ
2066 const size_t len = ::MultiByteToWideChar
2067 (
2068 m_CodePage, // code page
667e5b3e 2069 flags, // flags: fall on error
2b5f62a0
VZ
2070 psz, // input string
2071 -1, // its length (NUL-terminated)
b4da152e 2072 buf, // output string
2b5f62a0
VZ
2073 buf ? n : 0 // size of output buffer
2074 );
89028980
VS
2075 if ( !len )
2076 {
2077 // function totally failed
467e0479 2078 return wxCONV_FAILED;
89028980
VS
2079 }
2080
2081 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2082 // check if we succeeded, by doing a double trip:
2083 if ( !flags && buf )
2084 {
53c174fc
VZ
2085 const size_t mbLen = strlen(psz);
2086 wxCharBuffer mbBuf(mbLen);
89028980
VS
2087 if ( ::WideCharToMultiByte
2088 (
2089 m_CodePage,
2090 0,
2091 buf,
2092 -1,
2093 mbBuf.data(),
53c174fc 2094 mbLen + 1, // size in bytes, not length
89028980
VS
2095 NULL,
2096 NULL
2097 ) == 0 ||
2098 strcmp(mbBuf, psz) != 0 )
2099 {
2100 // we didn't obtain the same thing we started from, hence
2101 // the conversion was lossy and we consider that it failed
467e0479 2102 return wxCONV_FAILED;
89028980
VS
2103 }
2104 }
2b5f62a0 2105
03a991bc
VZ
2106 // note that it returns count of written chars for buf != NULL and size
2107 // of the needed buffer for buf == NULL so in either case the length of
2108 // the string (which never includes the terminating NUL) is one less
89028980 2109 return len - 1;
f1339c56 2110 }
dccce9ea 2111
d36c9347 2112 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2113 {
13dd924a
VZ
2114 /*
2115 we have a problem here: by default, WideCharToMultiByte() may
2116 replace characters unrepresentable in the target code page with bad
2117 quality approximations such as turning "1/2" symbol (U+00BD) into
2118 "1" for the code pages which don't have it and we, obviously, want
2119 to avoid this at any price
d775fa82 2120
13dd924a
VZ
2121 the trouble is that this function does it _silently_, i.e. it won't
2122 even tell us whether it did or not... Win98/2000 and higher provide
2123 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2124 we have to resort to a round trip, i.e. check that converting back
2125 results in the same string -- this is, of course, expensive but
2126 otherwise we simply can't be sure to not garble the data.
2127 */
2128
2129 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2130 // it doesn't work with CJK encodings (which we test for rather roughly
2131 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2132 // supporting it
907173e5
WS
2133 BOOL usedDef wxDUMMY_INITIALIZE(false);
2134 BOOL *pUsedDef;
13dd924a
VZ
2135 int flags;
2136 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2137 {
2138 // it's our lucky day
2139 flags = WC_NO_BEST_FIT_CHARS;
2140 pUsedDef = &usedDef;
2141 }
2142 else // old system or unsupported encoding
2143 {
2144 flags = 0;
2145 pUsedDef = NULL;
2146 }
2147
2b5f62a0
VZ
2148 const size_t len = ::WideCharToMultiByte
2149 (
2150 m_CodePage, // code page
13dd924a
VZ
2151 flags, // either none or no best fit
2152 pwz, // input string
2b5f62a0
VZ
2153 -1, // it is (wide) NUL-terminated
2154 buf, // output buffer
2155 buf ? n : 0, // and its size
2156 NULL, // default "replacement" char
13dd924a 2157 pUsedDef // [out] was it used?
2b5f62a0
VZ
2158 );
2159
13dd924a
VZ
2160 if ( !len )
2161 {
2162 // function totally failed
467e0479 2163 return wxCONV_FAILED;
13dd924a
VZ
2164 }
2165
2166 // if we were really converting, check if we succeeded
2167 if ( buf )
2168 {
2169 if ( flags )
2170 {
2171 // check if the conversion failed, i.e. if any replacements
2172 // were done
2173 if ( usedDef )
467e0479 2174 return wxCONV_FAILED;
13dd924a
VZ
2175 }
2176 else // we must resort to double tripping...
2177 {
2178 wxWCharBuffer wcBuf(n);
467e0479 2179 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2180 wcscmp(wcBuf, pwz) != 0 )
2181 {
2182 // we didn't obtain the same thing we started from, hence
2183 // the conversion was lossy and we consider that it failed
467e0479 2184 return wxCONV_FAILED;
13dd924a
VZ
2185 }
2186 }
2187 }
2188
03a991bc 2189 // see the comment above for the reason of "len - 1"
13dd924a 2190 return len - 1;
f1339c56 2191 }
dccce9ea 2192
7ef3ab50
VZ
2193 virtual size_t GetMBNulLen() const
2194 {
2195 if ( m_minMBCharWidth == 0 )
2196 {
2197 int len = ::WideCharToMultiByte
2198 (
2199 m_CodePage, // code page
2200 0, // no flags
2201 L"", // input string
2202 1, // translate just the NUL
2203 NULL, // output buffer
2204 0, // and its size
2205 NULL, // no replacement char
2206 NULL // [out] don't care if it was used
2207 );
2208
2209 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2210 switch ( len )
2211 {
2212 default:
2213 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2214 self->m_minMBCharWidth = (size_t)-1;
2215 break;
7ef3ab50
VZ
2216
2217 case 0:
2218 self->m_minMBCharWidth = (size_t)-1;
2219 break;
2220
2221 case 1:
2222 case 2:
2223 case 4:
2224 self->m_minMBCharWidth = len;
2225 break;
2226 }
2227 }
2228
2229 return m_minMBCharWidth;
2230 }
2231
d36c9347
VZ
2232 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2233
13dd924a
VZ
2234 bool IsOk() const { return m_CodePage != -1; }
2235
2236private:
2237 static bool CanUseNoBestFit()
2238 {
2239 static int s_isWin98Or2k = -1;
2240
2241 if ( s_isWin98Or2k == -1 )
2242 {
2243 int verMaj, verMin;
2244 switch ( wxGetOsVersion(&verMaj, &verMin) )
2245 {
406d283a 2246 case wxOS_WINDOWS_9X:
13dd924a
VZ
2247 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2248 break;
2249
406d283a 2250 case wxOS_WINDOWS_NT:
13dd924a
VZ
2251 s_isWin98Or2k = verMaj >= 5;
2252 break;
2253
2254 default:
ef199164 2255 // unknown: be conservative by default
13dd924a 2256 s_isWin98Or2k = 0;
ef199164 2257 break;
13dd924a
VZ
2258 }
2259
2260 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2261 }
2262
2263 return s_isWin98Or2k == 1;
2264 }
f1339c56 2265
89028980
VS
2266 static bool IsAtLeastWin2kSP4()
2267 {
8942f83a
WS
2268#ifdef __WXWINCE__
2269 return false;
2270#else
89028980
VS
2271 static int s_isAtLeastWin2kSP4 = -1;
2272
2273 if ( s_isAtLeastWin2kSP4 == -1 )
2274 {
2275 OSVERSIONINFOEX ver;
2276
2277 memset(&ver, 0, sizeof(ver));
2278 ver.dwOSVersionInfoSize = sizeof(ver);
2279 GetVersionEx((OSVERSIONINFO*)&ver);
2280
2281 s_isAtLeastWin2kSP4 =
2282 ((ver.dwMajorVersion > 5) || // Vista+
2283 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2284 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2285 ver.wServicePackMajor >= 4)) // 2000 SP4+
2286 ? 1 : 0;
2287 }
2288
2289 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2290#endif
89028980
VS
2291 }
2292
eec47cc6 2293
c1464d9d 2294 // the code page we're working with
b1d66b54 2295 long m_CodePage;
c1464d9d 2296
7ef3ab50 2297 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2298 // "unknown"
2299 size_t m_minMBCharWidth;
1cd52418 2300};
e95354ec
VZ
2301
2302#endif // wxHAVE_WIN32_MB2WC
2303
f7e98dee 2304// ============================================================================
5c4ed98d 2305// CoreFoundation conversion classes
f7e98dee
RN
2306// ============================================================================
2307
5c4ed98d 2308#ifdef __DARWIN__
f7e98dee 2309
ef199164
DS
2310// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2311// Strangely enough, internally Core Foundation uses
2312// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee 2313
f7e98dee 2314CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2315{
638357a0 2316 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2317
2318 switch (encoding)
ecd9653b 2319 {
ef199164
DS
2320 case wxFONTENCODING_DEFAULT :
2321 enc = CFStringGetSystemEncoding();
2322 break ;
2323
ecd9653b
WS
2324 case wxFONTENCODING_ISO8859_1 :
2325 enc = kCFStringEncodingISOLatin1 ;
2326 break ;
2327 case wxFONTENCODING_ISO8859_2 :
2328 enc = kCFStringEncodingISOLatin2;
2329 break ;
2330 case wxFONTENCODING_ISO8859_3 :
2331 enc = kCFStringEncodingISOLatin3 ;
2332 break ;
2333 case wxFONTENCODING_ISO8859_4 :
2334 enc = kCFStringEncodingISOLatin4;
2335 break ;
2336 case wxFONTENCODING_ISO8859_5 :
2337 enc = kCFStringEncodingISOLatinCyrillic;
2338 break ;
2339 case wxFONTENCODING_ISO8859_6 :
2340 enc = kCFStringEncodingISOLatinArabic;
2341 break ;
2342 case wxFONTENCODING_ISO8859_7 :
2343 enc = kCFStringEncodingISOLatinGreek;
2344 break ;
2345 case wxFONTENCODING_ISO8859_8 :
2346 enc = kCFStringEncodingISOLatinHebrew;
2347 break ;
2348 case wxFONTENCODING_ISO8859_9 :
2349 enc = kCFStringEncodingISOLatin5;
2350 break ;
2351 case wxFONTENCODING_ISO8859_10 :
2352 enc = kCFStringEncodingISOLatin6;
2353 break ;
2354 case wxFONTENCODING_ISO8859_11 :
2355 enc = kCFStringEncodingISOLatinThai;
2356 break ;
2357 case wxFONTENCODING_ISO8859_13 :
2358 enc = kCFStringEncodingISOLatin7;
2359 break ;
2360 case wxFONTENCODING_ISO8859_14 :
2361 enc = kCFStringEncodingISOLatin8;
2362 break ;
2363 case wxFONTENCODING_ISO8859_15 :
2364 enc = kCFStringEncodingISOLatin9;
2365 break ;
2366
2367 case wxFONTENCODING_KOI8 :
2368 enc = kCFStringEncodingKOI8_R;
2369 break ;
2370 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2371 enc = kCFStringEncodingDOSRussian;
2372 break ;
2373
2374// case wxFONTENCODING_BULGARIAN :
2375// enc = ;
2376// break ;
2377
2378 case wxFONTENCODING_CP437 :
ef199164 2379 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2380 break ;
2381 case wxFONTENCODING_CP850 :
2382 enc = kCFStringEncodingDOSLatin1;
2383 break ;
2384 case wxFONTENCODING_CP852 :
2385 enc = kCFStringEncodingDOSLatin2;
2386 break ;
2387 case wxFONTENCODING_CP855 :
2388 enc = kCFStringEncodingDOSCyrillic;
2389 break ;
2390 case wxFONTENCODING_CP866 :
ef199164 2391 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2392 break ;
2393 case wxFONTENCODING_CP874 :
2394 enc = kCFStringEncodingDOSThai;
2395 break ;
2396 case wxFONTENCODING_CP932 :
2397 enc = kCFStringEncodingDOSJapanese;
2398 break ;
2399 case wxFONTENCODING_CP936 :
ef199164 2400 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2401 break ;
2402 case wxFONTENCODING_CP949 :
2403 enc = kCFStringEncodingDOSKorean;
2404 break ;
2405 case wxFONTENCODING_CP950 :
2406 enc = kCFStringEncodingDOSChineseTrad;
2407 break ;
ecd9653b
WS
2408 case wxFONTENCODING_CP1250 :
2409 enc = kCFStringEncodingWindowsLatin2;
2410 break ;
2411 case wxFONTENCODING_CP1251 :
ef199164 2412 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2413 break ;
2414 case wxFONTENCODING_CP1252 :
ef199164 2415 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2416 break ;
2417 case wxFONTENCODING_CP1253 :
2418 enc = kCFStringEncodingWindowsGreek;
2419 break ;
2420 case wxFONTENCODING_CP1254 :
2421 enc = kCFStringEncodingWindowsLatin5;
2422 break ;
2423 case wxFONTENCODING_CP1255 :
ef199164 2424 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2425 break ;
2426 case wxFONTENCODING_CP1256 :
ef199164 2427 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2428 break ;
2429 case wxFONTENCODING_CP1257 :
2430 enc = kCFStringEncodingWindowsBalticRim;
2431 break ;
638357a0
RN
2432// This only really encodes to UTF7 (if that) evidently
2433// case wxFONTENCODING_UTF7 :
2434// enc = kCFStringEncodingNonLossyASCII ;
2435// break ;
ecd9653b
WS
2436 case wxFONTENCODING_UTF8 :
2437 enc = kCFStringEncodingUTF8 ;
2438 break ;
2439 case wxFONTENCODING_EUC_JP :
2440 enc = kCFStringEncodingEUC_JP;
2441 break ;
2442 case wxFONTENCODING_UTF16 :
f7e98dee 2443 enc = kCFStringEncodingUnicode ;
ecd9653b 2444 break ;
f7e98dee
RN
2445 case wxFONTENCODING_MACROMAN :
2446 enc = kCFStringEncodingMacRoman ;
2447 break ;
2448 case wxFONTENCODING_MACJAPANESE :
2449 enc = kCFStringEncodingMacJapanese ;
2450 break ;
2451 case wxFONTENCODING_MACCHINESETRAD :
2452 enc = kCFStringEncodingMacChineseTrad ;
2453 break ;
2454 case wxFONTENCODING_MACKOREAN :
2455 enc = kCFStringEncodingMacKorean ;
2456 break ;
2457 case wxFONTENCODING_MACARABIC :
2458 enc = kCFStringEncodingMacArabic ;
2459 break ;
2460 case wxFONTENCODING_MACHEBREW :
2461 enc = kCFStringEncodingMacHebrew ;
2462 break ;
2463 case wxFONTENCODING_MACGREEK :
2464 enc = kCFStringEncodingMacGreek ;
2465 break ;
2466 case wxFONTENCODING_MACCYRILLIC :
2467 enc = kCFStringEncodingMacCyrillic ;
2468 break ;
2469 case wxFONTENCODING_MACDEVANAGARI :
2470 enc = kCFStringEncodingMacDevanagari ;
2471 break ;
2472 case wxFONTENCODING_MACGURMUKHI :
2473 enc = kCFStringEncodingMacGurmukhi ;
2474 break ;
2475 case wxFONTENCODING_MACGUJARATI :
2476 enc = kCFStringEncodingMacGujarati ;
2477 break ;
2478 case wxFONTENCODING_MACORIYA :
2479 enc = kCFStringEncodingMacOriya ;
2480 break ;
2481 case wxFONTENCODING_MACBENGALI :
2482 enc = kCFStringEncodingMacBengali ;
2483 break ;
2484 case wxFONTENCODING_MACTAMIL :
2485 enc = kCFStringEncodingMacTamil ;
2486 break ;
2487 case wxFONTENCODING_MACTELUGU :
2488 enc = kCFStringEncodingMacTelugu ;
2489 break ;
2490 case wxFONTENCODING_MACKANNADA :
2491 enc = kCFStringEncodingMacKannada ;
2492 break ;
2493 case wxFONTENCODING_MACMALAJALAM :
2494 enc = kCFStringEncodingMacMalayalam ;
2495 break ;
2496 case wxFONTENCODING_MACSINHALESE :
2497 enc = kCFStringEncodingMacSinhalese ;
2498 break ;
2499 case wxFONTENCODING_MACBURMESE :
2500 enc = kCFStringEncodingMacBurmese ;
2501 break ;
2502 case wxFONTENCODING_MACKHMER :
2503 enc = kCFStringEncodingMacKhmer ;
2504 break ;
2505 case wxFONTENCODING_MACTHAI :
2506 enc = kCFStringEncodingMacThai ;
2507 break ;
2508 case wxFONTENCODING_MACLAOTIAN :
2509 enc = kCFStringEncodingMacLaotian ;
2510 break ;
2511 case wxFONTENCODING_MACGEORGIAN :
2512 enc = kCFStringEncodingMacGeorgian ;
2513 break ;
2514 case wxFONTENCODING_MACARMENIAN :
2515 enc = kCFStringEncodingMacArmenian ;
2516 break ;
2517 case wxFONTENCODING_MACCHINESESIMP :
2518 enc = kCFStringEncodingMacChineseSimp ;
2519 break ;
2520 case wxFONTENCODING_MACTIBETAN :
2521 enc = kCFStringEncodingMacTibetan ;
2522 break ;
2523 case wxFONTENCODING_MACMONGOLIAN :
2524 enc = kCFStringEncodingMacMongolian ;
2525 break ;
2526 case wxFONTENCODING_MACETHIOPIC :
2527 enc = kCFStringEncodingMacEthiopic ;
2528 break ;
2529 case wxFONTENCODING_MACCENTRALEUR :
2530 enc = kCFStringEncodingMacCentralEurRoman ;
2531 break ;
2532 case wxFONTENCODING_MACVIATNAMESE :
2533 enc = kCFStringEncodingMacVietnamese ;
2534 break ;
2535 case wxFONTENCODING_MACARABICEXT :
2536 enc = kCFStringEncodingMacExtArabic ;
2537 break ;
2538 case wxFONTENCODING_MACSYMBOL :
2539 enc = kCFStringEncodingMacSymbol ;
2540 break ;
2541 case wxFONTENCODING_MACDINGBATS :
2542 enc = kCFStringEncodingMacDingbats ;
2543 break ;
2544 case wxFONTENCODING_MACTURKISH :
2545 enc = kCFStringEncodingMacTurkish ;
2546 break ;
2547 case wxFONTENCODING_MACCROATIAN :
2548 enc = kCFStringEncodingMacCroatian ;
2549 break ;
2550 case wxFONTENCODING_MACICELANDIC :
2551 enc = kCFStringEncodingMacIcelandic ;
2552 break ;
2553 case wxFONTENCODING_MACROMANIAN :
2554 enc = kCFStringEncodingMacRomanian ;
2555 break ;
2556 case wxFONTENCODING_MACCELTIC :
2557 enc = kCFStringEncodingMacCeltic ;
2558 break ;
2559 case wxFONTENCODING_MACGAELIC :
2560 enc = kCFStringEncodingMacGaelic ;
2561 break ;
ecd9653b
WS
2562// case wxFONTENCODING_MACKEYBOARD :
2563// enc = kCFStringEncodingMacKeyboardGlyphs ;
2564// break ;
ef199164 2565
ecd9653b
WS
2566 default :
2567 // because gcc is picky
2568 break ;
ef199164
DS
2569 }
2570
ecd9653b 2571 return enc ;
f7e98dee
RN
2572}
2573
5c4ed98d 2574class wxMBConv_cf : public wxMBConv
f7e98dee
RN
2575{
2576public:
5c4ed98d 2577 wxMBConv_cf()
f7e98dee
RN
2578 {
2579 Init(CFStringGetSystemEncoding()) ;
2580 }
2581
5c4ed98d 2582 wxMBConv_cf(const wxMBConv_cf& conv)
d36c9347
VZ
2583 {
2584 m_encoding = conv.m_encoding;
2585 }
2586
a6900d10 2587#if wxUSE_FONTMAP
5c4ed98d 2588 wxMBConv_cf(const char* name)
f7e98dee 2589 {
267e11c5 2590 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2591 }
a6900d10 2592#endif
f7e98dee 2593
5c4ed98d 2594 wxMBConv_cf(wxFontEncoding encoding)
f7e98dee
RN
2595 {
2596 Init( wxCFStringEncFromFontEnc(encoding) );
2597 }
2598
5c4ed98d 2599 virtual ~wxMBConv_cf()
f7e98dee
RN
2600 {
2601 }
2602
2603 void Init( CFStringEncoding encoding)
2604 {
638357a0 2605 m_encoding = encoding ;
f7e98dee
RN
2606 }
2607
2608 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2609 {
2610 wxASSERT(szUnConv);
ecd9653b 2611
638357a0
RN
2612 CFStringRef theString = CFStringCreateWithBytes (
2613 NULL, //the allocator
2614 (const UInt8*)szUnConv,
2615 strlen(szUnConv),
2616 m_encoding,
2617 false //no BOM/external representation
f7e98dee
RN
2618 );
2619
2620 wxASSERT(theString);
2621
638357a0
RN
2622 size_t nOutLength = CFStringGetLength(theString);
2623
2624 if (szOut == NULL)
f7e98dee 2625 {
f7e98dee 2626 CFRelease(theString);
638357a0 2627 return nOutLength;
f7e98dee 2628 }
ecd9653b 2629
638357a0 2630 CFRange theRange = { 0, nOutSize };
ecd9653b 2631
638357a0
RN
2632#if SIZEOF_WCHAR_T == 4
2633 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2634#endif
3698ae71 2635
f7e98dee 2636 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2637
f7e98dee 2638 CFRelease(theString);
ecd9653b 2639
ef199164 2640 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2641
2642#if SIZEOF_WCHAR_T == 4
ef199164
DS
2643 wxMBConvUTF16 converter;
2644 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2645 delete [] szUniCharBuffer;
f7e98dee 2646#endif
3698ae71 2647
638357a0 2648 return nOutLength;
f7e98dee
RN
2649 }
2650
2651 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2652 {
638357a0 2653 wxASSERT(szUnConv);
3698ae71 2654
f7e98dee 2655 size_t nRealOutSize;
638357a0 2656 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2657 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2658
f7e98dee 2659#if SIZEOF_WCHAR_T == 4
d9d488cf 2660 wxMBConvUTF16 converter ;
ef199164
DS
2661 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2662 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2663 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2664 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2665#endif
2666
2667 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2668 NULL, //allocator
2669 szUniBuffer,
2670 nBufSize,
638357a0 2671 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2672 );
ecd9653b 2673
f7e98dee 2674 wxASSERT(theString);
ecd9653b 2675
f7e98dee 2676 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2677 //so we check and use getchars instead in that case
2678 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2679 {
638357a0
RN
2680 if (szOut != NULL)
2681 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2682
638357a0
RN
2683 nRealOutSize = CFStringGetLength(theString) + 1;
2684 }
2685 else
2686 {
2687 CFStringGetBytes(
2688 theString,
2689 CFRangeMake(0, CFStringGetLength(theString)),
2690 m_encoding,
2691 0, //what to put in characters that can't be converted -
2692 //0 tells CFString to return NULL if it meets such a character
2693 false, //not an external representation
2694 (UInt8*) szOut,
3698ae71 2695 nOutSize,
638357a0
RN
2696 (CFIndex*) &nRealOutSize
2697 );
f7e98dee 2698 }
ecd9653b 2699
638357a0 2700 CFRelease(theString);
ecd9653b 2701
638357a0
RN
2702#if SIZEOF_WCHAR_T == 4
2703 delete[] szUniBuffer;
2704#endif
ecd9653b 2705
f7e98dee
RN
2706 return nRealOutSize - 1;
2707 }
2708
5c4ed98d 2709 virtual wxMBConv *Clone() const { return new wxMBConv_cf(*this); }
d36c9347 2710
f7e98dee 2711 bool IsOk() const
ecd9653b 2712 {
3698ae71 2713 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2714 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2715 }
2716
2717private:
638357a0 2718 CFStringEncoding m_encoding ;
f7e98dee
RN
2719};
2720
5c4ed98d 2721#endif // __DARWIN__
f7e98dee 2722
335d31e0
SC
2723// ============================================================================
2724// Mac conversion classes
2725// ============================================================================
2726
5c4ed98d
DE
2727/* Although we are in the base library we currently have this wxMac
2728 * conditional. This is not generally good but fortunately does not affect
2729 * the ABI of the base library, only what encodings might work.
2730 * It does mean that a wxBase built as part of wxMac has slightly more support
2731 * than one built for wxCocoa or even wxGtk.
2732 */
335d31e0
SC
2733#if defined(__WXMAC__) && defined(TARGET_CARBON)
2734
2735class wxMBConv_mac : public wxMBConv
2736{
2737public:
2738 wxMBConv_mac()
2739 {
2740 Init(CFStringGetSystemEncoding()) ;
2741 }
2742
d36c9347
VZ
2743 wxMBConv_mac(const wxMBConv_mac& conv)
2744 {
2745 Init(conv.m_char_encoding);
2746 }
2747
2d1659cf 2748#if wxUSE_FONTMAP
faa60a4f 2749 wxMBConv_mac(const char* name)
335d31e0 2750 {
ef199164 2751 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2752 }
2d1659cf 2753#endif
335d31e0
SC
2754
2755 wxMBConv_mac(wxFontEncoding encoding)
2756 {
d775fa82
WS
2757 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2758 }
2759
d3c7fc99 2760 virtual ~wxMBConv_mac()
d775fa82
WS
2761 {
2762 OSStatus status = noErr ;
739cb14a
SC
2763 if (m_MB2WC_converter)
2764 status = TECDisposeConverter(m_MB2WC_converter);
2765 if (m_WC2MB_converter)
2766 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2767 }
2768
739cb14a
SC
2769 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2770 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2771 {
739cb14a
SC
2772 m_MB2WC_converter = NULL ;
2773 m_WC2MB_converter = NULL ;
2774 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2775 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2776 }
d775fa82 2777
739cb14a
SC
2778 virtual void CreateIfNeeded() const
2779 {
2780 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2781 {
2782 OSStatus status = noErr ;
2783 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2784 m_char_encoding,
2785 m_unicode_encoding);
739cb14a
SC
2786 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2787 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2788 m_unicode_encoding,
2789 m_char_encoding);
739cb14a
SC
2790 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2791 }
d775fa82 2792 }
57bd4c60 2793
335d31e0
SC
2794 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2795 {
739cb14a 2796 CreateIfNeeded() ;
d775fa82
WS
2797 OSStatus status = noErr ;
2798 ByteCount byteOutLen ;
9088c87b 2799 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2800 wchar_t *tbuf = NULL ;
2801 UniChar* ubuf = NULL ;
2802 size_t res = 0 ;
2803
2804 if (buf == NULL)
2805 {
ef199164
DS
2806 // Apple specs say at least 32
2807 n = wxMax( 32, byteInLen ) ;
2808 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2809 }
ef199164 2810
d775fa82 2811 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2812
f3a355ce 2813#if SIZEOF_WCHAR_T == 4
d775fa82 2814 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2815#else
d775fa82 2816 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2817#endif
ef199164
DS
2818
2819 status = TECConvertText(
2820 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2821 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2822
f3a355ce 2823#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2824 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2825 // is not properly terminated we get random characters at the end
2826 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2827 wxMBConvUTF16 converter ;
ef199164 2828 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2829 free( ubuf ) ;
f3a355ce 2830#else
d775fa82 2831 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2832#endif
ef199164 2833
d775fa82
WS
2834 if ( buf == NULL )
2835 free(tbuf) ;
335d31e0 2836
335d31e0
SC
2837 if ( buf && res < n)
2838 buf[res] = 0;
2839
d775fa82 2840 return res ;
335d31e0
SC
2841 }
2842
2843 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2844 {
739cb14a 2845 CreateIfNeeded() ;
d775fa82
WS
2846 OSStatus status = noErr ;
2847 ByteCount byteOutLen ;
2848 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2849
2850 char *tbuf = NULL ;
2851
2852 if (buf == NULL)
2853 {
ef199164
DS
2854 // Apple specs say at least 32
2855 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2856 tbuf = (char*) malloc( n ) ;
2857 }
2858
2859 ByteCount byteBufferLen = n ;
2860 UniChar* ubuf = NULL ;
ef199164 2861
f3a355ce 2862#if SIZEOF_WCHAR_T == 4
d9d488cf 2863 wxMBConvUTF16 converter ;
ef199164 2864 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2865 byteInLen = unicharlen ;
2866 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2867 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2868#else
d775fa82 2869 ubuf = (UniChar*) psz ;
f3a355ce 2870#endif
ef199164
DS
2871
2872 status = TECConvertText(
2873 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2874 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2875
f3a355ce 2876#if SIZEOF_WCHAR_T == 4
d775fa82 2877 free( ubuf ) ;
f3a355ce 2878#endif
ef199164 2879
d775fa82
WS
2880 if ( buf == NULL )
2881 free(tbuf) ;
335d31e0 2882
d775fa82 2883 size_t res = byteOutLen ;
335d31e0 2884 if ( buf && res < n)
638357a0 2885 {
335d31e0 2886 buf[res] = 0;
3698ae71 2887
638357a0
RN
2888 //we need to double-trip to verify it didn't insert any ? in place
2889 //of bogus characters
2890 wxWCharBuffer wcBuf(n);
2891 size_t pszlen = wxWcslen(psz);
467e0479 2892 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2893 wxWcslen(wcBuf) != pszlen ||
2894 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2895 {
2896 // we didn't obtain the same thing we started from, hence
2897 // the conversion was lossy and we consider that it failed
467e0479 2898 return wxCONV_FAILED;
638357a0
RN
2899 }
2900 }
335d31e0 2901
d775fa82 2902 return res ;
335d31e0
SC
2903 }
2904
d3478e2c 2905 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2906
335d31e0 2907 bool IsOk() const
57bd4c60 2908 {
739cb14a 2909 CreateIfNeeded() ;
57bd4c60 2910 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
739cb14a 2911 }
335d31e0 2912
739cb14a
SC
2913protected :
2914 mutable TECObjectRef m_MB2WC_converter;
2915 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2916
ef199164
DS
2917 TextEncodingBase m_char_encoding;
2918 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2919};
2920
739cb14a
SC
2921// MB is decomposed (D) normalized UTF8
2922
2923class wxMBConv_macUTF8D : public wxMBConv_mac
2924{
2925public :
57bd4c60 2926 wxMBConv_macUTF8D()
739cb14a
SC
2927 {
2928 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2929 m_uni = NULL;
fbb0b8af 2930 m_uniBack = NULL ;
739cb14a 2931 }
57bd4c60 2932
d3c7fc99 2933 virtual ~wxMBConv_macUTF8D()
739cb14a 2934 {
fbb0b8af
SC
2935 if (m_uni!=NULL)
2936 DisposeUnicodeToTextInfo(&m_uni);
2937 if (m_uniBack!=NULL)
2938 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a 2939 }
57bd4c60 2940
739cb14a
SC
2941 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2942 {
2943 CreateIfNeeded() ;
2944 OSStatus status = noErr ;
2945 ByteCount byteOutLen ;
2946 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2947
2948 char *tbuf = NULL ;
2949
2950 if (buf == NULL)
2951 {
2952 // Apple specs say at least 32
2953 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2954 tbuf = (char*) malloc( n ) ;
2955 }
2956
2957 ByteCount byteBufferLen = n ;
2958 UniChar* ubuf = NULL ;
2959
2960#if SIZEOF_WCHAR_T == 4
2961 wxMBConvUTF16 converter ;
2962 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2963 byteInLen = unicharlen ;
2964 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2965 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2966#else
2967 ubuf = (UniChar*) psz ;
2968#endif
2969
57bd4c60
WS
2970 // ubuf is a non-decomposed UniChar buffer
2971
739cb14a
SC
2972 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2973 ByteCount dcubufread , dcubufwritten ;
57bd4c60
WS
2974 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2975
2976 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
739cb14a 2977 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
57bd4c60 2978
739cb14a
SC
2979 // we now convert that decomposed buffer into UTF8
2980
2981 status = TECConvertText(
2982 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2983 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2984
2985 free( dcubuf );
2986
2987#if SIZEOF_WCHAR_T == 4
2988 free( ubuf ) ;
2989#endif
2990
2991 if ( buf == NULL )
2992 free(tbuf) ;
2993
2994 size_t res = byteOutLen ;
2995 if ( buf && res < n)
2996 {
2997 buf[res] = 0;
2998 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2999 }
3000
3001 return res ;
3002 }
57bd4c60 3003
fbb0b8af
SC
3004 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
3005 {
3006 CreateIfNeeded() ;
3007 OSStatus status = noErr ;
3008 ByteCount byteOutLen ;
3009 ByteCount byteInLen = strlen(psz) + 1;
3010 wchar_t *tbuf = NULL ;
3011 UniChar* ubuf = NULL ;
3012 size_t res = 0 ;
57bd4c60 3013
fbb0b8af
SC
3014 if (buf == NULL)
3015 {
3016 // Apple specs say at least 32
3017 n = wxMax( 32, byteInLen ) ;
3018 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3019 }
57bd4c60 3020
fbb0b8af 3021 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
57bd4c60 3022
fbb0b8af
SC
3023#if SIZEOF_WCHAR_T == 4
3024 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3025#else
3026 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3027#endif
57bd4c60 3028
fbb0b8af
SC
3029 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3030 ByteCount dcubufread , dcubufwritten ;
57bd4c60 3031 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
fbb0b8af
SC
3032
3033 status = TECConvertText(
3034 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3035 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3036 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3037 // is not properly terminated we get random characters at the end
3038 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60 3039
fbb0b8af 3040 // now from the decomposed UniChar to properly composed uniChar
57bd4c60 3041 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
fbb0b8af
SC
3042 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3043
3044 free( dcubuf );
3045 byteOutLen = dcubufwritten ;
3046 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60
WS
3047
3048
fbb0b8af
SC
3049#if SIZEOF_WCHAR_T == 4
3050 wxMBConvUTF16 converter ;
3051 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3052 free( ubuf ) ;
3053#else
3054 res = byteOutLen / sizeof( UniChar ) ;
3055#endif
57bd4c60 3056
fbb0b8af
SC
3057 if ( buf == NULL )
3058 free(tbuf) ;
57bd4c60 3059
fbb0b8af
SC
3060 if ( buf && res < n)
3061 buf[res] = 0;
57bd4c60 3062
fbb0b8af
SC
3063 return res ;
3064 }
3065
739cb14a
SC
3066 virtual void CreateIfNeeded() const
3067 {
3068 wxMBConv_mac::CreateIfNeeded() ;
3069 if ( m_uni == NULL )
3070 {
3071 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3072 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3073 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3074 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3075 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60
WS
3076
3077 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
739cb14a 3078 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
57bd4c60 3079
fbb0b8af
SC
3080 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3081 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3082 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3083 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3084 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60 3085 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
fbb0b8af 3086 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3087 }
3088 }
3089protected :
3090 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3091 mutable UnicodeToTextInfo m_uniBack;
739cb14a 3092 mutable UnicodeMapping m_map;
57bd4c60 3093};
335d31e0 3094#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3095
36acb880
VZ
3096// ============================================================================
3097// wxEncodingConverter based conversion classes
3098// ============================================================================
3099
1e6feb95 3100#if wxUSE_FONTMAP
1cd52418 3101
e95354ec 3102class wxMBConv_wxwin : public wxMBConv
1cd52418 3103{
8b04d4c4
VZ
3104private:
3105 void Init()
3106 {
3107 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3108 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3109 }
3110
6001e347 3111public:
f1339c56
RR
3112 // temporarily just use wxEncodingConverter stuff,
3113 // so that it works while a better implementation is built
86501081 3114 wxMBConv_wxwin(const char* name)
f1339c56
RR
3115 {
3116 if (name)
267e11c5 3117 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3118 else
3119 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3120
8b04d4c4
VZ
3121 Init();
3122 }
3123
e95354ec 3124 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3125 {
3126 m_enc = enc;
3127
3128 Init();
f1339c56 3129 }
dccce9ea 3130
bde4baac 3131 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3132 {
3133 size_t inbuf = strlen(psz);
dccce9ea 3134 if (buf)
c643a977 3135 {
ef199164 3136 if (!m2w.Convert(psz, buf))
467e0479 3137 return wxCONV_FAILED;
c643a977 3138 }
f1339c56
RR
3139 return inbuf;
3140 }
dccce9ea 3141
bde4baac 3142 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3143 {
f8d791e0 3144 const size_t inbuf = wxWcslen(psz);
f1339c56 3145 if (buf)
c643a977 3146 {
ef199164 3147 if (!w2m.Convert(psz, buf))
467e0479 3148 return wxCONV_FAILED;
c643a977 3149 }
dccce9ea 3150
f1339c56
RR
3151 return inbuf;
3152 }
dccce9ea 3153
7ef3ab50 3154 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3155 {
3156 switch ( m_enc )
3157 {
3158 case wxFONTENCODING_UTF16BE:
3159 case wxFONTENCODING_UTF16LE:
c1464d9d 3160 return 2;
eec47cc6
VZ
3161
3162 case wxFONTENCODING_UTF32BE:
3163 case wxFONTENCODING_UTF32LE:
c1464d9d 3164 return 4;
eec47cc6
VZ
3165
3166 default:
c1464d9d 3167 return 1;
eec47cc6
VZ
3168 }
3169 }
3170
d36c9347
VZ
3171 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3172
7ef3ab50
VZ
3173 bool IsOk() const { return m_ok; }
3174
3175public:
3176 wxFontEncoding m_enc;
3177 wxEncodingConverter m2w, w2m;
3178
3179private:
cafbf6fb
VZ
3180 // were we initialized successfully?
3181 bool m_ok;
fc7a2a60 3182
e95354ec 3183 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3184};
6001e347 3185
8f115891 3186// make the constructors available for unit testing
86501081 3187WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
8f115891
MW
3188{
3189 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3190 if ( !result->IsOk() )
3191 {
3192 delete result;
3193 return 0;
3194 }
ef199164 3195
8f115891
MW
3196 return result;
3197}
3198
1e6feb95
VZ
3199#endif // wxUSE_FONTMAP
3200
36acb880
VZ
3201// ============================================================================
3202// wxCSConv implementation
3203// ============================================================================
3204
8b04d4c4 3205void wxCSConv::Init()
6001e347 3206{
e95354ec
VZ
3207 m_name = NULL;
3208 m_convReal = NULL;
3209 m_deferred = true;
3210}
3211
86501081 3212wxCSConv::wxCSConv(const wxString& charset)
8b04d4c4
VZ
3213{
3214 Init();
82713003 3215
86501081 3216 if ( !charset.empty() )
e95354ec 3217 {
86501081 3218 SetName(charset.ToAscii());
e95354ec 3219 }
bda3d86a 3220
e4277538
VZ
3221#if wxUSE_FONTMAP
3222 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3223#else
bda3d86a 3224 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3225#endif
6001e347
RR
3226}
3227
8b04d4c4
VZ
3228wxCSConv::wxCSConv(wxFontEncoding encoding)
3229{
bda3d86a 3230 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3231 {
3232 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3233
3234 encoding = wxFONTENCODING_SYSTEM;
3235 }
3236
8b04d4c4
VZ
3237 Init();
3238
bda3d86a 3239 m_encoding = encoding;
8b04d4c4
VZ
3240}
3241
6001e347
RR
3242wxCSConv::~wxCSConv()
3243{
65e50848
JS
3244 Clear();
3245}
3246
54380f29 3247wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3248 : wxMBConv()
54380f29 3249{
8b04d4c4
VZ
3250 Init();
3251
54380f29 3252 SetName(conv.m_name);
8b04d4c4 3253 m_encoding = conv.m_encoding;
54380f29
GD
3254}
3255
3256wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3257{
3258 Clear();
8b04d4c4 3259
54380f29 3260 SetName(conv.m_name);
8b04d4c4
VZ
3261 m_encoding = conv.m_encoding;
3262
54380f29
GD
3263 return *this;
3264}
3265
65e50848
JS
3266void wxCSConv::Clear()
3267{
8b04d4c4 3268 free(m_name);
e95354ec 3269 delete m_convReal;
8b04d4c4 3270
65e50848 3271 m_name = NULL;
e95354ec 3272 m_convReal = NULL;
6001e347
RR
3273}
3274
86501081 3275void wxCSConv::SetName(const char *charset)
6001e347 3276{
f1339c56
RR
3277 if (charset)
3278 {
86501081 3279 m_name = strdup(charset);
e95354ec 3280 m_deferred = true;
f1339c56 3281 }
6001e347
RR
3282}
3283
8b3eb85d 3284#if wxUSE_FONTMAP
8b3eb85d
VZ
3285
3286WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3287 wxEncodingNameCache );
8b3eb85d
VZ
3288
3289static wxEncodingNameCache gs_nameCache;
3290#endif
3291
e95354ec
VZ
3292wxMBConv *wxCSConv::DoCreate() const
3293{
ce6f8d6f
VZ
3294#if wxUSE_FONTMAP
3295 wxLogTrace(TRACE_STRCONV,
3296 wxT("creating conversion for %s"),
3297 (m_name ? m_name
86501081 3298 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
ce6f8d6f
VZ
3299#endif // wxUSE_FONTMAP
3300
c547282d
VZ
3301 // check for the special case of ASCII or ISO8859-1 charset: as we have
3302 // special knowledge of it anyhow, we don't need to create a special
3303 // conversion object
e4277538
VZ
3304 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3305 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3306 {
e95354ec
VZ
3307 // don't convert at all
3308 return NULL;
3309 }
dccce9ea 3310
e95354ec
VZ
3311 // we trust OS to do conversion better than we can so try external
3312 // conversion methods first
3313 //
3314 // the full order is:
3315 // 1. OS conversion (iconv() under Unix or Win32 API)
3316 // 2. hard coded conversions for UTF
3317 // 3. wxEncodingConverter as fall back
3318
3319 // step (1)
3320#ifdef HAVE_ICONV
c547282d 3321#if !wxUSE_FONTMAP
e95354ec 3322 if ( m_name )
c547282d 3323#endif // !wxUSE_FONTMAP
e95354ec 3324 {
3ef10cfc 3325#if wxUSE_FONTMAP
8b3eb85d 3326 wxFontEncoding encoding(m_encoding);
3ef10cfc 3327#endif
8b3eb85d 3328
86501081 3329 if ( m_name )
8b3eb85d 3330 {
86501081 3331 wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
8b3eb85d
VZ
3332 if ( conv->IsOk() )
3333 return conv;
3334
3335 delete conv;
c547282d
VZ
3336
3337#if wxUSE_FONTMAP
8b3eb85d 3338 encoding =
86501081 3339 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3340#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3341 }
3342#if wxUSE_FONTMAP
3343 {
3344 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3345 if ( it != gs_nameCache.end() )
3346 {
3347 if ( it->second.empty() )
3348 return NULL;
c547282d 3349
86501081 3350 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
8b3eb85d
VZ
3351 if ( conv->IsOk() )
3352 return conv;
e95354ec 3353
8b3eb85d
VZ
3354 delete conv;
3355 }
3356
3357 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
86501081
VS
3358 // CS : in case this does not return valid names (eg for MacRoman)
3359 // encoding got a 'failure' entry in the cache all the same,
3360 // although it just has to be created using a different method, so
3361 // only store failed iconv creation attempts (or perhaps we
3362 // shoulnd't do this at all ?)
3c67ec06 3363 if ( names[0] != NULL )
8b3eb85d 3364 {
3c67ec06 3365 for ( ; *names; ++names )
8b3eb85d 3366 {
86501081
VS
3367 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3368 // will need changes that will obsolete this
3369 wxString name(*names);
3370 wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3c67ec06
SC
3371 if ( conv->IsOk() )
3372 {
3373 gs_nameCache[encoding] = *names;
3374 return conv;
3375 }
3376
3377 delete conv;
8b3eb85d
VZ
3378 }
3379
3c67ec06 3380 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 3381 }
8b3eb85d
VZ
3382 }
3383#endif // wxUSE_FONTMAP
e95354ec
VZ
3384 }
3385#endif // HAVE_ICONV
3386
3387#ifdef wxHAVE_WIN32_MB2WC
3388 {
7608a683 3389#if wxUSE_FONTMAP
e95354ec
VZ
3390 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3391 : new wxMBConv_win32(m_encoding);
3392 if ( conv->IsOk() )
3393 return conv;
3394
3395 delete conv;
7608a683
WS
3396#else
3397 return NULL;
3398#endif
e95354ec
VZ
3399 }
3400#endif // wxHAVE_WIN32_MB2WC
ef199164 3401
d775fa82
WS
3402#if defined(__WXMAC__)
3403 {
5c3c8676 3404 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3405 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3406 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3407 {
2d1659cf 3408#if wxUSE_FONTMAP
d775fa82
WS
3409 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3410 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3411#else
3412 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3413#endif
d775fa82 3414 if ( conv->IsOk() )
f7e98dee
RN
3415 return conv;
3416
3417 delete conv;
3418 }
3419 }
3420#endif
ef199164 3421
5c4ed98d 3422#ifdef __DARWIN__
f7e98dee
RN
3423 {
3424 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3425 {
a6900d10 3426#if wxUSE_FONTMAP
5c4ed98d
DE
3427 wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3428 : new wxMBConv_cf(m_encoding);
a6900d10 3429#else
5c4ed98d 3430 wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
a6900d10 3431#endif
ef199164 3432
f7e98dee 3433 if ( conv->IsOk() )
d775fa82
WS
3434 return conv;
3435
3436 delete conv;
3437 }
335d31e0 3438 }
5c4ed98d
DE
3439#endif // __DARWIN__
3440
e95354ec
VZ
3441 // step (2)
3442 wxFontEncoding enc = m_encoding;
3443#if wxUSE_FONTMAP
c547282d
VZ
3444 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3445 {
3446 // use "false" to suppress interactive dialogs -- we can be called from
3447 // anywhere and popping up a dialog from here is the last thing we want to
3448 // do
267e11c5 3449 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3450 }
e95354ec
VZ
3451#endif // wxUSE_FONTMAP
3452
3453 switch ( enc )
3454 {
3455 case wxFONTENCODING_UTF7:
3456 return new wxMBConvUTF7;
3457
3458 case wxFONTENCODING_UTF8:
3459 return new wxMBConvUTF8;
3460
e95354ec
VZ
3461 case wxFONTENCODING_UTF16BE:
3462 return new wxMBConvUTF16BE;
3463
3464 case wxFONTENCODING_UTF16LE:
3465 return new wxMBConvUTF16LE;
3466
e95354ec
VZ
3467 case wxFONTENCODING_UTF32BE:
3468 return new wxMBConvUTF32BE;
3469
3470 case wxFONTENCODING_UTF32LE:
3471 return new wxMBConvUTF32LE;
3472
3473 default:
3474 // nothing to do but put here to suppress gcc warnings
ef199164 3475 break;
e95354ec
VZ
3476 }
3477
3478 // step (3)
3479#if wxUSE_FONTMAP
3480 {
3481 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3482 : new wxMBConv_wxwin(m_encoding);
3483 if ( conv->IsOk() )
3484 return conv;
3485
3486 delete conv;
3487 }
3488#endif // wxUSE_FONTMAP
3489
a58d4f4d
VS
3490 // NB: This is a hack to prevent deadlock. What could otherwise happen
3491 // in Unicode build: wxConvLocal creation ends up being here
3492 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3493 // attach a timestamp, for which it will need wxConvLocal (to convert
3494 // time to char* and then wchar_t*), but that fails, tries to log the
3495 // error, but wxLog has an (already locked) critical section that
3496 // guards the static buffer.
a58d4f4d
VS
3497 static bool alreadyLoggingError = false;
3498 if (!alreadyLoggingError)
3499 {
3500 alreadyLoggingError = true;
3501 wxLogError(_("Cannot convert from the charset '%s'!"),
3502 m_name ? m_name
e95354ec
VZ
3503 :
3504#if wxUSE_FONTMAP
86501081 3505 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
e95354ec 3506#else // !wxUSE_FONTMAP
86501081 3507 (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
e95354ec
VZ
3508#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3509 );
ef199164 3510
a58d4f4d
VS
3511 alreadyLoggingError = false;
3512 }
e95354ec
VZ
3513
3514 return NULL;
3515}
3516
3517void wxCSConv::CreateConvIfNeeded() const
3518{
3519 if ( m_deferred )
3520 {
3521 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a 3522
bda3d86a
VZ
3523 // if we don't have neither the name nor the encoding, use the default
3524 // encoding for this system
3525 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3526 {
4c75209f 3527#if wxUSE_INTL
02c7347b 3528 self->m_encoding = wxLocale::GetSystemEncoding();
4c75209f
VS
3529#else
3530 // fallback to some reasonable default:
3531 self->m_encoding = wxFONTENCODING_ISO8859_1;
bda3d86a 3532#endif // wxUSE_INTL
4c75209f 3533 }
bda3d86a 3534
e95354ec
VZ
3535 self->m_convReal = DoCreate();
3536 self->m_deferred = false;
6001e347 3537 }
6001e347
RR
3538}
3539
0f0298b1
VZ
3540bool wxCSConv::IsOk() const
3541{
3542 CreateConvIfNeeded();
3543
3544 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3545 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3546 return true; // always ok as we do it ourselves
3547
3548 // m_convReal->IsOk() is called at its own creation, so we know it must
3549 // be ok if m_convReal is non-NULL
3550 return m_convReal != NULL;
3551}
3552
1c714a5d
VZ
3553size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3554 const char *src, size_t srcLen) const
3555{
3556 CreateConvIfNeeded();
3557
2c74c558
VS
3558 if (m_convReal)
3559 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3560
3561 // latin-1 (direct)
3562 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3563}
3564
3565size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3566 const wchar_t *src, size_t srcLen) const
3567{
3568 CreateConvIfNeeded();
3569
2c74c558
VS
3570 if (m_convReal)
3571 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3572
3573 // latin-1 (direct)
3574 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3575}
3576
6001e347
RR
3577size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3578{
e95354ec 3579 CreateConvIfNeeded();
dccce9ea 3580
e95354ec
VZ
3581 if (m_convReal)
3582 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3583
3584 // latin-1 (direct)
4def3b35 3585 size_t len = strlen(psz);
dccce9ea 3586
f1339c56
RR
3587 if (buf)
3588 {
4def3b35 3589 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3590 buf[c] = (unsigned char)(psz[c]);
3591 }
dccce9ea 3592
f1339c56 3593 return len;
6001e347
RR
3594}
3595
3596size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3597{
e95354ec 3598 CreateConvIfNeeded();
dccce9ea 3599
e95354ec
VZ
3600 if (m_convReal)
3601 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3602
f1339c56 3603 // latin-1 (direct)
f8d791e0 3604 const size_t len = wxWcslen(psz);
f1339c56
RR
3605 if (buf)
3606 {
4def3b35 3607 for (size_t c = 0; c <= len; c++)
24642831
VS
3608 {
3609 if (psz[c] > 0xFF)
467e0479 3610 return wxCONV_FAILED;
ef199164 3611
907173e5 3612 buf[c] = (char)psz[c];
24642831
VS
3613 }
3614 }
3615 else
3616 {
3617 for (size_t c = 0; c <= len; c++)
3618 {
3619 if (psz[c] > 0xFF)
467e0479 3620 return wxCONV_FAILED;
24642831 3621 }
f1339c56 3622 }
dccce9ea 3623
f1339c56 3624 return len;
6001e347
RR
3625}
3626
7ef3ab50 3627size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3628{
3629 CreateConvIfNeeded();
3630
3631 if ( m_convReal )
3632 {
7ef3ab50 3633 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3634 }
3635
ba98e032 3636 // otherwise, we are ISO-8859-1
c1464d9d 3637 return 1;
eec47cc6
VZ
3638}
3639
ba98e032
VS
3640#if wxUSE_UNICODE_UTF8
3641bool wxCSConv::IsUTF8() const
3642{
3643 CreateConvIfNeeded();
3644
3645 if ( m_convReal )
3646 {
3647 return m_convReal->IsUTF8();
3648 }
3649
3650 // otherwise, we are ISO-8859-1
3651 return false;
3652}
3653#endif
3654
69c928ef
VZ
3655
3656#if wxUSE_UNICODE
3657
3658wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3659{
3660 if ( !s )
3661 return wxWCharBuffer();
3662
3663 wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3664 if ( !wbuf )
5487ff0f 3665 wbuf = wxMBConvUTF8().cMB2WX(s);
69c928ef
VZ
3666 if ( !wbuf )
3667 wbuf = wxConvISO8859_1.cMB2WX(s);
3668
3669 return wbuf;
3670}
3671
3672wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3673{
3674 if ( !ws )
3675 return wxCharBuffer();
3676
3677 wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3678 if ( !buf )
3679 buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3680
3681 return buf;
3682}
3683
3684#endif // wxUSE_UNICODE
f5a1953b 3685
1e50d914
VS
3686// ----------------------------------------------------------------------------
3687// globals
3688// ----------------------------------------------------------------------------
3689
3690// NB: The reason why we create converted objects in this convoluted way,
3691// using a factory function instead of global variable, is that they
3692// may be used at static initialization time (some of them are used by
3693// wxString ctors and there may be a global wxString object). In other
3694// words, possibly _before_ the converter global object would be
3695// initialized.
3696
3697#undef wxConvLibc
3698#undef wxConvUTF8
3699#undef wxConvUTF7
3700#undef wxConvLocal
3701#undef wxConvISO8859_1
3702
3703#define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
3704 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
092ee46f 3705 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
1e50d914
VS
3706 { \
3707 static impl_klass name##Obj ctor_args; \
3708 return &name##Obj; \
3709 } \
3710 /* this ensures that all global converter objects are created */ \
3711 /* by the time static initialization is done, i.e. before any */ \
3712 /* thread is launched: */ \
3713 static klass* gs_##name##instance = wxGet_##name##Ptr()
3714
3715#define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3716 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3717
3718#ifdef __WINDOWS__
3719 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3720#elif defined(__WXMAC__) && !defined(__MACH__)
3721 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3722#else
3723 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3724#endif
3725
3726WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
3727WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
3728
3729WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3730WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3731
3732WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3733WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3734
3735#if defined(__WXMAC__) && defined(TARGET_CARBON)
3736static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3737#endif
3738WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3739#ifdef __WXOSX__
3740#if defined(__WXMAC__) && defined(TARGET_CARBON)
3741 &wxConvMacUTF8DObj;
3742#else
3743 wxGet_wxConvUTF8Ptr();
3744#endif
3745#else // !__WXOSX__
3746 wxGet_wxConvLibcPtr();
3747#endif // __WXOSX__/!__WXOSX__
3748
bde4baac
VZ
3749#else // !wxUSE_WCHAR_T
3750
1e50d914 3751// FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
bde4baac
VZ
3752// stand-ins in absence of wchar_t
3753WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3754 wxConvISO8859_1,
3755 wxConvLocal,
3756 wxConvUTF8;
3757
3758#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T