]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
don't hardcode HTML font sizes, this makes wxHtmlWindow look really ugly when default...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
373658eb
VZ
18#ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/log.h"
de6185e2 21 #include "wx/utils.h"
df69528b 22 #include "wx/hashmap.h"
ef199164 23#endif
373658eb 24
bde4baac
VZ
25#include "wx/strconv.h"
26
27#if wxUSE_WCHAR_T
28
7608a683 29#ifdef __WINDOWS__
532d575b 30 #include "wx/msw/private.h"
13dd924a 31 #include "wx/msw/missing.h"
0a1c1e62
GRG
32#endif
33
1c193821 34#ifndef __WXWINCE__
1cd52418 35#include <errno.h>
1c193821
JS
36#endif
37
6001e347
RR
38#include <ctype.h>
39#include <string.h>
40#include <stdlib.h>
41
e95354ec
VZ
42#if defined(__WIN32__) && !defined(__WXMICROWIN__)
43 #define wxHAVE_WIN32_MB2WC
ef199164 44#endif
e95354ec 45
6001e347 46#ifdef __SALFORDC__
373658eb 47 #include <clib.h>
6001e347
RR
48#endif
49
b040e242 50#ifdef HAVE_ICONV
373658eb 51 #include <iconv.h>
b1d547eb 52 #include "wx/thread.h"
1cd52418 53#endif
1cd52418 54
373658eb
VZ
55#include "wx/encconv.h"
56#include "wx/fontmap.h"
57
335d31e0 58#ifdef __WXMAC__
40ba2f3b 59#ifndef __DARWIN__
4227afa4
SC
60#include <ATSUnicode.h>
61#include <TextCommon.h>
62#include <TextEncodingConverter.h>
40ba2f3b 63#endif
335d31e0 64
ef199164
DS
65// includes Mac headers
66#include "wx/mac/private.h"
335d31e0 67#endif
ce6f8d6f 68
ef199164 69
ce6f8d6f
VZ
70#define TRACE_STRCONV _T("strconv")
71
467e0479
VZ
72// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
73// be 4 bytes
4948c2b6 74#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
75 #define WC_UTF16
76#endif
77
ef199164 78
373658eb
VZ
79// ============================================================================
80// implementation
81// ============================================================================
82
69373110
VZ
83// helper function of cMB2WC(): check if n bytes at this location are all NUL
84static bool NotAllNULs(const char *p, size_t n)
85{
86 while ( n && *p++ == '\0' )
87 n--;
88
89 return n != 0;
90}
91
373658eb 92// ----------------------------------------------------------------------------
467e0479 93// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 94// ----------------------------------------------------------------------------
6001e347 95
c91830cb 96static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 97{
ef199164 98 if (input <= 0xffff)
4def3b35 99 {
999836aa
VZ
100 if (output)
101 *output = (wxUint16) input;
ef199164 102
4def3b35 103 return 1;
dccce9ea 104 }
ef199164 105 else if (input >= 0x110000)
4def3b35 106 {
467e0479 107 return wxCONV_FAILED;
dccce9ea
VZ
108 }
109 else
4def3b35 110 {
dccce9ea 111 if (output)
4def3b35 112 {
ef199164
DS
113 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
114 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 115 }
ef199164 116
4def3b35 117 return 2;
1cd52418 118 }
1cd52418
OK
119}
120
c91830cb 121static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 122{
ef199164 123 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
124 {
125 output = *input;
126 return 1;
dccce9ea 127 }
ef199164 128 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
129 {
130 output = *input;
467e0479 131 return wxCONV_FAILED;
dccce9ea
VZ
132 }
133 else
4def3b35
VS
134 {
135 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
136 return 2;
137 }
1cd52418
OK
138}
139
467e0479 140#ifdef WC_UTF16
35d11700
VZ
141 typedef wchar_t wxDecodeSurrogate_t;
142#else // !WC_UTF16
143 typedef wxUint16 wxDecodeSurrogate_t;
144#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
145
146// returns the next UTF-32 character from the wchar_t buffer and advances the
147// pointer to the character after this one
148//
149// if an invalid character is found, *pSrc is set to NULL, the caller must
150// check for this
35d11700 151static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
152{
153 wxUint32 out;
8d3dd069
VZ
154 const size_t
155 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
156 if ( n == wxCONV_FAILED )
157 *pSrc = NULL;
158 else
159 *pSrc += n;
160
161 return out;
162}
163
f6bcfd97 164// ----------------------------------------------------------------------------
6001e347 165// wxMBConv
f6bcfd97 166// ----------------------------------------------------------------------------
2c53a80a 167
483b0434
VZ
168size_t
169wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
170 const char *src, size_t srcLen) const
6001e347 171{
483b0434
VZ
172 // although new conversion classes are supposed to implement this function
173 // directly, the existins ones only implement the old MB2WC() and so, to
174 // avoid to have to rewrite all conversion classes at once, we provide a
175 // default (but not efficient) implementation of this one in terms of the
176 // old function by copying the input to ensure that it's NUL-terminated and
177 // then using MB2WC() to convert it
6001e347 178
483b0434
VZ
179 // the number of chars [which would be] written to dst [if it were not NULL]
180 size_t dstWritten = 0;
eec47cc6 181
c1464d9d 182 // the number of NULs terminating this string
a78c43f1 183 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 184
c1464d9d
VZ
185 // if we were not given the input size we just have to assume that the
186 // string is properly terminated as we have no way of knowing how long it
187 // is anyhow, but if we do have the size check whether there are enough
188 // NULs at the end
483b0434
VZ
189 wxCharBuffer bufTmp;
190 const char *srcEnd;
467e0479 191 if ( srcLen != wxNO_LEN )
eec47cc6 192 {
c1464d9d 193 // we need to know how to find the end of this string
7ef3ab50 194 nulLen = GetMBNulLen();
483b0434
VZ
195 if ( nulLen == wxCONV_FAILED )
196 return wxCONV_FAILED;
e4e3bbb4 197
c1464d9d 198 // if there are enough NULs we can avoid the copy
483b0434 199 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
200 {
201 // make a copy in order to properly NUL-terminate the string
483b0434 202 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 203 char * const p = bufTmp.data();
483b0434
VZ
204 memcpy(p, src, srcLen);
205 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 206 *s = '\0';
483b0434
VZ
207
208 src = bufTmp;
eec47cc6 209 }
e4e3bbb4 210
483b0434
VZ
211 srcEnd = src + srcLen;
212 }
213 else // quit after the first loop iteration
214 {
215 srcEnd = NULL;
216 }
e4e3bbb4 217
483b0434 218 for ( ;; )
eec47cc6 219 {
c1464d9d 220 // try to convert the current chunk
483b0434 221 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
222 if ( lenChunk == wxCONV_FAILED )
223 return wxCONV_FAILED;
e4e3bbb4 224
467e0479 225 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 226
483b0434 227 dstWritten += lenChunk;
f5fb6871 228
467e0479
VZ
229 if ( lenChunk == 1 )
230 {
231 // nothing left in the input string, conversion succeeded
232 break;
233 }
234
483b0434
VZ
235 if ( dst )
236 {
237 if ( dstWritten > dstLen )
238 return wxCONV_FAILED;
239
830f8f11 240 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
241 return wxCONV_FAILED;
242
243 dst += lenChunk;
244 }
c1464d9d 245
483b0434 246 if ( !srcEnd )
c1464d9d 247 {
467e0479
VZ
248 // we convert just one chunk in this case as this is the entire
249 // string anyhow
c1464d9d
VZ
250 break;
251 }
eec47cc6
VZ
252
253 // advance the input pointer past the end of this chunk
483b0434 254 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
255 {
256 // notice that we must skip over multiple bytes here as we suppose
257 // that if NUL takes 2 or 4 bytes, then all the other characters do
258 // too and so if advanced by a single byte we might erroneously
259 // detect sequences of NUL bytes in the middle of the input
483b0434 260 src += nulLen;
c1464d9d 261 }
e4e3bbb4 262
483b0434 263 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
264
265 // note that ">=" (and not just "==") is needed here as the terminator
266 // we skipped just above could be inside or just after the buffer
267 // delimited by inEnd
483b0434 268 if ( src >= srcEnd )
c1464d9d
VZ
269 break;
270 }
271
483b0434 272 return dstWritten;
e4e3bbb4
RN
273}
274
483b0434
VZ
275size_t
276wxMBConv::FromWChar(char *dst, size_t dstLen,
277 const wchar_t *src, size_t srcLen) const
e4e3bbb4 278{
483b0434
VZ
279 // the number of chars [which would be] written to dst [if it were not NULL]
280 size_t dstWritten = 0;
e4e3bbb4 281
eec47cc6
VZ
282 // make a copy of the input string unless it is already properly
283 // NUL-terminated
284 //
285 // if we don't know its length we have no choice but to assume that it is,
286 // indeed, properly terminated
287 wxWCharBuffer bufTmp;
467e0479 288 if ( srcLen == wxNO_LEN )
e4e3bbb4 289 {
483b0434 290 srcLen = wxWcslen(src) + 1;
eec47cc6 291 }
483b0434 292 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
293 {
294 // make a copy in order to properly NUL-terminate the string
483b0434 295 bufTmp = wxWCharBuffer(srcLen);
ef199164 296 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
297 src = bufTmp;
298 }
299
300 const size_t lenNul = GetMBNulLen();
301 for ( const wchar_t * const srcEnd = src + srcLen;
302 src < srcEnd;
303 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
304 {
305 // try to convert the current chunk
306 size_t lenChunk = WC2MB(NULL, src, 0);
307
308 if ( lenChunk == wxCONV_FAILED )
309 return wxCONV_FAILED;
310
311 lenChunk += lenNul;
312 dstWritten += lenChunk;
313
314 if ( dst )
315 {
316 if ( dstWritten > dstLen )
317 return wxCONV_FAILED;
318
319 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
320 return wxCONV_FAILED;
321
322 dst += lenChunk;
323 }
eec47cc6 324 }
e4e3bbb4 325
483b0434
VZ
326 return dstWritten;
327}
328
ef199164 329size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 330{
ef199164 331 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 332 if ( rc != wxCONV_FAILED )
509da451
VZ
333 {
334 // ToWChar() returns the buffer length, i.e. including the trailing
335 // NUL, while this method doesn't take it into account
336 rc--;
337 }
338
339 return rc;
340}
341
ef199164 342size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 343{
ef199164 344 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 345 if ( rc != wxCONV_FAILED )
509da451
VZ
346 {
347 rc -= GetMBNulLen();
348 }
349
350 return rc;
351}
352
483b0434
VZ
353wxMBConv::~wxMBConv()
354{
355 // nothing to do here (necessary for Darwin linking probably)
356}
e4e3bbb4 357
483b0434
VZ
358const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
359{
360 if ( psz )
eec47cc6 361 {
483b0434
VZ
362 // calculate the length of the buffer needed first
363 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 364 if ( nLen != wxCONV_FAILED )
f5fb6871 365 {
483b0434
VZ
366 // now do the actual conversion
367 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 368
483b0434
VZ
369 // +1 for the trailing NULL
370 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
371 return buf;
f5fb6871 372 }
483b0434 373 }
e4e3bbb4 374
483b0434
VZ
375 return wxWCharBuffer();
376}
3698ae71 377
483b0434
VZ
378const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
379{
380 if ( pwz )
381 {
382 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 383 if ( nLen != wxCONV_FAILED )
483b0434
VZ
384 {
385 // extra space for trailing NUL(s)
386 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 387
483b0434
VZ
388 wxCharBuffer buf(nLen + extraLen - 1);
389 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
390 return buf;
391 }
392 }
393
394 return wxCharBuffer();
395}
e4e3bbb4 396
483b0434 397const wxWCharBuffer
ef199164 398wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 399{
ef199164 400 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 401 if ( dstLen != wxCONV_FAILED )
483b0434 402 {
830f8f11 403 wxWCharBuffer wbuf(dstLen - 1);
ef199164 404 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
405 {
406 if ( outLen )
467e0479
VZ
407 {
408 *outLen = dstLen;
409 if ( wbuf[dstLen - 1] == L'\0' )
410 (*outLen)--;
411 }
412
483b0434
VZ
413 return wbuf;
414 }
415 }
416
417 if ( outLen )
418 *outLen = 0;
419
420 return wxWCharBuffer();
421}
422
423const wxCharBuffer
ef199164 424wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 425{
13d92ad6 426 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 427 if ( dstLen != wxCONV_FAILED )
483b0434 428 {
168a76fe
VZ
429 // special case of empty input: can't allocate 0 size buffer below as
430 // wxCharBuffer insists on NUL-terminating it
431 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 432 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
433 {
434 if ( outLen )
467e0479
VZ
435 {
436 *outLen = dstLen;
437
438 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
439 if ( dstLen >= nulLen &&
440 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
441 {
442 // in this case the output is NUL-terminated and we're not
443 // supposed to count NUL
13d92ad6 444 *outLen -= nulLen;
467e0479
VZ
445 }
446 }
d32a507d 447
483b0434
VZ
448 return buf;
449 }
e4e3bbb4
RN
450 }
451
eec47cc6
VZ
452 if ( outLen )
453 *outLen = 0;
454
455 return wxCharBuffer();
e4e3bbb4
RN
456}
457
6001e347 458// ----------------------------------------------------------------------------
bde4baac 459// wxMBConvLibc
6001e347
RR
460// ----------------------------------------------------------------------------
461
bde4baac
VZ
462size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
463{
464 return wxMB2WC(buf, psz, n);
465}
466
467size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
468{
469 return wxWC2MB(buf, psz, n);
470}
e1bfe89e
RR
471
472// ----------------------------------------------------------------------------
532d575b 473// wxConvBrokenFileNames
e1bfe89e
RR
474// ----------------------------------------------------------------------------
475
eec47cc6
VZ
476#ifdef __UNIX__
477
845905d5 478wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 479{
845905d5
MW
480 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
481 || wxStricmp(charset, _T("UTF8")) == 0 )
482 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
483 else
484 m_conv = new wxCSConv(charset);
ea8ce907
RR
485}
486
eec47cc6 487#endif // __UNIX__
c12b7f79 488
bde4baac 489// ----------------------------------------------------------------------------
3698ae71 490// UTF-7
bde4baac 491// ----------------------------------------------------------------------------
6001e347 492
15f2ee32 493// Implementation (C) 2004 Fredrik Roubert
6001e347 494
15f2ee32
RN
495//
496// BASE64 decoding table
497//
498static const unsigned char utf7unb64[] =
6001e347 499{
15f2ee32
RN
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
506 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
507 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
509 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
510 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
511 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
513 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
514 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
515 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
532};
533
534size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535{
15f2ee32
RN
536 size_t len = 0;
537
04a37834 538 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
539 {
540 unsigned char cc = *psz++;
541 if (cc != '+')
542 {
543 // plain ASCII char
544 if (buf)
545 *buf++ = cc;
546 len++;
547 }
548 else if (*psz == '-')
549 {
550 // encoded plus sign
551 if (buf)
552 *buf++ = cc;
553 len++;
554 psz++;
555 }
04a37834 556 else // start of BASE64 encoded string
15f2ee32 557 {
04a37834 558 bool lsb, ok;
15f2ee32 559 unsigned int d, l;
04a37834
VZ
560 for ( ok = lsb = false, d = 0, l = 0;
561 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
562 psz++ )
15f2ee32
RN
563 {
564 d <<= 6;
565 d += cc;
566 for (l += 6; l >= 8; lsb = !lsb)
567 {
04a37834 568 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
569 if (lsb)
570 {
571 if (buf)
572 *buf++ |= c;
573 len ++;
574 }
575 else
04a37834 576 {
15f2ee32 577 if (buf)
6356d52a 578 *buf = (wchar_t)(c << 8);
04a37834
VZ
579 }
580
581 ok = true;
15f2ee32
RN
582 }
583 }
04a37834
VZ
584
585 if ( !ok )
586 {
587 // in valid UTF7 we should have valid characters after '+'
467e0479 588 return wxCONV_FAILED;
04a37834
VZ
589 }
590
15f2ee32
RN
591 if (*psz == '-')
592 psz++;
593 }
594 }
04a37834
VZ
595
596 if ( buf && (len < n) )
597 *buf = '\0';
598
15f2ee32 599 return len;
6001e347
RR
600}
601
15f2ee32
RN
602//
603// BASE64 encoding table
604//
605static const unsigned char utf7enb64[] =
606{
607 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
608 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
609 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
610 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
611 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
612 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
613 'w', 'x', 'y', 'z', '0', '1', '2', '3',
614 '4', '5', '6', '7', '8', '9', '+', '/'
615};
616
617//
618// UTF-7 encoding table
619//
620// 0 - Set D (directly encoded characters)
621// 1 - Set O (optional direct characters)
622// 2 - whitespace characters (optional)
623// 3 - special characters
624//
625static const unsigned char utf7encode[128] =
6001e347 626{
15f2ee32
RN
627 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
629 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
631 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
635};
636
667e5b3e 637size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 638{
15f2ee32
RN
639 size_t len = 0;
640
641 while (*psz && ((!buf) || (len < n)))
642 {
643 wchar_t cc = *psz++;
644 if (cc < 0x80 && utf7encode[cc] < 1)
645 {
646 // plain ASCII char
647 if (buf)
648 *buf++ = (char)cc;
ef199164 649
15f2ee32
RN
650 len++;
651 }
652#ifndef WC_UTF16
79c78d42 653 else if (((wxUint32)cc) > 0xffff)
b2c13097 654 {
15f2ee32 655 // no surrogate pair generation (yet?)
467e0479 656 return wxCONV_FAILED;
15f2ee32
RN
657 }
658#endif
659 else
660 {
661 if (buf)
662 *buf++ = '+';
ef199164 663
15f2ee32
RN
664 len++;
665 if (cc != '+')
666 {
667 // BASE64 encode string
668 unsigned int lsb, d, l;
73c902d6 669 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
670 {
671 for (lsb = 0; lsb < 2; lsb ++)
672 {
673 d <<= 8;
674 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
675
676 for (l += 8; l >= 6; )
677 {
678 l -= 6;
679 if (buf)
680 *buf++ = utf7enb64[(d >> l) % 64];
681 len++;
682 }
683 }
ef199164 684
15f2ee32
RN
685 cc = *psz;
686 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
687 break;
688 }
ef199164 689
15f2ee32
RN
690 if (l != 0)
691 {
692 if (buf)
693 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 694
15f2ee32
RN
695 len++;
696 }
697 }
ef199164 698
15f2ee32
RN
699 if (buf)
700 *buf++ = '-';
701 len++;
702 }
703 }
ef199164 704
15f2ee32
RN
705 if (buf && (len < n))
706 *buf = 0;
ef199164 707
15f2ee32 708 return len;
6001e347
RR
709}
710
f6bcfd97 711// ----------------------------------------------------------------------------
6001e347 712// UTF-8
f6bcfd97 713// ----------------------------------------------------------------------------
6001e347 714
dccce9ea 715static wxUint32 utf8_max[]=
4def3b35 716 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 717
3698ae71
VZ
718// boundaries of the private use area we use to (temporarily) remap invalid
719// characters invalid in a UTF-8 encoded string
ea8ce907
RR
720const wxUint32 wxUnicodePUA = 0x100000;
721const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
722
6001e347
RR
723size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
724{
4def3b35
VS
725 size_t len = 0;
726
dccce9ea 727 while (*psz && ((!buf) || (len < n)))
4def3b35 728 {
ea8ce907
RR
729 const char *opsz = psz;
730 bool invalid = false;
4def3b35
VS
731 unsigned char cc = *psz++, fc = cc;
732 unsigned cnt;
dccce9ea 733 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 734 fc <<= 1;
ef199164 735
dccce9ea 736 if (!cnt)
4def3b35
VS
737 {
738 // plain ASCII char
dccce9ea 739 if (buf)
4def3b35
VS
740 *buf++ = cc;
741 len++;
561488ef
MW
742
743 // escape the escape character for octal escapes
744 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
745 && cc == '\\' && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = cc;
749 len++;
750 }
dccce9ea
VZ
751 }
752 else
4def3b35
VS
753 {
754 cnt--;
dccce9ea 755 if (!cnt)
4def3b35
VS
756 {
757 // invalid UTF-8 sequence
ea8ce907 758 invalid = true;
dccce9ea
VZ
759 }
760 else
4def3b35
VS
761 {
762 unsigned ocnt = cnt - 1;
763 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 764 while (cnt--)
4def3b35 765 {
ea8ce907 766 cc = *psz;
dccce9ea 767 if ((cc & 0xC0) != 0x80)
4def3b35
VS
768 {
769 // invalid UTF-8 sequence
ea8ce907
RR
770 invalid = true;
771 break;
4def3b35 772 }
ef199164 773
ea8ce907 774 psz++;
4def3b35
VS
775 res = (res << 6) | (cc & 0x3f);
776 }
ef199164 777
ea8ce907 778 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
779 {
780 // illegal UTF-8 encoding
ea8ce907 781 invalid = true;
4def3b35 782 }
ea8ce907
RR
783 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
784 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
785 {
786 // if one of our PUA characters turns up externally
787 // it must also be treated as an illegal sequence
788 // (a bit like you have to escape an escape character)
789 invalid = true;
790 }
791 else
792 {
1cd52418 793#ifdef WC_UTF16
ea8ce907
RR
794 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
795 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 796 if (pa == wxCONV_FAILED)
ea8ce907
RR
797 {
798 invalid = true;
799 }
800 else
801 {
802 if (buf)
803 buf += pa;
804 len += pa;
805 }
373658eb 806#else // !WC_UTF16
ea8ce907 807 if (buf)
38d4b1e4 808 *buf++ = (wchar_t)res;
ea8ce907 809 len++;
373658eb 810#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
811 }
812 }
ef199164 813
ea8ce907
RR
814 if (invalid)
815 {
816 if (m_options & MAP_INVALID_UTF8_TO_PUA)
817 {
818 while (opsz < psz && (!buf || len < n))
819 {
820#ifdef WC_UTF16
821 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
822 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 823 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
824 if (buf)
825 buf += pa;
826 opsz++;
827 len += pa;
828#else
829 if (buf)
38d4b1e4 830 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
831 opsz++;
832 len++;
833#endif
834 }
835 }
3698ae71 836 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
837 {
838 while (opsz < psz && (!buf || len < n))
839 {
3698ae71
VZ
840 if ( buf && len + 3 < n )
841 {
17a1ebd1 842 unsigned char on = *opsz;
3698ae71 843 *buf++ = L'\\';
17a1ebd1
VZ
844 *buf++ = (wchar_t)( L'0' + on / 0100 );
845 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
846 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 847 }
ef199164 848
ea8ce907
RR
849 opsz++;
850 len += 4;
851 }
852 }
3698ae71 853 else // MAP_INVALID_UTF8_NOT
ea8ce907 854 {
467e0479 855 return wxCONV_FAILED;
ea8ce907 856 }
4def3b35
VS
857 }
858 }
6001e347 859 }
ef199164 860
dccce9ea 861 if (buf && (len < n))
4def3b35 862 *buf = 0;
ef199164 863
4def3b35 864 return len;
6001e347
RR
865}
866
3698ae71
VZ
867static inline bool isoctal(wchar_t wch)
868{
869 return L'0' <= wch && wch <= L'7';
870}
871
6001e347
RR
872size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
873{
4def3b35 874 size_t len = 0;
6001e347 875
dccce9ea 876 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
877 {
878 wxUint32 cc;
ef199164 879
1cd52418 880#ifdef WC_UTF16
b5153fd8
VZ
881 // cast is ok for WC_UTF16
882 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 883 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 884#else
ef199164 885 cc = (*psz++) & 0x7fffffff;
4def3b35 886#endif
3698ae71
VZ
887
888 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
889 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 890 {
dccce9ea 891 if (buf)
ea8ce907 892 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 893 len++;
3698ae71 894 }
561488ef
MW
895 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
896 && cc == L'\\' && psz[0] == L'\\' )
897 {
898 if (buf)
899 *buf++ = (char)cc;
900 psz++;
901 len++;
902 }
3698ae71
VZ
903 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
904 cc == L'\\' &&
905 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 906 {
dccce9ea 907 if (buf)
3698ae71 908 {
ef199164
DS
909 *buf++ = (char) ((psz[0] - L'0') * 0100 +
910 (psz[1] - L'0') * 010 +
b2c13097 911 (psz[2] - L'0'));
3698ae71
VZ
912 }
913
914 psz += 3;
ea8ce907
RR
915 len++;
916 }
917 else
918 {
919 unsigned cnt;
ef199164
DS
920 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
921 {
922 }
923
ea8ce907 924 if (!cnt)
4def3b35 925 {
ea8ce907
RR
926 // plain ASCII char
927 if (buf)
928 *buf++ = (char) cc;
929 len++;
930 }
ea8ce907
RR
931 else
932 {
933 len += cnt + 1;
934 if (buf)
935 {
936 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
937 while (cnt--)
938 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
939 }
4def3b35
VS
940 }
941 }
6001e347 942 }
4def3b35 943
ef199164 944 if (buf && (len < n))
3698ae71 945 *buf = 0;
adb45366 946
4def3b35 947 return len;
6001e347
RR
948}
949
467e0479 950// ============================================================================
c91830cb 951// UTF-16
467e0479 952// ============================================================================
c91830cb
VZ
953
954#ifdef WORDS_BIGENDIAN
bde4baac
VZ
955 #define wxMBConvUTF16straight wxMBConvUTF16BE
956 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 957#else
bde4baac
VZ
958 #define wxMBConvUTF16swap wxMBConvUTF16BE
959 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
960#endif
961
467e0479
VZ
962/* static */
963size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
964{
965 if ( srcLen == wxNO_LEN )
966 {
967 // count the number of bytes in input, including the trailing NULs
ef199164
DS
968 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
969 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 970 ;
c91830cb 971
467e0479
VZ
972 srcLen *= BYTES_PER_CHAR;
973 }
974 else // we already have the length
975 {
976 // we can only convert an entire number of UTF-16 characters
977 if ( srcLen % BYTES_PER_CHAR )
978 return wxCONV_FAILED;
979 }
980
981 return srcLen;
982}
983
984// case when in-memory representation is UTF-16 too
c91830cb
VZ
985#ifdef WC_UTF16
986
467e0479
VZ
987// ----------------------------------------------------------------------------
988// conversions without endianness change
989// ----------------------------------------------------------------------------
990
991size_t
992wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
993 const char *src, size_t srcLen) const
c91830cb 994{
467e0479
VZ
995 // set up the scene for using memcpy() (which is presumably more efficient
996 // than copying the bytes one by one)
997 srcLen = GetLength(src, srcLen);
998 if ( srcLen == wxNO_LEN )
999 return wxCONV_FAILED;
c91830cb 1000
ef199164 1001 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1002 if ( dst )
c91830cb 1003 {
467e0479
VZ
1004 if ( dstLen < inLen )
1005 return wxCONV_FAILED;
c91830cb 1006
467e0479 1007 memcpy(dst, src, srcLen);
c91830cb 1008 }
d32a507d 1009
467e0479 1010 return inLen;
c91830cb
VZ
1011}
1012
467e0479
VZ
1013size_t
1014wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1015 const wchar_t *src, size_t srcLen) const
c91830cb 1016{
467e0479
VZ
1017 if ( srcLen == wxNO_LEN )
1018 srcLen = wxWcslen(src) + 1;
c91830cb 1019
467e0479
VZ
1020 srcLen *= BYTES_PER_CHAR;
1021
1022 if ( dst )
c91830cb 1023 {
467e0479
VZ
1024 if ( dstLen < srcLen )
1025 return wxCONV_FAILED;
d32a507d 1026
467e0479 1027 memcpy(dst, src, srcLen);
c91830cb 1028 }
d32a507d 1029
467e0479 1030 return srcLen;
c91830cb
VZ
1031}
1032
467e0479
VZ
1033// ----------------------------------------------------------------------------
1034// endian-reversing conversions
1035// ----------------------------------------------------------------------------
c91830cb 1036
467e0479
VZ
1037size_t
1038wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1039 const char *src, size_t srcLen) const
c91830cb 1040{
467e0479
VZ
1041 srcLen = GetLength(src, srcLen);
1042 if ( srcLen == wxNO_LEN )
1043 return wxCONV_FAILED;
c91830cb 1044
467e0479
VZ
1045 srcLen /= BYTES_PER_CHAR;
1046
1047 if ( dst )
c91830cb 1048 {
467e0479
VZ
1049 if ( dstLen < srcLen )
1050 return wxCONV_FAILED;
1051
ef199164
DS
1052 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1053 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1054 {
ef199164 1055 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1056 }
c91830cb 1057 }
bfab25d4 1058
467e0479 1059 return srcLen;
c91830cb
VZ
1060}
1061
467e0479
VZ
1062size_t
1063wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1064 const wchar_t *src, size_t srcLen) const
c91830cb 1065{
467e0479
VZ
1066 if ( srcLen == wxNO_LEN )
1067 srcLen = wxWcslen(src) + 1;
c91830cb 1068
467e0479
VZ
1069 srcLen *= BYTES_PER_CHAR;
1070
1071 if ( dst )
c91830cb 1072 {
467e0479
VZ
1073 if ( dstLen < srcLen )
1074 return wxCONV_FAILED;
1075
ef199164 1076 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1077 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1078 {
ef199164 1079 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1080 }
c91830cb 1081 }
eec47cc6 1082
467e0479 1083 return srcLen;
c91830cb
VZ
1084}
1085
467e0479 1086#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1087
467e0479
VZ
1088// ----------------------------------------------------------------------------
1089// conversions without endianness change
1090// ----------------------------------------------------------------------------
c91830cb 1091
35d11700
VZ
1092size_t
1093wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1094 const char *src, size_t srcLen) const
c91830cb 1095{
35d11700
VZ
1096 srcLen = GetLength(src, srcLen);
1097 if ( srcLen == wxNO_LEN )
1098 return wxCONV_FAILED;
c91830cb 1099
ef199164 1100 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1101 if ( !dst )
c91830cb 1102 {
35d11700
VZ
1103 // optimization: return maximal space which could be needed for this
1104 // string even if the real size could be smaller if the buffer contains
1105 // any surrogates
1106 return inLen;
c91830cb 1107 }
c91830cb 1108
35d11700 1109 size_t outLen = 0;
ef199164
DS
1110 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1111 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1112 {
ef199164
DS
1113 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1114 if ( !inBuff )
35d11700
VZ
1115 return wxCONV_FAILED;
1116
1117 if ( ++outLen > dstLen )
1118 return wxCONV_FAILED;
c91830cb 1119
35d11700
VZ
1120 *dst++ = ch;
1121 }
1122
1123
1124 return outLen;
1125}
c91830cb 1126
35d11700
VZ
1127size_t
1128wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1129 const wchar_t *src, size_t srcLen) const
c91830cb 1130{
35d11700
VZ
1131 if ( srcLen == wxNO_LEN )
1132 srcLen = wxWcslen(src) + 1;
c91830cb 1133
35d11700 1134 size_t outLen = 0;
ef199164 1135 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1136 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1137 {
1138 wxUint16 cc[2];
35d11700
VZ
1139 const size_t numChars = encode_utf16(*src++, cc);
1140 if ( numChars == wxCONV_FAILED )
1141 return wxCONV_FAILED;
c91830cb 1142
ef199164
DS
1143 outLen += numChars * BYTES_PER_CHAR;
1144 if ( outBuff )
c91830cb 1145 {
35d11700
VZ
1146 if ( outLen > dstLen )
1147 return wxCONV_FAILED;
1148
ef199164 1149 *outBuff++ = cc[0];
35d11700 1150 if ( numChars == 2 )
69b80d28 1151 {
35d11700 1152 // second character of a surrogate
ef199164 1153 *outBuff++ = cc[1];
69b80d28 1154 }
c91830cb 1155 }
c91830cb 1156 }
c91830cb 1157
35d11700 1158 return outLen;
c91830cb
VZ
1159}
1160
467e0479
VZ
1161// ----------------------------------------------------------------------------
1162// endian-reversing conversions
1163// ----------------------------------------------------------------------------
c91830cb 1164
35d11700
VZ
1165size_t
1166wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1167 const char *src, size_t srcLen) const
c91830cb 1168{
35d11700
VZ
1169 srcLen = GetLength(src, srcLen);
1170 if ( srcLen == wxNO_LEN )
1171 return wxCONV_FAILED;
1172
ef199164 1173 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1174 if ( !dst )
1175 {
1176 // optimization: return maximal space which could be needed for this
1177 // string even if the real size could be smaller if the buffer contains
1178 // any surrogates
1179 return inLen;
1180 }
c91830cb 1181
35d11700 1182 size_t outLen = 0;
ef199164
DS
1183 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1184 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1185 {
35d11700
VZ
1186 wxUint32 ch;
1187 wxUint16 tmp[2];
ef199164
DS
1188
1189 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1190 inBuff++;
1191 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1192
35d11700
VZ
1193 const size_t numChars = decode_utf16(tmp, ch);
1194 if ( numChars == wxCONV_FAILED )
1195 return wxCONV_FAILED;
c91830cb 1196
35d11700 1197 if ( numChars == 2 )
ef199164 1198 inBuff++;
35d11700
VZ
1199
1200 if ( ++outLen > dstLen )
1201 return wxCONV_FAILED;
c91830cb 1202
35d11700 1203 *dst++ = ch;
c91830cb 1204 }
c91830cb 1205
c91830cb 1206
35d11700
VZ
1207 return outLen;
1208}
c91830cb 1209
35d11700
VZ
1210size_t
1211wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1212 const wchar_t *src, size_t srcLen) const
c91830cb 1213{
35d11700
VZ
1214 if ( srcLen == wxNO_LEN )
1215 srcLen = wxWcslen(src) + 1;
c91830cb 1216
35d11700 1217 size_t outLen = 0;
ef199164 1218 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1219 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1220 {
1221 wxUint16 cc[2];
35d11700
VZ
1222 const size_t numChars = encode_utf16(*src, cc);
1223 if ( numChars == wxCONV_FAILED )
1224 return wxCONV_FAILED;
c91830cb 1225
ef199164
DS
1226 outLen += numChars * BYTES_PER_CHAR;
1227 if ( outBuff )
c91830cb 1228 {
35d11700
VZ
1229 if ( outLen > dstLen )
1230 return wxCONV_FAILED;
1231
ef199164 1232 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1233 if ( numChars == 2 )
c91830cb 1234 {
35d11700 1235 // second character of a surrogate
ef199164 1236 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1237 }
1238 }
c91830cb 1239 }
c91830cb 1240
35d11700 1241 return outLen;
c91830cb
VZ
1242}
1243
467e0479 1244#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1245
1246
35d11700 1247// ============================================================================
c91830cb 1248// UTF-32
35d11700 1249// ============================================================================
c91830cb
VZ
1250
1251#ifdef WORDS_BIGENDIAN
467e0479
VZ
1252 #define wxMBConvUTF32straight wxMBConvUTF32BE
1253 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1254#else
467e0479
VZ
1255 #define wxMBConvUTF32swap wxMBConvUTF32BE
1256 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1257#endif
1258
1259
1260WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1261WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1262
467e0479
VZ
1263/* static */
1264size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1265{
1266 if ( srcLen == wxNO_LEN )
1267 {
1268 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1269 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1270 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1271 ;
c91830cb 1272
467e0479
VZ
1273 srcLen *= BYTES_PER_CHAR;
1274 }
1275 else // we already have the length
1276 {
1277 // we can only convert an entire number of UTF-32 characters
1278 if ( srcLen % BYTES_PER_CHAR )
1279 return wxCONV_FAILED;
1280 }
1281
1282 return srcLen;
1283}
1284
1285// case when in-memory representation is UTF-16
c91830cb
VZ
1286#ifdef WC_UTF16
1287
467e0479
VZ
1288// ----------------------------------------------------------------------------
1289// conversions without endianness change
1290// ----------------------------------------------------------------------------
1291
1292size_t
1293wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1294 const char *src, size_t srcLen) const
c91830cb 1295{
467e0479
VZ
1296 srcLen = GetLength(src, srcLen);
1297 if ( srcLen == wxNO_LEN )
1298 return wxCONV_FAILED;
c91830cb 1299
ef199164
DS
1300 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1301 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1302 size_t outLen = 0;
1303 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1304 {
1305 wxUint16 cc[2];
ef199164 1306 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1307 if ( numChars == wxCONV_FAILED )
1308 return wxCONV_FAILED;
c91830cb 1309
467e0479
VZ
1310 outLen += numChars;
1311 if ( dst )
c91830cb 1312 {
467e0479
VZ
1313 if ( outLen > dstLen )
1314 return wxCONV_FAILED;
d32a507d 1315
467e0479
VZ
1316 *dst++ = cc[0];
1317 if ( numChars == 2 )
1318 {
1319 // second character of a surrogate
1320 *dst++ = cc[1];
1321 }
1322 }
c91830cb 1323 }
d32a507d 1324
467e0479 1325 return outLen;
c91830cb
VZ
1326}
1327
467e0479
VZ
1328size_t
1329wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1330 const wchar_t *src, size_t srcLen) const
c91830cb 1331{
467e0479
VZ
1332 if ( srcLen == wxNO_LEN )
1333 srcLen = wxWcslen(src) + 1;
c91830cb 1334
467e0479 1335 if ( !dst )
c91830cb 1336 {
467e0479
VZ
1337 // optimization: return maximal space which could be needed for this
1338 // string instead of the exact amount which could be less if there are
1339 // any surrogates in the input
1340 //
1341 // we consider that surrogates are rare enough to make it worthwhile to
1342 // avoid running the loop below at the cost of slightly extra memory
1343 // consumption
ef199164 1344 return srcLen * BYTES_PER_CHAR;
467e0479 1345 }
c91830cb 1346
ef199164 1347 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1348 size_t outLen = 0;
1349 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1350 {
1351 const wxUint32 ch = wxDecodeSurrogate(&src);
1352 if ( !src )
1353 return wxCONV_FAILED;
c91830cb 1354
467e0479 1355 outLen += BYTES_PER_CHAR;
d32a507d 1356
467e0479
VZ
1357 if ( outLen > dstLen )
1358 return wxCONV_FAILED;
b5153fd8 1359
ef199164 1360 *outBuff++ = ch;
467e0479 1361 }
c91830cb 1362
467e0479 1363 return outLen;
c91830cb
VZ
1364}
1365
467e0479
VZ
1366// ----------------------------------------------------------------------------
1367// endian-reversing conversions
1368// ----------------------------------------------------------------------------
c91830cb 1369
467e0479
VZ
1370size_t
1371wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1372 const char *src, size_t srcLen) const
c91830cb 1373{
467e0479
VZ
1374 srcLen = GetLength(src, srcLen);
1375 if ( srcLen == wxNO_LEN )
1376 return wxCONV_FAILED;
c91830cb 1377
ef199164
DS
1378 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1379 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1380 size_t outLen = 0;
ef199164 1381 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1382 {
c91830cb 1383 wxUint16 cc[2];
ef199164 1384 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1385 if ( numChars == wxCONV_FAILED )
1386 return wxCONV_FAILED;
c91830cb 1387
467e0479
VZ
1388 outLen += numChars;
1389 if ( dst )
c91830cb 1390 {
467e0479
VZ
1391 if ( outLen > dstLen )
1392 return wxCONV_FAILED;
d32a507d 1393
467e0479
VZ
1394 *dst++ = cc[0];
1395 if ( numChars == 2 )
1396 {
1397 // second character of a surrogate
1398 *dst++ = cc[1];
1399 }
1400 }
c91830cb 1401 }
b5153fd8 1402
467e0479 1403 return outLen;
c91830cb
VZ
1404}
1405
467e0479
VZ
1406size_t
1407wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1408 const wchar_t *src, size_t srcLen) const
c91830cb 1409{
467e0479
VZ
1410 if ( srcLen == wxNO_LEN )
1411 srcLen = wxWcslen(src) + 1;
c91830cb 1412
467e0479 1413 if ( !dst )
c91830cb 1414 {
467e0479
VZ
1415 // optimization: return maximal space which could be needed for this
1416 // string instead of the exact amount which could be less if there are
1417 // any surrogates in the input
1418 //
1419 // we consider that surrogates are rare enough to make it worthwhile to
1420 // avoid running the loop below at the cost of slightly extra memory
1421 // consumption
1422 return srcLen*BYTES_PER_CHAR;
1423 }
c91830cb 1424
ef199164 1425 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1426 size_t outLen = 0;
1427 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1428 {
1429 const wxUint32 ch = wxDecodeSurrogate(&src);
1430 if ( !src )
1431 return wxCONV_FAILED;
c91830cb 1432
467e0479 1433 outLen += BYTES_PER_CHAR;
d32a507d 1434
467e0479
VZ
1435 if ( outLen > dstLen )
1436 return wxCONV_FAILED;
b5153fd8 1437
ef199164 1438 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1439 }
c91830cb 1440
467e0479 1441 return outLen;
c91830cb
VZ
1442}
1443
467e0479 1444#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1445
35d11700
VZ
1446// ----------------------------------------------------------------------------
1447// conversions without endianness change
1448// ----------------------------------------------------------------------------
1449
1450size_t
1451wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1452 const char *src, size_t srcLen) const
c91830cb 1453{
35d11700
VZ
1454 // use memcpy() as it should be much faster than hand-written loop
1455 srcLen = GetLength(src, srcLen);
1456 if ( srcLen == wxNO_LEN )
1457 return wxCONV_FAILED;
c91830cb 1458
35d11700
VZ
1459 const size_t inLen = srcLen/BYTES_PER_CHAR;
1460 if ( dst )
c91830cb 1461 {
35d11700
VZ
1462 if ( dstLen < inLen )
1463 return wxCONV_FAILED;
b5153fd8 1464
35d11700
VZ
1465 memcpy(dst, src, srcLen);
1466 }
c91830cb 1467
35d11700 1468 return inLen;
c91830cb
VZ
1469}
1470
35d11700
VZ
1471size_t
1472wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1473 const wchar_t *src, size_t srcLen) const
c91830cb 1474{
35d11700
VZ
1475 if ( srcLen == wxNO_LEN )
1476 srcLen = wxWcslen(src) + 1;
1477
1478 srcLen *= BYTES_PER_CHAR;
c91830cb 1479
35d11700 1480 if ( dst )
c91830cb 1481 {
35d11700
VZ
1482 if ( dstLen < srcLen )
1483 return wxCONV_FAILED;
c91830cb 1484
35d11700 1485 memcpy(dst, src, srcLen);
c91830cb
VZ
1486 }
1487
35d11700 1488 return srcLen;
c91830cb
VZ
1489}
1490
35d11700
VZ
1491// ----------------------------------------------------------------------------
1492// endian-reversing conversions
1493// ----------------------------------------------------------------------------
c91830cb 1494
35d11700
VZ
1495size_t
1496wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1497 const char *src, size_t srcLen) const
c91830cb 1498{
35d11700
VZ
1499 srcLen = GetLength(src, srcLen);
1500 if ( srcLen == wxNO_LEN )
1501 return wxCONV_FAILED;
1502
1503 srcLen /= BYTES_PER_CHAR;
c91830cb 1504
35d11700 1505 if ( dst )
c91830cb 1506 {
35d11700
VZ
1507 if ( dstLen < srcLen )
1508 return wxCONV_FAILED;
1509
ef199164
DS
1510 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1511 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1512 {
ef199164 1513 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1514 }
c91830cb 1515 }
b5153fd8 1516
35d11700 1517 return srcLen;
c91830cb
VZ
1518}
1519
35d11700
VZ
1520size_t
1521wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1522 const wchar_t *src, size_t srcLen) const
c91830cb 1523{
35d11700
VZ
1524 if ( srcLen == wxNO_LEN )
1525 srcLen = wxWcslen(src) + 1;
1526
1527 srcLen *= BYTES_PER_CHAR;
c91830cb 1528
35d11700 1529 if ( dst )
c91830cb 1530 {
35d11700
VZ
1531 if ( dstLen < srcLen )
1532 return wxCONV_FAILED;
1533
ef199164 1534 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1535 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1536 {
ef199164 1537 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1538 }
c91830cb 1539 }
b5153fd8 1540
35d11700 1541 return srcLen;
c91830cb
VZ
1542}
1543
467e0479 1544#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1545
1546
36acb880
VZ
1547// ============================================================================
1548// The classes doing conversion using the iconv_xxx() functions
1549// ============================================================================
3caec1bb 1550
b040e242 1551#ifdef HAVE_ICONV
3a0d76bc 1552
b1d547eb
VS
1553// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555// (unless there's yet another bug in glibc) the only case when iconv()
1556// returns with (size_t)-1 (which means error) and says there are 0 bytes
1557// left in the input buffer -- when _real_ error occurs,
1558// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1559// iconv() failure.
3caec1bb
VS
1560// [This bug does not appear in glibc 2.2.]
1561#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563 (errno != E2BIG || bufLeft != 0))
1564#else
1565#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1566#endif
1567
ab217dba 1568#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1569
74a7eb0b
VZ
1570#define ICONV_T_INVALID ((iconv_t)-1)
1571
1572#if SIZEOF_WCHAR_T == 4
1573 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF32
1575#elif SIZEOF_WCHAR_T == 2
1576 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1577 #define WC_ENC wxFONTENCODING_UTF16
1578#else // sizeof(wchar_t) != 2 nor 4
1579 // does this ever happen?
1580 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1581#endif
1582
36acb880 1583// ----------------------------------------------------------------------------
e95354ec 1584// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1585// ----------------------------------------------------------------------------
1586
e95354ec 1587class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1588{
1589public:
e95354ec
VZ
1590 wxMBConv_iconv(const wxChar *name);
1591 virtual ~wxMBConv_iconv();
36acb880 1592
bde4baac
VZ
1593 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1594 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1595
d36c9347 1596 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1597 virtual size_t GetMBNulLen() const;
1598
d36c9347
VZ
1599 virtual wxMBConv *Clone() const
1600 {
1601 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1602 p->m_minMBCharWidth = m_minMBCharWidth;
1603 return p;
1604 }
1605
e95354ec 1606 bool IsOk() const
74a7eb0b 1607 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1608
1609protected:
ef199164
DS
1610 // the iconv handlers used to translate from multibyte
1611 // to wide char and in the other direction
36acb880
VZ
1612 iconv_t m2w,
1613 w2m;
ef199164 1614
b1d547eb
VS
1615#if wxUSE_THREADS
1616 // guards access to m2w and w2m objects
1617 wxMutex m_iconvMutex;
1618#endif
36acb880
VZ
1619
1620private:
e95354ec 1621 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1622 // available on this machine, it will remain NULL
74a7eb0b 1623 static wxString ms_wcCharsetName;
36acb880
VZ
1624
1625 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626 // different endian-ness than the native one
405d8f46 1627 static bool ms_wcNeedsSwap;
eec47cc6 1628
d36c9347
VZ
1629
1630 // name of the encoding handled by this conversion
1631 wxString m_name;
1632
7ef3ab50 1633 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1634 // initially
1635 size_t m_minMBCharWidth;
36acb880
VZ
1636};
1637
8f115891
MW
1638// make the constructor available for unit testing
1639WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1640{
1641 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1642 if ( !result->IsOk() )
1643 {
1644 delete result;
1645 return 0;
1646 }
ef199164 1647
8f115891
MW
1648 return result;
1649}
1650
422e411e 1651wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1652bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1653
e95354ec 1654wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
d36c9347 1655 : m_name(name)
36acb880 1656{
c1464d9d 1657 m_minMBCharWidth = 0;
eec47cc6 1658
0331b385
VZ
1659 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660 // names for the charsets
200a9923 1661 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1662
36acb880 1663 // check for charset that represents wchar_t:
74a7eb0b 1664 if ( ms_wcCharsetName.empty() )
f1339c56 1665 {
c2b83fdd
VZ
1666 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1667
74a7eb0b
VZ
1668#if wxUSE_FONTMAP
1669 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1670#else // !wxUSE_FONTMAP
91cb7f52 1671 static const wxChar *names_static[] =
36acb880 1672 {
74a7eb0b
VZ
1673#if SIZEOF_WCHAR_T == 4
1674 _T("UCS-4"),
1675#elif SIZEOF_WCHAR_T = 2
1676 _T("UCS-2"),
1677#endif
1678 NULL
1679 };
91cb7f52 1680 const wxChar **names = names_static;
74a7eb0b 1681#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1682
d1f024a8 1683 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1684 {
17a1ebd1 1685 const wxString nameCS(*names);
74a7eb0b
VZ
1686
1687 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1688 wxString nameXE(nameCS);
ef199164
DS
1689
1690#ifdef WORDS_BIGENDIAN
74a7eb0b 1691 nameXE += _T("BE");
ef199164 1692#else // little endian
74a7eb0b 1693 nameXE += _T("LE");
ef199164 1694#endif
74a7eb0b 1695
c2b83fdd
VZ
1696 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1697 nameXE.c_str());
1698
74a7eb0b
VZ
1699 m2w = iconv_open(nameXE.ToAscii(), cname);
1700 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1701 {
74a7eb0b 1702 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1703 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1704 nameCS.c_str());
17a1ebd1 1705 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1706
74a7eb0b
VZ
1707 // and check for bytesex ourselves:
1708 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1709 {
74a7eb0b
VZ
1710 char buf[2], *bufPtr;
1711 wchar_t wbuf[2], *wbufPtr;
1712 size_t insz, outsz;
1713 size_t res;
1714
1715 buf[0] = 'A';
1716 buf[1] = 0;
1717 wbuf[0] = 0;
1718 insz = 2;
1719 outsz = SIZEOF_WCHAR_T * 2;
1720 wbufPtr = wbuf;
1721 bufPtr = buf;
1722
ef199164
DS
1723 res = iconv(
1724 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1725 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1726
1727 if (ICONV_FAILED(res, insz))
1728 {
1729 wxLogLastError(wxT("iconv"));
422e411e 1730 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1731 nameCS.c_str());
74a7eb0b
VZ
1732 }
1733 else // ok, can convert to this encoding, remember it
1734 {
17a1ebd1 1735 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1736 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1737 }
3a0d76bc
VS
1738 }
1739 }
74a7eb0b 1740 else // use charset not requiring byte swapping
36acb880 1741 {
74a7eb0b 1742 ms_wcCharsetName = nameXE;
36acb880 1743 }
3a0d76bc 1744 }
74a7eb0b 1745
0944fceb 1746 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1747 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1748 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1749 : ms_wcCharsetName.c_str(),
1750 ms_wcNeedsSwap ? _T(" (needs swap)")
1751 : _T(""));
3a0d76bc 1752 }
36acb880 1753 else // we already have ms_wcCharsetName
3caec1bb 1754 {
74a7eb0b 1755 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1756 }
dccce9ea 1757
74a7eb0b 1758 if ( ms_wcCharsetName.empty() )
f1339c56 1759 {
74a7eb0b 1760 w2m = ICONV_T_INVALID;
36acb880 1761 }
405d8f46
VZ
1762 else
1763 {
74a7eb0b
VZ
1764 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1765 if ( w2m == ICONV_T_INVALID )
1766 {
1767 wxLogTrace(TRACE_STRCONV,
1768 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1769 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1770 }
405d8f46 1771 }
36acb880 1772}
3caec1bb 1773
e95354ec 1774wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1775{
74a7eb0b 1776 if ( m2w != ICONV_T_INVALID )
36acb880 1777 iconv_close(m2w);
74a7eb0b 1778 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1779 iconv_close(w2m);
1780}
3a0d76bc 1781
bde4baac 1782size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1783{
69373110
VZ
1784 // find the string length: notice that must be done differently for
1785 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1786 size_t inbuf;
7ef3ab50 1787 const size_t nulLen = GetMBNulLen();
69373110
VZ
1788 switch ( nulLen )
1789 {
1790 default:
467e0479 1791 return wxCONV_FAILED;
69373110
VZ
1792
1793 case 1:
1794 inbuf = strlen(psz); // arguably more optimized than our version
1795 break;
1796
1797 case 2:
1798 case 4:
1799 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1800 // they also have to start at character boundary and not span two
1801 // adjacent characters
1802 const char *p;
1803 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1804 ;
1805 inbuf = p - psz;
1806 break;
1807 }
1808
b1d547eb 1809#if wxUSE_THREADS
6a17b868
SN
1810 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1811 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1812 // wxConvLocal that are used all over wx code, so we have to make sure
1813 // the handle is used by at most one thread at the time. Otherwise
1814 // only a few wx classes would be safe to use from non-main threads
1815 // as MB<->WC conversion would fail "randomly".
1816 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1817#endif // wxUSE_THREADS
1818
36acb880
VZ
1819 size_t outbuf = n * SIZEOF_WCHAR_T;
1820 size_t res, cres;
1821 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1822 wchar_t *bufPtr = buf;
1823 const char *pszPtr = psz;
1824
1825 if (buf)
1826 {
1827 // have destination buffer, convert there
1828 cres = iconv(m2w,
1829 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1830 (char**)&bufPtr, &outbuf);
1831 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1832
36acb880 1833 if (ms_wcNeedsSwap)
3a0d76bc 1834 {
36acb880 1835 // convert to native endianness
17a1ebd1
VZ
1836 for ( unsigned i = 0; i < res; i++ )
1837 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1838 }
adb45366 1839
69373110 1840 // NUL-terminate the string if there is any space left
49dd9820
VS
1841 if (res < n)
1842 buf[res] = 0;
36acb880
VZ
1843 }
1844 else
1845 {
1846 // no destination buffer... convert using temp buffer
1847 // to calculate destination buffer requirement
1848 wchar_t tbuf[8];
1849 res = 0;
ef199164
DS
1850
1851 do
1852 {
36acb880 1853 bufPtr = tbuf;
ef199164 1854 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1855
1856 cres = iconv(m2w,
1857 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1858 (char**)&bufPtr, &outbuf );
1859
ef199164
DS
1860 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1861 }
1862 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1863 }
dccce9ea 1864
36acb880 1865 if (ICONV_FAILED(cres, inbuf))
f1339c56 1866 {
36acb880 1867 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1868 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1869 return wxCONV_FAILED;
36acb880
VZ
1870 }
1871
1872 return res;
1873}
1874
bde4baac 1875size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1876{
b1d547eb
VS
1877#if wxUSE_THREADS
1878 // NB: explained in MB2WC
1879 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1880#endif
3698ae71 1881
156162ec
MW
1882 size_t inlen = wxWcslen(psz);
1883 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1884 size_t outbuf = n;
1885 size_t res, cres;
3a0d76bc 1886
36acb880 1887 wchar_t *tmpbuf = 0;
3caec1bb 1888
36acb880
VZ
1889 if (ms_wcNeedsSwap)
1890 {
1891 // need to copy to temp buffer to switch endianness
74a7eb0b 1892 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1893 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1894 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1895 for ( size_t i = 0; i < inlen; i++ )
1896 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1897
156162ec 1898 tmpbuf[inlen] = L'\0';
74a7eb0b 1899 psz = tmpbuf;
36acb880 1900 }
3a0d76bc 1901
36acb880
VZ
1902 if (buf)
1903 {
1904 // have destination buffer, convert there
1905 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1906
ef199164 1907 res = n - outbuf;
adb45366 1908
49dd9820
VS
1909 // NB: iconv was given only wcslen(psz) characters on input, and so
1910 // it couldn't convert the trailing zero. Let's do it ourselves
1911 // if there's some room left for it in the output buffer.
1912 if (res < n)
1913 buf[0] = 0;
36acb880
VZ
1914 }
1915 else
1916 {
ef199164 1917 // no destination buffer: convert using temp buffer
36acb880
VZ
1918 // to calculate destination buffer requirement
1919 char tbuf[16];
1920 res = 0;
ef199164
DS
1921 do
1922 {
1923 buf = tbuf;
1924 outbuf = 16;
36acb880
VZ
1925
1926 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1927
36acb880 1928 res += 16 - outbuf;
ef199164
DS
1929 }
1930 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1931 }
dccce9ea 1932
36acb880
VZ
1933 if (ms_wcNeedsSwap)
1934 {
1935 free(tmpbuf);
1936 }
dccce9ea 1937
36acb880
VZ
1938 if (ICONV_FAILED(cres, inbuf))
1939 {
ce6f8d6f 1940 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1941 return wxCONV_FAILED;
36acb880
VZ
1942 }
1943
1944 return res;
1945}
1946
7ef3ab50 1947size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1948{
c1464d9d 1949 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1950 {
1951 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1952
1953#if wxUSE_THREADS
1954 // NB: explained in MB2WC
1955 wxMutexLocker lock(self->m_iconvMutex);
1956#endif
1957
356410fc 1958 wchar_t *wnul = L"";
c1464d9d 1959 char buf[8]; // should be enough for NUL in any encoding
356410fc 1960 size_t inLen = sizeof(wchar_t),
c1464d9d 1961 outLen = WXSIZEOF(buf);
ef199164
DS
1962 char *inBuff = (char *)wnul;
1963 char *outBuff = buf;
1964 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1965 {
c1464d9d 1966 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1967 }
1968 else // ok
1969 {
ef199164 1970 self->m_minMBCharWidth = outBuff - buf;
356410fc 1971 }
eec47cc6
VZ
1972 }
1973
c1464d9d 1974 return m_minMBCharWidth;
eec47cc6
VZ
1975}
1976
b040e242 1977#endif // HAVE_ICONV
36acb880 1978
e95354ec 1979
36acb880
VZ
1980// ============================================================================
1981// Win32 conversion classes
1982// ============================================================================
1cd52418 1983
e95354ec 1984#ifdef wxHAVE_WIN32_MB2WC
373658eb 1985
8b04d4c4 1986// from utils.cpp
d775fa82 1987#if wxUSE_FONTMAP
8b04d4c4
VZ
1988extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1989extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1990#endif
373658eb 1991
e95354ec 1992class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1993{
1994public:
bde4baac
VZ
1995 wxMBConv_win32()
1996 {
1997 m_CodePage = CP_ACP;
c1464d9d 1998 m_minMBCharWidth = 0;
bde4baac
VZ
1999 }
2000
d36c9347 2001 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2002 : wxMBConv()
d36c9347
VZ
2003 {
2004 m_CodePage = conv.m_CodePage;
2005 m_minMBCharWidth = conv.m_minMBCharWidth;
2006 }
2007
7608a683 2008#if wxUSE_FONTMAP
e95354ec 2009 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
2010 {
2011 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2012 m_minMBCharWidth = 0;
bde4baac 2013 }
dccce9ea 2014
e95354ec 2015 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2016 {
2017 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2018 m_minMBCharWidth = 0;
bde4baac 2019 }
eec47cc6 2020#endif // wxUSE_FONTMAP
8b04d4c4 2021
d36c9347 2022 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2023 {
02272c9c
VZ
2024 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2025 // the behaviour is not compatible with the Unix version (using iconv)
2026 // and break the library itself, e.g. wxTextInputStream::NextChar()
2027 // wouldn't work if reading an incomplete MB char didn't result in an
2028 // error
667e5b3e 2029 //
89028980 2030 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2031 // Win XP or newer and it is not supported for UTF-[78] so we always
2032 // use our own conversions in this case. See
89028980
VS
2033 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2034 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2035 if ( m_CodePage == CP_UTF8 )
89028980 2036 {
830f8f11 2037 return wxConvUTF8.MB2WC(buf, psz, n);
89028980 2038 }
830f8f11
VZ
2039
2040 if ( m_CodePage == CP_UTF7 )
2041 {
2042 return wxConvUTF7.MB2WC(buf, psz, n);
2043 }
2044
2045 int flags = 0;
2046 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2047 IsAtLeastWin2kSP4() )
89028980 2048 {
830f8f11 2049 flags = MB_ERR_INVALID_CHARS;
89028980 2050 }
667e5b3e 2051
2b5f62a0
VZ
2052 const size_t len = ::MultiByteToWideChar
2053 (
2054 m_CodePage, // code page
667e5b3e 2055 flags, // flags: fall on error
2b5f62a0
VZ
2056 psz, // input string
2057 -1, // its length (NUL-terminated)
b4da152e 2058 buf, // output string
2b5f62a0
VZ
2059 buf ? n : 0 // size of output buffer
2060 );
89028980
VS
2061 if ( !len )
2062 {
2063 // function totally failed
467e0479 2064 return wxCONV_FAILED;
89028980
VS
2065 }
2066
2067 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2068 // check if we succeeded, by doing a double trip:
2069 if ( !flags && buf )
2070 {
53c174fc
VZ
2071 const size_t mbLen = strlen(psz);
2072 wxCharBuffer mbBuf(mbLen);
89028980
VS
2073 if ( ::WideCharToMultiByte
2074 (
2075 m_CodePage,
2076 0,
2077 buf,
2078 -1,
2079 mbBuf.data(),
53c174fc 2080 mbLen + 1, // size in bytes, not length
89028980
VS
2081 NULL,
2082 NULL
2083 ) == 0 ||
2084 strcmp(mbBuf, psz) != 0 )
2085 {
2086 // we didn't obtain the same thing we started from, hence
2087 // the conversion was lossy and we consider that it failed
467e0479 2088 return wxCONV_FAILED;
89028980
VS
2089 }
2090 }
2b5f62a0 2091
03a991bc
VZ
2092 // note that it returns count of written chars for buf != NULL and size
2093 // of the needed buffer for buf == NULL so in either case the length of
2094 // the string (which never includes the terminating NUL) is one less
89028980 2095 return len - 1;
f1339c56 2096 }
dccce9ea 2097
d36c9347 2098 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2099 {
13dd924a
VZ
2100 /*
2101 we have a problem here: by default, WideCharToMultiByte() may
2102 replace characters unrepresentable in the target code page with bad
2103 quality approximations such as turning "1/2" symbol (U+00BD) into
2104 "1" for the code pages which don't have it and we, obviously, want
2105 to avoid this at any price
d775fa82 2106
13dd924a
VZ
2107 the trouble is that this function does it _silently_, i.e. it won't
2108 even tell us whether it did or not... Win98/2000 and higher provide
2109 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2110 we have to resort to a round trip, i.e. check that converting back
2111 results in the same string -- this is, of course, expensive but
2112 otherwise we simply can't be sure to not garble the data.
2113 */
2114
2115 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2116 // it doesn't work with CJK encodings (which we test for rather roughly
2117 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2118 // supporting it
907173e5
WS
2119 BOOL usedDef wxDUMMY_INITIALIZE(false);
2120 BOOL *pUsedDef;
13dd924a
VZ
2121 int flags;
2122 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2123 {
2124 // it's our lucky day
2125 flags = WC_NO_BEST_FIT_CHARS;
2126 pUsedDef = &usedDef;
2127 }
2128 else // old system or unsupported encoding
2129 {
2130 flags = 0;
2131 pUsedDef = NULL;
2132 }
2133
2b5f62a0
VZ
2134 const size_t len = ::WideCharToMultiByte
2135 (
2136 m_CodePage, // code page
13dd924a
VZ
2137 flags, // either none or no best fit
2138 pwz, // input string
2b5f62a0
VZ
2139 -1, // it is (wide) NUL-terminated
2140 buf, // output buffer
2141 buf ? n : 0, // and its size
2142 NULL, // default "replacement" char
13dd924a 2143 pUsedDef // [out] was it used?
2b5f62a0
VZ
2144 );
2145
13dd924a
VZ
2146 if ( !len )
2147 {
2148 // function totally failed
467e0479 2149 return wxCONV_FAILED;
13dd924a
VZ
2150 }
2151
2152 // if we were really converting, check if we succeeded
2153 if ( buf )
2154 {
2155 if ( flags )
2156 {
2157 // check if the conversion failed, i.e. if any replacements
2158 // were done
2159 if ( usedDef )
467e0479 2160 return wxCONV_FAILED;
13dd924a
VZ
2161 }
2162 else // we must resort to double tripping...
2163 {
2164 wxWCharBuffer wcBuf(n);
467e0479 2165 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2166 wcscmp(wcBuf, pwz) != 0 )
2167 {
2168 // we didn't obtain the same thing we started from, hence
2169 // the conversion was lossy and we consider that it failed
467e0479 2170 return wxCONV_FAILED;
13dd924a
VZ
2171 }
2172 }
2173 }
2174
03a991bc 2175 // see the comment above for the reason of "len - 1"
13dd924a 2176 return len - 1;
f1339c56 2177 }
dccce9ea 2178
7ef3ab50
VZ
2179 virtual size_t GetMBNulLen() const
2180 {
2181 if ( m_minMBCharWidth == 0 )
2182 {
2183 int len = ::WideCharToMultiByte
2184 (
2185 m_CodePage, // code page
2186 0, // no flags
2187 L"", // input string
2188 1, // translate just the NUL
2189 NULL, // output buffer
2190 0, // and its size
2191 NULL, // no replacement char
2192 NULL // [out] don't care if it was used
2193 );
2194
2195 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2196 switch ( len )
2197 {
2198 default:
2199 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2200 self->m_minMBCharWidth = (size_t)-1;
2201 break;
7ef3ab50
VZ
2202
2203 case 0:
2204 self->m_minMBCharWidth = (size_t)-1;
2205 break;
2206
2207 case 1:
2208 case 2:
2209 case 4:
2210 self->m_minMBCharWidth = len;
2211 break;
2212 }
2213 }
2214
2215 return m_minMBCharWidth;
2216 }
2217
d36c9347
VZ
2218 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2219
13dd924a
VZ
2220 bool IsOk() const { return m_CodePage != -1; }
2221
2222private:
2223 static bool CanUseNoBestFit()
2224 {
2225 static int s_isWin98Or2k = -1;
2226
2227 if ( s_isWin98Or2k == -1 )
2228 {
2229 int verMaj, verMin;
2230 switch ( wxGetOsVersion(&verMaj, &verMin) )
2231 {
406d283a 2232 case wxOS_WINDOWS_9X:
13dd924a
VZ
2233 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2234 break;
2235
406d283a 2236 case wxOS_WINDOWS_NT:
13dd924a
VZ
2237 s_isWin98Or2k = verMaj >= 5;
2238 break;
2239
2240 default:
ef199164 2241 // unknown: be conservative by default
13dd924a 2242 s_isWin98Or2k = 0;
ef199164 2243 break;
13dd924a
VZ
2244 }
2245
2246 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2247 }
2248
2249 return s_isWin98Or2k == 1;
2250 }
f1339c56 2251
89028980
VS
2252 static bool IsAtLeastWin2kSP4()
2253 {
8942f83a
WS
2254#ifdef __WXWINCE__
2255 return false;
2256#else
89028980
VS
2257 static int s_isAtLeastWin2kSP4 = -1;
2258
2259 if ( s_isAtLeastWin2kSP4 == -1 )
2260 {
2261 OSVERSIONINFOEX ver;
2262
2263 memset(&ver, 0, sizeof(ver));
2264 ver.dwOSVersionInfoSize = sizeof(ver);
2265 GetVersionEx((OSVERSIONINFO*)&ver);
2266
2267 s_isAtLeastWin2kSP4 =
2268 ((ver.dwMajorVersion > 5) || // Vista+
2269 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2270 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2271 ver.wServicePackMajor >= 4)) // 2000 SP4+
2272 ? 1 : 0;
2273 }
2274
2275 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2276#endif
89028980
VS
2277 }
2278
eec47cc6 2279
c1464d9d 2280 // the code page we're working with
b1d66b54 2281 long m_CodePage;
c1464d9d 2282
7ef3ab50 2283 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2284 // "unknown"
2285 size_t m_minMBCharWidth;
1cd52418 2286};
e95354ec
VZ
2287
2288#endif // wxHAVE_WIN32_MB2WC
2289
f7e98dee
RN
2290// ============================================================================
2291// Cocoa conversion classes
2292// ============================================================================
2293
2294#if defined(__WXCOCOA__)
2295
ef199164
DS
2296// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2297// Strangely enough, internally Core Foundation uses
2298// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee
RN
2299
2300#include <CoreFoundation/CFString.h>
2301#include <CoreFoundation/CFStringEncodingExt.h>
2302
2303CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2304{
638357a0 2305 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2306
2307 switch (encoding)
ecd9653b 2308 {
ef199164
DS
2309 case wxFONTENCODING_DEFAULT :
2310 enc = CFStringGetSystemEncoding();
2311 break ;
2312
ecd9653b
WS
2313 case wxFONTENCODING_ISO8859_1 :
2314 enc = kCFStringEncodingISOLatin1 ;
2315 break ;
2316 case wxFONTENCODING_ISO8859_2 :
2317 enc = kCFStringEncodingISOLatin2;
2318 break ;
2319 case wxFONTENCODING_ISO8859_3 :
2320 enc = kCFStringEncodingISOLatin3 ;
2321 break ;
2322 case wxFONTENCODING_ISO8859_4 :
2323 enc = kCFStringEncodingISOLatin4;
2324 break ;
2325 case wxFONTENCODING_ISO8859_5 :
2326 enc = kCFStringEncodingISOLatinCyrillic;
2327 break ;
2328 case wxFONTENCODING_ISO8859_6 :
2329 enc = kCFStringEncodingISOLatinArabic;
2330 break ;
2331 case wxFONTENCODING_ISO8859_7 :
2332 enc = kCFStringEncodingISOLatinGreek;
2333 break ;
2334 case wxFONTENCODING_ISO8859_8 :
2335 enc = kCFStringEncodingISOLatinHebrew;
2336 break ;
2337 case wxFONTENCODING_ISO8859_9 :
2338 enc = kCFStringEncodingISOLatin5;
2339 break ;
2340 case wxFONTENCODING_ISO8859_10 :
2341 enc = kCFStringEncodingISOLatin6;
2342 break ;
2343 case wxFONTENCODING_ISO8859_11 :
2344 enc = kCFStringEncodingISOLatinThai;
2345 break ;
2346 case wxFONTENCODING_ISO8859_13 :
2347 enc = kCFStringEncodingISOLatin7;
2348 break ;
2349 case wxFONTENCODING_ISO8859_14 :
2350 enc = kCFStringEncodingISOLatin8;
2351 break ;
2352 case wxFONTENCODING_ISO8859_15 :
2353 enc = kCFStringEncodingISOLatin9;
2354 break ;
2355
2356 case wxFONTENCODING_KOI8 :
2357 enc = kCFStringEncodingKOI8_R;
2358 break ;
2359 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2360 enc = kCFStringEncodingDOSRussian;
2361 break ;
2362
2363// case wxFONTENCODING_BULGARIAN :
2364// enc = ;
2365// break ;
2366
2367 case wxFONTENCODING_CP437 :
ef199164 2368 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2369 break ;
2370 case wxFONTENCODING_CP850 :
2371 enc = kCFStringEncodingDOSLatin1;
2372 break ;
2373 case wxFONTENCODING_CP852 :
2374 enc = kCFStringEncodingDOSLatin2;
2375 break ;
2376 case wxFONTENCODING_CP855 :
2377 enc = kCFStringEncodingDOSCyrillic;
2378 break ;
2379 case wxFONTENCODING_CP866 :
ef199164 2380 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2381 break ;
2382 case wxFONTENCODING_CP874 :
2383 enc = kCFStringEncodingDOSThai;
2384 break ;
2385 case wxFONTENCODING_CP932 :
2386 enc = kCFStringEncodingDOSJapanese;
2387 break ;
2388 case wxFONTENCODING_CP936 :
ef199164 2389 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2390 break ;
2391 case wxFONTENCODING_CP949 :
2392 enc = kCFStringEncodingDOSKorean;
2393 break ;
2394 case wxFONTENCODING_CP950 :
2395 enc = kCFStringEncodingDOSChineseTrad;
2396 break ;
ecd9653b
WS
2397 case wxFONTENCODING_CP1250 :
2398 enc = kCFStringEncodingWindowsLatin2;
2399 break ;
2400 case wxFONTENCODING_CP1251 :
ef199164 2401 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2402 break ;
2403 case wxFONTENCODING_CP1252 :
ef199164 2404 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2405 break ;
2406 case wxFONTENCODING_CP1253 :
2407 enc = kCFStringEncodingWindowsGreek;
2408 break ;
2409 case wxFONTENCODING_CP1254 :
2410 enc = kCFStringEncodingWindowsLatin5;
2411 break ;
2412 case wxFONTENCODING_CP1255 :
ef199164 2413 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2414 break ;
2415 case wxFONTENCODING_CP1256 :
ef199164 2416 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2417 break ;
2418 case wxFONTENCODING_CP1257 :
2419 enc = kCFStringEncodingWindowsBalticRim;
2420 break ;
638357a0
RN
2421// This only really encodes to UTF7 (if that) evidently
2422// case wxFONTENCODING_UTF7 :
2423// enc = kCFStringEncodingNonLossyASCII ;
2424// break ;
ecd9653b
WS
2425 case wxFONTENCODING_UTF8 :
2426 enc = kCFStringEncodingUTF8 ;
2427 break ;
2428 case wxFONTENCODING_EUC_JP :
2429 enc = kCFStringEncodingEUC_JP;
2430 break ;
2431 case wxFONTENCODING_UTF16 :
f7e98dee 2432 enc = kCFStringEncodingUnicode ;
ecd9653b 2433 break ;
f7e98dee
RN
2434 case wxFONTENCODING_MACROMAN :
2435 enc = kCFStringEncodingMacRoman ;
2436 break ;
2437 case wxFONTENCODING_MACJAPANESE :
2438 enc = kCFStringEncodingMacJapanese ;
2439 break ;
2440 case wxFONTENCODING_MACCHINESETRAD :
2441 enc = kCFStringEncodingMacChineseTrad ;
2442 break ;
2443 case wxFONTENCODING_MACKOREAN :
2444 enc = kCFStringEncodingMacKorean ;
2445 break ;
2446 case wxFONTENCODING_MACARABIC :
2447 enc = kCFStringEncodingMacArabic ;
2448 break ;
2449 case wxFONTENCODING_MACHEBREW :
2450 enc = kCFStringEncodingMacHebrew ;
2451 break ;
2452 case wxFONTENCODING_MACGREEK :
2453 enc = kCFStringEncodingMacGreek ;
2454 break ;
2455 case wxFONTENCODING_MACCYRILLIC :
2456 enc = kCFStringEncodingMacCyrillic ;
2457 break ;
2458 case wxFONTENCODING_MACDEVANAGARI :
2459 enc = kCFStringEncodingMacDevanagari ;
2460 break ;
2461 case wxFONTENCODING_MACGURMUKHI :
2462 enc = kCFStringEncodingMacGurmukhi ;
2463 break ;
2464 case wxFONTENCODING_MACGUJARATI :
2465 enc = kCFStringEncodingMacGujarati ;
2466 break ;
2467 case wxFONTENCODING_MACORIYA :
2468 enc = kCFStringEncodingMacOriya ;
2469 break ;
2470 case wxFONTENCODING_MACBENGALI :
2471 enc = kCFStringEncodingMacBengali ;
2472 break ;
2473 case wxFONTENCODING_MACTAMIL :
2474 enc = kCFStringEncodingMacTamil ;
2475 break ;
2476 case wxFONTENCODING_MACTELUGU :
2477 enc = kCFStringEncodingMacTelugu ;
2478 break ;
2479 case wxFONTENCODING_MACKANNADA :
2480 enc = kCFStringEncodingMacKannada ;
2481 break ;
2482 case wxFONTENCODING_MACMALAJALAM :
2483 enc = kCFStringEncodingMacMalayalam ;
2484 break ;
2485 case wxFONTENCODING_MACSINHALESE :
2486 enc = kCFStringEncodingMacSinhalese ;
2487 break ;
2488 case wxFONTENCODING_MACBURMESE :
2489 enc = kCFStringEncodingMacBurmese ;
2490 break ;
2491 case wxFONTENCODING_MACKHMER :
2492 enc = kCFStringEncodingMacKhmer ;
2493 break ;
2494 case wxFONTENCODING_MACTHAI :
2495 enc = kCFStringEncodingMacThai ;
2496 break ;
2497 case wxFONTENCODING_MACLAOTIAN :
2498 enc = kCFStringEncodingMacLaotian ;
2499 break ;
2500 case wxFONTENCODING_MACGEORGIAN :
2501 enc = kCFStringEncodingMacGeorgian ;
2502 break ;
2503 case wxFONTENCODING_MACARMENIAN :
2504 enc = kCFStringEncodingMacArmenian ;
2505 break ;
2506 case wxFONTENCODING_MACCHINESESIMP :
2507 enc = kCFStringEncodingMacChineseSimp ;
2508 break ;
2509 case wxFONTENCODING_MACTIBETAN :
2510 enc = kCFStringEncodingMacTibetan ;
2511 break ;
2512 case wxFONTENCODING_MACMONGOLIAN :
2513 enc = kCFStringEncodingMacMongolian ;
2514 break ;
2515 case wxFONTENCODING_MACETHIOPIC :
2516 enc = kCFStringEncodingMacEthiopic ;
2517 break ;
2518 case wxFONTENCODING_MACCENTRALEUR :
2519 enc = kCFStringEncodingMacCentralEurRoman ;
2520 break ;
2521 case wxFONTENCODING_MACVIATNAMESE :
2522 enc = kCFStringEncodingMacVietnamese ;
2523 break ;
2524 case wxFONTENCODING_MACARABICEXT :
2525 enc = kCFStringEncodingMacExtArabic ;
2526 break ;
2527 case wxFONTENCODING_MACSYMBOL :
2528 enc = kCFStringEncodingMacSymbol ;
2529 break ;
2530 case wxFONTENCODING_MACDINGBATS :
2531 enc = kCFStringEncodingMacDingbats ;
2532 break ;
2533 case wxFONTENCODING_MACTURKISH :
2534 enc = kCFStringEncodingMacTurkish ;
2535 break ;
2536 case wxFONTENCODING_MACCROATIAN :
2537 enc = kCFStringEncodingMacCroatian ;
2538 break ;
2539 case wxFONTENCODING_MACICELANDIC :
2540 enc = kCFStringEncodingMacIcelandic ;
2541 break ;
2542 case wxFONTENCODING_MACROMANIAN :
2543 enc = kCFStringEncodingMacRomanian ;
2544 break ;
2545 case wxFONTENCODING_MACCELTIC :
2546 enc = kCFStringEncodingMacCeltic ;
2547 break ;
2548 case wxFONTENCODING_MACGAELIC :
2549 enc = kCFStringEncodingMacGaelic ;
2550 break ;
ecd9653b
WS
2551// case wxFONTENCODING_MACKEYBOARD :
2552// enc = kCFStringEncodingMacKeyboardGlyphs ;
2553// break ;
ef199164 2554
ecd9653b
WS
2555 default :
2556 // because gcc is picky
2557 break ;
ef199164
DS
2558 }
2559
ecd9653b 2560 return enc ;
f7e98dee
RN
2561}
2562
f7e98dee
RN
2563class wxMBConv_cocoa : public wxMBConv
2564{
2565public:
2566 wxMBConv_cocoa()
2567 {
2568 Init(CFStringGetSystemEncoding()) ;
2569 }
2570
d36c9347
VZ
2571 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2572 {
2573 m_encoding = conv.m_encoding;
2574 }
2575
a6900d10 2576#if wxUSE_FONTMAP
f7e98dee
RN
2577 wxMBConv_cocoa(const wxChar* name)
2578 {
267e11c5 2579 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2580 }
a6900d10 2581#endif
f7e98dee
RN
2582
2583 wxMBConv_cocoa(wxFontEncoding encoding)
2584 {
2585 Init( wxCFStringEncFromFontEnc(encoding) );
2586 }
2587
2588 ~wxMBConv_cocoa()
2589 {
2590 }
2591
2592 void Init( CFStringEncoding encoding)
2593 {
638357a0 2594 m_encoding = encoding ;
f7e98dee
RN
2595 }
2596
2597 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2598 {
2599 wxASSERT(szUnConv);
ecd9653b 2600
638357a0
RN
2601 CFStringRef theString = CFStringCreateWithBytes (
2602 NULL, //the allocator
2603 (const UInt8*)szUnConv,
2604 strlen(szUnConv),
2605 m_encoding,
2606 false //no BOM/external representation
f7e98dee
RN
2607 );
2608
2609 wxASSERT(theString);
2610
638357a0
RN
2611 size_t nOutLength = CFStringGetLength(theString);
2612
2613 if (szOut == NULL)
f7e98dee 2614 {
f7e98dee 2615 CFRelease(theString);
638357a0 2616 return nOutLength;
f7e98dee 2617 }
ecd9653b 2618
638357a0 2619 CFRange theRange = { 0, nOutSize };
ecd9653b 2620
638357a0
RN
2621#if SIZEOF_WCHAR_T == 4
2622 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2623#endif
3698ae71 2624
f7e98dee 2625 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2626
f7e98dee 2627 CFRelease(theString);
ecd9653b 2628
ef199164 2629 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2630
2631#if SIZEOF_WCHAR_T == 4
ef199164
DS
2632 wxMBConvUTF16 converter;
2633 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2634 delete [] szUniCharBuffer;
f7e98dee 2635#endif
3698ae71 2636
638357a0 2637 return nOutLength;
f7e98dee
RN
2638 }
2639
2640 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2641 {
638357a0 2642 wxASSERT(szUnConv);
3698ae71 2643
f7e98dee 2644 size_t nRealOutSize;
638357a0 2645 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2646 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2647
f7e98dee 2648#if SIZEOF_WCHAR_T == 4
d9d488cf 2649 wxMBConvUTF16 converter ;
ef199164
DS
2650 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2651 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2652 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2653 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2654#endif
2655
2656 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2657 NULL, //allocator
2658 szUniBuffer,
2659 nBufSize,
638357a0 2660 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2661 );
ecd9653b 2662
f7e98dee 2663 wxASSERT(theString);
ecd9653b 2664
f7e98dee 2665 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2666 //so we check and use getchars instead in that case
2667 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2668 {
638357a0
RN
2669 if (szOut != NULL)
2670 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2671
638357a0
RN
2672 nRealOutSize = CFStringGetLength(theString) + 1;
2673 }
2674 else
2675 {
2676 CFStringGetBytes(
2677 theString,
2678 CFRangeMake(0, CFStringGetLength(theString)),
2679 m_encoding,
2680 0, //what to put in characters that can't be converted -
2681 //0 tells CFString to return NULL if it meets such a character
2682 false, //not an external representation
2683 (UInt8*) szOut,
3698ae71 2684 nOutSize,
638357a0
RN
2685 (CFIndex*) &nRealOutSize
2686 );
f7e98dee 2687 }
ecd9653b 2688
638357a0 2689 CFRelease(theString);
ecd9653b 2690
638357a0
RN
2691#if SIZEOF_WCHAR_T == 4
2692 delete[] szUniBuffer;
2693#endif
ecd9653b 2694
f7e98dee
RN
2695 return nRealOutSize - 1;
2696 }
2697
d36c9347
VZ
2698 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2699
f7e98dee 2700 bool IsOk() const
ecd9653b 2701 {
3698ae71 2702 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2703 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2704 }
2705
2706private:
638357a0 2707 CFStringEncoding m_encoding ;
f7e98dee
RN
2708};
2709
2710#endif // defined(__WXCOCOA__)
2711
335d31e0
SC
2712// ============================================================================
2713// Mac conversion classes
2714// ============================================================================
2715
2716#if defined(__WXMAC__) && defined(TARGET_CARBON)
2717
2718class wxMBConv_mac : public wxMBConv
2719{
2720public:
2721 wxMBConv_mac()
2722 {
2723 Init(CFStringGetSystemEncoding()) ;
2724 }
2725
d36c9347
VZ
2726 wxMBConv_mac(const wxMBConv_mac& conv)
2727 {
2728 Init(conv.m_char_encoding);
2729 }
2730
2d1659cf 2731#if wxUSE_FONTMAP
335d31e0
SC
2732 wxMBConv_mac(const wxChar* name)
2733 {
ef199164 2734 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2735 }
2d1659cf 2736#endif
335d31e0
SC
2737
2738 wxMBConv_mac(wxFontEncoding encoding)
2739 {
d775fa82
WS
2740 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2741 }
2742
2743 ~wxMBConv_mac()
2744 {
2745 OSStatus status = noErr ;
739cb14a
SC
2746 if (m_MB2WC_converter)
2747 status = TECDisposeConverter(m_MB2WC_converter);
2748 if (m_WC2MB_converter)
2749 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2750 }
2751
739cb14a
SC
2752 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2753 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2754 {
739cb14a
SC
2755 m_MB2WC_converter = NULL ;
2756 m_WC2MB_converter = NULL ;
2757 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2758 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2759 }
d775fa82 2760
739cb14a
SC
2761 virtual void CreateIfNeeded() const
2762 {
2763 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2764 {
2765 OSStatus status = noErr ;
2766 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2767 m_char_encoding,
2768 m_unicode_encoding);
739cb14a
SC
2769 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2770 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2771 m_unicode_encoding,
2772 m_char_encoding);
739cb14a
SC
2773 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2774 }
d775fa82 2775 }
739cb14a 2776
335d31e0
SC
2777 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2778 {
739cb14a 2779 CreateIfNeeded() ;
d775fa82
WS
2780 OSStatus status = noErr ;
2781 ByteCount byteOutLen ;
9088c87b 2782 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2783 wchar_t *tbuf = NULL ;
2784 UniChar* ubuf = NULL ;
2785 size_t res = 0 ;
2786
2787 if (buf == NULL)
2788 {
ef199164
DS
2789 // Apple specs say at least 32
2790 n = wxMax( 32, byteInLen ) ;
2791 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2792 }
ef199164 2793
d775fa82 2794 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2795
f3a355ce 2796#if SIZEOF_WCHAR_T == 4
d775fa82 2797 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2798#else
d775fa82 2799 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2800#endif
ef199164
DS
2801
2802 status = TECConvertText(
2803 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2804 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2805
f3a355ce 2806#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2807 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2808 // is not properly terminated we get random characters at the end
2809 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2810 wxMBConvUTF16 converter ;
ef199164 2811 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2812 free( ubuf ) ;
f3a355ce 2813#else
d775fa82 2814 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2815#endif
ef199164 2816
d775fa82
WS
2817 if ( buf == NULL )
2818 free(tbuf) ;
335d31e0 2819
335d31e0
SC
2820 if ( buf && res < n)
2821 buf[res] = 0;
2822
d775fa82 2823 return res ;
335d31e0
SC
2824 }
2825
2826 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2827 {
739cb14a 2828 CreateIfNeeded() ;
d775fa82
WS
2829 OSStatus status = noErr ;
2830 ByteCount byteOutLen ;
2831 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2832
2833 char *tbuf = NULL ;
2834
2835 if (buf == NULL)
2836 {
ef199164
DS
2837 // Apple specs say at least 32
2838 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2839 tbuf = (char*) malloc( n ) ;
2840 }
2841
2842 ByteCount byteBufferLen = n ;
2843 UniChar* ubuf = NULL ;
ef199164 2844
f3a355ce 2845#if SIZEOF_WCHAR_T == 4
d9d488cf 2846 wxMBConvUTF16 converter ;
ef199164 2847 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2848 byteInLen = unicharlen ;
2849 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2850 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2851#else
d775fa82 2852 ubuf = (UniChar*) psz ;
f3a355ce 2853#endif
ef199164
DS
2854
2855 status = TECConvertText(
2856 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2857 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2858
f3a355ce 2859#if SIZEOF_WCHAR_T == 4
d775fa82 2860 free( ubuf ) ;
f3a355ce 2861#endif
ef199164 2862
d775fa82
WS
2863 if ( buf == NULL )
2864 free(tbuf) ;
335d31e0 2865
d775fa82 2866 size_t res = byteOutLen ;
335d31e0 2867 if ( buf && res < n)
638357a0 2868 {
335d31e0 2869 buf[res] = 0;
3698ae71 2870
638357a0
RN
2871 //we need to double-trip to verify it didn't insert any ? in place
2872 //of bogus characters
2873 wxWCharBuffer wcBuf(n);
2874 size_t pszlen = wxWcslen(psz);
467e0479 2875 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2876 wxWcslen(wcBuf) != pszlen ||
2877 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2878 {
2879 // we didn't obtain the same thing we started from, hence
2880 // the conversion was lossy and we consider that it failed
467e0479 2881 return wxCONV_FAILED;
638357a0
RN
2882 }
2883 }
335d31e0 2884
d775fa82 2885 return res ;
335d31e0
SC
2886 }
2887
d3478e2c 2888 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2889
335d31e0 2890 bool IsOk() const
739cb14a
SC
2891 {
2892 CreateIfNeeded() ;
2893 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2894 }
335d31e0 2895
739cb14a
SC
2896protected :
2897 mutable TECObjectRef m_MB2WC_converter;
2898 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2899
ef199164
DS
2900 TextEncodingBase m_char_encoding;
2901 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2902};
2903
739cb14a
SC
2904// MB is decomposed (D) normalized UTF8
2905
2906class wxMBConv_macUTF8D : public wxMBConv_mac
2907{
2908public :
2909 wxMBConv_macUTF8D()
2910 {
2911 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2912 m_uni = NULL;
fbb0b8af 2913 m_uniBack = NULL ;
739cb14a
SC
2914 }
2915
2916 ~wxMBConv_macUTF8D()
2917 {
fbb0b8af
SC
2918 if (m_uni!=NULL)
2919 DisposeUnicodeToTextInfo(&m_uni);
2920 if (m_uniBack!=NULL)
2921 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a
SC
2922 }
2923
2924 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2925 {
2926 CreateIfNeeded() ;
2927 OSStatus status = noErr ;
2928 ByteCount byteOutLen ;
2929 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2930
2931 char *tbuf = NULL ;
2932
2933 if (buf == NULL)
2934 {
2935 // Apple specs say at least 32
2936 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2937 tbuf = (char*) malloc( n ) ;
2938 }
2939
2940 ByteCount byteBufferLen = n ;
2941 UniChar* ubuf = NULL ;
2942
2943#if SIZEOF_WCHAR_T == 4
2944 wxMBConvUTF16 converter ;
2945 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2946 byteInLen = unicharlen ;
2947 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2948 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2949#else
2950 ubuf = (UniChar*) psz ;
2951#endif
2952
2953 // ubuf is a non-decomposed UniChar buffer
2954
2955 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2956 ByteCount dcubufread , dcubufwritten ;
2957 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2958
2959 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2960 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
2961
2962 // we now convert that decomposed buffer into UTF8
2963
2964 status = TECConvertText(
2965 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2966 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2967
2968 free( dcubuf );
2969
2970#if SIZEOF_WCHAR_T == 4
2971 free( ubuf ) ;
2972#endif
2973
2974 if ( buf == NULL )
2975 free(tbuf) ;
2976
2977 size_t res = byteOutLen ;
2978 if ( buf && res < n)
2979 {
2980 buf[res] = 0;
2981 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2982 }
2983
2984 return res ;
2985 }
2986
fbb0b8af
SC
2987 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2988 {
2989 CreateIfNeeded() ;
2990 OSStatus status = noErr ;
2991 ByteCount byteOutLen ;
2992 ByteCount byteInLen = strlen(psz) + 1;
2993 wchar_t *tbuf = NULL ;
2994 UniChar* ubuf = NULL ;
2995 size_t res = 0 ;
2996
2997 if (buf == NULL)
2998 {
2999 // Apple specs say at least 32
3000 n = wxMax( 32, byteInLen ) ;
3001 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3002 }
3003
3004 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3005
3006#if SIZEOF_WCHAR_T == 4
3007 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3008#else
3009 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3010#endif
3011
3012 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3013 ByteCount dcubufread , dcubufwritten ;
3014 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3015
3016 status = TECConvertText(
3017 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3018 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3019 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3020 // is not properly terminated we get random characters at the end
3021 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3022
3023 // now from the decomposed UniChar to properly composed uniChar
3024 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3025 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3026
3027 free( dcubuf );
3028 byteOutLen = dcubufwritten ;
3029 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3030
3031
3032#if SIZEOF_WCHAR_T == 4
3033 wxMBConvUTF16 converter ;
3034 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3035 free( ubuf ) ;
3036#else
3037 res = byteOutLen / sizeof( UniChar ) ;
3038#endif
3039
3040 if ( buf == NULL )
3041 free(tbuf) ;
3042
3043 if ( buf && res < n)
3044 buf[res] = 0;
3045
3046 return res ;
3047 }
3048
739cb14a
SC
3049 virtual void CreateIfNeeded() const
3050 {
3051 wxMBConv_mac::CreateIfNeeded() ;
3052 if ( m_uni == NULL )
3053 {
3054 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3055 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3056 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3057 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3058 m_map.mappingVersion = kUnicodeUseLatestMapping;
3059
3060 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3061 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
fbb0b8af
SC
3062
3063 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3064 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3065 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3066 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3067 m_map.mappingVersion = kUnicodeUseLatestMapping;
3068 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3069 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3070 }
3071 }
3072protected :
3073 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3074 mutable UnicodeToTextInfo m_uniBack;
739cb14a
SC
3075 mutable UnicodeMapping m_map;
3076};
335d31e0 3077#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3078
36acb880
VZ
3079// ============================================================================
3080// wxEncodingConverter based conversion classes
3081// ============================================================================
3082
1e6feb95 3083#if wxUSE_FONTMAP
1cd52418 3084
e95354ec 3085class wxMBConv_wxwin : public wxMBConv
1cd52418 3086{
8b04d4c4
VZ
3087private:
3088 void Init()
3089 {
3090 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3091 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3092 }
3093
6001e347 3094public:
f1339c56
RR
3095 // temporarily just use wxEncodingConverter stuff,
3096 // so that it works while a better implementation is built
e95354ec 3097 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
3098 {
3099 if (name)
267e11c5 3100 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3101 else
3102 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3103
8b04d4c4
VZ
3104 Init();
3105 }
3106
e95354ec 3107 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3108 {
3109 m_enc = enc;
3110
3111 Init();
f1339c56 3112 }
dccce9ea 3113
bde4baac 3114 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3115 {
3116 size_t inbuf = strlen(psz);
dccce9ea 3117 if (buf)
c643a977 3118 {
ef199164 3119 if (!m2w.Convert(psz, buf))
467e0479 3120 return wxCONV_FAILED;
c643a977 3121 }
f1339c56
RR
3122 return inbuf;
3123 }
dccce9ea 3124
bde4baac 3125 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3126 {
f8d791e0 3127 const size_t inbuf = wxWcslen(psz);
f1339c56 3128 if (buf)
c643a977 3129 {
ef199164 3130 if (!w2m.Convert(psz, buf))
467e0479 3131 return wxCONV_FAILED;
c643a977 3132 }
dccce9ea 3133
f1339c56
RR
3134 return inbuf;
3135 }
dccce9ea 3136
7ef3ab50 3137 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3138 {
3139 switch ( m_enc )
3140 {
3141 case wxFONTENCODING_UTF16BE:
3142 case wxFONTENCODING_UTF16LE:
c1464d9d 3143 return 2;
eec47cc6
VZ
3144
3145 case wxFONTENCODING_UTF32BE:
3146 case wxFONTENCODING_UTF32LE:
c1464d9d 3147 return 4;
eec47cc6
VZ
3148
3149 default:
c1464d9d 3150 return 1;
eec47cc6
VZ
3151 }
3152 }
3153
d36c9347
VZ
3154 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3155
7ef3ab50
VZ
3156 bool IsOk() const { return m_ok; }
3157
3158public:
3159 wxFontEncoding m_enc;
3160 wxEncodingConverter m2w, w2m;
3161
3162private:
cafbf6fb
VZ
3163 // were we initialized successfully?
3164 bool m_ok;
fc7a2a60 3165
e95354ec 3166 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3167};
6001e347 3168
8f115891
MW
3169// make the constructors available for unit testing
3170WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3171{
3172 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3173 if ( !result->IsOk() )
3174 {
3175 delete result;
3176 return 0;
3177 }
ef199164 3178
8f115891
MW
3179 return result;
3180}
3181
1e6feb95
VZ
3182#endif // wxUSE_FONTMAP
3183
36acb880
VZ
3184// ============================================================================
3185// wxCSConv implementation
3186// ============================================================================
3187
8b04d4c4 3188void wxCSConv::Init()
6001e347 3189{
e95354ec
VZ
3190 m_name = NULL;
3191 m_convReal = NULL;
3192 m_deferred = true;
3193}
3194
8b04d4c4
VZ
3195wxCSConv::wxCSConv(const wxChar *charset)
3196{
3197 Init();
82713003 3198
e95354ec
VZ
3199 if ( charset )
3200 {
e95354ec
VZ
3201 SetName(charset);
3202 }
bda3d86a 3203
e4277538
VZ
3204#if wxUSE_FONTMAP
3205 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3206#else
bda3d86a 3207 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3208#endif
6001e347
RR
3209}
3210
8b04d4c4
VZ
3211wxCSConv::wxCSConv(wxFontEncoding encoding)
3212{
bda3d86a 3213 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3214 {
3215 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3216
3217 encoding = wxFONTENCODING_SYSTEM;
3218 }
3219
8b04d4c4
VZ
3220 Init();
3221
bda3d86a 3222 m_encoding = encoding;
8b04d4c4
VZ
3223}
3224
6001e347
RR
3225wxCSConv::~wxCSConv()
3226{
65e50848
JS
3227 Clear();
3228}
3229
54380f29 3230wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3231 : wxMBConv()
54380f29 3232{
8b04d4c4
VZ
3233 Init();
3234
54380f29 3235 SetName(conv.m_name);
8b04d4c4 3236 m_encoding = conv.m_encoding;
54380f29
GD
3237}
3238
3239wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3240{
3241 Clear();
8b04d4c4 3242
54380f29 3243 SetName(conv.m_name);
8b04d4c4
VZ
3244 m_encoding = conv.m_encoding;
3245
54380f29
GD
3246 return *this;
3247}
3248
65e50848
JS
3249void wxCSConv::Clear()
3250{
8b04d4c4 3251 free(m_name);
e95354ec 3252 delete m_convReal;
8b04d4c4 3253
65e50848 3254 m_name = NULL;
e95354ec 3255 m_convReal = NULL;
6001e347
RR
3256}
3257
3258void wxCSConv::SetName(const wxChar *charset)
3259{
f1339c56
RR
3260 if (charset)
3261 {
3262 m_name = wxStrdup(charset);
e95354ec 3263 m_deferred = true;
f1339c56 3264 }
6001e347
RR
3265}
3266
8b3eb85d 3267#if wxUSE_FONTMAP
8b3eb85d
VZ
3268
3269WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3270 wxEncodingNameCache );
8b3eb85d
VZ
3271
3272static wxEncodingNameCache gs_nameCache;
3273#endif
3274
e95354ec
VZ
3275wxMBConv *wxCSConv::DoCreate() const
3276{
ce6f8d6f
VZ
3277#if wxUSE_FONTMAP
3278 wxLogTrace(TRACE_STRCONV,
3279 wxT("creating conversion for %s"),
3280 (m_name ? m_name
3281 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3282#endif // wxUSE_FONTMAP
3283
c547282d
VZ
3284 // check for the special case of ASCII or ISO8859-1 charset: as we have
3285 // special knowledge of it anyhow, we don't need to create a special
3286 // conversion object
e4277538
VZ
3287 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3288 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3289 {
e95354ec
VZ
3290 // don't convert at all
3291 return NULL;
3292 }
dccce9ea 3293
e95354ec
VZ
3294 // we trust OS to do conversion better than we can so try external
3295 // conversion methods first
3296 //
3297 // the full order is:
3298 // 1. OS conversion (iconv() under Unix or Win32 API)
3299 // 2. hard coded conversions for UTF
3300 // 3. wxEncodingConverter as fall back
3301
3302 // step (1)
3303#ifdef HAVE_ICONV
c547282d 3304#if !wxUSE_FONTMAP
e95354ec 3305 if ( m_name )
c547282d 3306#endif // !wxUSE_FONTMAP
e95354ec 3307 {
c547282d 3308 wxString name(m_name);
3ef10cfc 3309#if wxUSE_FONTMAP
8b3eb85d 3310 wxFontEncoding encoding(m_encoding);
3ef10cfc 3311#endif
8b3eb85d
VZ
3312
3313 if ( !name.empty() )
3314 {
3315 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3316 if ( conv->IsOk() )
3317 return conv;
3318
3319 delete conv;
c547282d
VZ
3320
3321#if wxUSE_FONTMAP
8b3eb85d
VZ
3322 encoding =
3323 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 3324#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3325 }
3326#if wxUSE_FONTMAP
3327 {
3328 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3329 if ( it != gs_nameCache.end() )
3330 {
3331 if ( it->second.empty() )
3332 return NULL;
c547282d 3333
8b3eb85d
VZ
3334 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3335 if ( conv->IsOk() )
3336 return conv;
e95354ec 3337
8b3eb85d
VZ
3338 delete conv;
3339 }
3340
3341 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3c67ec06
SC
3342 // CS : in case this does not return valid names (eg for MacRoman) encoding
3343 // got a 'failure' entry in the cache all the same, although it just has to
3344 // be created using a different method, so only store failed iconv creation
3345 // attempts (or perhaps we shoulnd't do this at all ?)
3346 if ( names[0] != NULL )
8b3eb85d 3347 {
3c67ec06 3348 for ( ; *names; ++names )
8b3eb85d 3349 {
3c67ec06
SC
3350 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3351 if ( conv->IsOk() )
3352 {
3353 gs_nameCache[encoding] = *names;
3354 return conv;
3355 }
3356
3357 delete conv;
8b3eb85d
VZ
3358 }
3359
3c67ec06 3360 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 3361 }
8b3eb85d
VZ
3362 }
3363#endif // wxUSE_FONTMAP
e95354ec
VZ
3364 }
3365#endif // HAVE_ICONV
3366
3367#ifdef wxHAVE_WIN32_MB2WC
3368 {
7608a683 3369#if wxUSE_FONTMAP
e95354ec
VZ
3370 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3371 : new wxMBConv_win32(m_encoding);
3372 if ( conv->IsOk() )
3373 return conv;
3374
3375 delete conv;
7608a683
WS
3376#else
3377 return NULL;
3378#endif
e95354ec
VZ
3379 }
3380#endif // wxHAVE_WIN32_MB2WC
ef199164 3381
d775fa82
WS
3382#if defined(__WXMAC__)
3383 {
5c3c8676 3384 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3385 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3386 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3387 {
2d1659cf 3388#if wxUSE_FONTMAP
d775fa82
WS
3389 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3390 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3391#else
3392 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3393#endif
d775fa82 3394 if ( conv->IsOk() )
f7e98dee
RN
3395 return conv;
3396
3397 delete conv;
3398 }
3399 }
3400#endif
ef199164 3401
f7e98dee
RN
3402#if defined(__WXCOCOA__)
3403 {
3404 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3405 {
a6900d10 3406#if wxUSE_FONTMAP
f7e98dee
RN
3407 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3408 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3409#else
3410 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3411#endif
ef199164 3412
f7e98dee 3413 if ( conv->IsOk() )
d775fa82
WS
3414 return conv;
3415
3416 delete conv;
3417 }
335d31e0
SC
3418 }
3419#endif
e95354ec
VZ
3420 // step (2)
3421 wxFontEncoding enc = m_encoding;
3422#if wxUSE_FONTMAP
c547282d
VZ
3423 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3424 {
3425 // use "false" to suppress interactive dialogs -- we can be called from
3426 // anywhere and popping up a dialog from here is the last thing we want to
3427 // do
267e11c5 3428 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3429 }
e95354ec
VZ
3430#endif // wxUSE_FONTMAP
3431
3432 switch ( enc )
3433 {
3434 case wxFONTENCODING_UTF7:
3435 return new wxMBConvUTF7;
3436
3437 case wxFONTENCODING_UTF8:
3438 return new wxMBConvUTF8;
3439
e95354ec
VZ
3440 case wxFONTENCODING_UTF16BE:
3441 return new wxMBConvUTF16BE;
3442
3443 case wxFONTENCODING_UTF16LE:
3444 return new wxMBConvUTF16LE;
3445
e95354ec
VZ
3446 case wxFONTENCODING_UTF32BE:
3447 return new wxMBConvUTF32BE;
3448
3449 case wxFONTENCODING_UTF32LE:
3450 return new wxMBConvUTF32LE;
3451
3452 default:
3453 // nothing to do but put here to suppress gcc warnings
ef199164 3454 break;
e95354ec
VZ
3455 }
3456
3457 // step (3)
3458#if wxUSE_FONTMAP
3459 {
3460 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3461 : new wxMBConv_wxwin(m_encoding);
3462 if ( conv->IsOk() )
3463 return conv;
3464
3465 delete conv;
3466 }
3467#endif // wxUSE_FONTMAP
3468
a58d4f4d
VS
3469 // NB: This is a hack to prevent deadlock. What could otherwise happen
3470 // in Unicode build: wxConvLocal creation ends up being here
3471 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3472 // attach a timestamp, for which it will need wxConvLocal (to convert
3473 // time to char* and then wchar_t*), but that fails, tries to log the
3474 // error, but wxLog has an (already locked) critical section that
3475 // guards the static buffer.
a58d4f4d
VS
3476 static bool alreadyLoggingError = false;
3477 if (!alreadyLoggingError)
3478 {
3479 alreadyLoggingError = true;
3480 wxLogError(_("Cannot convert from the charset '%s'!"),
3481 m_name ? m_name
e95354ec
VZ
3482 :
3483#if wxUSE_FONTMAP
267e11c5 3484 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec 3485#else // !wxUSE_FONTMAP
3ef10cfc 3486 wxString::Format(_("encoding %i"), m_encoding).c_str()
e95354ec
VZ
3487#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3488 );
ef199164 3489
a58d4f4d
VS
3490 alreadyLoggingError = false;
3491 }
e95354ec
VZ
3492
3493 return NULL;
3494}
3495
3496void wxCSConv::CreateConvIfNeeded() const
3497{
3498 if ( m_deferred )
3499 {
3500 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
3501
3502#if wxUSE_INTL
3503 // if we don't have neither the name nor the encoding, use the default
3504 // encoding for this system
3505 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3506 {
4d312c22 3507 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
3508 }
3509#endif // wxUSE_INTL
3510
e95354ec
VZ
3511 self->m_convReal = DoCreate();
3512 self->m_deferred = false;
6001e347 3513 }
6001e347
RR
3514}
3515
3516size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3517{
e95354ec 3518 CreateConvIfNeeded();
dccce9ea 3519
e95354ec
VZ
3520 if (m_convReal)
3521 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3522
3523 // latin-1 (direct)
4def3b35 3524 size_t len = strlen(psz);
dccce9ea 3525
f1339c56
RR
3526 if (buf)
3527 {
4def3b35 3528 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3529 buf[c] = (unsigned char)(psz[c]);
3530 }
dccce9ea 3531
f1339c56 3532 return len;
6001e347
RR
3533}
3534
3535size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3536{
e95354ec 3537 CreateConvIfNeeded();
dccce9ea 3538
e95354ec
VZ
3539 if (m_convReal)
3540 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3541
f1339c56 3542 // latin-1 (direct)
f8d791e0 3543 const size_t len = wxWcslen(psz);
f1339c56
RR
3544 if (buf)
3545 {
4def3b35 3546 for (size_t c = 0; c <= len; c++)
24642831
VS
3547 {
3548 if (psz[c] > 0xFF)
467e0479 3549 return wxCONV_FAILED;
ef199164 3550
907173e5 3551 buf[c] = (char)psz[c];
24642831
VS
3552 }
3553 }
3554 else
3555 {
3556 for (size_t c = 0; c <= len; c++)
3557 {
3558 if (psz[c] > 0xFF)
467e0479 3559 return wxCONV_FAILED;
24642831 3560 }
f1339c56 3561 }
dccce9ea 3562
f1339c56 3563 return len;
6001e347
RR
3564}
3565
7ef3ab50 3566size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3567{
3568 CreateConvIfNeeded();
3569
3570 if ( m_convReal )
3571 {
7ef3ab50 3572 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3573 }
3574
c1464d9d 3575 return 1;
eec47cc6
VZ
3576}
3577
bde4baac
VZ
3578// ----------------------------------------------------------------------------
3579// globals
3580// ----------------------------------------------------------------------------
3581
3582#ifdef __WINDOWS__
3583 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3584#elif defined(__WXMAC__) && !defined(__MACH__)
3585 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3586#else
dcc8fac0 3587 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3588#endif
3589
3590static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3591static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3592static wxMBConvUTF7 wxConvUTF7Obj;
3593static wxMBConvUTF8 wxConvUTF8Obj;
d43d9ee7 3594#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a
SC
3595static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3596#endif
bde4baac
VZ
3597WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3598WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3599WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3600WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3601WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3602WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
d5bef0a3 3603WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
f5a1953b
VZ
3604WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3605#ifdef __WXOSX__
d43d9ee7 3606#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a 3607 wxConvMacUTF8DObj;
d43d9ee7
SC
3608#else
3609 wxConvUTF8Obj;
3610#endif
f5a1953b 3611#else
ea8ce907 3612 wxConvLibcObj;
f5a1953b
VZ
3613#endif
3614
bde4baac
VZ
3615#else // !wxUSE_WCHAR_T
3616
3617// stand-ins in absence of wchar_t
3618WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3619 wxConvISO8859_1,
3620 wxConvLocal,
3621 wxConvUTF8;
3622
3623#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T