]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
removed wxLANGUAGE_SERBIAN, we must explicitly use either or wxLANGUAGE_SERBIAN_CYRIL...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
373658eb
VZ
18#ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/log.h"
de6185e2 21 #include "wx/utils.h"
df69528b 22 #include "wx/hashmap.h"
ef199164 23#endif
373658eb 24
bde4baac
VZ
25#include "wx/strconv.h"
26
27#if wxUSE_WCHAR_T
28
7608a683 29#ifdef __WINDOWS__
532d575b 30 #include "wx/msw/private.h"
13dd924a 31 #include "wx/msw/missing.h"
0a1c1e62
GRG
32#endif
33
1c193821 34#ifndef __WXWINCE__
1cd52418 35#include <errno.h>
1c193821
JS
36#endif
37
6001e347
RR
38#include <ctype.h>
39#include <string.h>
40#include <stdlib.h>
41
e95354ec
VZ
42#if defined(__WIN32__) && !defined(__WXMICROWIN__)
43 #define wxHAVE_WIN32_MB2WC
ef199164 44#endif
e95354ec 45
6001e347 46#ifdef __SALFORDC__
373658eb 47 #include <clib.h>
6001e347
RR
48#endif
49
b040e242 50#ifdef HAVE_ICONV
373658eb 51 #include <iconv.h>
b1d547eb 52 #include "wx/thread.h"
1cd52418 53#endif
1cd52418 54
373658eb
VZ
55#include "wx/encconv.h"
56#include "wx/fontmap.h"
57
335d31e0 58#ifdef __WXMAC__
40ba2f3b 59#ifndef __DARWIN__
4227afa4
SC
60#include <ATSUnicode.h>
61#include <TextCommon.h>
62#include <TextEncodingConverter.h>
40ba2f3b 63#endif
335d31e0 64
ef199164
DS
65// includes Mac headers
66#include "wx/mac/private.h"
335d31e0 67#endif
ce6f8d6f 68
ef199164 69
ce6f8d6f
VZ
70#define TRACE_STRCONV _T("strconv")
71
467e0479
VZ
72// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
73// be 4 bytes
4948c2b6 74#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
75 #define WC_UTF16
76#endif
77
ef199164 78
373658eb
VZ
79// ============================================================================
80// implementation
81// ============================================================================
82
69373110
VZ
83// helper function of cMB2WC(): check if n bytes at this location are all NUL
84static bool NotAllNULs(const char *p, size_t n)
85{
86 while ( n && *p++ == '\0' )
87 n--;
88
89 return n != 0;
90}
91
373658eb 92// ----------------------------------------------------------------------------
467e0479 93// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 94// ----------------------------------------------------------------------------
6001e347 95
c91830cb 96static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 97{
ef199164 98 if (input <= 0xffff)
4def3b35 99 {
999836aa
VZ
100 if (output)
101 *output = (wxUint16) input;
ef199164 102
4def3b35 103 return 1;
dccce9ea 104 }
ef199164 105 else if (input >= 0x110000)
4def3b35 106 {
467e0479 107 return wxCONV_FAILED;
dccce9ea
VZ
108 }
109 else
4def3b35 110 {
dccce9ea 111 if (output)
4def3b35 112 {
ef199164
DS
113 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
114 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 115 }
ef199164 116
4def3b35 117 return 2;
1cd52418 118 }
1cd52418
OK
119}
120
c91830cb 121static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 122{
ef199164 123 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
124 {
125 output = *input;
126 return 1;
dccce9ea 127 }
ef199164 128 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
129 {
130 output = *input;
467e0479 131 return wxCONV_FAILED;
dccce9ea
VZ
132 }
133 else
4def3b35
VS
134 {
135 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
136 return 2;
137 }
1cd52418
OK
138}
139
467e0479 140#ifdef WC_UTF16
35d11700
VZ
141 typedef wchar_t wxDecodeSurrogate_t;
142#else // !WC_UTF16
143 typedef wxUint16 wxDecodeSurrogate_t;
144#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
145
146// returns the next UTF-32 character from the wchar_t buffer and advances the
147// pointer to the character after this one
148//
149// if an invalid character is found, *pSrc is set to NULL, the caller must
150// check for this
35d11700 151static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
152{
153 wxUint32 out;
8d3dd069
VZ
154 const size_t
155 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
156 if ( n == wxCONV_FAILED )
157 *pSrc = NULL;
158 else
159 *pSrc += n;
160
161 return out;
162}
163
f6bcfd97 164// ----------------------------------------------------------------------------
6001e347 165// wxMBConv
f6bcfd97 166// ----------------------------------------------------------------------------
2c53a80a 167
483b0434
VZ
168size_t
169wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
170 const char *src, size_t srcLen) const
6001e347 171{
483b0434
VZ
172 // although new conversion classes are supposed to implement this function
173 // directly, the existins ones only implement the old MB2WC() and so, to
174 // avoid to have to rewrite all conversion classes at once, we provide a
175 // default (but not efficient) implementation of this one in terms of the
176 // old function by copying the input to ensure that it's NUL-terminated and
177 // then using MB2WC() to convert it
6001e347 178
483b0434
VZ
179 // the number of chars [which would be] written to dst [if it were not NULL]
180 size_t dstWritten = 0;
eec47cc6 181
c1464d9d 182 // the number of NULs terminating this string
a78c43f1 183 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 184
c1464d9d
VZ
185 // if we were not given the input size we just have to assume that the
186 // string is properly terminated as we have no way of knowing how long it
187 // is anyhow, but if we do have the size check whether there are enough
188 // NULs at the end
483b0434
VZ
189 wxCharBuffer bufTmp;
190 const char *srcEnd;
467e0479 191 if ( srcLen != wxNO_LEN )
eec47cc6 192 {
c1464d9d 193 // we need to know how to find the end of this string
7ef3ab50 194 nulLen = GetMBNulLen();
483b0434
VZ
195 if ( nulLen == wxCONV_FAILED )
196 return wxCONV_FAILED;
e4e3bbb4 197
c1464d9d 198 // if there are enough NULs we can avoid the copy
483b0434 199 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
200 {
201 // make a copy in order to properly NUL-terminate the string
483b0434 202 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 203 char * const p = bufTmp.data();
483b0434
VZ
204 memcpy(p, src, srcLen);
205 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 206 *s = '\0';
483b0434
VZ
207
208 src = bufTmp;
eec47cc6 209 }
e4e3bbb4 210
483b0434
VZ
211 srcEnd = src + srcLen;
212 }
213 else // quit after the first loop iteration
214 {
215 srcEnd = NULL;
216 }
e4e3bbb4 217
483b0434 218 for ( ;; )
eec47cc6 219 {
c1464d9d 220 // try to convert the current chunk
483b0434 221 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
222 if ( lenChunk == wxCONV_FAILED )
223 return wxCONV_FAILED;
e4e3bbb4 224
467e0479 225 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 226
483b0434 227 dstWritten += lenChunk;
f5fb6871 228
467e0479
VZ
229 if ( lenChunk == 1 )
230 {
231 // nothing left in the input string, conversion succeeded
232 break;
233 }
234
483b0434
VZ
235 if ( dst )
236 {
237 if ( dstWritten > dstLen )
238 return wxCONV_FAILED;
239
830f8f11 240 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
241 return wxCONV_FAILED;
242
243 dst += lenChunk;
244 }
c1464d9d 245
483b0434 246 if ( !srcEnd )
c1464d9d 247 {
467e0479
VZ
248 // we convert just one chunk in this case as this is the entire
249 // string anyhow
c1464d9d
VZ
250 break;
251 }
eec47cc6
VZ
252
253 // advance the input pointer past the end of this chunk
483b0434 254 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
255 {
256 // notice that we must skip over multiple bytes here as we suppose
257 // that if NUL takes 2 or 4 bytes, then all the other characters do
258 // too and so if advanced by a single byte we might erroneously
259 // detect sequences of NUL bytes in the middle of the input
483b0434 260 src += nulLen;
c1464d9d 261 }
e4e3bbb4 262
483b0434 263 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
264
265 // note that ">=" (and not just "==") is needed here as the terminator
266 // we skipped just above could be inside or just after the buffer
267 // delimited by inEnd
483b0434 268 if ( src >= srcEnd )
c1464d9d
VZ
269 break;
270 }
271
483b0434 272 return dstWritten;
e4e3bbb4
RN
273}
274
483b0434
VZ
275size_t
276wxMBConv::FromWChar(char *dst, size_t dstLen,
277 const wchar_t *src, size_t srcLen) const
e4e3bbb4 278{
483b0434
VZ
279 // the number of chars [which would be] written to dst [if it were not NULL]
280 size_t dstWritten = 0;
e4e3bbb4 281
eec47cc6
VZ
282 // make a copy of the input string unless it is already properly
283 // NUL-terminated
284 //
285 // if we don't know its length we have no choice but to assume that it is,
286 // indeed, properly terminated
287 wxWCharBuffer bufTmp;
467e0479 288 if ( srcLen == wxNO_LEN )
e4e3bbb4 289 {
483b0434 290 srcLen = wxWcslen(src) + 1;
eec47cc6 291 }
483b0434 292 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
293 {
294 // make a copy in order to properly NUL-terminate the string
483b0434 295 bufTmp = wxWCharBuffer(srcLen);
ef199164 296 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
297 src = bufTmp;
298 }
299
300 const size_t lenNul = GetMBNulLen();
301 for ( const wchar_t * const srcEnd = src + srcLen;
302 src < srcEnd;
303 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
304 {
305 // try to convert the current chunk
306 size_t lenChunk = WC2MB(NULL, src, 0);
307
308 if ( lenChunk == wxCONV_FAILED )
309 return wxCONV_FAILED;
310
311 lenChunk += lenNul;
312 dstWritten += lenChunk;
313
314 if ( dst )
315 {
316 if ( dstWritten > dstLen )
317 return wxCONV_FAILED;
318
319 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
320 return wxCONV_FAILED;
321
322 dst += lenChunk;
323 }
eec47cc6 324 }
e4e3bbb4 325
483b0434
VZ
326 return dstWritten;
327}
328
ef199164 329size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 330{
ef199164 331 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 332 if ( rc != wxCONV_FAILED )
509da451
VZ
333 {
334 // ToWChar() returns the buffer length, i.e. including the trailing
335 // NUL, while this method doesn't take it into account
336 rc--;
337 }
338
339 return rc;
340}
341
ef199164 342size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 343{
ef199164 344 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 345 if ( rc != wxCONV_FAILED )
509da451
VZ
346 {
347 rc -= GetMBNulLen();
348 }
349
350 return rc;
351}
352
483b0434
VZ
353wxMBConv::~wxMBConv()
354{
355 // nothing to do here (necessary for Darwin linking probably)
356}
e4e3bbb4 357
483b0434
VZ
358const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
359{
360 if ( psz )
eec47cc6 361 {
483b0434
VZ
362 // calculate the length of the buffer needed first
363 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 364 if ( nLen != wxCONV_FAILED )
f5fb6871 365 {
483b0434
VZ
366 // now do the actual conversion
367 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 368
483b0434
VZ
369 // +1 for the trailing NULL
370 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
371 return buf;
f5fb6871 372 }
483b0434 373 }
e4e3bbb4 374
483b0434
VZ
375 return wxWCharBuffer();
376}
3698ae71 377
483b0434
VZ
378const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
379{
380 if ( pwz )
381 {
382 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 383 if ( nLen != wxCONV_FAILED )
483b0434
VZ
384 {
385 // extra space for trailing NUL(s)
386 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 387
483b0434
VZ
388 wxCharBuffer buf(nLen + extraLen - 1);
389 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
390 return buf;
391 }
392 }
393
394 return wxCharBuffer();
395}
e4e3bbb4 396
483b0434 397const wxWCharBuffer
ef199164 398wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 399{
ef199164 400 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 401 if ( dstLen != wxCONV_FAILED )
483b0434 402 {
830f8f11 403 wxWCharBuffer wbuf(dstLen - 1);
ef199164 404 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
405 {
406 if ( outLen )
467e0479
VZ
407 {
408 *outLen = dstLen;
409 if ( wbuf[dstLen - 1] == L'\0' )
410 (*outLen)--;
411 }
412
483b0434
VZ
413 return wbuf;
414 }
415 }
416
417 if ( outLen )
418 *outLen = 0;
419
420 return wxWCharBuffer();
421}
422
423const wxCharBuffer
ef199164 424wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 425{
13d92ad6 426 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 427 if ( dstLen != wxCONV_FAILED )
483b0434 428 {
168a76fe
VZ
429 // special case of empty input: can't allocate 0 size buffer below as
430 // wxCharBuffer insists on NUL-terminating it
431 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 432 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
433 {
434 if ( outLen )
467e0479
VZ
435 {
436 *outLen = dstLen;
437
438 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
439 if ( dstLen >= nulLen &&
440 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
441 {
442 // in this case the output is NUL-terminated and we're not
443 // supposed to count NUL
13d92ad6 444 *outLen -= nulLen;
467e0479
VZ
445 }
446 }
d32a507d 447
483b0434
VZ
448 return buf;
449 }
e4e3bbb4
RN
450 }
451
eec47cc6
VZ
452 if ( outLen )
453 *outLen = 0;
454
455 return wxCharBuffer();
e4e3bbb4
RN
456}
457
6001e347 458// ----------------------------------------------------------------------------
bde4baac 459// wxMBConvLibc
6001e347
RR
460// ----------------------------------------------------------------------------
461
bde4baac
VZ
462size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
463{
464 return wxMB2WC(buf, psz, n);
465}
466
467size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
468{
469 return wxWC2MB(buf, psz, n);
470}
e1bfe89e
RR
471
472// ----------------------------------------------------------------------------
532d575b 473// wxConvBrokenFileNames
e1bfe89e
RR
474// ----------------------------------------------------------------------------
475
eec47cc6
VZ
476#ifdef __UNIX__
477
845905d5 478wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 479{
845905d5
MW
480 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
481 || wxStricmp(charset, _T("UTF8")) == 0 )
482 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
483 else
484 m_conv = new wxCSConv(charset);
ea8ce907
RR
485}
486
eec47cc6 487#endif // __UNIX__
c12b7f79 488
bde4baac 489// ----------------------------------------------------------------------------
3698ae71 490// UTF-7
bde4baac 491// ----------------------------------------------------------------------------
6001e347 492
15f2ee32 493// Implementation (C) 2004 Fredrik Roubert
6001e347 494
15f2ee32
RN
495//
496// BASE64 decoding table
497//
498static const unsigned char utf7unb64[] =
6001e347 499{
15f2ee32
RN
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
506 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
507 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
509 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
510 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
511 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
513 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
514 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
515 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
532};
533
534size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535{
15f2ee32
RN
536 size_t len = 0;
537
04a37834 538 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
539 {
540 unsigned char cc = *psz++;
541 if (cc != '+')
542 {
543 // plain ASCII char
544 if (buf)
545 *buf++ = cc;
546 len++;
547 }
548 else if (*psz == '-')
549 {
550 // encoded plus sign
551 if (buf)
552 *buf++ = cc;
553 len++;
554 psz++;
555 }
04a37834 556 else // start of BASE64 encoded string
15f2ee32 557 {
04a37834 558 bool lsb, ok;
15f2ee32 559 unsigned int d, l;
04a37834
VZ
560 for ( ok = lsb = false, d = 0, l = 0;
561 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
562 psz++ )
15f2ee32
RN
563 {
564 d <<= 6;
565 d += cc;
566 for (l += 6; l >= 8; lsb = !lsb)
567 {
04a37834 568 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
569 if (lsb)
570 {
571 if (buf)
572 *buf++ |= c;
573 len ++;
574 }
575 else
04a37834 576 {
15f2ee32 577 if (buf)
6356d52a 578 *buf = (wchar_t)(c << 8);
04a37834
VZ
579 }
580
581 ok = true;
15f2ee32
RN
582 }
583 }
04a37834
VZ
584
585 if ( !ok )
586 {
587 // in valid UTF7 we should have valid characters after '+'
467e0479 588 return wxCONV_FAILED;
04a37834
VZ
589 }
590
15f2ee32
RN
591 if (*psz == '-')
592 psz++;
593 }
594 }
04a37834
VZ
595
596 if ( buf && (len < n) )
597 *buf = '\0';
598
15f2ee32 599 return len;
6001e347
RR
600}
601
15f2ee32
RN
602//
603// BASE64 encoding table
604//
605static const unsigned char utf7enb64[] =
606{
607 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
608 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
609 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
610 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
611 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
612 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
613 'w', 'x', 'y', 'z', '0', '1', '2', '3',
614 '4', '5', '6', '7', '8', '9', '+', '/'
615};
616
617//
618// UTF-7 encoding table
619//
620// 0 - Set D (directly encoded characters)
621// 1 - Set O (optional direct characters)
622// 2 - whitespace characters (optional)
623// 3 - special characters
624//
625static const unsigned char utf7encode[128] =
6001e347 626{
15f2ee32
RN
627 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
629 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
631 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
635};
636
667e5b3e 637size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 638{
15f2ee32
RN
639 size_t len = 0;
640
641 while (*psz && ((!buf) || (len < n)))
642 {
643 wchar_t cc = *psz++;
644 if (cc < 0x80 && utf7encode[cc] < 1)
645 {
646 // plain ASCII char
647 if (buf)
648 *buf++ = (char)cc;
ef199164 649
15f2ee32
RN
650 len++;
651 }
652#ifndef WC_UTF16
79c78d42 653 else if (((wxUint32)cc) > 0xffff)
b2c13097 654 {
15f2ee32 655 // no surrogate pair generation (yet?)
467e0479 656 return wxCONV_FAILED;
15f2ee32
RN
657 }
658#endif
659 else
660 {
661 if (buf)
662 *buf++ = '+';
ef199164 663
15f2ee32
RN
664 len++;
665 if (cc != '+')
666 {
667 // BASE64 encode string
668 unsigned int lsb, d, l;
73c902d6 669 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
670 {
671 for (lsb = 0; lsb < 2; lsb ++)
672 {
673 d <<= 8;
674 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
675
676 for (l += 8; l >= 6; )
677 {
678 l -= 6;
679 if (buf)
680 *buf++ = utf7enb64[(d >> l) % 64];
681 len++;
682 }
683 }
ef199164 684
15f2ee32
RN
685 cc = *psz;
686 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
687 break;
688 }
ef199164 689
15f2ee32
RN
690 if (l != 0)
691 {
692 if (buf)
693 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 694
15f2ee32
RN
695 len++;
696 }
697 }
ef199164 698
15f2ee32
RN
699 if (buf)
700 *buf++ = '-';
701 len++;
702 }
703 }
ef199164 704
15f2ee32
RN
705 if (buf && (len < n))
706 *buf = 0;
ef199164 707
15f2ee32 708 return len;
6001e347
RR
709}
710
f6bcfd97 711// ----------------------------------------------------------------------------
6001e347 712// UTF-8
f6bcfd97 713// ----------------------------------------------------------------------------
6001e347 714
dccce9ea 715static wxUint32 utf8_max[]=
4def3b35 716 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 717
3698ae71
VZ
718// boundaries of the private use area we use to (temporarily) remap invalid
719// characters invalid in a UTF-8 encoded string
ea8ce907
RR
720const wxUint32 wxUnicodePUA = 0x100000;
721const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
722
6001e347
RR
723size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
724{
4def3b35
VS
725 size_t len = 0;
726
dccce9ea 727 while (*psz && ((!buf) || (len < n)))
4def3b35 728 {
ea8ce907
RR
729 const char *opsz = psz;
730 bool invalid = false;
4def3b35
VS
731 unsigned char cc = *psz++, fc = cc;
732 unsigned cnt;
dccce9ea 733 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 734 fc <<= 1;
ef199164 735
dccce9ea 736 if (!cnt)
4def3b35
VS
737 {
738 // plain ASCII char
dccce9ea 739 if (buf)
4def3b35
VS
740 *buf++ = cc;
741 len++;
561488ef
MW
742
743 // escape the escape character for octal escapes
744 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
745 && cc == '\\' && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = cc;
749 len++;
750 }
dccce9ea
VZ
751 }
752 else
4def3b35
VS
753 {
754 cnt--;
dccce9ea 755 if (!cnt)
4def3b35
VS
756 {
757 // invalid UTF-8 sequence
ea8ce907 758 invalid = true;
dccce9ea
VZ
759 }
760 else
4def3b35
VS
761 {
762 unsigned ocnt = cnt - 1;
763 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 764 while (cnt--)
4def3b35 765 {
ea8ce907 766 cc = *psz;
dccce9ea 767 if ((cc & 0xC0) != 0x80)
4def3b35
VS
768 {
769 // invalid UTF-8 sequence
ea8ce907
RR
770 invalid = true;
771 break;
4def3b35 772 }
ef199164 773
ea8ce907 774 psz++;
4def3b35
VS
775 res = (res << 6) | (cc & 0x3f);
776 }
ef199164 777
ea8ce907 778 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
779 {
780 // illegal UTF-8 encoding
ea8ce907 781 invalid = true;
4def3b35 782 }
ea8ce907
RR
783 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
784 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
785 {
786 // if one of our PUA characters turns up externally
787 // it must also be treated as an illegal sequence
788 // (a bit like you have to escape an escape character)
789 invalid = true;
790 }
791 else
792 {
1cd52418 793#ifdef WC_UTF16
ea8ce907
RR
794 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
795 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 796 if (pa == wxCONV_FAILED)
ea8ce907
RR
797 {
798 invalid = true;
799 }
800 else
801 {
802 if (buf)
803 buf += pa;
804 len += pa;
805 }
373658eb 806#else // !WC_UTF16
ea8ce907 807 if (buf)
38d4b1e4 808 *buf++ = (wchar_t)res;
ea8ce907 809 len++;
373658eb 810#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
811 }
812 }
ef199164 813
ea8ce907
RR
814 if (invalid)
815 {
816 if (m_options & MAP_INVALID_UTF8_TO_PUA)
817 {
818 while (opsz < psz && (!buf || len < n))
819 {
820#ifdef WC_UTF16
821 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
822 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 823 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
824 if (buf)
825 buf += pa;
826 opsz++;
827 len += pa;
828#else
829 if (buf)
38d4b1e4 830 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
831 opsz++;
832 len++;
833#endif
834 }
835 }
3698ae71 836 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
837 {
838 while (opsz < psz && (!buf || len < n))
839 {
3698ae71
VZ
840 if ( buf && len + 3 < n )
841 {
17a1ebd1 842 unsigned char on = *opsz;
3698ae71 843 *buf++ = L'\\';
17a1ebd1
VZ
844 *buf++ = (wchar_t)( L'0' + on / 0100 );
845 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
846 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 847 }
ef199164 848
ea8ce907
RR
849 opsz++;
850 len += 4;
851 }
852 }
3698ae71 853 else // MAP_INVALID_UTF8_NOT
ea8ce907 854 {
467e0479 855 return wxCONV_FAILED;
ea8ce907 856 }
4def3b35
VS
857 }
858 }
6001e347 859 }
ef199164 860
dccce9ea 861 if (buf && (len < n))
4def3b35 862 *buf = 0;
ef199164 863
4def3b35 864 return len;
6001e347
RR
865}
866
3698ae71
VZ
867static inline bool isoctal(wchar_t wch)
868{
869 return L'0' <= wch && wch <= L'7';
870}
871
6001e347
RR
872size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
873{
4def3b35 874 size_t len = 0;
6001e347 875
dccce9ea 876 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
877 {
878 wxUint32 cc;
ef199164 879
1cd52418 880#ifdef WC_UTF16
b5153fd8
VZ
881 // cast is ok for WC_UTF16
882 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 883 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 884#else
ef199164 885 cc = (*psz++) & 0x7fffffff;
4def3b35 886#endif
3698ae71
VZ
887
888 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
889 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 890 {
dccce9ea 891 if (buf)
ea8ce907 892 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 893 len++;
3698ae71 894 }
561488ef
MW
895 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
896 && cc == L'\\' && psz[0] == L'\\' )
897 {
898 if (buf)
899 *buf++ = (char)cc;
900 psz++;
901 len++;
902 }
3698ae71
VZ
903 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
904 cc == L'\\' &&
905 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 906 {
dccce9ea 907 if (buf)
3698ae71 908 {
ef199164
DS
909 *buf++ = (char) ((psz[0] - L'0') * 0100 +
910 (psz[1] - L'0') * 010 +
b2c13097 911 (psz[2] - L'0'));
3698ae71
VZ
912 }
913
914 psz += 3;
ea8ce907
RR
915 len++;
916 }
917 else
918 {
919 unsigned cnt;
ef199164
DS
920 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
921 {
922 }
923
ea8ce907 924 if (!cnt)
4def3b35 925 {
ea8ce907
RR
926 // plain ASCII char
927 if (buf)
928 *buf++ = (char) cc;
929 len++;
930 }
ea8ce907
RR
931 else
932 {
933 len += cnt + 1;
934 if (buf)
935 {
936 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
937 while (cnt--)
938 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
939 }
4def3b35
VS
940 }
941 }
6001e347 942 }
4def3b35 943
ef199164 944 if (buf && (len < n))
3698ae71 945 *buf = 0;
adb45366 946
4def3b35 947 return len;
6001e347
RR
948}
949
467e0479 950// ============================================================================
c91830cb 951// UTF-16
467e0479 952// ============================================================================
c91830cb
VZ
953
954#ifdef WORDS_BIGENDIAN
bde4baac
VZ
955 #define wxMBConvUTF16straight wxMBConvUTF16BE
956 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 957#else
bde4baac
VZ
958 #define wxMBConvUTF16swap wxMBConvUTF16BE
959 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
960#endif
961
467e0479
VZ
962/* static */
963size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
964{
965 if ( srcLen == wxNO_LEN )
966 {
967 // count the number of bytes in input, including the trailing NULs
ef199164
DS
968 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
969 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 970 ;
c91830cb 971
467e0479
VZ
972 srcLen *= BYTES_PER_CHAR;
973 }
974 else // we already have the length
975 {
976 // we can only convert an entire number of UTF-16 characters
977 if ( srcLen % BYTES_PER_CHAR )
978 return wxCONV_FAILED;
979 }
980
981 return srcLen;
982}
983
984// case when in-memory representation is UTF-16 too
c91830cb
VZ
985#ifdef WC_UTF16
986
467e0479
VZ
987// ----------------------------------------------------------------------------
988// conversions without endianness change
989// ----------------------------------------------------------------------------
990
991size_t
992wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
993 const char *src, size_t srcLen) const
c91830cb 994{
467e0479
VZ
995 // set up the scene for using memcpy() (which is presumably more efficient
996 // than copying the bytes one by one)
997 srcLen = GetLength(src, srcLen);
998 if ( srcLen == wxNO_LEN )
999 return wxCONV_FAILED;
c91830cb 1000
ef199164 1001 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1002 if ( dst )
c91830cb 1003 {
467e0479
VZ
1004 if ( dstLen < inLen )
1005 return wxCONV_FAILED;
c91830cb 1006
467e0479 1007 memcpy(dst, src, srcLen);
c91830cb 1008 }
d32a507d 1009
467e0479 1010 return inLen;
c91830cb
VZ
1011}
1012
467e0479
VZ
1013size_t
1014wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1015 const wchar_t *src, size_t srcLen) const
c91830cb 1016{
467e0479
VZ
1017 if ( srcLen == wxNO_LEN )
1018 srcLen = wxWcslen(src) + 1;
c91830cb 1019
467e0479
VZ
1020 srcLen *= BYTES_PER_CHAR;
1021
1022 if ( dst )
c91830cb 1023 {
467e0479
VZ
1024 if ( dstLen < srcLen )
1025 return wxCONV_FAILED;
d32a507d 1026
467e0479 1027 memcpy(dst, src, srcLen);
c91830cb 1028 }
d32a507d 1029
467e0479 1030 return srcLen;
c91830cb
VZ
1031}
1032
467e0479
VZ
1033// ----------------------------------------------------------------------------
1034// endian-reversing conversions
1035// ----------------------------------------------------------------------------
c91830cb 1036
467e0479
VZ
1037size_t
1038wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1039 const char *src, size_t srcLen) const
c91830cb 1040{
467e0479
VZ
1041 srcLen = GetLength(src, srcLen);
1042 if ( srcLen == wxNO_LEN )
1043 return wxCONV_FAILED;
c91830cb 1044
467e0479
VZ
1045 srcLen /= BYTES_PER_CHAR;
1046
1047 if ( dst )
c91830cb 1048 {
467e0479
VZ
1049 if ( dstLen < srcLen )
1050 return wxCONV_FAILED;
1051
ef199164
DS
1052 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1053 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1054 {
ef199164 1055 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1056 }
c91830cb 1057 }
bfab25d4 1058
467e0479 1059 return srcLen;
c91830cb
VZ
1060}
1061
467e0479
VZ
1062size_t
1063wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1064 const wchar_t *src, size_t srcLen) const
c91830cb 1065{
467e0479
VZ
1066 if ( srcLen == wxNO_LEN )
1067 srcLen = wxWcslen(src) + 1;
c91830cb 1068
467e0479
VZ
1069 srcLen *= BYTES_PER_CHAR;
1070
1071 if ( dst )
c91830cb 1072 {
467e0479
VZ
1073 if ( dstLen < srcLen )
1074 return wxCONV_FAILED;
1075
ef199164 1076 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1077 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1078 {
ef199164 1079 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1080 }
c91830cb 1081 }
eec47cc6 1082
467e0479 1083 return srcLen;
c91830cb
VZ
1084}
1085
467e0479 1086#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1087
467e0479
VZ
1088// ----------------------------------------------------------------------------
1089// conversions without endianness change
1090// ----------------------------------------------------------------------------
c91830cb 1091
35d11700
VZ
1092size_t
1093wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1094 const char *src, size_t srcLen) const
c91830cb 1095{
35d11700
VZ
1096 srcLen = GetLength(src, srcLen);
1097 if ( srcLen == wxNO_LEN )
1098 return wxCONV_FAILED;
c91830cb 1099
ef199164 1100 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1101 if ( !dst )
c91830cb 1102 {
35d11700
VZ
1103 // optimization: return maximal space which could be needed for this
1104 // string even if the real size could be smaller if the buffer contains
1105 // any surrogates
1106 return inLen;
c91830cb 1107 }
c91830cb 1108
35d11700 1109 size_t outLen = 0;
ef199164
DS
1110 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1111 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1112 {
ef199164
DS
1113 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1114 if ( !inBuff )
35d11700
VZ
1115 return wxCONV_FAILED;
1116
1117 if ( ++outLen > dstLen )
1118 return wxCONV_FAILED;
c91830cb 1119
35d11700
VZ
1120 *dst++ = ch;
1121 }
1122
1123
1124 return outLen;
1125}
c91830cb 1126
35d11700
VZ
1127size_t
1128wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1129 const wchar_t *src, size_t srcLen) const
c91830cb 1130{
35d11700
VZ
1131 if ( srcLen == wxNO_LEN )
1132 srcLen = wxWcslen(src) + 1;
c91830cb 1133
35d11700 1134 size_t outLen = 0;
ef199164 1135 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1136 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1137 {
1138 wxUint16 cc[2];
35d11700
VZ
1139 const size_t numChars = encode_utf16(*src++, cc);
1140 if ( numChars == wxCONV_FAILED )
1141 return wxCONV_FAILED;
c91830cb 1142
ef199164
DS
1143 outLen += numChars * BYTES_PER_CHAR;
1144 if ( outBuff )
c91830cb 1145 {
35d11700
VZ
1146 if ( outLen > dstLen )
1147 return wxCONV_FAILED;
1148
ef199164 1149 *outBuff++ = cc[0];
35d11700 1150 if ( numChars == 2 )
69b80d28 1151 {
35d11700 1152 // second character of a surrogate
ef199164 1153 *outBuff++ = cc[1];
69b80d28 1154 }
c91830cb 1155 }
c91830cb 1156 }
c91830cb 1157
35d11700 1158 return outLen;
c91830cb
VZ
1159}
1160
467e0479
VZ
1161// ----------------------------------------------------------------------------
1162// endian-reversing conversions
1163// ----------------------------------------------------------------------------
c91830cb 1164
35d11700
VZ
1165size_t
1166wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1167 const char *src, size_t srcLen) const
c91830cb 1168{
35d11700
VZ
1169 srcLen = GetLength(src, srcLen);
1170 if ( srcLen == wxNO_LEN )
1171 return wxCONV_FAILED;
1172
ef199164 1173 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1174 if ( !dst )
1175 {
1176 // optimization: return maximal space which could be needed for this
1177 // string even if the real size could be smaller if the buffer contains
1178 // any surrogates
1179 return inLen;
1180 }
c91830cb 1181
35d11700 1182 size_t outLen = 0;
ef199164
DS
1183 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1184 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1185 {
35d11700
VZ
1186 wxUint32 ch;
1187 wxUint16 tmp[2];
ef199164
DS
1188
1189 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1190 inBuff++;
1191 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1192
35d11700
VZ
1193 const size_t numChars = decode_utf16(tmp, ch);
1194 if ( numChars == wxCONV_FAILED )
1195 return wxCONV_FAILED;
c91830cb 1196
35d11700 1197 if ( numChars == 2 )
ef199164 1198 inBuff++;
35d11700
VZ
1199
1200 if ( ++outLen > dstLen )
1201 return wxCONV_FAILED;
c91830cb 1202
35d11700 1203 *dst++ = ch;
c91830cb 1204 }
c91830cb 1205
c91830cb 1206
35d11700
VZ
1207 return outLen;
1208}
c91830cb 1209
35d11700
VZ
1210size_t
1211wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1212 const wchar_t *src, size_t srcLen) const
c91830cb 1213{
35d11700
VZ
1214 if ( srcLen == wxNO_LEN )
1215 srcLen = wxWcslen(src) + 1;
c91830cb 1216
35d11700 1217 size_t outLen = 0;
ef199164 1218 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1219 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1220 {
1221 wxUint16 cc[2];
35d11700
VZ
1222 const size_t numChars = encode_utf16(*src, cc);
1223 if ( numChars == wxCONV_FAILED )
1224 return wxCONV_FAILED;
c91830cb 1225
ef199164
DS
1226 outLen += numChars * BYTES_PER_CHAR;
1227 if ( outBuff )
c91830cb 1228 {
35d11700
VZ
1229 if ( outLen > dstLen )
1230 return wxCONV_FAILED;
1231
ef199164 1232 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1233 if ( numChars == 2 )
c91830cb 1234 {
35d11700 1235 // second character of a surrogate
ef199164 1236 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1237 }
1238 }
c91830cb 1239 }
c91830cb 1240
35d11700 1241 return outLen;
c91830cb
VZ
1242}
1243
467e0479 1244#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1245
1246
35d11700 1247// ============================================================================
c91830cb 1248// UTF-32
35d11700 1249// ============================================================================
c91830cb
VZ
1250
1251#ifdef WORDS_BIGENDIAN
467e0479
VZ
1252 #define wxMBConvUTF32straight wxMBConvUTF32BE
1253 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1254#else
467e0479
VZ
1255 #define wxMBConvUTF32swap wxMBConvUTF32BE
1256 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1257#endif
1258
1259
1260WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1261WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1262
467e0479
VZ
1263/* static */
1264size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1265{
1266 if ( srcLen == wxNO_LEN )
1267 {
1268 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1269 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1270 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1271 ;
c91830cb 1272
467e0479
VZ
1273 srcLen *= BYTES_PER_CHAR;
1274 }
1275 else // we already have the length
1276 {
1277 // we can only convert an entire number of UTF-32 characters
1278 if ( srcLen % BYTES_PER_CHAR )
1279 return wxCONV_FAILED;
1280 }
1281
1282 return srcLen;
1283}
1284
1285// case when in-memory representation is UTF-16
c91830cb
VZ
1286#ifdef WC_UTF16
1287
467e0479
VZ
1288// ----------------------------------------------------------------------------
1289// conversions without endianness change
1290// ----------------------------------------------------------------------------
1291
1292size_t
1293wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1294 const char *src, size_t srcLen) const
c91830cb 1295{
467e0479
VZ
1296 srcLen = GetLength(src, srcLen);
1297 if ( srcLen == wxNO_LEN )
1298 return wxCONV_FAILED;
c91830cb 1299
ef199164
DS
1300 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1301 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1302 size_t outLen = 0;
1303 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1304 {
1305 wxUint16 cc[2];
ef199164 1306 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1307 if ( numChars == wxCONV_FAILED )
1308 return wxCONV_FAILED;
c91830cb 1309
467e0479
VZ
1310 outLen += numChars;
1311 if ( dst )
c91830cb 1312 {
467e0479
VZ
1313 if ( outLen > dstLen )
1314 return wxCONV_FAILED;
d32a507d 1315
467e0479
VZ
1316 *dst++ = cc[0];
1317 if ( numChars == 2 )
1318 {
1319 // second character of a surrogate
1320 *dst++ = cc[1];
1321 }
1322 }
c91830cb 1323 }
d32a507d 1324
467e0479 1325 return outLen;
c91830cb
VZ
1326}
1327
467e0479
VZ
1328size_t
1329wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1330 const wchar_t *src, size_t srcLen) const
c91830cb 1331{
467e0479
VZ
1332 if ( srcLen == wxNO_LEN )
1333 srcLen = wxWcslen(src) + 1;
c91830cb 1334
467e0479 1335 if ( !dst )
c91830cb 1336 {
467e0479
VZ
1337 // optimization: return maximal space which could be needed for this
1338 // string instead of the exact amount which could be less if there are
1339 // any surrogates in the input
1340 //
1341 // we consider that surrogates are rare enough to make it worthwhile to
1342 // avoid running the loop below at the cost of slightly extra memory
1343 // consumption
ef199164 1344 return srcLen * BYTES_PER_CHAR;
467e0479 1345 }
c91830cb 1346
ef199164 1347 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1348 size_t outLen = 0;
1349 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1350 {
1351 const wxUint32 ch = wxDecodeSurrogate(&src);
1352 if ( !src )
1353 return wxCONV_FAILED;
c91830cb 1354
467e0479 1355 outLen += BYTES_PER_CHAR;
d32a507d 1356
467e0479
VZ
1357 if ( outLen > dstLen )
1358 return wxCONV_FAILED;
b5153fd8 1359
ef199164 1360 *outBuff++ = ch;
467e0479 1361 }
c91830cb 1362
467e0479 1363 return outLen;
c91830cb
VZ
1364}
1365
467e0479
VZ
1366// ----------------------------------------------------------------------------
1367// endian-reversing conversions
1368// ----------------------------------------------------------------------------
c91830cb 1369
467e0479
VZ
1370size_t
1371wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1372 const char *src, size_t srcLen) const
c91830cb 1373{
467e0479
VZ
1374 srcLen = GetLength(src, srcLen);
1375 if ( srcLen == wxNO_LEN )
1376 return wxCONV_FAILED;
c91830cb 1377
ef199164
DS
1378 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1379 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1380 size_t outLen = 0;
ef199164 1381 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1382 {
c91830cb 1383 wxUint16 cc[2];
ef199164 1384 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1385 if ( numChars == wxCONV_FAILED )
1386 return wxCONV_FAILED;
c91830cb 1387
467e0479
VZ
1388 outLen += numChars;
1389 if ( dst )
c91830cb 1390 {
467e0479
VZ
1391 if ( outLen > dstLen )
1392 return wxCONV_FAILED;
d32a507d 1393
467e0479
VZ
1394 *dst++ = cc[0];
1395 if ( numChars == 2 )
1396 {
1397 // second character of a surrogate
1398 *dst++ = cc[1];
1399 }
1400 }
c91830cb 1401 }
b5153fd8 1402
467e0479 1403 return outLen;
c91830cb
VZ
1404}
1405
467e0479
VZ
1406size_t
1407wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1408 const wchar_t *src, size_t srcLen) const
c91830cb 1409{
467e0479
VZ
1410 if ( srcLen == wxNO_LEN )
1411 srcLen = wxWcslen(src) + 1;
c91830cb 1412
467e0479 1413 if ( !dst )
c91830cb 1414 {
467e0479
VZ
1415 // optimization: return maximal space which could be needed for this
1416 // string instead of the exact amount which could be less if there are
1417 // any surrogates in the input
1418 //
1419 // we consider that surrogates are rare enough to make it worthwhile to
1420 // avoid running the loop below at the cost of slightly extra memory
1421 // consumption
1422 return srcLen*BYTES_PER_CHAR;
1423 }
c91830cb 1424
ef199164 1425 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1426 size_t outLen = 0;
1427 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1428 {
1429 const wxUint32 ch = wxDecodeSurrogate(&src);
1430 if ( !src )
1431 return wxCONV_FAILED;
c91830cb 1432
467e0479 1433 outLen += BYTES_PER_CHAR;
d32a507d 1434
467e0479
VZ
1435 if ( outLen > dstLen )
1436 return wxCONV_FAILED;
b5153fd8 1437
ef199164 1438 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1439 }
c91830cb 1440
467e0479 1441 return outLen;
c91830cb
VZ
1442}
1443
467e0479 1444#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1445
35d11700
VZ
1446// ----------------------------------------------------------------------------
1447// conversions without endianness change
1448// ----------------------------------------------------------------------------
1449
1450size_t
1451wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1452 const char *src, size_t srcLen) const
c91830cb 1453{
35d11700
VZ
1454 // use memcpy() as it should be much faster than hand-written loop
1455 srcLen = GetLength(src, srcLen);
1456 if ( srcLen == wxNO_LEN )
1457 return wxCONV_FAILED;
c91830cb 1458
35d11700
VZ
1459 const size_t inLen = srcLen/BYTES_PER_CHAR;
1460 if ( dst )
c91830cb 1461 {
35d11700
VZ
1462 if ( dstLen < inLen )
1463 return wxCONV_FAILED;
b5153fd8 1464
35d11700
VZ
1465 memcpy(dst, src, srcLen);
1466 }
c91830cb 1467
35d11700 1468 return inLen;
c91830cb
VZ
1469}
1470
35d11700
VZ
1471size_t
1472wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1473 const wchar_t *src, size_t srcLen) const
c91830cb 1474{
35d11700
VZ
1475 if ( srcLen == wxNO_LEN )
1476 srcLen = wxWcslen(src) + 1;
1477
1478 srcLen *= BYTES_PER_CHAR;
c91830cb 1479
35d11700 1480 if ( dst )
c91830cb 1481 {
35d11700
VZ
1482 if ( dstLen < srcLen )
1483 return wxCONV_FAILED;
c91830cb 1484
35d11700 1485 memcpy(dst, src, srcLen);
c91830cb
VZ
1486 }
1487
35d11700 1488 return srcLen;
c91830cb
VZ
1489}
1490
35d11700
VZ
1491// ----------------------------------------------------------------------------
1492// endian-reversing conversions
1493// ----------------------------------------------------------------------------
c91830cb 1494
35d11700
VZ
1495size_t
1496wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1497 const char *src, size_t srcLen) const
c91830cb 1498{
35d11700
VZ
1499 srcLen = GetLength(src, srcLen);
1500 if ( srcLen == wxNO_LEN )
1501 return wxCONV_FAILED;
1502
1503 srcLen /= BYTES_PER_CHAR;
c91830cb 1504
35d11700 1505 if ( dst )
c91830cb 1506 {
35d11700
VZ
1507 if ( dstLen < srcLen )
1508 return wxCONV_FAILED;
1509
ef199164
DS
1510 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1511 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1512 {
ef199164 1513 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1514 }
c91830cb 1515 }
b5153fd8 1516
35d11700 1517 return srcLen;
c91830cb
VZ
1518}
1519
35d11700
VZ
1520size_t
1521wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1522 const wchar_t *src, size_t srcLen) const
c91830cb 1523{
35d11700
VZ
1524 if ( srcLen == wxNO_LEN )
1525 srcLen = wxWcslen(src) + 1;
1526
1527 srcLen *= BYTES_PER_CHAR;
c91830cb 1528
35d11700 1529 if ( dst )
c91830cb 1530 {
35d11700
VZ
1531 if ( dstLen < srcLen )
1532 return wxCONV_FAILED;
1533
ef199164 1534 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1535 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1536 {
ef199164 1537 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1538 }
c91830cb 1539 }
b5153fd8 1540
35d11700 1541 return srcLen;
c91830cb
VZ
1542}
1543
467e0479 1544#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1545
1546
36acb880
VZ
1547// ============================================================================
1548// The classes doing conversion using the iconv_xxx() functions
1549// ============================================================================
3caec1bb 1550
b040e242 1551#ifdef HAVE_ICONV
3a0d76bc 1552
b1d547eb
VS
1553// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555// (unless there's yet another bug in glibc) the only case when iconv()
1556// returns with (size_t)-1 (which means error) and says there are 0 bytes
1557// left in the input buffer -- when _real_ error occurs,
1558// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1559// iconv() failure.
3caec1bb
VS
1560// [This bug does not appear in glibc 2.2.]
1561#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563 (errno != E2BIG || bufLeft != 0))
1564#else
1565#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1566#endif
1567
ab217dba 1568#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1569
74a7eb0b
VZ
1570#define ICONV_T_INVALID ((iconv_t)-1)
1571
1572#if SIZEOF_WCHAR_T == 4
1573 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF32
1575#elif SIZEOF_WCHAR_T == 2
1576 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1577 #define WC_ENC wxFONTENCODING_UTF16
1578#else // sizeof(wchar_t) != 2 nor 4
1579 // does this ever happen?
1580 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1581#endif
1582
36acb880 1583// ----------------------------------------------------------------------------
e95354ec 1584// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1585// ----------------------------------------------------------------------------
1586
e95354ec 1587class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1588{
1589public:
e95354ec
VZ
1590 wxMBConv_iconv(const wxChar *name);
1591 virtual ~wxMBConv_iconv();
36acb880 1592
bde4baac
VZ
1593 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1594 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1595
d36c9347 1596 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1597 virtual size_t GetMBNulLen() const;
1598
d36c9347
VZ
1599 virtual wxMBConv *Clone() const
1600 {
1601 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1602 p->m_minMBCharWidth = m_minMBCharWidth;
1603 return p;
1604 }
1605
e95354ec 1606 bool IsOk() const
74a7eb0b 1607 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1608
1609protected:
ef199164
DS
1610 // the iconv handlers used to translate from multibyte
1611 // to wide char and in the other direction
36acb880
VZ
1612 iconv_t m2w,
1613 w2m;
ef199164 1614
b1d547eb
VS
1615#if wxUSE_THREADS
1616 // guards access to m2w and w2m objects
1617 wxMutex m_iconvMutex;
1618#endif
36acb880
VZ
1619
1620private:
e95354ec 1621 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1622 // available on this machine, it will remain NULL
74a7eb0b 1623 static wxString ms_wcCharsetName;
36acb880
VZ
1624
1625 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626 // different endian-ness than the native one
405d8f46 1627 static bool ms_wcNeedsSwap;
eec47cc6 1628
d36c9347
VZ
1629
1630 // name of the encoding handled by this conversion
1631 wxString m_name;
1632
7ef3ab50 1633 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1634 // initially
1635 size_t m_minMBCharWidth;
36acb880
VZ
1636};
1637
8f115891
MW
1638// make the constructor available for unit testing
1639WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1640{
1641 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1642 if ( !result->IsOk() )
1643 {
1644 delete result;
1645 return 0;
1646 }
ef199164 1647
8f115891
MW
1648 return result;
1649}
1650
422e411e 1651wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1652bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1653
e95354ec 1654wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
d36c9347 1655 : m_name(name)
36acb880 1656{
c1464d9d 1657 m_minMBCharWidth = 0;
eec47cc6 1658
0331b385
VZ
1659 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660 // names for the charsets
200a9923 1661 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1662
36acb880 1663 // check for charset that represents wchar_t:
74a7eb0b 1664 if ( ms_wcCharsetName.empty() )
f1339c56 1665 {
c2b83fdd
VZ
1666 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1667
74a7eb0b
VZ
1668#if wxUSE_FONTMAP
1669 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1670#else // !wxUSE_FONTMAP
1671 static const wxChar *names[] =
36acb880 1672 {
74a7eb0b
VZ
1673#if SIZEOF_WCHAR_T == 4
1674 _T("UCS-4"),
1675#elif SIZEOF_WCHAR_T = 2
1676 _T("UCS-2"),
1677#endif
1678 NULL
1679 };
1680#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1681
d1f024a8 1682 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1683 {
17a1ebd1 1684 const wxString nameCS(*names);
74a7eb0b
VZ
1685
1686 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1687 wxString nameXE(nameCS);
ef199164
DS
1688
1689#ifdef WORDS_BIGENDIAN
74a7eb0b 1690 nameXE += _T("BE");
ef199164 1691#else // little endian
74a7eb0b 1692 nameXE += _T("LE");
ef199164 1693#endif
74a7eb0b 1694
c2b83fdd
VZ
1695 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1696 nameXE.c_str());
1697
74a7eb0b
VZ
1698 m2w = iconv_open(nameXE.ToAscii(), cname);
1699 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1700 {
74a7eb0b 1701 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1702 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1703 nameCS.c_str());
17a1ebd1 1704 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1705
74a7eb0b
VZ
1706 // and check for bytesex ourselves:
1707 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1708 {
74a7eb0b
VZ
1709 char buf[2], *bufPtr;
1710 wchar_t wbuf[2], *wbufPtr;
1711 size_t insz, outsz;
1712 size_t res;
1713
1714 buf[0] = 'A';
1715 buf[1] = 0;
1716 wbuf[0] = 0;
1717 insz = 2;
1718 outsz = SIZEOF_WCHAR_T * 2;
1719 wbufPtr = wbuf;
1720 bufPtr = buf;
1721
ef199164
DS
1722 res = iconv(
1723 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1724 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1725
1726 if (ICONV_FAILED(res, insz))
1727 {
1728 wxLogLastError(wxT("iconv"));
422e411e 1729 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1730 nameCS.c_str());
74a7eb0b
VZ
1731 }
1732 else // ok, can convert to this encoding, remember it
1733 {
17a1ebd1 1734 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1735 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1736 }
3a0d76bc
VS
1737 }
1738 }
74a7eb0b 1739 else // use charset not requiring byte swapping
36acb880 1740 {
74a7eb0b 1741 ms_wcCharsetName = nameXE;
36acb880 1742 }
3a0d76bc 1743 }
74a7eb0b 1744
0944fceb 1745 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1746 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1747 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1748 : ms_wcCharsetName.c_str(),
1749 ms_wcNeedsSwap ? _T(" (needs swap)")
1750 : _T(""));
3a0d76bc 1751 }
36acb880 1752 else // we already have ms_wcCharsetName
3caec1bb 1753 {
74a7eb0b 1754 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1755 }
dccce9ea 1756
74a7eb0b 1757 if ( ms_wcCharsetName.empty() )
f1339c56 1758 {
74a7eb0b 1759 w2m = ICONV_T_INVALID;
36acb880 1760 }
405d8f46
VZ
1761 else
1762 {
74a7eb0b
VZ
1763 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1764 if ( w2m == ICONV_T_INVALID )
1765 {
1766 wxLogTrace(TRACE_STRCONV,
1767 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1768 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1769 }
405d8f46 1770 }
36acb880 1771}
3caec1bb 1772
e95354ec 1773wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1774{
74a7eb0b 1775 if ( m2w != ICONV_T_INVALID )
36acb880 1776 iconv_close(m2w);
74a7eb0b 1777 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1778 iconv_close(w2m);
1779}
3a0d76bc 1780
bde4baac 1781size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1782{
69373110
VZ
1783 // find the string length: notice that must be done differently for
1784 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1785 size_t inbuf;
7ef3ab50 1786 const size_t nulLen = GetMBNulLen();
69373110
VZ
1787 switch ( nulLen )
1788 {
1789 default:
467e0479 1790 return wxCONV_FAILED;
69373110
VZ
1791
1792 case 1:
1793 inbuf = strlen(psz); // arguably more optimized than our version
1794 break;
1795
1796 case 2:
1797 case 4:
1798 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1799 // they also have to start at character boundary and not span two
1800 // adjacent characters
1801 const char *p;
1802 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1803 ;
1804 inbuf = p - psz;
1805 break;
1806 }
1807
b1d547eb 1808#if wxUSE_THREADS
6a17b868
SN
1809 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1810 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1811 // wxConvLocal that are used all over wx code, so we have to make sure
1812 // the handle is used by at most one thread at the time. Otherwise
1813 // only a few wx classes would be safe to use from non-main threads
1814 // as MB<->WC conversion would fail "randomly".
1815 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1816#endif // wxUSE_THREADS
1817
36acb880
VZ
1818 size_t outbuf = n * SIZEOF_WCHAR_T;
1819 size_t res, cres;
1820 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1821 wchar_t *bufPtr = buf;
1822 const char *pszPtr = psz;
1823
1824 if (buf)
1825 {
1826 // have destination buffer, convert there
1827 cres = iconv(m2w,
1828 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1829 (char**)&bufPtr, &outbuf);
1830 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1831
36acb880 1832 if (ms_wcNeedsSwap)
3a0d76bc 1833 {
36acb880 1834 // convert to native endianness
17a1ebd1
VZ
1835 for ( unsigned i = 0; i < res; i++ )
1836 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1837 }
adb45366 1838
69373110 1839 // NUL-terminate the string if there is any space left
49dd9820
VS
1840 if (res < n)
1841 buf[res] = 0;
36acb880
VZ
1842 }
1843 else
1844 {
1845 // no destination buffer... convert using temp buffer
1846 // to calculate destination buffer requirement
1847 wchar_t tbuf[8];
1848 res = 0;
ef199164
DS
1849
1850 do
1851 {
36acb880 1852 bufPtr = tbuf;
ef199164 1853 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1854
1855 cres = iconv(m2w,
1856 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1857 (char**)&bufPtr, &outbuf );
1858
ef199164
DS
1859 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1860 }
1861 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1862 }
dccce9ea 1863
36acb880 1864 if (ICONV_FAILED(cres, inbuf))
f1339c56 1865 {
36acb880 1866 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1867 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1868 return wxCONV_FAILED;
36acb880
VZ
1869 }
1870
1871 return res;
1872}
1873
bde4baac 1874size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1875{
b1d547eb
VS
1876#if wxUSE_THREADS
1877 // NB: explained in MB2WC
1878 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1879#endif
3698ae71 1880
156162ec
MW
1881 size_t inlen = wxWcslen(psz);
1882 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1883 size_t outbuf = n;
1884 size_t res, cres;
3a0d76bc 1885
36acb880 1886 wchar_t *tmpbuf = 0;
3caec1bb 1887
36acb880
VZ
1888 if (ms_wcNeedsSwap)
1889 {
1890 // need to copy to temp buffer to switch endianness
74a7eb0b 1891 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1892 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1893 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1894 for ( size_t i = 0; i < inlen; i++ )
1895 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1896
156162ec 1897 tmpbuf[inlen] = L'\0';
74a7eb0b 1898 psz = tmpbuf;
36acb880 1899 }
3a0d76bc 1900
36acb880
VZ
1901 if (buf)
1902 {
1903 // have destination buffer, convert there
1904 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1905
ef199164 1906 res = n - outbuf;
adb45366 1907
49dd9820
VS
1908 // NB: iconv was given only wcslen(psz) characters on input, and so
1909 // it couldn't convert the trailing zero. Let's do it ourselves
1910 // if there's some room left for it in the output buffer.
1911 if (res < n)
1912 buf[0] = 0;
36acb880
VZ
1913 }
1914 else
1915 {
ef199164 1916 // no destination buffer: convert using temp buffer
36acb880
VZ
1917 // to calculate destination buffer requirement
1918 char tbuf[16];
1919 res = 0;
ef199164
DS
1920 do
1921 {
1922 buf = tbuf;
1923 outbuf = 16;
36acb880
VZ
1924
1925 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1926
36acb880 1927 res += 16 - outbuf;
ef199164
DS
1928 }
1929 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1930 }
dccce9ea 1931
36acb880
VZ
1932 if (ms_wcNeedsSwap)
1933 {
1934 free(tmpbuf);
1935 }
dccce9ea 1936
36acb880
VZ
1937 if (ICONV_FAILED(cres, inbuf))
1938 {
ce6f8d6f 1939 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1940 return wxCONV_FAILED;
36acb880
VZ
1941 }
1942
1943 return res;
1944}
1945
7ef3ab50 1946size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1947{
c1464d9d 1948 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1949 {
1950 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1951
1952#if wxUSE_THREADS
1953 // NB: explained in MB2WC
1954 wxMutexLocker lock(self->m_iconvMutex);
1955#endif
1956
356410fc 1957 wchar_t *wnul = L"";
c1464d9d 1958 char buf[8]; // should be enough for NUL in any encoding
356410fc 1959 size_t inLen = sizeof(wchar_t),
c1464d9d 1960 outLen = WXSIZEOF(buf);
ef199164
DS
1961 char *inBuff = (char *)wnul;
1962 char *outBuff = buf;
1963 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1964 {
c1464d9d 1965 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1966 }
1967 else // ok
1968 {
ef199164 1969 self->m_minMBCharWidth = outBuff - buf;
356410fc 1970 }
eec47cc6
VZ
1971 }
1972
c1464d9d 1973 return m_minMBCharWidth;
eec47cc6
VZ
1974}
1975
b040e242 1976#endif // HAVE_ICONV
36acb880 1977
e95354ec 1978
36acb880
VZ
1979// ============================================================================
1980// Win32 conversion classes
1981// ============================================================================
1cd52418 1982
e95354ec 1983#ifdef wxHAVE_WIN32_MB2WC
373658eb 1984
8b04d4c4 1985// from utils.cpp
d775fa82 1986#if wxUSE_FONTMAP
8b04d4c4
VZ
1987extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1988extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1989#endif
373658eb 1990
e95354ec 1991class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1992{
1993public:
bde4baac
VZ
1994 wxMBConv_win32()
1995 {
1996 m_CodePage = CP_ACP;
c1464d9d 1997 m_minMBCharWidth = 0;
bde4baac
VZ
1998 }
1999
d36c9347 2000 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2001 : wxMBConv()
d36c9347
VZ
2002 {
2003 m_CodePage = conv.m_CodePage;
2004 m_minMBCharWidth = conv.m_minMBCharWidth;
2005 }
2006
7608a683 2007#if wxUSE_FONTMAP
e95354ec 2008 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
2009 {
2010 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2011 m_minMBCharWidth = 0;
bde4baac 2012 }
dccce9ea 2013
e95354ec 2014 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2015 {
2016 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2017 m_minMBCharWidth = 0;
bde4baac 2018 }
eec47cc6 2019#endif // wxUSE_FONTMAP
8b04d4c4 2020
d36c9347 2021 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2022 {
02272c9c
VZ
2023 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2024 // the behaviour is not compatible with the Unix version (using iconv)
2025 // and break the library itself, e.g. wxTextInputStream::NextChar()
2026 // wouldn't work if reading an incomplete MB char didn't result in an
2027 // error
667e5b3e 2028 //
89028980 2029 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2030 // Win XP or newer and it is not supported for UTF-[78] so we always
2031 // use our own conversions in this case. See
89028980
VS
2032 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2033 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2034 if ( m_CodePage == CP_UTF8 )
89028980 2035 {
830f8f11 2036 return wxConvUTF8.MB2WC(buf, psz, n);
89028980 2037 }
830f8f11
VZ
2038
2039 if ( m_CodePage == CP_UTF7 )
2040 {
2041 return wxConvUTF7.MB2WC(buf, psz, n);
2042 }
2043
2044 int flags = 0;
2045 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2046 IsAtLeastWin2kSP4() )
89028980 2047 {
830f8f11 2048 flags = MB_ERR_INVALID_CHARS;
89028980 2049 }
667e5b3e 2050
2b5f62a0
VZ
2051 const size_t len = ::MultiByteToWideChar
2052 (
2053 m_CodePage, // code page
667e5b3e 2054 flags, // flags: fall on error
2b5f62a0
VZ
2055 psz, // input string
2056 -1, // its length (NUL-terminated)
b4da152e 2057 buf, // output string
2b5f62a0
VZ
2058 buf ? n : 0 // size of output buffer
2059 );
89028980
VS
2060 if ( !len )
2061 {
2062 // function totally failed
467e0479 2063 return wxCONV_FAILED;
89028980
VS
2064 }
2065
2066 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2067 // check if we succeeded, by doing a double trip:
2068 if ( !flags && buf )
2069 {
53c174fc
VZ
2070 const size_t mbLen = strlen(psz);
2071 wxCharBuffer mbBuf(mbLen);
89028980
VS
2072 if ( ::WideCharToMultiByte
2073 (
2074 m_CodePage,
2075 0,
2076 buf,
2077 -1,
2078 mbBuf.data(),
53c174fc 2079 mbLen + 1, // size in bytes, not length
89028980
VS
2080 NULL,
2081 NULL
2082 ) == 0 ||
2083 strcmp(mbBuf, psz) != 0 )
2084 {
2085 // we didn't obtain the same thing we started from, hence
2086 // the conversion was lossy and we consider that it failed
467e0479 2087 return wxCONV_FAILED;
89028980
VS
2088 }
2089 }
2b5f62a0 2090
03a991bc
VZ
2091 // note that it returns count of written chars for buf != NULL and size
2092 // of the needed buffer for buf == NULL so in either case the length of
2093 // the string (which never includes the terminating NUL) is one less
89028980 2094 return len - 1;
f1339c56 2095 }
dccce9ea 2096
d36c9347 2097 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2098 {
13dd924a
VZ
2099 /*
2100 we have a problem here: by default, WideCharToMultiByte() may
2101 replace characters unrepresentable in the target code page with bad
2102 quality approximations such as turning "1/2" symbol (U+00BD) into
2103 "1" for the code pages which don't have it and we, obviously, want
2104 to avoid this at any price
d775fa82 2105
13dd924a
VZ
2106 the trouble is that this function does it _silently_, i.e. it won't
2107 even tell us whether it did or not... Win98/2000 and higher provide
2108 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2109 we have to resort to a round trip, i.e. check that converting back
2110 results in the same string -- this is, of course, expensive but
2111 otherwise we simply can't be sure to not garble the data.
2112 */
2113
2114 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2115 // it doesn't work with CJK encodings (which we test for rather roughly
2116 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2117 // supporting it
907173e5
WS
2118 BOOL usedDef wxDUMMY_INITIALIZE(false);
2119 BOOL *pUsedDef;
13dd924a
VZ
2120 int flags;
2121 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2122 {
2123 // it's our lucky day
2124 flags = WC_NO_BEST_FIT_CHARS;
2125 pUsedDef = &usedDef;
2126 }
2127 else // old system or unsupported encoding
2128 {
2129 flags = 0;
2130 pUsedDef = NULL;
2131 }
2132
2b5f62a0
VZ
2133 const size_t len = ::WideCharToMultiByte
2134 (
2135 m_CodePage, // code page
13dd924a
VZ
2136 flags, // either none or no best fit
2137 pwz, // input string
2b5f62a0
VZ
2138 -1, // it is (wide) NUL-terminated
2139 buf, // output buffer
2140 buf ? n : 0, // and its size
2141 NULL, // default "replacement" char
13dd924a 2142 pUsedDef // [out] was it used?
2b5f62a0
VZ
2143 );
2144
13dd924a
VZ
2145 if ( !len )
2146 {
2147 // function totally failed
467e0479 2148 return wxCONV_FAILED;
13dd924a
VZ
2149 }
2150
2151 // if we were really converting, check if we succeeded
2152 if ( buf )
2153 {
2154 if ( flags )
2155 {
2156 // check if the conversion failed, i.e. if any replacements
2157 // were done
2158 if ( usedDef )
467e0479 2159 return wxCONV_FAILED;
13dd924a
VZ
2160 }
2161 else // we must resort to double tripping...
2162 {
2163 wxWCharBuffer wcBuf(n);
467e0479 2164 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2165 wcscmp(wcBuf, pwz) != 0 )
2166 {
2167 // we didn't obtain the same thing we started from, hence
2168 // the conversion was lossy and we consider that it failed
467e0479 2169 return wxCONV_FAILED;
13dd924a
VZ
2170 }
2171 }
2172 }
2173
03a991bc 2174 // see the comment above for the reason of "len - 1"
13dd924a 2175 return len - 1;
f1339c56 2176 }
dccce9ea 2177
7ef3ab50
VZ
2178 virtual size_t GetMBNulLen() const
2179 {
2180 if ( m_minMBCharWidth == 0 )
2181 {
2182 int len = ::WideCharToMultiByte
2183 (
2184 m_CodePage, // code page
2185 0, // no flags
2186 L"", // input string
2187 1, // translate just the NUL
2188 NULL, // output buffer
2189 0, // and its size
2190 NULL, // no replacement char
2191 NULL // [out] don't care if it was used
2192 );
2193
2194 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2195 switch ( len )
2196 {
2197 default:
2198 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2199 self->m_minMBCharWidth = (size_t)-1;
2200 break;
7ef3ab50
VZ
2201
2202 case 0:
2203 self->m_minMBCharWidth = (size_t)-1;
2204 break;
2205
2206 case 1:
2207 case 2:
2208 case 4:
2209 self->m_minMBCharWidth = len;
2210 break;
2211 }
2212 }
2213
2214 return m_minMBCharWidth;
2215 }
2216
d36c9347
VZ
2217 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2218
13dd924a
VZ
2219 bool IsOk() const { return m_CodePage != -1; }
2220
2221private:
2222 static bool CanUseNoBestFit()
2223 {
2224 static int s_isWin98Or2k = -1;
2225
2226 if ( s_isWin98Or2k == -1 )
2227 {
2228 int verMaj, verMin;
2229 switch ( wxGetOsVersion(&verMaj, &verMin) )
2230 {
2231 case wxWIN95:
2232 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2233 break;
2234
2235 case wxWINDOWS_NT:
2236 s_isWin98Or2k = verMaj >= 5;
2237 break;
2238
2239 default:
ef199164 2240 // unknown: be conservative by default
13dd924a 2241 s_isWin98Or2k = 0;
ef199164 2242 break;
13dd924a
VZ
2243 }
2244
2245 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2246 }
2247
2248 return s_isWin98Or2k == 1;
2249 }
f1339c56 2250
89028980
VS
2251 static bool IsAtLeastWin2kSP4()
2252 {
8942f83a
WS
2253#ifdef __WXWINCE__
2254 return false;
2255#else
89028980
VS
2256 static int s_isAtLeastWin2kSP4 = -1;
2257
2258 if ( s_isAtLeastWin2kSP4 == -1 )
2259 {
2260 OSVERSIONINFOEX ver;
2261
2262 memset(&ver, 0, sizeof(ver));
2263 ver.dwOSVersionInfoSize = sizeof(ver);
2264 GetVersionEx((OSVERSIONINFO*)&ver);
2265
2266 s_isAtLeastWin2kSP4 =
2267 ((ver.dwMajorVersion > 5) || // Vista+
2268 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2269 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2270 ver.wServicePackMajor >= 4)) // 2000 SP4+
2271 ? 1 : 0;
2272 }
2273
2274 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2275#endif
89028980
VS
2276 }
2277
eec47cc6 2278
c1464d9d 2279 // the code page we're working with
b1d66b54 2280 long m_CodePage;
c1464d9d 2281
7ef3ab50 2282 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2283 // "unknown"
2284 size_t m_minMBCharWidth;
1cd52418 2285};
e95354ec
VZ
2286
2287#endif // wxHAVE_WIN32_MB2WC
2288
f7e98dee
RN
2289// ============================================================================
2290// Cocoa conversion classes
2291// ============================================================================
2292
2293#if defined(__WXCOCOA__)
2294
ef199164
DS
2295// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2296// Strangely enough, internally Core Foundation uses
2297// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee
RN
2298
2299#include <CoreFoundation/CFString.h>
2300#include <CoreFoundation/CFStringEncodingExt.h>
2301
2302CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2303{
638357a0 2304 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2305
2306 switch (encoding)
ecd9653b 2307 {
ef199164
DS
2308 case wxFONTENCODING_DEFAULT :
2309 enc = CFStringGetSystemEncoding();
2310 break ;
2311
ecd9653b
WS
2312 case wxFONTENCODING_ISO8859_1 :
2313 enc = kCFStringEncodingISOLatin1 ;
2314 break ;
2315 case wxFONTENCODING_ISO8859_2 :
2316 enc = kCFStringEncodingISOLatin2;
2317 break ;
2318 case wxFONTENCODING_ISO8859_3 :
2319 enc = kCFStringEncodingISOLatin3 ;
2320 break ;
2321 case wxFONTENCODING_ISO8859_4 :
2322 enc = kCFStringEncodingISOLatin4;
2323 break ;
2324 case wxFONTENCODING_ISO8859_5 :
2325 enc = kCFStringEncodingISOLatinCyrillic;
2326 break ;
2327 case wxFONTENCODING_ISO8859_6 :
2328 enc = kCFStringEncodingISOLatinArabic;
2329 break ;
2330 case wxFONTENCODING_ISO8859_7 :
2331 enc = kCFStringEncodingISOLatinGreek;
2332 break ;
2333 case wxFONTENCODING_ISO8859_8 :
2334 enc = kCFStringEncodingISOLatinHebrew;
2335 break ;
2336 case wxFONTENCODING_ISO8859_9 :
2337 enc = kCFStringEncodingISOLatin5;
2338 break ;
2339 case wxFONTENCODING_ISO8859_10 :
2340 enc = kCFStringEncodingISOLatin6;
2341 break ;
2342 case wxFONTENCODING_ISO8859_11 :
2343 enc = kCFStringEncodingISOLatinThai;
2344 break ;
2345 case wxFONTENCODING_ISO8859_13 :
2346 enc = kCFStringEncodingISOLatin7;
2347 break ;
2348 case wxFONTENCODING_ISO8859_14 :
2349 enc = kCFStringEncodingISOLatin8;
2350 break ;
2351 case wxFONTENCODING_ISO8859_15 :
2352 enc = kCFStringEncodingISOLatin9;
2353 break ;
2354
2355 case wxFONTENCODING_KOI8 :
2356 enc = kCFStringEncodingKOI8_R;
2357 break ;
2358 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2359 enc = kCFStringEncodingDOSRussian;
2360 break ;
2361
2362// case wxFONTENCODING_BULGARIAN :
2363// enc = ;
2364// break ;
2365
2366 case wxFONTENCODING_CP437 :
ef199164 2367 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2368 break ;
2369 case wxFONTENCODING_CP850 :
2370 enc = kCFStringEncodingDOSLatin1;
2371 break ;
2372 case wxFONTENCODING_CP852 :
2373 enc = kCFStringEncodingDOSLatin2;
2374 break ;
2375 case wxFONTENCODING_CP855 :
2376 enc = kCFStringEncodingDOSCyrillic;
2377 break ;
2378 case wxFONTENCODING_CP866 :
ef199164 2379 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2380 break ;
2381 case wxFONTENCODING_CP874 :
2382 enc = kCFStringEncodingDOSThai;
2383 break ;
2384 case wxFONTENCODING_CP932 :
2385 enc = kCFStringEncodingDOSJapanese;
2386 break ;
2387 case wxFONTENCODING_CP936 :
ef199164 2388 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2389 break ;
2390 case wxFONTENCODING_CP949 :
2391 enc = kCFStringEncodingDOSKorean;
2392 break ;
2393 case wxFONTENCODING_CP950 :
2394 enc = kCFStringEncodingDOSChineseTrad;
2395 break ;
ecd9653b
WS
2396 case wxFONTENCODING_CP1250 :
2397 enc = kCFStringEncodingWindowsLatin2;
2398 break ;
2399 case wxFONTENCODING_CP1251 :
ef199164 2400 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2401 break ;
2402 case wxFONTENCODING_CP1252 :
ef199164 2403 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2404 break ;
2405 case wxFONTENCODING_CP1253 :
2406 enc = kCFStringEncodingWindowsGreek;
2407 break ;
2408 case wxFONTENCODING_CP1254 :
2409 enc = kCFStringEncodingWindowsLatin5;
2410 break ;
2411 case wxFONTENCODING_CP1255 :
ef199164 2412 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2413 break ;
2414 case wxFONTENCODING_CP1256 :
ef199164 2415 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2416 break ;
2417 case wxFONTENCODING_CP1257 :
2418 enc = kCFStringEncodingWindowsBalticRim;
2419 break ;
638357a0
RN
2420// This only really encodes to UTF7 (if that) evidently
2421// case wxFONTENCODING_UTF7 :
2422// enc = kCFStringEncodingNonLossyASCII ;
2423// break ;
ecd9653b
WS
2424 case wxFONTENCODING_UTF8 :
2425 enc = kCFStringEncodingUTF8 ;
2426 break ;
2427 case wxFONTENCODING_EUC_JP :
2428 enc = kCFStringEncodingEUC_JP;
2429 break ;
2430 case wxFONTENCODING_UTF16 :
f7e98dee 2431 enc = kCFStringEncodingUnicode ;
ecd9653b 2432 break ;
f7e98dee
RN
2433 case wxFONTENCODING_MACROMAN :
2434 enc = kCFStringEncodingMacRoman ;
2435 break ;
2436 case wxFONTENCODING_MACJAPANESE :
2437 enc = kCFStringEncodingMacJapanese ;
2438 break ;
2439 case wxFONTENCODING_MACCHINESETRAD :
2440 enc = kCFStringEncodingMacChineseTrad ;
2441 break ;
2442 case wxFONTENCODING_MACKOREAN :
2443 enc = kCFStringEncodingMacKorean ;
2444 break ;
2445 case wxFONTENCODING_MACARABIC :
2446 enc = kCFStringEncodingMacArabic ;
2447 break ;
2448 case wxFONTENCODING_MACHEBREW :
2449 enc = kCFStringEncodingMacHebrew ;
2450 break ;
2451 case wxFONTENCODING_MACGREEK :
2452 enc = kCFStringEncodingMacGreek ;
2453 break ;
2454 case wxFONTENCODING_MACCYRILLIC :
2455 enc = kCFStringEncodingMacCyrillic ;
2456 break ;
2457 case wxFONTENCODING_MACDEVANAGARI :
2458 enc = kCFStringEncodingMacDevanagari ;
2459 break ;
2460 case wxFONTENCODING_MACGURMUKHI :
2461 enc = kCFStringEncodingMacGurmukhi ;
2462 break ;
2463 case wxFONTENCODING_MACGUJARATI :
2464 enc = kCFStringEncodingMacGujarati ;
2465 break ;
2466 case wxFONTENCODING_MACORIYA :
2467 enc = kCFStringEncodingMacOriya ;
2468 break ;
2469 case wxFONTENCODING_MACBENGALI :
2470 enc = kCFStringEncodingMacBengali ;
2471 break ;
2472 case wxFONTENCODING_MACTAMIL :
2473 enc = kCFStringEncodingMacTamil ;
2474 break ;
2475 case wxFONTENCODING_MACTELUGU :
2476 enc = kCFStringEncodingMacTelugu ;
2477 break ;
2478 case wxFONTENCODING_MACKANNADA :
2479 enc = kCFStringEncodingMacKannada ;
2480 break ;
2481 case wxFONTENCODING_MACMALAJALAM :
2482 enc = kCFStringEncodingMacMalayalam ;
2483 break ;
2484 case wxFONTENCODING_MACSINHALESE :
2485 enc = kCFStringEncodingMacSinhalese ;
2486 break ;
2487 case wxFONTENCODING_MACBURMESE :
2488 enc = kCFStringEncodingMacBurmese ;
2489 break ;
2490 case wxFONTENCODING_MACKHMER :
2491 enc = kCFStringEncodingMacKhmer ;
2492 break ;
2493 case wxFONTENCODING_MACTHAI :
2494 enc = kCFStringEncodingMacThai ;
2495 break ;
2496 case wxFONTENCODING_MACLAOTIAN :
2497 enc = kCFStringEncodingMacLaotian ;
2498 break ;
2499 case wxFONTENCODING_MACGEORGIAN :
2500 enc = kCFStringEncodingMacGeorgian ;
2501 break ;
2502 case wxFONTENCODING_MACARMENIAN :
2503 enc = kCFStringEncodingMacArmenian ;
2504 break ;
2505 case wxFONTENCODING_MACCHINESESIMP :
2506 enc = kCFStringEncodingMacChineseSimp ;
2507 break ;
2508 case wxFONTENCODING_MACTIBETAN :
2509 enc = kCFStringEncodingMacTibetan ;
2510 break ;
2511 case wxFONTENCODING_MACMONGOLIAN :
2512 enc = kCFStringEncodingMacMongolian ;
2513 break ;
2514 case wxFONTENCODING_MACETHIOPIC :
2515 enc = kCFStringEncodingMacEthiopic ;
2516 break ;
2517 case wxFONTENCODING_MACCENTRALEUR :
2518 enc = kCFStringEncodingMacCentralEurRoman ;
2519 break ;
2520 case wxFONTENCODING_MACVIATNAMESE :
2521 enc = kCFStringEncodingMacVietnamese ;
2522 break ;
2523 case wxFONTENCODING_MACARABICEXT :
2524 enc = kCFStringEncodingMacExtArabic ;
2525 break ;
2526 case wxFONTENCODING_MACSYMBOL :
2527 enc = kCFStringEncodingMacSymbol ;
2528 break ;
2529 case wxFONTENCODING_MACDINGBATS :
2530 enc = kCFStringEncodingMacDingbats ;
2531 break ;
2532 case wxFONTENCODING_MACTURKISH :
2533 enc = kCFStringEncodingMacTurkish ;
2534 break ;
2535 case wxFONTENCODING_MACCROATIAN :
2536 enc = kCFStringEncodingMacCroatian ;
2537 break ;
2538 case wxFONTENCODING_MACICELANDIC :
2539 enc = kCFStringEncodingMacIcelandic ;
2540 break ;
2541 case wxFONTENCODING_MACROMANIAN :
2542 enc = kCFStringEncodingMacRomanian ;
2543 break ;
2544 case wxFONTENCODING_MACCELTIC :
2545 enc = kCFStringEncodingMacCeltic ;
2546 break ;
2547 case wxFONTENCODING_MACGAELIC :
2548 enc = kCFStringEncodingMacGaelic ;
2549 break ;
ecd9653b
WS
2550// case wxFONTENCODING_MACKEYBOARD :
2551// enc = kCFStringEncodingMacKeyboardGlyphs ;
2552// break ;
ef199164 2553
ecd9653b
WS
2554 default :
2555 // because gcc is picky
2556 break ;
ef199164
DS
2557 }
2558
ecd9653b 2559 return enc ;
f7e98dee
RN
2560}
2561
f7e98dee
RN
2562class wxMBConv_cocoa : public wxMBConv
2563{
2564public:
2565 wxMBConv_cocoa()
2566 {
2567 Init(CFStringGetSystemEncoding()) ;
2568 }
2569
d36c9347
VZ
2570 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2571 {
2572 m_encoding = conv.m_encoding;
2573 }
2574
a6900d10 2575#if wxUSE_FONTMAP
f7e98dee
RN
2576 wxMBConv_cocoa(const wxChar* name)
2577 {
267e11c5 2578 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2579 }
a6900d10 2580#endif
f7e98dee
RN
2581
2582 wxMBConv_cocoa(wxFontEncoding encoding)
2583 {
2584 Init( wxCFStringEncFromFontEnc(encoding) );
2585 }
2586
2587 ~wxMBConv_cocoa()
2588 {
2589 }
2590
2591 void Init( CFStringEncoding encoding)
2592 {
638357a0 2593 m_encoding = encoding ;
f7e98dee
RN
2594 }
2595
2596 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2597 {
2598 wxASSERT(szUnConv);
ecd9653b 2599
638357a0
RN
2600 CFStringRef theString = CFStringCreateWithBytes (
2601 NULL, //the allocator
2602 (const UInt8*)szUnConv,
2603 strlen(szUnConv),
2604 m_encoding,
2605 false //no BOM/external representation
f7e98dee
RN
2606 );
2607
2608 wxASSERT(theString);
2609
638357a0
RN
2610 size_t nOutLength = CFStringGetLength(theString);
2611
2612 if (szOut == NULL)
f7e98dee 2613 {
f7e98dee 2614 CFRelease(theString);
638357a0 2615 return nOutLength;
f7e98dee 2616 }
ecd9653b 2617
638357a0 2618 CFRange theRange = { 0, nOutSize };
ecd9653b 2619
638357a0
RN
2620#if SIZEOF_WCHAR_T == 4
2621 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2622#endif
3698ae71 2623
f7e98dee 2624 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2625
f7e98dee 2626 CFRelease(theString);
ecd9653b 2627
ef199164 2628 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2629
2630#if SIZEOF_WCHAR_T == 4
ef199164
DS
2631 wxMBConvUTF16 converter;
2632 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2633 delete [] szUniCharBuffer;
f7e98dee 2634#endif
3698ae71 2635
638357a0 2636 return nOutLength;
f7e98dee
RN
2637 }
2638
2639 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2640 {
638357a0 2641 wxASSERT(szUnConv);
3698ae71 2642
f7e98dee 2643 size_t nRealOutSize;
638357a0 2644 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2645 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2646
f7e98dee 2647#if SIZEOF_WCHAR_T == 4
d9d488cf 2648 wxMBConvUTF16 converter ;
ef199164
DS
2649 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2650 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2651 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2652 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2653#endif
2654
2655 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2656 NULL, //allocator
2657 szUniBuffer,
2658 nBufSize,
638357a0 2659 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2660 );
ecd9653b 2661
f7e98dee 2662 wxASSERT(theString);
ecd9653b 2663
f7e98dee 2664 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2665 //so we check and use getchars instead in that case
2666 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2667 {
638357a0
RN
2668 if (szOut != NULL)
2669 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2670
638357a0
RN
2671 nRealOutSize = CFStringGetLength(theString) + 1;
2672 }
2673 else
2674 {
2675 CFStringGetBytes(
2676 theString,
2677 CFRangeMake(0, CFStringGetLength(theString)),
2678 m_encoding,
2679 0, //what to put in characters that can't be converted -
2680 //0 tells CFString to return NULL if it meets such a character
2681 false, //not an external representation
2682 (UInt8*) szOut,
3698ae71 2683 nOutSize,
638357a0
RN
2684 (CFIndex*) &nRealOutSize
2685 );
f7e98dee 2686 }
ecd9653b 2687
638357a0 2688 CFRelease(theString);
ecd9653b 2689
638357a0
RN
2690#if SIZEOF_WCHAR_T == 4
2691 delete[] szUniBuffer;
2692#endif
ecd9653b 2693
f7e98dee
RN
2694 return nRealOutSize - 1;
2695 }
2696
d36c9347
VZ
2697 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2698
f7e98dee 2699 bool IsOk() const
ecd9653b 2700 {
3698ae71 2701 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2702 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2703 }
2704
2705private:
638357a0 2706 CFStringEncoding m_encoding ;
f7e98dee
RN
2707};
2708
2709#endif // defined(__WXCOCOA__)
2710
335d31e0
SC
2711// ============================================================================
2712// Mac conversion classes
2713// ============================================================================
2714
2715#if defined(__WXMAC__) && defined(TARGET_CARBON)
2716
2717class wxMBConv_mac : public wxMBConv
2718{
2719public:
2720 wxMBConv_mac()
2721 {
2722 Init(CFStringGetSystemEncoding()) ;
2723 }
2724
d36c9347
VZ
2725 wxMBConv_mac(const wxMBConv_mac& conv)
2726 {
2727 Init(conv.m_char_encoding);
2728 }
2729
2d1659cf 2730#if wxUSE_FONTMAP
335d31e0
SC
2731 wxMBConv_mac(const wxChar* name)
2732 {
ef199164 2733 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2734 }
2d1659cf 2735#endif
335d31e0
SC
2736
2737 wxMBConv_mac(wxFontEncoding encoding)
2738 {
d775fa82
WS
2739 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2740 }
2741
2742 ~wxMBConv_mac()
2743 {
2744 OSStatus status = noErr ;
739cb14a
SC
2745 if (m_MB2WC_converter)
2746 status = TECDisposeConverter(m_MB2WC_converter);
2747 if (m_WC2MB_converter)
2748 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2749 }
2750
739cb14a
SC
2751 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2752 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2753 {
739cb14a
SC
2754 m_MB2WC_converter = NULL ;
2755 m_WC2MB_converter = NULL ;
2756 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2757 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2758 }
d775fa82 2759
739cb14a
SC
2760 virtual void CreateIfNeeded() const
2761 {
2762 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2763 {
2764 OSStatus status = noErr ;
2765 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2766 m_char_encoding,
2767 m_unicode_encoding);
739cb14a
SC
2768 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2769 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2770 m_unicode_encoding,
2771 m_char_encoding);
739cb14a
SC
2772 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2773 }
d775fa82 2774 }
739cb14a 2775
335d31e0
SC
2776 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2777 {
739cb14a 2778 CreateIfNeeded() ;
d775fa82
WS
2779 OSStatus status = noErr ;
2780 ByteCount byteOutLen ;
9088c87b 2781 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2782 wchar_t *tbuf = NULL ;
2783 UniChar* ubuf = NULL ;
2784 size_t res = 0 ;
2785
2786 if (buf == NULL)
2787 {
ef199164
DS
2788 // Apple specs say at least 32
2789 n = wxMax( 32, byteInLen ) ;
2790 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2791 }
ef199164 2792
d775fa82 2793 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2794
f3a355ce 2795#if SIZEOF_WCHAR_T == 4
d775fa82 2796 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2797#else
d775fa82 2798 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2799#endif
ef199164
DS
2800
2801 status = TECConvertText(
2802 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2803 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2804
f3a355ce 2805#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2806 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2807 // is not properly terminated we get random characters at the end
2808 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2809 wxMBConvUTF16 converter ;
ef199164 2810 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2811 free( ubuf ) ;
f3a355ce 2812#else
d775fa82 2813 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2814#endif
ef199164 2815
d775fa82
WS
2816 if ( buf == NULL )
2817 free(tbuf) ;
335d31e0 2818
335d31e0
SC
2819 if ( buf && res < n)
2820 buf[res] = 0;
2821
d775fa82 2822 return res ;
335d31e0
SC
2823 }
2824
2825 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2826 {
739cb14a 2827 CreateIfNeeded() ;
d775fa82
WS
2828 OSStatus status = noErr ;
2829 ByteCount byteOutLen ;
2830 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2831
2832 char *tbuf = NULL ;
2833
2834 if (buf == NULL)
2835 {
ef199164
DS
2836 // Apple specs say at least 32
2837 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2838 tbuf = (char*) malloc( n ) ;
2839 }
2840
2841 ByteCount byteBufferLen = n ;
2842 UniChar* ubuf = NULL ;
ef199164 2843
f3a355ce 2844#if SIZEOF_WCHAR_T == 4
d9d488cf 2845 wxMBConvUTF16 converter ;
ef199164 2846 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2847 byteInLen = unicharlen ;
2848 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2849 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2850#else
d775fa82 2851 ubuf = (UniChar*) psz ;
f3a355ce 2852#endif
ef199164
DS
2853
2854 status = TECConvertText(
2855 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2856 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2857
f3a355ce 2858#if SIZEOF_WCHAR_T == 4
d775fa82 2859 free( ubuf ) ;
f3a355ce 2860#endif
ef199164 2861
d775fa82
WS
2862 if ( buf == NULL )
2863 free(tbuf) ;
335d31e0 2864
d775fa82 2865 size_t res = byteOutLen ;
335d31e0 2866 if ( buf && res < n)
638357a0 2867 {
335d31e0 2868 buf[res] = 0;
3698ae71 2869
638357a0
RN
2870 //we need to double-trip to verify it didn't insert any ? in place
2871 //of bogus characters
2872 wxWCharBuffer wcBuf(n);
2873 size_t pszlen = wxWcslen(psz);
467e0479 2874 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2875 wxWcslen(wcBuf) != pszlen ||
2876 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2877 {
2878 // we didn't obtain the same thing we started from, hence
2879 // the conversion was lossy and we consider that it failed
467e0479 2880 return wxCONV_FAILED;
638357a0
RN
2881 }
2882 }
335d31e0 2883
d775fa82 2884 return res ;
335d31e0
SC
2885 }
2886
d3478e2c 2887 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2888
335d31e0 2889 bool IsOk() const
739cb14a
SC
2890 {
2891 CreateIfNeeded() ;
2892 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2893 }
335d31e0 2894
739cb14a
SC
2895protected :
2896 mutable TECObjectRef m_MB2WC_converter;
2897 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2898
ef199164
DS
2899 TextEncodingBase m_char_encoding;
2900 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2901};
2902
739cb14a
SC
2903// MB is decomposed (D) normalized UTF8
2904
2905class wxMBConv_macUTF8D : public wxMBConv_mac
2906{
2907public :
2908 wxMBConv_macUTF8D()
2909 {
2910 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2911 m_uni = NULL;
fbb0b8af 2912 m_uniBack = NULL ;
739cb14a
SC
2913 }
2914
2915 ~wxMBConv_macUTF8D()
2916 {
fbb0b8af
SC
2917 if (m_uni!=NULL)
2918 DisposeUnicodeToTextInfo(&m_uni);
2919 if (m_uniBack!=NULL)
2920 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a
SC
2921 }
2922
2923 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2924 {
2925 CreateIfNeeded() ;
2926 OSStatus status = noErr ;
2927 ByteCount byteOutLen ;
2928 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2929
2930 char *tbuf = NULL ;
2931
2932 if (buf == NULL)
2933 {
2934 // Apple specs say at least 32
2935 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2936 tbuf = (char*) malloc( n ) ;
2937 }
2938
2939 ByteCount byteBufferLen = n ;
2940 UniChar* ubuf = NULL ;
2941
2942#if SIZEOF_WCHAR_T == 4
2943 wxMBConvUTF16 converter ;
2944 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2945 byteInLen = unicharlen ;
2946 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2947 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2948#else
2949 ubuf = (UniChar*) psz ;
2950#endif
2951
2952 // ubuf is a non-decomposed UniChar buffer
2953
2954 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2955 ByteCount dcubufread , dcubufwritten ;
2956 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2957
2958 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2959 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
2960
2961 // we now convert that decomposed buffer into UTF8
2962
2963 status = TECConvertText(
2964 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2965 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2966
2967 free( dcubuf );
2968
2969#if SIZEOF_WCHAR_T == 4
2970 free( ubuf ) ;
2971#endif
2972
2973 if ( buf == NULL )
2974 free(tbuf) ;
2975
2976 size_t res = byteOutLen ;
2977 if ( buf && res < n)
2978 {
2979 buf[res] = 0;
2980 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2981 }
2982
2983 return res ;
2984 }
2985
fbb0b8af
SC
2986 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2987 {
2988 CreateIfNeeded() ;
2989 OSStatus status = noErr ;
2990 ByteCount byteOutLen ;
2991 ByteCount byteInLen = strlen(psz) + 1;
2992 wchar_t *tbuf = NULL ;
2993 UniChar* ubuf = NULL ;
2994 size_t res = 0 ;
2995
2996 if (buf == NULL)
2997 {
2998 // Apple specs say at least 32
2999 n = wxMax( 32, byteInLen ) ;
3000 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3001 }
3002
3003 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3004
3005#if SIZEOF_WCHAR_T == 4
3006 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3007#else
3008 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3009#endif
3010
3011 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3012 ByteCount dcubufread , dcubufwritten ;
3013 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3014
3015 status = TECConvertText(
3016 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3017 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3018 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3019 // is not properly terminated we get random characters at the end
3020 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3021
3022 // now from the decomposed UniChar to properly composed uniChar
3023 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3024 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3025
3026 free( dcubuf );
3027 byteOutLen = dcubufwritten ;
3028 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3029
3030
3031#if SIZEOF_WCHAR_T == 4
3032 wxMBConvUTF16 converter ;
3033 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3034 free( ubuf ) ;
3035#else
3036 res = byteOutLen / sizeof( UniChar ) ;
3037#endif
3038
3039 if ( buf == NULL )
3040 free(tbuf) ;
3041
3042 if ( buf && res < n)
3043 buf[res] = 0;
3044
3045 return res ;
3046 }
3047
739cb14a
SC
3048 virtual void CreateIfNeeded() const
3049 {
3050 wxMBConv_mac::CreateIfNeeded() ;
3051 if ( m_uni == NULL )
3052 {
3053 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3054 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3055 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3056 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3057 m_map.mappingVersion = kUnicodeUseLatestMapping;
3058
3059 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3060 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
fbb0b8af
SC
3061
3062 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3063 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3064 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3065 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3066 m_map.mappingVersion = kUnicodeUseLatestMapping;
3067 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3068 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3069 }
3070 }
3071protected :
3072 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3073 mutable UnicodeToTextInfo m_uniBack;
739cb14a
SC
3074 mutable UnicodeMapping m_map;
3075};
335d31e0 3076#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3077
36acb880
VZ
3078// ============================================================================
3079// wxEncodingConverter based conversion classes
3080// ============================================================================
3081
1e6feb95 3082#if wxUSE_FONTMAP
1cd52418 3083
e95354ec 3084class wxMBConv_wxwin : public wxMBConv
1cd52418 3085{
8b04d4c4
VZ
3086private:
3087 void Init()
3088 {
3089 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3090 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3091 }
3092
6001e347 3093public:
f1339c56
RR
3094 // temporarily just use wxEncodingConverter stuff,
3095 // so that it works while a better implementation is built
e95354ec 3096 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
3097 {
3098 if (name)
267e11c5 3099 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3100 else
3101 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3102
8b04d4c4
VZ
3103 Init();
3104 }
3105
e95354ec 3106 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3107 {
3108 m_enc = enc;
3109
3110 Init();
f1339c56 3111 }
dccce9ea 3112
bde4baac 3113 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3114 {
3115 size_t inbuf = strlen(psz);
dccce9ea 3116 if (buf)
c643a977 3117 {
ef199164 3118 if (!m2w.Convert(psz, buf))
467e0479 3119 return wxCONV_FAILED;
c643a977 3120 }
f1339c56
RR
3121 return inbuf;
3122 }
dccce9ea 3123
bde4baac 3124 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3125 {
f8d791e0 3126 const size_t inbuf = wxWcslen(psz);
f1339c56 3127 if (buf)
c643a977 3128 {
ef199164 3129 if (!w2m.Convert(psz, buf))
467e0479 3130 return wxCONV_FAILED;
c643a977 3131 }
dccce9ea 3132
f1339c56
RR
3133 return inbuf;
3134 }
dccce9ea 3135
7ef3ab50 3136 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3137 {
3138 switch ( m_enc )
3139 {
3140 case wxFONTENCODING_UTF16BE:
3141 case wxFONTENCODING_UTF16LE:
c1464d9d 3142 return 2;
eec47cc6
VZ
3143
3144 case wxFONTENCODING_UTF32BE:
3145 case wxFONTENCODING_UTF32LE:
c1464d9d 3146 return 4;
eec47cc6
VZ
3147
3148 default:
c1464d9d 3149 return 1;
eec47cc6
VZ
3150 }
3151 }
3152
d36c9347
VZ
3153 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3154
7ef3ab50
VZ
3155 bool IsOk() const { return m_ok; }
3156
3157public:
3158 wxFontEncoding m_enc;
3159 wxEncodingConverter m2w, w2m;
3160
3161private:
cafbf6fb
VZ
3162 // were we initialized successfully?
3163 bool m_ok;
fc7a2a60 3164
e95354ec 3165 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3166};
6001e347 3167
8f115891
MW
3168// make the constructors available for unit testing
3169WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3170{
3171 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3172 if ( !result->IsOk() )
3173 {
3174 delete result;
3175 return 0;
3176 }
ef199164 3177
8f115891
MW
3178 return result;
3179}
3180
1e6feb95
VZ
3181#endif // wxUSE_FONTMAP
3182
36acb880
VZ
3183// ============================================================================
3184// wxCSConv implementation
3185// ============================================================================
3186
8b04d4c4 3187void wxCSConv::Init()
6001e347 3188{
e95354ec
VZ
3189 m_name = NULL;
3190 m_convReal = NULL;
3191 m_deferred = true;
3192}
3193
8b04d4c4
VZ
3194wxCSConv::wxCSConv(const wxChar *charset)
3195{
3196 Init();
82713003 3197
e95354ec
VZ
3198 if ( charset )
3199 {
e95354ec
VZ
3200 SetName(charset);
3201 }
bda3d86a 3202
e4277538
VZ
3203#if wxUSE_FONTMAP
3204 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3205#else
bda3d86a 3206 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3207#endif
6001e347
RR
3208}
3209
8b04d4c4
VZ
3210wxCSConv::wxCSConv(wxFontEncoding encoding)
3211{
bda3d86a 3212 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3213 {
3214 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3215
3216 encoding = wxFONTENCODING_SYSTEM;
3217 }
3218
8b04d4c4
VZ
3219 Init();
3220
bda3d86a 3221 m_encoding = encoding;
8b04d4c4
VZ
3222}
3223
6001e347
RR
3224wxCSConv::~wxCSConv()
3225{
65e50848
JS
3226 Clear();
3227}
3228
54380f29 3229wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3230 : wxMBConv()
54380f29 3231{
8b04d4c4
VZ
3232 Init();
3233
54380f29 3234 SetName(conv.m_name);
8b04d4c4 3235 m_encoding = conv.m_encoding;
54380f29
GD
3236}
3237
3238wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3239{
3240 Clear();
8b04d4c4 3241
54380f29 3242 SetName(conv.m_name);
8b04d4c4
VZ
3243 m_encoding = conv.m_encoding;
3244
54380f29
GD
3245 return *this;
3246}
3247
65e50848
JS
3248void wxCSConv::Clear()
3249{
8b04d4c4 3250 free(m_name);
e95354ec 3251 delete m_convReal;
8b04d4c4 3252
65e50848 3253 m_name = NULL;
e95354ec 3254 m_convReal = NULL;
6001e347
RR
3255}
3256
3257void wxCSConv::SetName(const wxChar *charset)
3258{
f1339c56
RR
3259 if (charset)
3260 {
3261 m_name = wxStrdup(charset);
e95354ec 3262 m_deferred = true;
f1339c56 3263 }
6001e347
RR
3264}
3265
8b3eb85d 3266#if wxUSE_FONTMAP
8b3eb85d
VZ
3267
3268WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3269 wxEncodingNameCache );
8b3eb85d
VZ
3270
3271static wxEncodingNameCache gs_nameCache;
3272#endif
3273
e95354ec
VZ
3274wxMBConv *wxCSConv::DoCreate() const
3275{
ce6f8d6f
VZ
3276#if wxUSE_FONTMAP
3277 wxLogTrace(TRACE_STRCONV,
3278 wxT("creating conversion for %s"),
3279 (m_name ? m_name
3280 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3281#endif // wxUSE_FONTMAP
3282
c547282d
VZ
3283 // check for the special case of ASCII or ISO8859-1 charset: as we have
3284 // special knowledge of it anyhow, we don't need to create a special
3285 // conversion object
e4277538
VZ
3286 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3287 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3288 {
e95354ec
VZ
3289 // don't convert at all
3290 return NULL;
3291 }
dccce9ea 3292
e95354ec
VZ
3293 // we trust OS to do conversion better than we can so try external
3294 // conversion methods first
3295 //
3296 // the full order is:
3297 // 1. OS conversion (iconv() under Unix or Win32 API)
3298 // 2. hard coded conversions for UTF
3299 // 3. wxEncodingConverter as fall back
3300
3301 // step (1)
3302#ifdef HAVE_ICONV
c547282d 3303#if !wxUSE_FONTMAP
e95354ec 3304 if ( m_name )
c547282d 3305#endif // !wxUSE_FONTMAP
e95354ec 3306 {
c547282d 3307 wxString name(m_name);
8b3eb85d
VZ
3308 wxFontEncoding encoding(m_encoding);
3309
3310 if ( !name.empty() )
3311 {
3312 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3313 if ( conv->IsOk() )
3314 return conv;
3315
3316 delete conv;
c547282d
VZ
3317
3318#if wxUSE_FONTMAP
8b3eb85d
VZ
3319 encoding =
3320 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 3321#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3322 }
3323#if wxUSE_FONTMAP
3324 {
3325 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3326 if ( it != gs_nameCache.end() )
3327 {
3328 if ( it->second.empty() )
3329 return NULL;
c547282d 3330
8b3eb85d
VZ
3331 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3332 if ( conv->IsOk() )
3333 return conv;
e95354ec 3334
8b3eb85d
VZ
3335 delete conv;
3336 }
3337
3338 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3339
3340 for ( ; *names; ++names )
3341 {
3342 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3343 if ( conv->IsOk() )
3344 {
3345 gs_nameCache[encoding] = *names;
3346 return conv;
3347 }
3348
3349 delete conv;
3350 }
3351
40711af8 3352 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d
VZ
3353 }
3354#endif // wxUSE_FONTMAP
e95354ec
VZ
3355 }
3356#endif // HAVE_ICONV
3357
3358#ifdef wxHAVE_WIN32_MB2WC
3359 {
7608a683 3360#if wxUSE_FONTMAP
e95354ec
VZ
3361 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3362 : new wxMBConv_win32(m_encoding);
3363 if ( conv->IsOk() )
3364 return conv;
3365
3366 delete conv;
7608a683
WS
3367#else
3368 return NULL;
3369#endif
e95354ec
VZ
3370 }
3371#endif // wxHAVE_WIN32_MB2WC
ef199164 3372
d775fa82
WS
3373#if defined(__WXMAC__)
3374 {
5c3c8676 3375 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3376 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3377 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3378 {
2d1659cf 3379#if wxUSE_FONTMAP
d775fa82
WS
3380 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3381 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3382#else
3383 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3384#endif
d775fa82 3385 if ( conv->IsOk() )
f7e98dee
RN
3386 return conv;
3387
3388 delete conv;
3389 }
3390 }
3391#endif
ef199164 3392
f7e98dee
RN
3393#if defined(__WXCOCOA__)
3394 {
3395 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3396 {
a6900d10 3397#if wxUSE_FONTMAP
f7e98dee
RN
3398 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3399 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3400#else
3401 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3402#endif
ef199164 3403
f7e98dee 3404 if ( conv->IsOk() )
d775fa82
WS
3405 return conv;
3406
3407 delete conv;
3408 }
335d31e0
SC
3409 }
3410#endif
e95354ec
VZ
3411 // step (2)
3412 wxFontEncoding enc = m_encoding;
3413#if wxUSE_FONTMAP
c547282d
VZ
3414 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3415 {
3416 // use "false" to suppress interactive dialogs -- we can be called from
3417 // anywhere and popping up a dialog from here is the last thing we want to
3418 // do
267e11c5 3419 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3420 }
e95354ec
VZ
3421#endif // wxUSE_FONTMAP
3422
3423 switch ( enc )
3424 {
3425 case wxFONTENCODING_UTF7:
3426 return new wxMBConvUTF7;
3427
3428 case wxFONTENCODING_UTF8:
3429 return new wxMBConvUTF8;
3430
e95354ec
VZ
3431 case wxFONTENCODING_UTF16BE:
3432 return new wxMBConvUTF16BE;
3433
3434 case wxFONTENCODING_UTF16LE:
3435 return new wxMBConvUTF16LE;
3436
e95354ec
VZ
3437 case wxFONTENCODING_UTF32BE:
3438 return new wxMBConvUTF32BE;
3439
3440 case wxFONTENCODING_UTF32LE:
3441 return new wxMBConvUTF32LE;
3442
3443 default:
3444 // nothing to do but put here to suppress gcc warnings
ef199164 3445 break;
e95354ec
VZ
3446 }
3447
3448 // step (3)
3449#if wxUSE_FONTMAP
3450 {
3451 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3452 : new wxMBConv_wxwin(m_encoding);
3453 if ( conv->IsOk() )
3454 return conv;
3455
3456 delete conv;
3457 }
3458#endif // wxUSE_FONTMAP
3459
a58d4f4d
VS
3460 // NB: This is a hack to prevent deadlock. What could otherwise happen
3461 // in Unicode build: wxConvLocal creation ends up being here
3462 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3463 // attach a timestamp, for which it will need wxConvLocal (to convert
3464 // time to char* and then wchar_t*), but that fails, tries to log the
3465 // error, but wxLog has an (already locked) critical section that
3466 // guards the static buffer.
a58d4f4d
VS
3467 static bool alreadyLoggingError = false;
3468 if (!alreadyLoggingError)
3469 {
3470 alreadyLoggingError = true;
3471 wxLogError(_("Cannot convert from the charset '%s'!"),
3472 m_name ? m_name
e95354ec
VZ
3473 :
3474#if wxUSE_FONTMAP
267e11c5 3475 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
3476#else // !wxUSE_FONTMAP
3477 wxString::Format(_("encoding %s"), m_encoding).c_str()
3478#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3479 );
ef199164 3480
a58d4f4d
VS
3481 alreadyLoggingError = false;
3482 }
e95354ec
VZ
3483
3484 return NULL;
3485}
3486
3487void wxCSConv::CreateConvIfNeeded() const
3488{
3489 if ( m_deferred )
3490 {
3491 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
3492
3493#if wxUSE_INTL
3494 // if we don't have neither the name nor the encoding, use the default
3495 // encoding for this system
3496 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3497 {
4d312c22 3498 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
3499 }
3500#endif // wxUSE_INTL
3501
e95354ec
VZ
3502 self->m_convReal = DoCreate();
3503 self->m_deferred = false;
6001e347 3504 }
6001e347
RR
3505}
3506
3507size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3508{
e95354ec 3509 CreateConvIfNeeded();
dccce9ea 3510
e95354ec
VZ
3511 if (m_convReal)
3512 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3513
3514 // latin-1 (direct)
4def3b35 3515 size_t len = strlen(psz);
dccce9ea 3516
f1339c56
RR
3517 if (buf)
3518 {
4def3b35 3519 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3520 buf[c] = (unsigned char)(psz[c]);
3521 }
dccce9ea 3522
f1339c56 3523 return len;
6001e347
RR
3524}
3525
3526size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3527{
e95354ec 3528 CreateConvIfNeeded();
dccce9ea 3529
e95354ec
VZ
3530 if (m_convReal)
3531 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3532
f1339c56 3533 // latin-1 (direct)
f8d791e0 3534 const size_t len = wxWcslen(psz);
f1339c56
RR
3535 if (buf)
3536 {
4def3b35 3537 for (size_t c = 0; c <= len; c++)
24642831
VS
3538 {
3539 if (psz[c] > 0xFF)
467e0479 3540 return wxCONV_FAILED;
ef199164 3541
907173e5 3542 buf[c] = (char)psz[c];
24642831
VS
3543 }
3544 }
3545 else
3546 {
3547 for (size_t c = 0; c <= len; c++)
3548 {
3549 if (psz[c] > 0xFF)
467e0479 3550 return wxCONV_FAILED;
24642831 3551 }
f1339c56 3552 }
dccce9ea 3553
f1339c56 3554 return len;
6001e347
RR
3555}
3556
7ef3ab50 3557size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3558{
3559 CreateConvIfNeeded();
3560
3561 if ( m_convReal )
3562 {
7ef3ab50 3563 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3564 }
3565
c1464d9d 3566 return 1;
eec47cc6
VZ
3567}
3568
bde4baac
VZ
3569// ----------------------------------------------------------------------------
3570// globals
3571// ----------------------------------------------------------------------------
3572
3573#ifdef __WINDOWS__
3574 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3575#elif defined(__WXMAC__) && !defined(__MACH__)
3576 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3577#else
dcc8fac0 3578 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3579#endif
3580
3581static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3582static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3583static wxMBConvUTF7 wxConvUTF7Obj;
3584static wxMBConvUTF8 wxConvUTF8Obj;
d43d9ee7 3585#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a
SC
3586static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3587#endif
bde4baac
VZ
3588WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3589WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3590WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3591WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3592WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3593WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
d5bef0a3 3594WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
f5a1953b
VZ
3595WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3596#ifdef __WXOSX__
d43d9ee7 3597#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a 3598 wxConvMacUTF8DObj;
d43d9ee7
SC
3599#else
3600 wxConvUTF8Obj;
3601#endif
f5a1953b 3602#else
ea8ce907 3603 wxConvLibcObj;
f5a1953b
VZ
3604#endif
3605
bde4baac
VZ
3606#else // !wxUSE_WCHAR_T
3607
3608// stand-ins in absence of wchar_t
3609WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3610 wxConvISO8859_1,
3611 wxConvLocal,
3612 wxConvUTF8;
3613
3614#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T