]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
changed wxPlatform implicit conversion to string to return const reference to wxStrin...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
373658eb
VZ
18#ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/log.h"
de6185e2 21 #include "wx/utils.h"
df69528b 22 #include "wx/hashmap.h"
ef199164 23#endif
373658eb 24
bde4baac
VZ
25#include "wx/strconv.h"
26
27#if wxUSE_WCHAR_T
28
1c193821 29#ifndef __WXWINCE__
1cd52418 30#include <errno.h>
1c193821
JS
31#endif
32
6001e347
RR
33#include <ctype.h>
34#include <string.h>
35#include <stdlib.h>
36
e95354ec 37#if defined(__WIN32__) && !defined(__WXMICROWIN__)
a6c2e2c7
VZ
38 #include "wx/msw/private.h"
39 #include "wx/msw/missing.h"
e95354ec 40 #define wxHAVE_WIN32_MB2WC
ef199164 41#endif
e95354ec 42
6001e347 43#ifdef __SALFORDC__
373658eb 44 #include <clib.h>
6001e347
RR
45#endif
46
b040e242 47#ifdef HAVE_ICONV
373658eb 48 #include <iconv.h>
b1d547eb 49 #include "wx/thread.h"
1cd52418 50#endif
1cd52418 51
373658eb
VZ
52#include "wx/encconv.h"
53#include "wx/fontmap.h"
54
335d31e0 55#ifdef __WXMAC__
40ba2f3b 56#ifndef __DARWIN__
4227afa4
SC
57#include <ATSUnicode.h>
58#include <TextCommon.h>
59#include <TextEncodingConverter.h>
40ba2f3b 60#endif
335d31e0 61
ef199164
DS
62// includes Mac headers
63#include "wx/mac/private.h"
335d31e0 64#endif
ce6f8d6f 65
ef199164 66
ce6f8d6f
VZ
67#define TRACE_STRCONV _T("strconv")
68
467e0479
VZ
69// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
70// be 4 bytes
4948c2b6 71#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
72 #define WC_UTF16
73#endif
74
ef199164 75
373658eb
VZ
76// ============================================================================
77// implementation
78// ============================================================================
79
69373110
VZ
80// helper function of cMB2WC(): check if n bytes at this location are all NUL
81static bool NotAllNULs(const char *p, size_t n)
82{
83 while ( n && *p++ == '\0' )
84 n--;
85
86 return n != 0;
87}
88
373658eb 89// ----------------------------------------------------------------------------
467e0479 90// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 91// ----------------------------------------------------------------------------
6001e347 92
c91830cb 93static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 94{
ef199164 95 if (input <= 0xffff)
4def3b35 96 {
999836aa
VZ
97 if (output)
98 *output = (wxUint16) input;
ef199164 99
4def3b35 100 return 1;
dccce9ea 101 }
ef199164 102 else if (input >= 0x110000)
4def3b35 103 {
467e0479 104 return wxCONV_FAILED;
dccce9ea
VZ
105 }
106 else
4def3b35 107 {
dccce9ea 108 if (output)
4def3b35 109 {
ef199164
DS
110 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
111 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 112 }
ef199164 113
4def3b35 114 return 2;
1cd52418 115 }
1cd52418
OK
116}
117
c91830cb 118static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 119{
ef199164 120 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
121 {
122 output = *input;
123 return 1;
dccce9ea 124 }
ef199164 125 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
126 {
127 output = *input;
467e0479 128 return wxCONV_FAILED;
dccce9ea
VZ
129 }
130 else
4def3b35
VS
131 {
132 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
133 return 2;
134 }
1cd52418
OK
135}
136
467e0479 137#ifdef WC_UTF16
35d11700
VZ
138 typedef wchar_t wxDecodeSurrogate_t;
139#else // !WC_UTF16
140 typedef wxUint16 wxDecodeSurrogate_t;
141#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
142
143// returns the next UTF-32 character from the wchar_t buffer and advances the
144// pointer to the character after this one
145//
146// if an invalid character is found, *pSrc is set to NULL, the caller must
147// check for this
35d11700 148static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
149{
150 wxUint32 out;
8d3dd069
VZ
151 const size_t
152 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
153 if ( n == wxCONV_FAILED )
154 *pSrc = NULL;
155 else
156 *pSrc += n;
157
158 return out;
159}
160
f6bcfd97 161// ----------------------------------------------------------------------------
6001e347 162// wxMBConv
f6bcfd97 163// ----------------------------------------------------------------------------
2c53a80a 164
483b0434
VZ
165size_t
166wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
167 const char *src, size_t srcLen) const
6001e347 168{
483b0434
VZ
169 // although new conversion classes are supposed to implement this function
170 // directly, the existins ones only implement the old MB2WC() and so, to
171 // avoid to have to rewrite all conversion classes at once, we provide a
172 // default (but not efficient) implementation of this one in terms of the
173 // old function by copying the input to ensure that it's NUL-terminated and
174 // then using MB2WC() to convert it
6001e347 175
483b0434
VZ
176 // the number of chars [which would be] written to dst [if it were not NULL]
177 size_t dstWritten = 0;
eec47cc6 178
c1464d9d 179 // the number of NULs terminating this string
a78c43f1 180 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 181
c1464d9d
VZ
182 // if we were not given the input size we just have to assume that the
183 // string is properly terminated as we have no way of knowing how long it
184 // is anyhow, but if we do have the size check whether there are enough
185 // NULs at the end
483b0434
VZ
186 wxCharBuffer bufTmp;
187 const char *srcEnd;
467e0479 188 if ( srcLen != wxNO_LEN )
eec47cc6 189 {
c1464d9d 190 // we need to know how to find the end of this string
7ef3ab50 191 nulLen = GetMBNulLen();
483b0434
VZ
192 if ( nulLen == wxCONV_FAILED )
193 return wxCONV_FAILED;
e4e3bbb4 194
c1464d9d 195 // if there are enough NULs we can avoid the copy
483b0434 196 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
197 {
198 // make a copy in order to properly NUL-terminate the string
483b0434 199 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 200 char * const p = bufTmp.data();
483b0434
VZ
201 memcpy(p, src, srcLen);
202 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 203 *s = '\0';
483b0434
VZ
204
205 src = bufTmp;
eec47cc6 206 }
e4e3bbb4 207
483b0434
VZ
208 srcEnd = src + srcLen;
209 }
210 else // quit after the first loop iteration
211 {
212 srcEnd = NULL;
213 }
e4e3bbb4 214
483b0434 215 for ( ;; )
eec47cc6 216 {
c1464d9d 217 // try to convert the current chunk
483b0434 218 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
219 if ( lenChunk == wxCONV_FAILED )
220 return wxCONV_FAILED;
e4e3bbb4 221
467e0479 222 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 223
483b0434 224 dstWritten += lenChunk;
f5fb6871 225
467e0479
VZ
226 if ( lenChunk == 1 )
227 {
228 // nothing left in the input string, conversion succeeded
229 break;
230 }
231
483b0434
VZ
232 if ( dst )
233 {
234 if ( dstWritten > dstLen )
235 return wxCONV_FAILED;
236
830f8f11 237 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
238 return wxCONV_FAILED;
239
240 dst += lenChunk;
241 }
c1464d9d 242
483b0434 243 if ( !srcEnd )
c1464d9d 244 {
467e0479
VZ
245 // we convert just one chunk in this case as this is the entire
246 // string anyhow
c1464d9d
VZ
247 break;
248 }
eec47cc6
VZ
249
250 // advance the input pointer past the end of this chunk
483b0434 251 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
252 {
253 // notice that we must skip over multiple bytes here as we suppose
254 // that if NUL takes 2 or 4 bytes, then all the other characters do
255 // too and so if advanced by a single byte we might erroneously
256 // detect sequences of NUL bytes in the middle of the input
483b0434 257 src += nulLen;
c1464d9d 258 }
e4e3bbb4 259
483b0434 260 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
261
262 // note that ">=" (and not just "==") is needed here as the terminator
263 // we skipped just above could be inside or just after the buffer
264 // delimited by inEnd
483b0434 265 if ( src >= srcEnd )
c1464d9d
VZ
266 break;
267 }
268
483b0434 269 return dstWritten;
e4e3bbb4
RN
270}
271
483b0434
VZ
272size_t
273wxMBConv::FromWChar(char *dst, size_t dstLen,
274 const wchar_t *src, size_t srcLen) const
e4e3bbb4 275{
483b0434
VZ
276 // the number of chars [which would be] written to dst [if it were not NULL]
277 size_t dstWritten = 0;
e4e3bbb4 278
eec47cc6
VZ
279 // make a copy of the input string unless it is already properly
280 // NUL-terminated
281 //
282 // if we don't know its length we have no choice but to assume that it is,
283 // indeed, properly terminated
284 wxWCharBuffer bufTmp;
467e0479 285 if ( srcLen == wxNO_LEN )
e4e3bbb4 286 {
483b0434 287 srcLen = wxWcslen(src) + 1;
eec47cc6 288 }
483b0434 289 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
290 {
291 // make a copy in order to properly NUL-terminate the string
483b0434 292 bufTmp = wxWCharBuffer(srcLen);
ef199164 293 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
294 src = bufTmp;
295 }
296
297 const size_t lenNul = GetMBNulLen();
298 for ( const wchar_t * const srcEnd = src + srcLen;
299 src < srcEnd;
300 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
301 {
302 // try to convert the current chunk
303 size_t lenChunk = WC2MB(NULL, src, 0);
304
305 if ( lenChunk == wxCONV_FAILED )
306 return wxCONV_FAILED;
307
308 lenChunk += lenNul;
309 dstWritten += lenChunk;
310
311 if ( dst )
312 {
313 if ( dstWritten > dstLen )
314 return wxCONV_FAILED;
315
316 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
317 return wxCONV_FAILED;
318
319 dst += lenChunk;
320 }
eec47cc6 321 }
e4e3bbb4 322
483b0434
VZ
323 return dstWritten;
324}
325
ef199164 326size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 327{
ef199164 328 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 329 if ( rc != wxCONV_FAILED )
509da451
VZ
330 {
331 // ToWChar() returns the buffer length, i.e. including the trailing
332 // NUL, while this method doesn't take it into account
333 rc--;
334 }
335
336 return rc;
337}
338
ef199164 339size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 340{
ef199164 341 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 342 if ( rc != wxCONV_FAILED )
509da451
VZ
343 {
344 rc -= GetMBNulLen();
345 }
346
347 return rc;
348}
349
483b0434
VZ
350wxMBConv::~wxMBConv()
351{
352 // nothing to do here (necessary for Darwin linking probably)
353}
e4e3bbb4 354
483b0434
VZ
355const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
356{
357 if ( psz )
eec47cc6 358 {
483b0434
VZ
359 // calculate the length of the buffer needed first
360 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 361 if ( nLen != wxCONV_FAILED )
f5fb6871 362 {
483b0434
VZ
363 // now do the actual conversion
364 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 365
483b0434
VZ
366 // +1 for the trailing NULL
367 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
368 return buf;
f5fb6871 369 }
483b0434 370 }
e4e3bbb4 371
483b0434
VZ
372 return wxWCharBuffer();
373}
3698ae71 374
483b0434
VZ
375const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
376{
377 if ( pwz )
378 {
379 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 380 if ( nLen != wxCONV_FAILED )
483b0434
VZ
381 {
382 // extra space for trailing NUL(s)
383 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 384
483b0434
VZ
385 wxCharBuffer buf(nLen + extraLen - 1);
386 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
387 return buf;
388 }
389 }
390
391 return wxCharBuffer();
392}
e4e3bbb4 393
483b0434 394const wxWCharBuffer
ef199164 395wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 396{
ef199164 397 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 398 if ( dstLen != wxCONV_FAILED )
483b0434 399 {
830f8f11 400 wxWCharBuffer wbuf(dstLen - 1);
ef199164 401 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
402 {
403 if ( outLen )
467e0479
VZ
404 {
405 *outLen = dstLen;
406 if ( wbuf[dstLen - 1] == L'\0' )
407 (*outLen)--;
408 }
409
483b0434
VZ
410 return wbuf;
411 }
412 }
413
414 if ( outLen )
415 *outLen = 0;
416
417 return wxWCharBuffer();
418}
419
420const wxCharBuffer
ef199164 421wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 422{
13d92ad6 423 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 424 if ( dstLen != wxCONV_FAILED )
483b0434 425 {
168a76fe
VZ
426 // special case of empty input: can't allocate 0 size buffer below as
427 // wxCharBuffer insists on NUL-terminating it
428 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 429 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
430 {
431 if ( outLen )
467e0479
VZ
432 {
433 *outLen = dstLen;
434
435 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
436 if ( dstLen >= nulLen &&
437 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
438 {
439 // in this case the output is NUL-terminated and we're not
440 // supposed to count NUL
13d92ad6 441 *outLen -= nulLen;
467e0479
VZ
442 }
443 }
d32a507d 444
483b0434
VZ
445 return buf;
446 }
e4e3bbb4
RN
447 }
448
eec47cc6
VZ
449 if ( outLen )
450 *outLen = 0;
451
452 return wxCharBuffer();
e4e3bbb4
RN
453}
454
6001e347 455// ----------------------------------------------------------------------------
bde4baac 456// wxMBConvLibc
6001e347
RR
457// ----------------------------------------------------------------------------
458
bde4baac
VZ
459size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
460{
461 return wxMB2WC(buf, psz, n);
462}
463
464size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
465{
466 return wxWC2MB(buf, psz, n);
467}
e1bfe89e
RR
468
469// ----------------------------------------------------------------------------
532d575b 470// wxConvBrokenFileNames
e1bfe89e
RR
471// ----------------------------------------------------------------------------
472
eec47cc6
VZ
473#ifdef __UNIX__
474
845905d5 475wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 476{
845905d5
MW
477 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
478 || wxStricmp(charset, _T("UTF8")) == 0 )
479 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
480 else
481 m_conv = new wxCSConv(charset);
ea8ce907
RR
482}
483
eec47cc6 484#endif // __UNIX__
c12b7f79 485
bde4baac 486// ----------------------------------------------------------------------------
3698ae71 487// UTF-7
bde4baac 488// ----------------------------------------------------------------------------
6001e347 489
15f2ee32 490// Implementation (C) 2004 Fredrik Roubert
6001e347 491
15f2ee32
RN
492//
493// BASE64 decoding table
494//
495static const unsigned char utf7unb64[] =
6001e347 496{
15f2ee32
RN
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
503 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
504 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
506 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
507 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
508 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
510 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
511 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
512 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
515 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
529};
530
531size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
532{
15f2ee32
RN
533 size_t len = 0;
534
04a37834 535 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
536 {
537 unsigned char cc = *psz++;
538 if (cc != '+')
539 {
540 // plain ASCII char
541 if (buf)
542 *buf++ = cc;
543 len++;
544 }
545 else if (*psz == '-')
546 {
547 // encoded plus sign
548 if (buf)
549 *buf++ = cc;
550 len++;
551 psz++;
552 }
04a37834 553 else // start of BASE64 encoded string
15f2ee32 554 {
04a37834 555 bool lsb, ok;
15f2ee32 556 unsigned int d, l;
04a37834
VZ
557 for ( ok = lsb = false, d = 0, l = 0;
558 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
559 psz++ )
15f2ee32
RN
560 {
561 d <<= 6;
562 d += cc;
563 for (l += 6; l >= 8; lsb = !lsb)
564 {
04a37834 565 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
566 if (lsb)
567 {
568 if (buf)
569 *buf++ |= c;
570 len ++;
571 }
572 else
04a37834 573 {
15f2ee32 574 if (buf)
6356d52a 575 *buf = (wchar_t)(c << 8);
04a37834
VZ
576 }
577
578 ok = true;
15f2ee32
RN
579 }
580 }
04a37834
VZ
581
582 if ( !ok )
583 {
584 // in valid UTF7 we should have valid characters after '+'
467e0479 585 return wxCONV_FAILED;
04a37834
VZ
586 }
587
15f2ee32
RN
588 if (*psz == '-')
589 psz++;
590 }
591 }
04a37834
VZ
592
593 if ( buf && (len < n) )
594 *buf = '\0';
595
15f2ee32 596 return len;
6001e347
RR
597}
598
15f2ee32
RN
599//
600// BASE64 encoding table
601//
602static const unsigned char utf7enb64[] =
603{
604 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
605 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
606 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
607 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
608 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
609 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
610 'w', 'x', 'y', 'z', '0', '1', '2', '3',
611 '4', '5', '6', '7', '8', '9', '+', '/'
612};
613
614//
615// UTF-7 encoding table
616//
617// 0 - Set D (directly encoded characters)
618// 1 - Set O (optional direct characters)
619// 2 - whitespace characters (optional)
620// 3 - special characters
621//
622static const unsigned char utf7encode[128] =
6001e347 623{
15f2ee32
RN
624 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
625 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
626 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
628 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
630 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
632};
633
667e5b3e 634size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 635{
15f2ee32
RN
636 size_t len = 0;
637
638 while (*psz && ((!buf) || (len < n)))
639 {
640 wchar_t cc = *psz++;
641 if (cc < 0x80 && utf7encode[cc] < 1)
642 {
643 // plain ASCII char
644 if (buf)
645 *buf++ = (char)cc;
ef199164 646
15f2ee32
RN
647 len++;
648 }
649#ifndef WC_UTF16
79c78d42 650 else if (((wxUint32)cc) > 0xffff)
b2c13097 651 {
15f2ee32 652 // no surrogate pair generation (yet?)
467e0479 653 return wxCONV_FAILED;
15f2ee32
RN
654 }
655#endif
656 else
657 {
658 if (buf)
659 *buf++ = '+';
ef199164 660
15f2ee32
RN
661 len++;
662 if (cc != '+')
663 {
664 // BASE64 encode string
665 unsigned int lsb, d, l;
73c902d6 666 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
667 {
668 for (lsb = 0; lsb < 2; lsb ++)
669 {
670 d <<= 8;
671 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
672
673 for (l += 8; l >= 6; )
674 {
675 l -= 6;
676 if (buf)
677 *buf++ = utf7enb64[(d >> l) % 64];
678 len++;
679 }
680 }
ef199164 681
15f2ee32
RN
682 cc = *psz;
683 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
684 break;
685 }
ef199164 686
15f2ee32
RN
687 if (l != 0)
688 {
689 if (buf)
690 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 691
15f2ee32
RN
692 len++;
693 }
694 }
ef199164 695
15f2ee32
RN
696 if (buf)
697 *buf++ = '-';
698 len++;
699 }
700 }
ef199164 701
15f2ee32
RN
702 if (buf && (len < n))
703 *buf = 0;
ef199164 704
15f2ee32 705 return len;
6001e347
RR
706}
707
f6bcfd97 708// ----------------------------------------------------------------------------
6001e347 709// UTF-8
f6bcfd97 710// ----------------------------------------------------------------------------
6001e347 711
dccce9ea 712static wxUint32 utf8_max[]=
4def3b35 713 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 714
3698ae71
VZ
715// boundaries of the private use area we use to (temporarily) remap invalid
716// characters invalid in a UTF-8 encoded string
ea8ce907
RR
717const wxUint32 wxUnicodePUA = 0x100000;
718const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
719
6001e347
RR
720size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
721{
4def3b35
VS
722 size_t len = 0;
723
dccce9ea 724 while (*psz && ((!buf) || (len < n)))
4def3b35 725 {
ea8ce907
RR
726 const char *opsz = psz;
727 bool invalid = false;
4def3b35
VS
728 unsigned char cc = *psz++, fc = cc;
729 unsigned cnt;
dccce9ea 730 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 731 fc <<= 1;
ef199164 732
dccce9ea 733 if (!cnt)
4def3b35
VS
734 {
735 // plain ASCII char
dccce9ea 736 if (buf)
4def3b35
VS
737 *buf++ = cc;
738 len++;
561488ef
MW
739
740 // escape the escape character for octal escapes
741 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
742 && cc == '\\' && (!buf || len < n))
743 {
744 if (buf)
745 *buf++ = cc;
746 len++;
747 }
dccce9ea
VZ
748 }
749 else
4def3b35
VS
750 {
751 cnt--;
dccce9ea 752 if (!cnt)
4def3b35
VS
753 {
754 // invalid UTF-8 sequence
ea8ce907 755 invalid = true;
dccce9ea
VZ
756 }
757 else
4def3b35
VS
758 {
759 unsigned ocnt = cnt - 1;
760 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 761 while (cnt--)
4def3b35 762 {
ea8ce907 763 cc = *psz;
dccce9ea 764 if ((cc & 0xC0) != 0x80)
4def3b35
VS
765 {
766 // invalid UTF-8 sequence
ea8ce907
RR
767 invalid = true;
768 break;
4def3b35 769 }
ef199164 770
ea8ce907 771 psz++;
4def3b35
VS
772 res = (res << 6) | (cc & 0x3f);
773 }
ef199164 774
ea8ce907 775 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
776 {
777 // illegal UTF-8 encoding
ea8ce907 778 invalid = true;
4def3b35 779 }
ea8ce907
RR
780 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
781 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
782 {
783 // if one of our PUA characters turns up externally
784 // it must also be treated as an illegal sequence
785 // (a bit like you have to escape an escape character)
786 invalid = true;
787 }
788 else
789 {
1cd52418 790#ifdef WC_UTF16
ea8ce907
RR
791 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
792 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 793 if (pa == wxCONV_FAILED)
ea8ce907
RR
794 {
795 invalid = true;
796 }
797 else
798 {
799 if (buf)
800 buf += pa;
801 len += pa;
802 }
373658eb 803#else // !WC_UTF16
ea8ce907 804 if (buf)
38d4b1e4 805 *buf++ = (wchar_t)res;
ea8ce907 806 len++;
373658eb 807#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
808 }
809 }
ef199164 810
ea8ce907
RR
811 if (invalid)
812 {
813 if (m_options & MAP_INVALID_UTF8_TO_PUA)
814 {
815 while (opsz < psz && (!buf || len < n))
816 {
817#ifdef WC_UTF16
818 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
819 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 820 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
821 if (buf)
822 buf += pa;
823 opsz++;
824 len += pa;
825#else
826 if (buf)
38d4b1e4 827 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
828 opsz++;
829 len++;
830#endif
831 }
832 }
3698ae71 833 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
834 {
835 while (opsz < psz && (!buf || len < n))
836 {
3698ae71
VZ
837 if ( buf && len + 3 < n )
838 {
17a1ebd1 839 unsigned char on = *opsz;
3698ae71 840 *buf++ = L'\\';
17a1ebd1
VZ
841 *buf++ = (wchar_t)( L'0' + on / 0100 );
842 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
843 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 844 }
ef199164 845
ea8ce907
RR
846 opsz++;
847 len += 4;
848 }
849 }
3698ae71 850 else // MAP_INVALID_UTF8_NOT
ea8ce907 851 {
467e0479 852 return wxCONV_FAILED;
ea8ce907 853 }
4def3b35
VS
854 }
855 }
6001e347 856 }
ef199164 857
dccce9ea 858 if (buf && (len < n))
4def3b35 859 *buf = 0;
ef199164 860
4def3b35 861 return len;
6001e347
RR
862}
863
3698ae71
VZ
864static inline bool isoctal(wchar_t wch)
865{
866 return L'0' <= wch && wch <= L'7';
867}
868
6001e347
RR
869size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
870{
4def3b35 871 size_t len = 0;
6001e347 872
dccce9ea 873 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
874 {
875 wxUint32 cc;
ef199164 876
1cd52418 877#ifdef WC_UTF16
b5153fd8
VZ
878 // cast is ok for WC_UTF16
879 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 880 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 881#else
ef199164 882 cc = (*psz++) & 0x7fffffff;
4def3b35 883#endif
3698ae71
VZ
884
885 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
886 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 887 {
dccce9ea 888 if (buf)
ea8ce907 889 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 890 len++;
3698ae71 891 }
561488ef
MW
892 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
893 && cc == L'\\' && psz[0] == L'\\' )
894 {
895 if (buf)
896 *buf++ = (char)cc;
897 psz++;
898 len++;
899 }
3698ae71
VZ
900 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
901 cc == L'\\' &&
902 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 903 {
dccce9ea 904 if (buf)
3698ae71 905 {
ef199164
DS
906 *buf++ = (char) ((psz[0] - L'0') * 0100 +
907 (psz[1] - L'0') * 010 +
b2c13097 908 (psz[2] - L'0'));
3698ae71
VZ
909 }
910
911 psz += 3;
ea8ce907
RR
912 len++;
913 }
914 else
915 {
916 unsigned cnt;
ef199164
DS
917 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
918 {
919 }
920
ea8ce907 921 if (!cnt)
4def3b35 922 {
ea8ce907
RR
923 // plain ASCII char
924 if (buf)
925 *buf++ = (char) cc;
926 len++;
927 }
ea8ce907
RR
928 else
929 {
930 len += cnt + 1;
931 if (buf)
932 {
933 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
934 while (cnt--)
935 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
936 }
4def3b35
VS
937 }
938 }
6001e347 939 }
4def3b35 940
ef199164 941 if (buf && (len < n))
3698ae71 942 *buf = 0;
adb45366 943
4def3b35 944 return len;
6001e347
RR
945}
946
467e0479 947// ============================================================================
c91830cb 948// UTF-16
467e0479 949// ============================================================================
c91830cb
VZ
950
951#ifdef WORDS_BIGENDIAN
bde4baac
VZ
952 #define wxMBConvUTF16straight wxMBConvUTF16BE
953 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 954#else
bde4baac
VZ
955 #define wxMBConvUTF16swap wxMBConvUTF16BE
956 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
957#endif
958
467e0479
VZ
959/* static */
960size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
961{
962 if ( srcLen == wxNO_LEN )
963 {
964 // count the number of bytes in input, including the trailing NULs
ef199164
DS
965 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
966 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 967 ;
c91830cb 968
467e0479
VZ
969 srcLen *= BYTES_PER_CHAR;
970 }
971 else // we already have the length
972 {
973 // we can only convert an entire number of UTF-16 characters
974 if ( srcLen % BYTES_PER_CHAR )
975 return wxCONV_FAILED;
976 }
977
978 return srcLen;
979}
980
981// case when in-memory representation is UTF-16 too
c91830cb
VZ
982#ifdef WC_UTF16
983
467e0479
VZ
984// ----------------------------------------------------------------------------
985// conversions without endianness change
986// ----------------------------------------------------------------------------
987
988size_t
989wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
990 const char *src, size_t srcLen) const
c91830cb 991{
467e0479
VZ
992 // set up the scene for using memcpy() (which is presumably more efficient
993 // than copying the bytes one by one)
994 srcLen = GetLength(src, srcLen);
995 if ( srcLen == wxNO_LEN )
996 return wxCONV_FAILED;
c91830cb 997
ef199164 998 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 999 if ( dst )
c91830cb 1000 {
467e0479
VZ
1001 if ( dstLen < inLen )
1002 return wxCONV_FAILED;
c91830cb 1003
467e0479 1004 memcpy(dst, src, srcLen);
c91830cb 1005 }
d32a507d 1006
467e0479 1007 return inLen;
c91830cb
VZ
1008}
1009
467e0479
VZ
1010size_t
1011wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1012 const wchar_t *src, size_t srcLen) const
c91830cb 1013{
467e0479
VZ
1014 if ( srcLen == wxNO_LEN )
1015 srcLen = wxWcslen(src) + 1;
c91830cb 1016
467e0479
VZ
1017 srcLen *= BYTES_PER_CHAR;
1018
1019 if ( dst )
c91830cb 1020 {
467e0479
VZ
1021 if ( dstLen < srcLen )
1022 return wxCONV_FAILED;
d32a507d 1023
467e0479 1024 memcpy(dst, src, srcLen);
c91830cb 1025 }
d32a507d 1026
467e0479 1027 return srcLen;
c91830cb
VZ
1028}
1029
467e0479
VZ
1030// ----------------------------------------------------------------------------
1031// endian-reversing conversions
1032// ----------------------------------------------------------------------------
c91830cb 1033
467e0479
VZ
1034size_t
1035wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1036 const char *src, size_t srcLen) const
c91830cb 1037{
467e0479
VZ
1038 srcLen = GetLength(src, srcLen);
1039 if ( srcLen == wxNO_LEN )
1040 return wxCONV_FAILED;
c91830cb 1041
467e0479
VZ
1042 srcLen /= BYTES_PER_CHAR;
1043
1044 if ( dst )
c91830cb 1045 {
467e0479
VZ
1046 if ( dstLen < srcLen )
1047 return wxCONV_FAILED;
1048
ef199164
DS
1049 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1050 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1051 {
ef199164 1052 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1053 }
c91830cb 1054 }
bfab25d4 1055
467e0479 1056 return srcLen;
c91830cb
VZ
1057}
1058
467e0479
VZ
1059size_t
1060wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1061 const wchar_t *src, size_t srcLen) const
c91830cb 1062{
467e0479
VZ
1063 if ( srcLen == wxNO_LEN )
1064 srcLen = wxWcslen(src) + 1;
c91830cb 1065
467e0479
VZ
1066 srcLen *= BYTES_PER_CHAR;
1067
1068 if ( dst )
c91830cb 1069 {
467e0479
VZ
1070 if ( dstLen < srcLen )
1071 return wxCONV_FAILED;
1072
ef199164 1073 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1074 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1075 {
ef199164 1076 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1077 }
c91830cb 1078 }
eec47cc6 1079
467e0479 1080 return srcLen;
c91830cb
VZ
1081}
1082
467e0479 1083#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1084
467e0479
VZ
1085// ----------------------------------------------------------------------------
1086// conversions without endianness change
1087// ----------------------------------------------------------------------------
c91830cb 1088
35d11700
VZ
1089size_t
1090wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1091 const char *src, size_t srcLen) const
c91830cb 1092{
35d11700
VZ
1093 srcLen = GetLength(src, srcLen);
1094 if ( srcLen == wxNO_LEN )
1095 return wxCONV_FAILED;
c91830cb 1096
ef199164 1097 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1098 if ( !dst )
c91830cb 1099 {
35d11700
VZ
1100 // optimization: return maximal space which could be needed for this
1101 // string even if the real size could be smaller if the buffer contains
1102 // any surrogates
1103 return inLen;
c91830cb 1104 }
c91830cb 1105
35d11700 1106 size_t outLen = 0;
ef199164
DS
1107 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1108 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1109 {
ef199164
DS
1110 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1111 if ( !inBuff )
35d11700
VZ
1112 return wxCONV_FAILED;
1113
1114 if ( ++outLen > dstLen )
1115 return wxCONV_FAILED;
c91830cb 1116
35d11700
VZ
1117 *dst++ = ch;
1118 }
1119
1120
1121 return outLen;
1122}
c91830cb 1123
35d11700
VZ
1124size_t
1125wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1126 const wchar_t *src, size_t srcLen) const
c91830cb 1127{
35d11700
VZ
1128 if ( srcLen == wxNO_LEN )
1129 srcLen = wxWcslen(src) + 1;
c91830cb 1130
35d11700 1131 size_t outLen = 0;
ef199164 1132 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1133 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1134 {
1135 wxUint16 cc[2];
35d11700
VZ
1136 const size_t numChars = encode_utf16(*src++, cc);
1137 if ( numChars == wxCONV_FAILED )
1138 return wxCONV_FAILED;
c91830cb 1139
ef199164
DS
1140 outLen += numChars * BYTES_PER_CHAR;
1141 if ( outBuff )
c91830cb 1142 {
35d11700
VZ
1143 if ( outLen > dstLen )
1144 return wxCONV_FAILED;
1145
ef199164 1146 *outBuff++ = cc[0];
35d11700 1147 if ( numChars == 2 )
69b80d28 1148 {
35d11700 1149 // second character of a surrogate
ef199164 1150 *outBuff++ = cc[1];
69b80d28 1151 }
c91830cb 1152 }
c91830cb 1153 }
c91830cb 1154
35d11700 1155 return outLen;
c91830cb
VZ
1156}
1157
467e0479
VZ
1158// ----------------------------------------------------------------------------
1159// endian-reversing conversions
1160// ----------------------------------------------------------------------------
c91830cb 1161
35d11700
VZ
1162size_t
1163wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1164 const char *src, size_t srcLen) const
c91830cb 1165{
35d11700
VZ
1166 srcLen = GetLength(src, srcLen);
1167 if ( srcLen == wxNO_LEN )
1168 return wxCONV_FAILED;
1169
ef199164 1170 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1171 if ( !dst )
1172 {
1173 // optimization: return maximal space which could be needed for this
1174 // string even if the real size could be smaller if the buffer contains
1175 // any surrogates
1176 return inLen;
1177 }
c91830cb 1178
35d11700 1179 size_t outLen = 0;
ef199164
DS
1180 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1181 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1182 {
35d11700
VZ
1183 wxUint32 ch;
1184 wxUint16 tmp[2];
ef199164
DS
1185
1186 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1187 inBuff++;
1188 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1189
35d11700
VZ
1190 const size_t numChars = decode_utf16(tmp, ch);
1191 if ( numChars == wxCONV_FAILED )
1192 return wxCONV_FAILED;
c91830cb 1193
35d11700 1194 if ( numChars == 2 )
ef199164 1195 inBuff++;
35d11700
VZ
1196
1197 if ( ++outLen > dstLen )
1198 return wxCONV_FAILED;
c91830cb 1199
35d11700 1200 *dst++ = ch;
c91830cb 1201 }
c91830cb 1202
c91830cb 1203
35d11700
VZ
1204 return outLen;
1205}
c91830cb 1206
35d11700
VZ
1207size_t
1208wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1209 const wchar_t *src, size_t srcLen) const
c91830cb 1210{
35d11700
VZ
1211 if ( srcLen == wxNO_LEN )
1212 srcLen = wxWcslen(src) + 1;
c91830cb 1213
35d11700 1214 size_t outLen = 0;
ef199164 1215 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1216 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1217 {
1218 wxUint16 cc[2];
35d11700
VZ
1219 const size_t numChars = encode_utf16(*src, cc);
1220 if ( numChars == wxCONV_FAILED )
1221 return wxCONV_FAILED;
c91830cb 1222
ef199164
DS
1223 outLen += numChars * BYTES_PER_CHAR;
1224 if ( outBuff )
c91830cb 1225 {
35d11700
VZ
1226 if ( outLen > dstLen )
1227 return wxCONV_FAILED;
1228
ef199164 1229 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1230 if ( numChars == 2 )
c91830cb 1231 {
35d11700 1232 // second character of a surrogate
ef199164 1233 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1234 }
1235 }
c91830cb 1236 }
c91830cb 1237
35d11700 1238 return outLen;
c91830cb
VZ
1239}
1240
467e0479 1241#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1242
1243
35d11700 1244// ============================================================================
c91830cb 1245// UTF-32
35d11700 1246// ============================================================================
c91830cb
VZ
1247
1248#ifdef WORDS_BIGENDIAN
467e0479
VZ
1249 #define wxMBConvUTF32straight wxMBConvUTF32BE
1250 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1251#else
467e0479
VZ
1252 #define wxMBConvUTF32swap wxMBConvUTF32BE
1253 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1254#endif
1255
1256
1257WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1258WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1259
467e0479
VZ
1260/* static */
1261size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1262{
1263 if ( srcLen == wxNO_LEN )
1264 {
1265 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1266 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1267 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1268 ;
c91830cb 1269
467e0479
VZ
1270 srcLen *= BYTES_PER_CHAR;
1271 }
1272 else // we already have the length
1273 {
1274 // we can only convert an entire number of UTF-32 characters
1275 if ( srcLen % BYTES_PER_CHAR )
1276 return wxCONV_FAILED;
1277 }
1278
1279 return srcLen;
1280}
1281
1282// case when in-memory representation is UTF-16
c91830cb
VZ
1283#ifdef WC_UTF16
1284
467e0479
VZ
1285// ----------------------------------------------------------------------------
1286// conversions without endianness change
1287// ----------------------------------------------------------------------------
1288
1289size_t
1290wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1291 const char *src, size_t srcLen) const
c91830cb 1292{
467e0479
VZ
1293 srcLen = GetLength(src, srcLen);
1294 if ( srcLen == wxNO_LEN )
1295 return wxCONV_FAILED;
c91830cb 1296
ef199164
DS
1297 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1298 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1299 size_t outLen = 0;
1300 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1301 {
1302 wxUint16 cc[2];
ef199164 1303 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1304 if ( numChars == wxCONV_FAILED )
1305 return wxCONV_FAILED;
c91830cb 1306
467e0479
VZ
1307 outLen += numChars;
1308 if ( dst )
c91830cb 1309 {
467e0479
VZ
1310 if ( outLen > dstLen )
1311 return wxCONV_FAILED;
d32a507d 1312
467e0479
VZ
1313 *dst++ = cc[0];
1314 if ( numChars == 2 )
1315 {
1316 // second character of a surrogate
1317 *dst++ = cc[1];
1318 }
1319 }
c91830cb 1320 }
d32a507d 1321
467e0479 1322 return outLen;
c91830cb
VZ
1323}
1324
467e0479
VZ
1325size_t
1326wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1327 const wchar_t *src, size_t srcLen) const
c91830cb 1328{
467e0479
VZ
1329 if ( srcLen == wxNO_LEN )
1330 srcLen = wxWcslen(src) + 1;
c91830cb 1331
467e0479 1332 if ( !dst )
c91830cb 1333 {
467e0479
VZ
1334 // optimization: return maximal space which could be needed for this
1335 // string instead of the exact amount which could be less if there are
1336 // any surrogates in the input
1337 //
1338 // we consider that surrogates are rare enough to make it worthwhile to
1339 // avoid running the loop below at the cost of slightly extra memory
1340 // consumption
ef199164 1341 return srcLen * BYTES_PER_CHAR;
467e0479 1342 }
c91830cb 1343
ef199164 1344 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1345 size_t outLen = 0;
1346 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1347 {
1348 const wxUint32 ch = wxDecodeSurrogate(&src);
1349 if ( !src )
1350 return wxCONV_FAILED;
c91830cb 1351
467e0479 1352 outLen += BYTES_PER_CHAR;
d32a507d 1353
467e0479
VZ
1354 if ( outLen > dstLen )
1355 return wxCONV_FAILED;
b5153fd8 1356
ef199164 1357 *outBuff++ = ch;
467e0479 1358 }
c91830cb 1359
467e0479 1360 return outLen;
c91830cb
VZ
1361}
1362
467e0479
VZ
1363// ----------------------------------------------------------------------------
1364// endian-reversing conversions
1365// ----------------------------------------------------------------------------
c91830cb 1366
467e0479
VZ
1367size_t
1368wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1369 const char *src, size_t srcLen) const
c91830cb 1370{
467e0479
VZ
1371 srcLen = GetLength(src, srcLen);
1372 if ( srcLen == wxNO_LEN )
1373 return wxCONV_FAILED;
c91830cb 1374
ef199164
DS
1375 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1376 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1377 size_t outLen = 0;
ef199164 1378 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1379 {
c91830cb 1380 wxUint16 cc[2];
ef199164 1381 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1382 if ( numChars == wxCONV_FAILED )
1383 return wxCONV_FAILED;
c91830cb 1384
467e0479
VZ
1385 outLen += numChars;
1386 if ( dst )
c91830cb 1387 {
467e0479
VZ
1388 if ( outLen > dstLen )
1389 return wxCONV_FAILED;
d32a507d 1390
467e0479
VZ
1391 *dst++ = cc[0];
1392 if ( numChars == 2 )
1393 {
1394 // second character of a surrogate
1395 *dst++ = cc[1];
1396 }
1397 }
c91830cb 1398 }
b5153fd8 1399
467e0479 1400 return outLen;
c91830cb
VZ
1401}
1402
467e0479
VZ
1403size_t
1404wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1405 const wchar_t *src, size_t srcLen) const
c91830cb 1406{
467e0479
VZ
1407 if ( srcLen == wxNO_LEN )
1408 srcLen = wxWcslen(src) + 1;
c91830cb 1409
467e0479 1410 if ( !dst )
c91830cb 1411 {
467e0479
VZ
1412 // optimization: return maximal space which could be needed for this
1413 // string instead of the exact amount which could be less if there are
1414 // any surrogates in the input
1415 //
1416 // we consider that surrogates are rare enough to make it worthwhile to
1417 // avoid running the loop below at the cost of slightly extra memory
1418 // consumption
1419 return srcLen*BYTES_PER_CHAR;
1420 }
c91830cb 1421
ef199164 1422 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1423 size_t outLen = 0;
1424 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1425 {
1426 const wxUint32 ch = wxDecodeSurrogate(&src);
1427 if ( !src )
1428 return wxCONV_FAILED;
c91830cb 1429
467e0479 1430 outLen += BYTES_PER_CHAR;
d32a507d 1431
467e0479
VZ
1432 if ( outLen > dstLen )
1433 return wxCONV_FAILED;
b5153fd8 1434
ef199164 1435 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1436 }
c91830cb 1437
467e0479 1438 return outLen;
c91830cb
VZ
1439}
1440
467e0479 1441#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1442
35d11700
VZ
1443// ----------------------------------------------------------------------------
1444// conversions without endianness change
1445// ----------------------------------------------------------------------------
1446
1447size_t
1448wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1449 const char *src, size_t srcLen) const
c91830cb 1450{
35d11700
VZ
1451 // use memcpy() as it should be much faster than hand-written loop
1452 srcLen = GetLength(src, srcLen);
1453 if ( srcLen == wxNO_LEN )
1454 return wxCONV_FAILED;
c91830cb 1455
35d11700
VZ
1456 const size_t inLen = srcLen/BYTES_PER_CHAR;
1457 if ( dst )
c91830cb 1458 {
35d11700
VZ
1459 if ( dstLen < inLen )
1460 return wxCONV_FAILED;
b5153fd8 1461
35d11700
VZ
1462 memcpy(dst, src, srcLen);
1463 }
c91830cb 1464
35d11700 1465 return inLen;
c91830cb
VZ
1466}
1467
35d11700
VZ
1468size_t
1469wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1470 const wchar_t *src, size_t srcLen) const
c91830cb 1471{
35d11700
VZ
1472 if ( srcLen == wxNO_LEN )
1473 srcLen = wxWcslen(src) + 1;
1474
1475 srcLen *= BYTES_PER_CHAR;
c91830cb 1476
35d11700 1477 if ( dst )
c91830cb 1478 {
35d11700
VZ
1479 if ( dstLen < srcLen )
1480 return wxCONV_FAILED;
c91830cb 1481
35d11700 1482 memcpy(dst, src, srcLen);
c91830cb
VZ
1483 }
1484
35d11700 1485 return srcLen;
c91830cb
VZ
1486}
1487
35d11700
VZ
1488// ----------------------------------------------------------------------------
1489// endian-reversing conversions
1490// ----------------------------------------------------------------------------
c91830cb 1491
35d11700
VZ
1492size_t
1493wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1494 const char *src, size_t srcLen) const
c91830cb 1495{
35d11700
VZ
1496 srcLen = GetLength(src, srcLen);
1497 if ( srcLen == wxNO_LEN )
1498 return wxCONV_FAILED;
1499
1500 srcLen /= BYTES_PER_CHAR;
c91830cb 1501
35d11700 1502 if ( dst )
c91830cb 1503 {
35d11700
VZ
1504 if ( dstLen < srcLen )
1505 return wxCONV_FAILED;
1506
ef199164
DS
1507 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1508 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1509 {
ef199164 1510 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1511 }
c91830cb 1512 }
b5153fd8 1513
35d11700 1514 return srcLen;
c91830cb
VZ
1515}
1516
35d11700
VZ
1517size_t
1518wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1519 const wchar_t *src, size_t srcLen) const
c91830cb 1520{
35d11700
VZ
1521 if ( srcLen == wxNO_LEN )
1522 srcLen = wxWcslen(src) + 1;
1523
1524 srcLen *= BYTES_PER_CHAR;
c91830cb 1525
35d11700 1526 if ( dst )
c91830cb 1527 {
35d11700
VZ
1528 if ( dstLen < srcLen )
1529 return wxCONV_FAILED;
1530
ef199164 1531 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1532 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1533 {
ef199164 1534 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1535 }
c91830cb 1536 }
b5153fd8 1537
35d11700 1538 return srcLen;
c91830cb
VZ
1539}
1540
467e0479 1541#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1542
1543
36acb880
VZ
1544// ============================================================================
1545// The classes doing conversion using the iconv_xxx() functions
1546// ============================================================================
3caec1bb 1547
b040e242 1548#ifdef HAVE_ICONV
3a0d76bc 1549
b1d547eb
VS
1550// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1551// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1552// (unless there's yet another bug in glibc) the only case when iconv()
1553// returns with (size_t)-1 (which means error) and says there are 0 bytes
1554// left in the input buffer -- when _real_ error occurs,
1555// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1556// iconv() failure.
3caec1bb
VS
1557// [This bug does not appear in glibc 2.2.]
1558#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1559#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1560 (errno != E2BIG || bufLeft != 0))
1561#else
1562#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1563#endif
1564
ab217dba 1565#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1566
74a7eb0b
VZ
1567#define ICONV_T_INVALID ((iconv_t)-1)
1568
1569#if SIZEOF_WCHAR_T == 4
1570 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1571 #define WC_ENC wxFONTENCODING_UTF32
1572#elif SIZEOF_WCHAR_T == 2
1573 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF16
1575#else // sizeof(wchar_t) != 2 nor 4
1576 // does this ever happen?
1577 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1578#endif
1579
36acb880 1580// ----------------------------------------------------------------------------
e95354ec 1581// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1582// ----------------------------------------------------------------------------
1583
e95354ec 1584class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1585{
1586public:
e95354ec
VZ
1587 wxMBConv_iconv(const wxChar *name);
1588 virtual ~wxMBConv_iconv();
36acb880 1589
bde4baac
VZ
1590 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1591 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1592
d36c9347 1593 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1594 virtual size_t GetMBNulLen() const;
1595
d36c9347
VZ
1596 virtual wxMBConv *Clone() const
1597 {
1598 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1599 p->m_minMBCharWidth = m_minMBCharWidth;
1600 return p;
1601 }
1602
e95354ec 1603 bool IsOk() const
74a7eb0b 1604 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1605
1606protected:
ef199164
DS
1607 // the iconv handlers used to translate from multibyte
1608 // to wide char and in the other direction
36acb880
VZ
1609 iconv_t m2w,
1610 w2m;
ef199164 1611
b1d547eb
VS
1612#if wxUSE_THREADS
1613 // guards access to m2w and w2m objects
1614 wxMutex m_iconvMutex;
1615#endif
36acb880
VZ
1616
1617private:
e95354ec 1618 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1619 // available on this machine, it will remain NULL
74a7eb0b 1620 static wxString ms_wcCharsetName;
36acb880
VZ
1621
1622 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1623 // different endian-ness than the native one
405d8f46 1624 static bool ms_wcNeedsSwap;
eec47cc6 1625
d36c9347
VZ
1626
1627 // name of the encoding handled by this conversion
1628 wxString m_name;
1629
7ef3ab50 1630 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1631 // initially
1632 size_t m_minMBCharWidth;
36acb880
VZ
1633};
1634
8f115891
MW
1635// make the constructor available for unit testing
1636WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1637{
1638 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1639 if ( !result->IsOk() )
1640 {
1641 delete result;
1642 return 0;
1643 }
ef199164 1644
8f115891
MW
1645 return result;
1646}
1647
422e411e 1648wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1649bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1650
e95354ec 1651wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
d36c9347 1652 : m_name(name)
36acb880 1653{
c1464d9d 1654 m_minMBCharWidth = 0;
eec47cc6 1655
0331b385
VZ
1656 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1657 // names for the charsets
200a9923 1658 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1659
36acb880 1660 // check for charset that represents wchar_t:
74a7eb0b 1661 if ( ms_wcCharsetName.empty() )
f1339c56 1662 {
c2b83fdd
VZ
1663 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1664
74a7eb0b
VZ
1665#if wxUSE_FONTMAP
1666 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1667#else // !wxUSE_FONTMAP
91cb7f52 1668 static const wxChar *names_static[] =
36acb880 1669 {
74a7eb0b
VZ
1670#if SIZEOF_WCHAR_T == 4
1671 _T("UCS-4"),
1672#elif SIZEOF_WCHAR_T = 2
1673 _T("UCS-2"),
1674#endif
1675 NULL
1676 };
91cb7f52 1677 const wxChar **names = names_static;
74a7eb0b 1678#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1679
d1f024a8 1680 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1681 {
17a1ebd1 1682 const wxString nameCS(*names);
74a7eb0b
VZ
1683
1684 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1685 wxString nameXE(nameCS);
ef199164
DS
1686
1687#ifdef WORDS_BIGENDIAN
74a7eb0b 1688 nameXE += _T("BE");
ef199164 1689#else // little endian
74a7eb0b 1690 nameXE += _T("LE");
ef199164 1691#endif
74a7eb0b 1692
c2b83fdd
VZ
1693 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1694 nameXE.c_str());
1695
74a7eb0b
VZ
1696 m2w = iconv_open(nameXE.ToAscii(), cname);
1697 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1698 {
74a7eb0b 1699 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1700 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1701 nameCS.c_str());
17a1ebd1 1702 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1703
74a7eb0b
VZ
1704 // and check for bytesex ourselves:
1705 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1706 {
74a7eb0b
VZ
1707 char buf[2], *bufPtr;
1708 wchar_t wbuf[2], *wbufPtr;
1709 size_t insz, outsz;
1710 size_t res;
1711
1712 buf[0] = 'A';
1713 buf[1] = 0;
1714 wbuf[0] = 0;
1715 insz = 2;
1716 outsz = SIZEOF_WCHAR_T * 2;
1717 wbufPtr = wbuf;
1718 bufPtr = buf;
1719
ef199164
DS
1720 res = iconv(
1721 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1722 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1723
1724 if (ICONV_FAILED(res, insz))
1725 {
1726 wxLogLastError(wxT("iconv"));
422e411e 1727 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1728 nameCS.c_str());
74a7eb0b
VZ
1729 }
1730 else // ok, can convert to this encoding, remember it
1731 {
17a1ebd1 1732 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1733 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1734 }
3a0d76bc
VS
1735 }
1736 }
74a7eb0b 1737 else // use charset not requiring byte swapping
36acb880 1738 {
74a7eb0b 1739 ms_wcCharsetName = nameXE;
36acb880 1740 }
3a0d76bc 1741 }
74a7eb0b 1742
0944fceb 1743 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1744 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1745 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1746 : ms_wcCharsetName.c_str(),
1747 ms_wcNeedsSwap ? _T(" (needs swap)")
1748 : _T(""));
3a0d76bc 1749 }
36acb880 1750 else // we already have ms_wcCharsetName
3caec1bb 1751 {
74a7eb0b 1752 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1753 }
dccce9ea 1754
74a7eb0b 1755 if ( ms_wcCharsetName.empty() )
f1339c56 1756 {
74a7eb0b 1757 w2m = ICONV_T_INVALID;
36acb880 1758 }
405d8f46
VZ
1759 else
1760 {
74a7eb0b
VZ
1761 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1762 if ( w2m == ICONV_T_INVALID )
1763 {
1764 wxLogTrace(TRACE_STRCONV,
1765 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1766 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1767 }
405d8f46 1768 }
36acb880 1769}
3caec1bb 1770
e95354ec 1771wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1772{
74a7eb0b 1773 if ( m2w != ICONV_T_INVALID )
36acb880 1774 iconv_close(m2w);
74a7eb0b 1775 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1776 iconv_close(w2m);
1777}
3a0d76bc 1778
bde4baac 1779size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1780{
69373110
VZ
1781 // find the string length: notice that must be done differently for
1782 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1783 size_t inbuf;
7ef3ab50 1784 const size_t nulLen = GetMBNulLen();
69373110
VZ
1785 switch ( nulLen )
1786 {
1787 default:
467e0479 1788 return wxCONV_FAILED;
69373110
VZ
1789
1790 case 1:
1791 inbuf = strlen(psz); // arguably more optimized than our version
1792 break;
1793
1794 case 2:
1795 case 4:
1796 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1797 // they also have to start at character boundary and not span two
1798 // adjacent characters
1799 const char *p;
1800 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1801 ;
1802 inbuf = p - psz;
1803 break;
1804 }
1805
b1d547eb 1806#if wxUSE_THREADS
6a17b868
SN
1807 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1808 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1809 // wxConvLocal that are used all over wx code, so we have to make sure
1810 // the handle is used by at most one thread at the time. Otherwise
1811 // only a few wx classes would be safe to use from non-main threads
1812 // as MB<->WC conversion would fail "randomly".
1813 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1814#endif // wxUSE_THREADS
1815
36acb880
VZ
1816 size_t outbuf = n * SIZEOF_WCHAR_T;
1817 size_t res, cres;
1818 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1819 wchar_t *bufPtr = buf;
1820 const char *pszPtr = psz;
1821
1822 if (buf)
1823 {
1824 // have destination buffer, convert there
1825 cres = iconv(m2w,
1826 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1827 (char**)&bufPtr, &outbuf);
1828 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1829
36acb880 1830 if (ms_wcNeedsSwap)
3a0d76bc 1831 {
36acb880 1832 // convert to native endianness
17a1ebd1
VZ
1833 for ( unsigned i = 0; i < res; i++ )
1834 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1835 }
adb45366 1836
69373110 1837 // NUL-terminate the string if there is any space left
49dd9820
VS
1838 if (res < n)
1839 buf[res] = 0;
36acb880
VZ
1840 }
1841 else
1842 {
1843 // no destination buffer... convert using temp buffer
1844 // to calculate destination buffer requirement
1845 wchar_t tbuf[8];
1846 res = 0;
ef199164
DS
1847
1848 do
1849 {
36acb880 1850 bufPtr = tbuf;
ef199164 1851 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1852
1853 cres = iconv(m2w,
1854 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1855 (char**)&bufPtr, &outbuf );
1856
ef199164
DS
1857 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1858 }
1859 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1860 }
dccce9ea 1861
36acb880 1862 if (ICONV_FAILED(cres, inbuf))
f1339c56 1863 {
36acb880 1864 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1865 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1866 return wxCONV_FAILED;
36acb880
VZ
1867 }
1868
1869 return res;
1870}
1871
bde4baac 1872size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1873{
b1d547eb
VS
1874#if wxUSE_THREADS
1875 // NB: explained in MB2WC
1876 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1877#endif
3698ae71 1878
156162ec
MW
1879 size_t inlen = wxWcslen(psz);
1880 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1881 size_t outbuf = n;
1882 size_t res, cres;
3a0d76bc 1883
36acb880 1884 wchar_t *tmpbuf = 0;
3caec1bb 1885
36acb880
VZ
1886 if (ms_wcNeedsSwap)
1887 {
1888 // need to copy to temp buffer to switch endianness
74a7eb0b 1889 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1890 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1891 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1892 for ( size_t i = 0; i < inlen; i++ )
1893 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1894
156162ec 1895 tmpbuf[inlen] = L'\0';
74a7eb0b 1896 psz = tmpbuf;
36acb880 1897 }
3a0d76bc 1898
36acb880
VZ
1899 if (buf)
1900 {
1901 // have destination buffer, convert there
1902 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1903
ef199164 1904 res = n - outbuf;
adb45366 1905
49dd9820
VS
1906 // NB: iconv was given only wcslen(psz) characters on input, and so
1907 // it couldn't convert the trailing zero. Let's do it ourselves
1908 // if there's some room left for it in the output buffer.
1909 if (res < n)
1910 buf[0] = 0;
36acb880
VZ
1911 }
1912 else
1913 {
ef199164 1914 // no destination buffer: convert using temp buffer
36acb880
VZ
1915 // to calculate destination buffer requirement
1916 char tbuf[16];
1917 res = 0;
ef199164
DS
1918 do
1919 {
1920 buf = tbuf;
1921 outbuf = 16;
36acb880
VZ
1922
1923 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1924
36acb880 1925 res += 16 - outbuf;
ef199164
DS
1926 }
1927 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1928 }
dccce9ea 1929
36acb880
VZ
1930 if (ms_wcNeedsSwap)
1931 {
1932 free(tmpbuf);
1933 }
dccce9ea 1934
36acb880
VZ
1935 if (ICONV_FAILED(cres, inbuf))
1936 {
ce6f8d6f 1937 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1938 return wxCONV_FAILED;
36acb880
VZ
1939 }
1940
1941 return res;
1942}
1943
7ef3ab50 1944size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1945{
c1464d9d 1946 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1947 {
1948 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1949
1950#if wxUSE_THREADS
1951 // NB: explained in MB2WC
1952 wxMutexLocker lock(self->m_iconvMutex);
1953#endif
1954
356410fc 1955 wchar_t *wnul = L"";
c1464d9d 1956 char buf[8]; // should be enough for NUL in any encoding
356410fc 1957 size_t inLen = sizeof(wchar_t),
c1464d9d 1958 outLen = WXSIZEOF(buf);
ef199164
DS
1959 char *inBuff = (char *)wnul;
1960 char *outBuff = buf;
1961 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1962 {
c1464d9d 1963 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1964 }
1965 else // ok
1966 {
ef199164 1967 self->m_minMBCharWidth = outBuff - buf;
356410fc 1968 }
eec47cc6
VZ
1969 }
1970
c1464d9d 1971 return m_minMBCharWidth;
eec47cc6
VZ
1972}
1973
b040e242 1974#endif // HAVE_ICONV
36acb880 1975
e95354ec 1976
36acb880
VZ
1977// ============================================================================
1978// Win32 conversion classes
1979// ============================================================================
1cd52418 1980
e95354ec 1981#ifdef wxHAVE_WIN32_MB2WC
373658eb 1982
8b04d4c4 1983// from utils.cpp
d775fa82 1984#if wxUSE_FONTMAP
8b04d4c4
VZ
1985extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1986extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1987#endif
373658eb 1988
e95354ec 1989class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1990{
1991public:
bde4baac
VZ
1992 wxMBConv_win32()
1993 {
1994 m_CodePage = CP_ACP;
c1464d9d 1995 m_minMBCharWidth = 0;
bde4baac
VZ
1996 }
1997
d36c9347 1998 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 1999 : wxMBConv()
d36c9347
VZ
2000 {
2001 m_CodePage = conv.m_CodePage;
2002 m_minMBCharWidth = conv.m_minMBCharWidth;
2003 }
2004
7608a683 2005#if wxUSE_FONTMAP
e95354ec 2006 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
2007 {
2008 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2009 m_minMBCharWidth = 0;
bde4baac 2010 }
dccce9ea 2011
e95354ec 2012 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2013 {
2014 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2015 m_minMBCharWidth = 0;
bde4baac 2016 }
eec47cc6 2017#endif // wxUSE_FONTMAP
8b04d4c4 2018
d36c9347 2019 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2020 {
02272c9c
VZ
2021 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2022 // the behaviour is not compatible with the Unix version (using iconv)
2023 // and break the library itself, e.g. wxTextInputStream::NextChar()
2024 // wouldn't work if reading an incomplete MB char didn't result in an
2025 // error
667e5b3e 2026 //
89028980 2027 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2028 // Win XP or newer and it is not supported for UTF-[78] so we always
2029 // use our own conversions in this case. See
89028980
VS
2030 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2031 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2032 if ( m_CodePage == CP_UTF8 )
89028980 2033 {
830f8f11 2034 return wxConvUTF8.MB2WC(buf, psz, n);
89028980 2035 }
830f8f11
VZ
2036
2037 if ( m_CodePage == CP_UTF7 )
2038 {
2039 return wxConvUTF7.MB2WC(buf, psz, n);
2040 }
2041
2042 int flags = 0;
2043 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2044 IsAtLeastWin2kSP4() )
89028980 2045 {
830f8f11 2046 flags = MB_ERR_INVALID_CHARS;
89028980 2047 }
667e5b3e 2048
2b5f62a0
VZ
2049 const size_t len = ::MultiByteToWideChar
2050 (
2051 m_CodePage, // code page
667e5b3e 2052 flags, // flags: fall on error
2b5f62a0
VZ
2053 psz, // input string
2054 -1, // its length (NUL-terminated)
b4da152e 2055 buf, // output string
2b5f62a0
VZ
2056 buf ? n : 0 // size of output buffer
2057 );
89028980
VS
2058 if ( !len )
2059 {
2060 // function totally failed
467e0479 2061 return wxCONV_FAILED;
89028980
VS
2062 }
2063
2064 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2065 // check if we succeeded, by doing a double trip:
2066 if ( !flags && buf )
2067 {
53c174fc
VZ
2068 const size_t mbLen = strlen(psz);
2069 wxCharBuffer mbBuf(mbLen);
89028980
VS
2070 if ( ::WideCharToMultiByte
2071 (
2072 m_CodePage,
2073 0,
2074 buf,
2075 -1,
2076 mbBuf.data(),
53c174fc 2077 mbLen + 1, // size in bytes, not length
89028980
VS
2078 NULL,
2079 NULL
2080 ) == 0 ||
2081 strcmp(mbBuf, psz) != 0 )
2082 {
2083 // we didn't obtain the same thing we started from, hence
2084 // the conversion was lossy and we consider that it failed
467e0479 2085 return wxCONV_FAILED;
89028980
VS
2086 }
2087 }
2b5f62a0 2088
03a991bc
VZ
2089 // note that it returns count of written chars for buf != NULL and size
2090 // of the needed buffer for buf == NULL so in either case the length of
2091 // the string (which never includes the terminating NUL) is one less
89028980 2092 return len - 1;
f1339c56 2093 }
dccce9ea 2094
d36c9347 2095 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2096 {
13dd924a
VZ
2097 /*
2098 we have a problem here: by default, WideCharToMultiByte() may
2099 replace characters unrepresentable in the target code page with bad
2100 quality approximations such as turning "1/2" symbol (U+00BD) into
2101 "1" for the code pages which don't have it and we, obviously, want
2102 to avoid this at any price
d775fa82 2103
13dd924a
VZ
2104 the trouble is that this function does it _silently_, i.e. it won't
2105 even tell us whether it did or not... Win98/2000 and higher provide
2106 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2107 we have to resort to a round trip, i.e. check that converting back
2108 results in the same string -- this is, of course, expensive but
2109 otherwise we simply can't be sure to not garble the data.
2110 */
2111
2112 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2113 // it doesn't work with CJK encodings (which we test for rather roughly
2114 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2115 // supporting it
907173e5
WS
2116 BOOL usedDef wxDUMMY_INITIALIZE(false);
2117 BOOL *pUsedDef;
13dd924a
VZ
2118 int flags;
2119 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2120 {
2121 // it's our lucky day
2122 flags = WC_NO_BEST_FIT_CHARS;
2123 pUsedDef = &usedDef;
2124 }
2125 else // old system or unsupported encoding
2126 {
2127 flags = 0;
2128 pUsedDef = NULL;
2129 }
2130
2b5f62a0
VZ
2131 const size_t len = ::WideCharToMultiByte
2132 (
2133 m_CodePage, // code page
13dd924a
VZ
2134 flags, // either none or no best fit
2135 pwz, // input string
2b5f62a0
VZ
2136 -1, // it is (wide) NUL-terminated
2137 buf, // output buffer
2138 buf ? n : 0, // and its size
2139 NULL, // default "replacement" char
13dd924a 2140 pUsedDef // [out] was it used?
2b5f62a0
VZ
2141 );
2142
13dd924a
VZ
2143 if ( !len )
2144 {
2145 // function totally failed
467e0479 2146 return wxCONV_FAILED;
13dd924a
VZ
2147 }
2148
2149 // if we were really converting, check if we succeeded
2150 if ( buf )
2151 {
2152 if ( flags )
2153 {
2154 // check if the conversion failed, i.e. if any replacements
2155 // were done
2156 if ( usedDef )
467e0479 2157 return wxCONV_FAILED;
13dd924a
VZ
2158 }
2159 else // we must resort to double tripping...
2160 {
2161 wxWCharBuffer wcBuf(n);
467e0479 2162 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2163 wcscmp(wcBuf, pwz) != 0 )
2164 {
2165 // we didn't obtain the same thing we started from, hence
2166 // the conversion was lossy and we consider that it failed
467e0479 2167 return wxCONV_FAILED;
13dd924a
VZ
2168 }
2169 }
2170 }
2171
03a991bc 2172 // see the comment above for the reason of "len - 1"
13dd924a 2173 return len - 1;
f1339c56 2174 }
dccce9ea 2175
7ef3ab50
VZ
2176 virtual size_t GetMBNulLen() const
2177 {
2178 if ( m_minMBCharWidth == 0 )
2179 {
2180 int len = ::WideCharToMultiByte
2181 (
2182 m_CodePage, // code page
2183 0, // no flags
2184 L"", // input string
2185 1, // translate just the NUL
2186 NULL, // output buffer
2187 0, // and its size
2188 NULL, // no replacement char
2189 NULL // [out] don't care if it was used
2190 );
2191
2192 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2193 switch ( len )
2194 {
2195 default:
2196 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2197 self->m_minMBCharWidth = (size_t)-1;
2198 break;
7ef3ab50
VZ
2199
2200 case 0:
2201 self->m_minMBCharWidth = (size_t)-1;
2202 break;
2203
2204 case 1:
2205 case 2:
2206 case 4:
2207 self->m_minMBCharWidth = len;
2208 break;
2209 }
2210 }
2211
2212 return m_minMBCharWidth;
2213 }
2214
d36c9347
VZ
2215 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2216
13dd924a
VZ
2217 bool IsOk() const { return m_CodePage != -1; }
2218
2219private:
2220 static bool CanUseNoBestFit()
2221 {
2222 static int s_isWin98Or2k = -1;
2223
2224 if ( s_isWin98Or2k == -1 )
2225 {
2226 int verMaj, verMin;
2227 switch ( wxGetOsVersion(&verMaj, &verMin) )
2228 {
406d283a 2229 case wxOS_WINDOWS_9X:
13dd924a
VZ
2230 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2231 break;
2232
406d283a 2233 case wxOS_WINDOWS_NT:
13dd924a
VZ
2234 s_isWin98Or2k = verMaj >= 5;
2235 break;
2236
2237 default:
ef199164 2238 // unknown: be conservative by default
13dd924a 2239 s_isWin98Or2k = 0;
ef199164 2240 break;
13dd924a
VZ
2241 }
2242
2243 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2244 }
2245
2246 return s_isWin98Or2k == 1;
2247 }
f1339c56 2248
89028980
VS
2249 static bool IsAtLeastWin2kSP4()
2250 {
8942f83a
WS
2251#ifdef __WXWINCE__
2252 return false;
2253#else
89028980
VS
2254 static int s_isAtLeastWin2kSP4 = -1;
2255
2256 if ( s_isAtLeastWin2kSP4 == -1 )
2257 {
2258 OSVERSIONINFOEX ver;
2259
2260 memset(&ver, 0, sizeof(ver));
2261 ver.dwOSVersionInfoSize = sizeof(ver);
2262 GetVersionEx((OSVERSIONINFO*)&ver);
2263
2264 s_isAtLeastWin2kSP4 =
2265 ((ver.dwMajorVersion > 5) || // Vista+
2266 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2267 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2268 ver.wServicePackMajor >= 4)) // 2000 SP4+
2269 ? 1 : 0;
2270 }
2271
2272 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2273#endif
89028980
VS
2274 }
2275
eec47cc6 2276
c1464d9d 2277 // the code page we're working with
b1d66b54 2278 long m_CodePage;
c1464d9d 2279
7ef3ab50 2280 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2281 // "unknown"
2282 size_t m_minMBCharWidth;
1cd52418 2283};
e95354ec
VZ
2284
2285#endif // wxHAVE_WIN32_MB2WC
2286
f7e98dee
RN
2287// ============================================================================
2288// Cocoa conversion classes
2289// ============================================================================
2290
2291#if defined(__WXCOCOA__)
2292
ef199164
DS
2293// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2294// Strangely enough, internally Core Foundation uses
2295// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee
RN
2296
2297#include <CoreFoundation/CFString.h>
2298#include <CoreFoundation/CFStringEncodingExt.h>
2299
2300CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2301{
638357a0 2302 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2303
2304 switch (encoding)
ecd9653b 2305 {
ef199164
DS
2306 case wxFONTENCODING_DEFAULT :
2307 enc = CFStringGetSystemEncoding();
2308 break ;
2309
ecd9653b
WS
2310 case wxFONTENCODING_ISO8859_1 :
2311 enc = kCFStringEncodingISOLatin1 ;
2312 break ;
2313 case wxFONTENCODING_ISO8859_2 :
2314 enc = kCFStringEncodingISOLatin2;
2315 break ;
2316 case wxFONTENCODING_ISO8859_3 :
2317 enc = kCFStringEncodingISOLatin3 ;
2318 break ;
2319 case wxFONTENCODING_ISO8859_4 :
2320 enc = kCFStringEncodingISOLatin4;
2321 break ;
2322 case wxFONTENCODING_ISO8859_5 :
2323 enc = kCFStringEncodingISOLatinCyrillic;
2324 break ;
2325 case wxFONTENCODING_ISO8859_6 :
2326 enc = kCFStringEncodingISOLatinArabic;
2327 break ;
2328 case wxFONTENCODING_ISO8859_7 :
2329 enc = kCFStringEncodingISOLatinGreek;
2330 break ;
2331 case wxFONTENCODING_ISO8859_8 :
2332 enc = kCFStringEncodingISOLatinHebrew;
2333 break ;
2334 case wxFONTENCODING_ISO8859_9 :
2335 enc = kCFStringEncodingISOLatin5;
2336 break ;
2337 case wxFONTENCODING_ISO8859_10 :
2338 enc = kCFStringEncodingISOLatin6;
2339 break ;
2340 case wxFONTENCODING_ISO8859_11 :
2341 enc = kCFStringEncodingISOLatinThai;
2342 break ;
2343 case wxFONTENCODING_ISO8859_13 :
2344 enc = kCFStringEncodingISOLatin7;
2345 break ;
2346 case wxFONTENCODING_ISO8859_14 :
2347 enc = kCFStringEncodingISOLatin8;
2348 break ;
2349 case wxFONTENCODING_ISO8859_15 :
2350 enc = kCFStringEncodingISOLatin9;
2351 break ;
2352
2353 case wxFONTENCODING_KOI8 :
2354 enc = kCFStringEncodingKOI8_R;
2355 break ;
2356 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2357 enc = kCFStringEncodingDOSRussian;
2358 break ;
2359
2360// case wxFONTENCODING_BULGARIAN :
2361// enc = ;
2362// break ;
2363
2364 case wxFONTENCODING_CP437 :
ef199164 2365 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2366 break ;
2367 case wxFONTENCODING_CP850 :
2368 enc = kCFStringEncodingDOSLatin1;
2369 break ;
2370 case wxFONTENCODING_CP852 :
2371 enc = kCFStringEncodingDOSLatin2;
2372 break ;
2373 case wxFONTENCODING_CP855 :
2374 enc = kCFStringEncodingDOSCyrillic;
2375 break ;
2376 case wxFONTENCODING_CP866 :
ef199164 2377 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2378 break ;
2379 case wxFONTENCODING_CP874 :
2380 enc = kCFStringEncodingDOSThai;
2381 break ;
2382 case wxFONTENCODING_CP932 :
2383 enc = kCFStringEncodingDOSJapanese;
2384 break ;
2385 case wxFONTENCODING_CP936 :
ef199164 2386 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2387 break ;
2388 case wxFONTENCODING_CP949 :
2389 enc = kCFStringEncodingDOSKorean;
2390 break ;
2391 case wxFONTENCODING_CP950 :
2392 enc = kCFStringEncodingDOSChineseTrad;
2393 break ;
ecd9653b
WS
2394 case wxFONTENCODING_CP1250 :
2395 enc = kCFStringEncodingWindowsLatin2;
2396 break ;
2397 case wxFONTENCODING_CP1251 :
ef199164 2398 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2399 break ;
2400 case wxFONTENCODING_CP1252 :
ef199164 2401 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2402 break ;
2403 case wxFONTENCODING_CP1253 :
2404 enc = kCFStringEncodingWindowsGreek;
2405 break ;
2406 case wxFONTENCODING_CP1254 :
2407 enc = kCFStringEncodingWindowsLatin5;
2408 break ;
2409 case wxFONTENCODING_CP1255 :
ef199164 2410 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2411 break ;
2412 case wxFONTENCODING_CP1256 :
ef199164 2413 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2414 break ;
2415 case wxFONTENCODING_CP1257 :
2416 enc = kCFStringEncodingWindowsBalticRim;
2417 break ;
638357a0
RN
2418// This only really encodes to UTF7 (if that) evidently
2419// case wxFONTENCODING_UTF7 :
2420// enc = kCFStringEncodingNonLossyASCII ;
2421// break ;
ecd9653b
WS
2422 case wxFONTENCODING_UTF8 :
2423 enc = kCFStringEncodingUTF8 ;
2424 break ;
2425 case wxFONTENCODING_EUC_JP :
2426 enc = kCFStringEncodingEUC_JP;
2427 break ;
2428 case wxFONTENCODING_UTF16 :
f7e98dee 2429 enc = kCFStringEncodingUnicode ;
ecd9653b 2430 break ;
f7e98dee
RN
2431 case wxFONTENCODING_MACROMAN :
2432 enc = kCFStringEncodingMacRoman ;
2433 break ;
2434 case wxFONTENCODING_MACJAPANESE :
2435 enc = kCFStringEncodingMacJapanese ;
2436 break ;
2437 case wxFONTENCODING_MACCHINESETRAD :
2438 enc = kCFStringEncodingMacChineseTrad ;
2439 break ;
2440 case wxFONTENCODING_MACKOREAN :
2441 enc = kCFStringEncodingMacKorean ;
2442 break ;
2443 case wxFONTENCODING_MACARABIC :
2444 enc = kCFStringEncodingMacArabic ;
2445 break ;
2446 case wxFONTENCODING_MACHEBREW :
2447 enc = kCFStringEncodingMacHebrew ;
2448 break ;
2449 case wxFONTENCODING_MACGREEK :
2450 enc = kCFStringEncodingMacGreek ;
2451 break ;
2452 case wxFONTENCODING_MACCYRILLIC :
2453 enc = kCFStringEncodingMacCyrillic ;
2454 break ;
2455 case wxFONTENCODING_MACDEVANAGARI :
2456 enc = kCFStringEncodingMacDevanagari ;
2457 break ;
2458 case wxFONTENCODING_MACGURMUKHI :
2459 enc = kCFStringEncodingMacGurmukhi ;
2460 break ;
2461 case wxFONTENCODING_MACGUJARATI :
2462 enc = kCFStringEncodingMacGujarati ;
2463 break ;
2464 case wxFONTENCODING_MACORIYA :
2465 enc = kCFStringEncodingMacOriya ;
2466 break ;
2467 case wxFONTENCODING_MACBENGALI :
2468 enc = kCFStringEncodingMacBengali ;
2469 break ;
2470 case wxFONTENCODING_MACTAMIL :
2471 enc = kCFStringEncodingMacTamil ;
2472 break ;
2473 case wxFONTENCODING_MACTELUGU :
2474 enc = kCFStringEncodingMacTelugu ;
2475 break ;
2476 case wxFONTENCODING_MACKANNADA :
2477 enc = kCFStringEncodingMacKannada ;
2478 break ;
2479 case wxFONTENCODING_MACMALAJALAM :
2480 enc = kCFStringEncodingMacMalayalam ;
2481 break ;
2482 case wxFONTENCODING_MACSINHALESE :
2483 enc = kCFStringEncodingMacSinhalese ;
2484 break ;
2485 case wxFONTENCODING_MACBURMESE :
2486 enc = kCFStringEncodingMacBurmese ;
2487 break ;
2488 case wxFONTENCODING_MACKHMER :
2489 enc = kCFStringEncodingMacKhmer ;
2490 break ;
2491 case wxFONTENCODING_MACTHAI :
2492 enc = kCFStringEncodingMacThai ;
2493 break ;
2494 case wxFONTENCODING_MACLAOTIAN :
2495 enc = kCFStringEncodingMacLaotian ;
2496 break ;
2497 case wxFONTENCODING_MACGEORGIAN :
2498 enc = kCFStringEncodingMacGeorgian ;
2499 break ;
2500 case wxFONTENCODING_MACARMENIAN :
2501 enc = kCFStringEncodingMacArmenian ;
2502 break ;
2503 case wxFONTENCODING_MACCHINESESIMP :
2504 enc = kCFStringEncodingMacChineseSimp ;
2505 break ;
2506 case wxFONTENCODING_MACTIBETAN :
2507 enc = kCFStringEncodingMacTibetan ;
2508 break ;
2509 case wxFONTENCODING_MACMONGOLIAN :
2510 enc = kCFStringEncodingMacMongolian ;
2511 break ;
2512 case wxFONTENCODING_MACETHIOPIC :
2513 enc = kCFStringEncodingMacEthiopic ;
2514 break ;
2515 case wxFONTENCODING_MACCENTRALEUR :
2516 enc = kCFStringEncodingMacCentralEurRoman ;
2517 break ;
2518 case wxFONTENCODING_MACVIATNAMESE :
2519 enc = kCFStringEncodingMacVietnamese ;
2520 break ;
2521 case wxFONTENCODING_MACARABICEXT :
2522 enc = kCFStringEncodingMacExtArabic ;
2523 break ;
2524 case wxFONTENCODING_MACSYMBOL :
2525 enc = kCFStringEncodingMacSymbol ;
2526 break ;
2527 case wxFONTENCODING_MACDINGBATS :
2528 enc = kCFStringEncodingMacDingbats ;
2529 break ;
2530 case wxFONTENCODING_MACTURKISH :
2531 enc = kCFStringEncodingMacTurkish ;
2532 break ;
2533 case wxFONTENCODING_MACCROATIAN :
2534 enc = kCFStringEncodingMacCroatian ;
2535 break ;
2536 case wxFONTENCODING_MACICELANDIC :
2537 enc = kCFStringEncodingMacIcelandic ;
2538 break ;
2539 case wxFONTENCODING_MACROMANIAN :
2540 enc = kCFStringEncodingMacRomanian ;
2541 break ;
2542 case wxFONTENCODING_MACCELTIC :
2543 enc = kCFStringEncodingMacCeltic ;
2544 break ;
2545 case wxFONTENCODING_MACGAELIC :
2546 enc = kCFStringEncodingMacGaelic ;
2547 break ;
ecd9653b
WS
2548// case wxFONTENCODING_MACKEYBOARD :
2549// enc = kCFStringEncodingMacKeyboardGlyphs ;
2550// break ;
ef199164 2551
ecd9653b
WS
2552 default :
2553 // because gcc is picky
2554 break ;
ef199164
DS
2555 }
2556
ecd9653b 2557 return enc ;
f7e98dee
RN
2558}
2559
f7e98dee
RN
2560class wxMBConv_cocoa : public wxMBConv
2561{
2562public:
2563 wxMBConv_cocoa()
2564 {
2565 Init(CFStringGetSystemEncoding()) ;
2566 }
2567
d36c9347
VZ
2568 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2569 {
2570 m_encoding = conv.m_encoding;
2571 }
2572
a6900d10 2573#if wxUSE_FONTMAP
f7e98dee
RN
2574 wxMBConv_cocoa(const wxChar* name)
2575 {
267e11c5 2576 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2577 }
a6900d10 2578#endif
f7e98dee
RN
2579
2580 wxMBConv_cocoa(wxFontEncoding encoding)
2581 {
2582 Init( wxCFStringEncFromFontEnc(encoding) );
2583 }
2584
d3c7fc99 2585 virtual ~wxMBConv_cocoa()
f7e98dee
RN
2586 {
2587 }
2588
2589 void Init( CFStringEncoding encoding)
2590 {
638357a0 2591 m_encoding = encoding ;
f7e98dee
RN
2592 }
2593
2594 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2595 {
2596 wxASSERT(szUnConv);
ecd9653b 2597
638357a0
RN
2598 CFStringRef theString = CFStringCreateWithBytes (
2599 NULL, //the allocator
2600 (const UInt8*)szUnConv,
2601 strlen(szUnConv),
2602 m_encoding,
2603 false //no BOM/external representation
f7e98dee
RN
2604 );
2605
2606 wxASSERT(theString);
2607
638357a0
RN
2608 size_t nOutLength = CFStringGetLength(theString);
2609
2610 if (szOut == NULL)
f7e98dee 2611 {
f7e98dee 2612 CFRelease(theString);
638357a0 2613 return nOutLength;
f7e98dee 2614 }
ecd9653b 2615
638357a0 2616 CFRange theRange = { 0, nOutSize };
ecd9653b 2617
638357a0
RN
2618#if SIZEOF_WCHAR_T == 4
2619 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2620#endif
3698ae71 2621
f7e98dee 2622 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2623
f7e98dee 2624 CFRelease(theString);
ecd9653b 2625
ef199164 2626 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2627
2628#if SIZEOF_WCHAR_T == 4
ef199164
DS
2629 wxMBConvUTF16 converter;
2630 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2631 delete [] szUniCharBuffer;
f7e98dee 2632#endif
3698ae71 2633
638357a0 2634 return nOutLength;
f7e98dee
RN
2635 }
2636
2637 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2638 {
638357a0 2639 wxASSERT(szUnConv);
3698ae71 2640
f7e98dee 2641 size_t nRealOutSize;
638357a0 2642 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2643 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2644
f7e98dee 2645#if SIZEOF_WCHAR_T == 4
d9d488cf 2646 wxMBConvUTF16 converter ;
ef199164
DS
2647 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2648 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2649 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2650 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2651#endif
2652
2653 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2654 NULL, //allocator
2655 szUniBuffer,
2656 nBufSize,
638357a0 2657 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2658 );
ecd9653b 2659
f7e98dee 2660 wxASSERT(theString);
ecd9653b 2661
f7e98dee 2662 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2663 //so we check and use getchars instead in that case
2664 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2665 {
638357a0
RN
2666 if (szOut != NULL)
2667 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2668
638357a0
RN
2669 nRealOutSize = CFStringGetLength(theString) + 1;
2670 }
2671 else
2672 {
2673 CFStringGetBytes(
2674 theString,
2675 CFRangeMake(0, CFStringGetLength(theString)),
2676 m_encoding,
2677 0, //what to put in characters that can't be converted -
2678 //0 tells CFString to return NULL if it meets such a character
2679 false, //not an external representation
2680 (UInt8*) szOut,
3698ae71 2681 nOutSize,
638357a0
RN
2682 (CFIndex*) &nRealOutSize
2683 );
f7e98dee 2684 }
ecd9653b 2685
638357a0 2686 CFRelease(theString);
ecd9653b 2687
638357a0
RN
2688#if SIZEOF_WCHAR_T == 4
2689 delete[] szUniBuffer;
2690#endif
ecd9653b 2691
f7e98dee
RN
2692 return nRealOutSize - 1;
2693 }
2694
d36c9347
VZ
2695 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2696
f7e98dee 2697 bool IsOk() const
ecd9653b 2698 {
3698ae71 2699 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2700 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2701 }
2702
2703private:
638357a0 2704 CFStringEncoding m_encoding ;
f7e98dee
RN
2705};
2706
2707#endif // defined(__WXCOCOA__)
2708
335d31e0
SC
2709// ============================================================================
2710// Mac conversion classes
2711// ============================================================================
2712
2713#if defined(__WXMAC__) && defined(TARGET_CARBON)
2714
2715class wxMBConv_mac : public wxMBConv
2716{
2717public:
2718 wxMBConv_mac()
2719 {
2720 Init(CFStringGetSystemEncoding()) ;
2721 }
2722
d36c9347
VZ
2723 wxMBConv_mac(const wxMBConv_mac& conv)
2724 {
2725 Init(conv.m_char_encoding);
2726 }
2727
2d1659cf 2728#if wxUSE_FONTMAP
335d31e0
SC
2729 wxMBConv_mac(const wxChar* name)
2730 {
ef199164 2731 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2732 }
2d1659cf 2733#endif
335d31e0
SC
2734
2735 wxMBConv_mac(wxFontEncoding encoding)
2736 {
d775fa82
WS
2737 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2738 }
2739
d3c7fc99 2740 virtual ~wxMBConv_mac()
d775fa82
WS
2741 {
2742 OSStatus status = noErr ;
739cb14a
SC
2743 if (m_MB2WC_converter)
2744 status = TECDisposeConverter(m_MB2WC_converter);
2745 if (m_WC2MB_converter)
2746 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2747 }
2748
739cb14a
SC
2749 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2750 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2751 {
739cb14a
SC
2752 m_MB2WC_converter = NULL ;
2753 m_WC2MB_converter = NULL ;
2754 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2755 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2756 }
d775fa82 2757
739cb14a
SC
2758 virtual void CreateIfNeeded() const
2759 {
2760 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2761 {
2762 OSStatus status = noErr ;
2763 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2764 m_char_encoding,
2765 m_unicode_encoding);
739cb14a
SC
2766 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2767 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2768 m_unicode_encoding,
2769 m_char_encoding);
739cb14a
SC
2770 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2771 }
d775fa82 2772 }
57bd4c60 2773
335d31e0
SC
2774 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2775 {
739cb14a 2776 CreateIfNeeded() ;
d775fa82
WS
2777 OSStatus status = noErr ;
2778 ByteCount byteOutLen ;
9088c87b 2779 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2780 wchar_t *tbuf = NULL ;
2781 UniChar* ubuf = NULL ;
2782 size_t res = 0 ;
2783
2784 if (buf == NULL)
2785 {
ef199164
DS
2786 // Apple specs say at least 32
2787 n = wxMax( 32, byteInLen ) ;
2788 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2789 }
ef199164 2790
d775fa82 2791 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2792
f3a355ce 2793#if SIZEOF_WCHAR_T == 4
d775fa82 2794 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2795#else
d775fa82 2796 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2797#endif
ef199164
DS
2798
2799 status = TECConvertText(
2800 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2801 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2802
f3a355ce 2803#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2804 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2805 // is not properly terminated we get random characters at the end
2806 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2807 wxMBConvUTF16 converter ;
ef199164 2808 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2809 free( ubuf ) ;
f3a355ce 2810#else
d775fa82 2811 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2812#endif
ef199164 2813
d775fa82
WS
2814 if ( buf == NULL )
2815 free(tbuf) ;
335d31e0 2816
335d31e0
SC
2817 if ( buf && res < n)
2818 buf[res] = 0;
2819
d775fa82 2820 return res ;
335d31e0
SC
2821 }
2822
2823 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2824 {
739cb14a 2825 CreateIfNeeded() ;
d775fa82
WS
2826 OSStatus status = noErr ;
2827 ByteCount byteOutLen ;
2828 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2829
2830 char *tbuf = NULL ;
2831
2832 if (buf == NULL)
2833 {
ef199164
DS
2834 // Apple specs say at least 32
2835 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2836 tbuf = (char*) malloc( n ) ;
2837 }
2838
2839 ByteCount byteBufferLen = n ;
2840 UniChar* ubuf = NULL ;
ef199164 2841
f3a355ce 2842#if SIZEOF_WCHAR_T == 4
d9d488cf 2843 wxMBConvUTF16 converter ;
ef199164 2844 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2845 byteInLen = unicharlen ;
2846 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2847 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2848#else
d775fa82 2849 ubuf = (UniChar*) psz ;
f3a355ce 2850#endif
ef199164
DS
2851
2852 status = TECConvertText(
2853 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2854 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2855
f3a355ce 2856#if SIZEOF_WCHAR_T == 4
d775fa82 2857 free( ubuf ) ;
f3a355ce 2858#endif
ef199164 2859
d775fa82
WS
2860 if ( buf == NULL )
2861 free(tbuf) ;
335d31e0 2862
d775fa82 2863 size_t res = byteOutLen ;
335d31e0 2864 if ( buf && res < n)
638357a0 2865 {
335d31e0 2866 buf[res] = 0;
3698ae71 2867
638357a0
RN
2868 //we need to double-trip to verify it didn't insert any ? in place
2869 //of bogus characters
2870 wxWCharBuffer wcBuf(n);
2871 size_t pszlen = wxWcslen(psz);
467e0479 2872 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2873 wxWcslen(wcBuf) != pszlen ||
2874 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2875 {
2876 // we didn't obtain the same thing we started from, hence
2877 // the conversion was lossy and we consider that it failed
467e0479 2878 return wxCONV_FAILED;
638357a0
RN
2879 }
2880 }
335d31e0 2881
d775fa82 2882 return res ;
335d31e0
SC
2883 }
2884
d3478e2c 2885 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2886
335d31e0 2887 bool IsOk() const
57bd4c60 2888 {
739cb14a 2889 CreateIfNeeded() ;
57bd4c60 2890 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
739cb14a 2891 }
335d31e0 2892
739cb14a
SC
2893protected :
2894 mutable TECObjectRef m_MB2WC_converter;
2895 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2896
ef199164
DS
2897 TextEncodingBase m_char_encoding;
2898 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2899};
2900
739cb14a
SC
2901// MB is decomposed (D) normalized UTF8
2902
2903class wxMBConv_macUTF8D : public wxMBConv_mac
2904{
2905public :
57bd4c60 2906 wxMBConv_macUTF8D()
739cb14a
SC
2907 {
2908 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2909 m_uni = NULL;
fbb0b8af 2910 m_uniBack = NULL ;
739cb14a 2911 }
57bd4c60 2912
d3c7fc99 2913 virtual ~wxMBConv_macUTF8D()
739cb14a 2914 {
fbb0b8af
SC
2915 if (m_uni!=NULL)
2916 DisposeUnicodeToTextInfo(&m_uni);
2917 if (m_uniBack!=NULL)
2918 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a 2919 }
57bd4c60 2920
739cb14a
SC
2921 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2922 {
2923 CreateIfNeeded() ;
2924 OSStatus status = noErr ;
2925 ByteCount byteOutLen ;
2926 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2927
2928 char *tbuf = NULL ;
2929
2930 if (buf == NULL)
2931 {
2932 // Apple specs say at least 32
2933 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2934 tbuf = (char*) malloc( n ) ;
2935 }
2936
2937 ByteCount byteBufferLen = n ;
2938 UniChar* ubuf = NULL ;
2939
2940#if SIZEOF_WCHAR_T == 4
2941 wxMBConvUTF16 converter ;
2942 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2943 byteInLen = unicharlen ;
2944 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2945 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2946#else
2947 ubuf = (UniChar*) psz ;
2948#endif
2949
57bd4c60
WS
2950 // ubuf is a non-decomposed UniChar buffer
2951
739cb14a
SC
2952 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2953 ByteCount dcubufread , dcubufwritten ;
57bd4c60
WS
2954 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2955
2956 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
739cb14a 2957 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
57bd4c60 2958
739cb14a
SC
2959 // we now convert that decomposed buffer into UTF8
2960
2961 status = TECConvertText(
2962 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2963 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2964
2965 free( dcubuf );
2966
2967#if SIZEOF_WCHAR_T == 4
2968 free( ubuf ) ;
2969#endif
2970
2971 if ( buf == NULL )
2972 free(tbuf) ;
2973
2974 size_t res = byteOutLen ;
2975 if ( buf && res < n)
2976 {
2977 buf[res] = 0;
2978 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2979 }
2980
2981 return res ;
2982 }
57bd4c60 2983
fbb0b8af
SC
2984 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2985 {
2986 CreateIfNeeded() ;
2987 OSStatus status = noErr ;
2988 ByteCount byteOutLen ;
2989 ByteCount byteInLen = strlen(psz) + 1;
2990 wchar_t *tbuf = NULL ;
2991 UniChar* ubuf = NULL ;
2992 size_t res = 0 ;
57bd4c60 2993
fbb0b8af
SC
2994 if (buf == NULL)
2995 {
2996 // Apple specs say at least 32
2997 n = wxMax( 32, byteInLen ) ;
2998 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2999 }
57bd4c60 3000
fbb0b8af 3001 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
57bd4c60 3002
fbb0b8af
SC
3003#if SIZEOF_WCHAR_T == 4
3004 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3005#else
3006 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3007#endif
57bd4c60 3008
fbb0b8af
SC
3009 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3010 ByteCount dcubufread , dcubufwritten ;
57bd4c60 3011 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
fbb0b8af
SC
3012
3013 status = TECConvertText(
3014 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3015 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3016 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3017 // is not properly terminated we get random characters at the end
3018 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60 3019
fbb0b8af 3020 // now from the decomposed UniChar to properly composed uniChar
57bd4c60 3021 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
fbb0b8af
SC
3022 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3023
3024 free( dcubuf );
3025 byteOutLen = dcubufwritten ;
3026 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60
WS
3027
3028
fbb0b8af
SC
3029#if SIZEOF_WCHAR_T == 4
3030 wxMBConvUTF16 converter ;
3031 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3032 free( ubuf ) ;
3033#else
3034 res = byteOutLen / sizeof( UniChar ) ;
3035#endif
57bd4c60 3036
fbb0b8af
SC
3037 if ( buf == NULL )
3038 free(tbuf) ;
57bd4c60 3039
fbb0b8af
SC
3040 if ( buf && res < n)
3041 buf[res] = 0;
57bd4c60 3042
fbb0b8af
SC
3043 return res ;
3044 }
3045
739cb14a
SC
3046 virtual void CreateIfNeeded() const
3047 {
3048 wxMBConv_mac::CreateIfNeeded() ;
3049 if ( m_uni == NULL )
3050 {
3051 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3052 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3053 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3054 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3055 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60
WS
3056
3057 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
739cb14a 3058 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
57bd4c60 3059
fbb0b8af
SC
3060 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3061 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3062 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3063 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3064 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60 3065 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
fbb0b8af 3066 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3067 }
3068 }
3069protected :
3070 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3071 mutable UnicodeToTextInfo m_uniBack;
739cb14a 3072 mutable UnicodeMapping m_map;
57bd4c60 3073};
335d31e0 3074#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3075
36acb880
VZ
3076// ============================================================================
3077// wxEncodingConverter based conversion classes
3078// ============================================================================
3079
1e6feb95 3080#if wxUSE_FONTMAP
1cd52418 3081
e95354ec 3082class wxMBConv_wxwin : public wxMBConv
1cd52418 3083{
8b04d4c4
VZ
3084private:
3085 void Init()
3086 {
3087 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3088 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3089 }
3090
6001e347 3091public:
f1339c56
RR
3092 // temporarily just use wxEncodingConverter stuff,
3093 // so that it works while a better implementation is built
e95354ec 3094 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
3095 {
3096 if (name)
267e11c5 3097 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3098 else
3099 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3100
8b04d4c4
VZ
3101 Init();
3102 }
3103
e95354ec 3104 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3105 {
3106 m_enc = enc;
3107
3108 Init();
f1339c56 3109 }
dccce9ea 3110
bde4baac 3111 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3112 {
3113 size_t inbuf = strlen(psz);
dccce9ea 3114 if (buf)
c643a977 3115 {
ef199164 3116 if (!m2w.Convert(psz, buf))
467e0479 3117 return wxCONV_FAILED;
c643a977 3118 }
f1339c56
RR
3119 return inbuf;
3120 }
dccce9ea 3121
bde4baac 3122 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3123 {
f8d791e0 3124 const size_t inbuf = wxWcslen(psz);
f1339c56 3125 if (buf)
c643a977 3126 {
ef199164 3127 if (!w2m.Convert(psz, buf))
467e0479 3128 return wxCONV_FAILED;
c643a977 3129 }
dccce9ea 3130
f1339c56
RR
3131 return inbuf;
3132 }
dccce9ea 3133
7ef3ab50 3134 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3135 {
3136 switch ( m_enc )
3137 {
3138 case wxFONTENCODING_UTF16BE:
3139 case wxFONTENCODING_UTF16LE:
c1464d9d 3140 return 2;
eec47cc6
VZ
3141
3142 case wxFONTENCODING_UTF32BE:
3143 case wxFONTENCODING_UTF32LE:
c1464d9d 3144 return 4;
eec47cc6
VZ
3145
3146 default:
c1464d9d 3147 return 1;
eec47cc6
VZ
3148 }
3149 }
3150
d36c9347
VZ
3151 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3152
7ef3ab50
VZ
3153 bool IsOk() const { return m_ok; }
3154
3155public:
3156 wxFontEncoding m_enc;
3157 wxEncodingConverter m2w, w2m;
3158
3159private:
cafbf6fb
VZ
3160 // were we initialized successfully?
3161 bool m_ok;
fc7a2a60 3162
e95354ec 3163 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3164};
6001e347 3165
8f115891
MW
3166// make the constructors available for unit testing
3167WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3168{
3169 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3170 if ( !result->IsOk() )
3171 {
3172 delete result;
3173 return 0;
3174 }
ef199164 3175
8f115891
MW
3176 return result;
3177}
3178
1e6feb95
VZ
3179#endif // wxUSE_FONTMAP
3180
36acb880
VZ
3181// ============================================================================
3182// wxCSConv implementation
3183// ============================================================================
3184
8b04d4c4 3185void wxCSConv::Init()
6001e347 3186{
e95354ec
VZ
3187 m_name = NULL;
3188 m_convReal = NULL;
3189 m_deferred = true;
3190}
3191
8b04d4c4
VZ
3192wxCSConv::wxCSConv(const wxChar *charset)
3193{
3194 Init();
82713003 3195
e95354ec
VZ
3196 if ( charset )
3197 {
e95354ec
VZ
3198 SetName(charset);
3199 }
bda3d86a 3200
e4277538
VZ
3201#if wxUSE_FONTMAP
3202 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3203#else
bda3d86a 3204 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3205#endif
6001e347
RR
3206}
3207
8b04d4c4
VZ
3208wxCSConv::wxCSConv(wxFontEncoding encoding)
3209{
bda3d86a 3210 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3211 {
3212 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3213
3214 encoding = wxFONTENCODING_SYSTEM;
3215 }
3216
8b04d4c4
VZ
3217 Init();
3218
bda3d86a 3219 m_encoding = encoding;
8b04d4c4
VZ
3220}
3221
6001e347
RR
3222wxCSConv::~wxCSConv()
3223{
65e50848
JS
3224 Clear();
3225}
3226
54380f29 3227wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3228 : wxMBConv()
54380f29 3229{
8b04d4c4
VZ
3230 Init();
3231
54380f29 3232 SetName(conv.m_name);
8b04d4c4 3233 m_encoding = conv.m_encoding;
54380f29
GD
3234}
3235
3236wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3237{
3238 Clear();
8b04d4c4 3239
54380f29 3240 SetName(conv.m_name);
8b04d4c4
VZ
3241 m_encoding = conv.m_encoding;
3242
54380f29
GD
3243 return *this;
3244}
3245
65e50848
JS
3246void wxCSConv::Clear()
3247{
8b04d4c4 3248 free(m_name);
e95354ec 3249 delete m_convReal;
8b04d4c4 3250
65e50848 3251 m_name = NULL;
e95354ec 3252 m_convReal = NULL;
6001e347
RR
3253}
3254
3255void wxCSConv::SetName(const wxChar *charset)
3256{
f1339c56
RR
3257 if (charset)
3258 {
3259 m_name = wxStrdup(charset);
e95354ec 3260 m_deferred = true;
f1339c56 3261 }
6001e347
RR
3262}
3263
8b3eb85d 3264#if wxUSE_FONTMAP
8b3eb85d
VZ
3265
3266WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3267 wxEncodingNameCache );
8b3eb85d
VZ
3268
3269static wxEncodingNameCache gs_nameCache;
3270#endif
3271
e95354ec
VZ
3272wxMBConv *wxCSConv::DoCreate() const
3273{
ce6f8d6f
VZ
3274#if wxUSE_FONTMAP
3275 wxLogTrace(TRACE_STRCONV,
3276 wxT("creating conversion for %s"),
3277 (m_name ? m_name
3278 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3279#endif // wxUSE_FONTMAP
3280
c547282d
VZ
3281 // check for the special case of ASCII or ISO8859-1 charset: as we have
3282 // special knowledge of it anyhow, we don't need to create a special
3283 // conversion object
e4277538
VZ
3284 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3285 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3286 {
e95354ec
VZ
3287 // don't convert at all
3288 return NULL;
3289 }
dccce9ea 3290
e95354ec
VZ
3291 // we trust OS to do conversion better than we can so try external
3292 // conversion methods first
3293 //
3294 // the full order is:
3295 // 1. OS conversion (iconv() under Unix or Win32 API)
3296 // 2. hard coded conversions for UTF
3297 // 3. wxEncodingConverter as fall back
3298
3299 // step (1)
3300#ifdef HAVE_ICONV
c547282d 3301#if !wxUSE_FONTMAP
e95354ec 3302 if ( m_name )
c547282d 3303#endif // !wxUSE_FONTMAP
e95354ec 3304 {
c547282d 3305 wxString name(m_name);
3ef10cfc 3306#if wxUSE_FONTMAP
8b3eb85d 3307 wxFontEncoding encoding(m_encoding);
3ef10cfc 3308#endif
8b3eb85d
VZ
3309
3310 if ( !name.empty() )
3311 {
3312 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3313 if ( conv->IsOk() )
3314 return conv;
3315
3316 delete conv;
c547282d
VZ
3317
3318#if wxUSE_FONTMAP
8b3eb85d
VZ
3319 encoding =
3320 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 3321#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3322 }
3323#if wxUSE_FONTMAP
3324 {
3325 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3326 if ( it != gs_nameCache.end() )
3327 {
3328 if ( it->second.empty() )
3329 return NULL;
c547282d 3330
8b3eb85d
VZ
3331 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3332 if ( conv->IsOk() )
3333 return conv;
e95354ec 3334
8b3eb85d
VZ
3335 delete conv;
3336 }
3337
3338 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3c67ec06 3339 // CS : in case this does not return valid names (eg for MacRoman) encoding
57bd4c60 3340 // got a 'failure' entry in the cache all the same, although it just has to
3c67ec06
SC
3341 // be created using a different method, so only store failed iconv creation
3342 // attempts (or perhaps we shoulnd't do this at all ?)
3343 if ( names[0] != NULL )
8b3eb85d 3344 {
3c67ec06 3345 for ( ; *names; ++names )
8b3eb85d 3346 {
3c67ec06
SC
3347 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3348 if ( conv->IsOk() )
3349 {
3350 gs_nameCache[encoding] = *names;
3351 return conv;
3352 }
3353
3354 delete conv;
8b3eb85d
VZ
3355 }
3356
3c67ec06 3357 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 3358 }
8b3eb85d
VZ
3359 }
3360#endif // wxUSE_FONTMAP
e95354ec
VZ
3361 }
3362#endif // HAVE_ICONV
3363
3364#ifdef wxHAVE_WIN32_MB2WC
3365 {
7608a683 3366#if wxUSE_FONTMAP
e95354ec
VZ
3367 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3368 : new wxMBConv_win32(m_encoding);
3369 if ( conv->IsOk() )
3370 return conv;
3371
3372 delete conv;
7608a683
WS
3373#else
3374 return NULL;
3375#endif
e95354ec
VZ
3376 }
3377#endif // wxHAVE_WIN32_MB2WC
ef199164 3378
d775fa82
WS
3379#if defined(__WXMAC__)
3380 {
5c3c8676 3381 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3382 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3383 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3384 {
2d1659cf 3385#if wxUSE_FONTMAP
d775fa82
WS
3386 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3387 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3388#else
3389 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3390#endif
d775fa82 3391 if ( conv->IsOk() )
f7e98dee
RN
3392 return conv;
3393
3394 delete conv;
3395 }
3396 }
3397#endif
ef199164 3398
f7e98dee
RN
3399#if defined(__WXCOCOA__)
3400 {
3401 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3402 {
a6900d10 3403#if wxUSE_FONTMAP
f7e98dee
RN
3404 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3405 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3406#else
3407 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3408#endif
ef199164 3409
f7e98dee 3410 if ( conv->IsOk() )
d775fa82
WS
3411 return conv;
3412
3413 delete conv;
3414 }
335d31e0
SC
3415 }
3416#endif
e95354ec
VZ
3417 // step (2)
3418 wxFontEncoding enc = m_encoding;
3419#if wxUSE_FONTMAP
c547282d
VZ
3420 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3421 {
3422 // use "false" to suppress interactive dialogs -- we can be called from
3423 // anywhere and popping up a dialog from here is the last thing we want to
3424 // do
267e11c5 3425 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3426 }
e95354ec
VZ
3427#endif // wxUSE_FONTMAP
3428
3429 switch ( enc )
3430 {
3431 case wxFONTENCODING_UTF7:
3432 return new wxMBConvUTF7;
3433
3434 case wxFONTENCODING_UTF8:
3435 return new wxMBConvUTF8;
3436
e95354ec
VZ
3437 case wxFONTENCODING_UTF16BE:
3438 return new wxMBConvUTF16BE;
3439
3440 case wxFONTENCODING_UTF16LE:
3441 return new wxMBConvUTF16LE;
3442
e95354ec
VZ
3443 case wxFONTENCODING_UTF32BE:
3444 return new wxMBConvUTF32BE;
3445
3446 case wxFONTENCODING_UTF32LE:
3447 return new wxMBConvUTF32LE;
3448
3449 default:
3450 // nothing to do but put here to suppress gcc warnings
ef199164 3451 break;
e95354ec
VZ
3452 }
3453
3454 // step (3)
3455#if wxUSE_FONTMAP
3456 {
3457 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3458 : new wxMBConv_wxwin(m_encoding);
3459 if ( conv->IsOk() )
3460 return conv;
3461
3462 delete conv;
3463 }
3464#endif // wxUSE_FONTMAP
3465
a58d4f4d
VS
3466 // NB: This is a hack to prevent deadlock. What could otherwise happen
3467 // in Unicode build: wxConvLocal creation ends up being here
3468 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3469 // attach a timestamp, for which it will need wxConvLocal (to convert
3470 // time to char* and then wchar_t*), but that fails, tries to log the
3471 // error, but wxLog has an (already locked) critical section that
3472 // guards the static buffer.
a58d4f4d
VS
3473 static bool alreadyLoggingError = false;
3474 if (!alreadyLoggingError)
3475 {
3476 alreadyLoggingError = true;
3477 wxLogError(_("Cannot convert from the charset '%s'!"),
3478 m_name ? m_name
e95354ec
VZ
3479 :
3480#if wxUSE_FONTMAP
267e11c5 3481 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec 3482#else // !wxUSE_FONTMAP
3ef10cfc 3483 wxString::Format(_("encoding %i"), m_encoding).c_str()
e95354ec
VZ
3484#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3485 );
ef199164 3486
a58d4f4d
VS
3487 alreadyLoggingError = false;
3488 }
e95354ec
VZ
3489
3490 return NULL;
3491}
3492
3493void wxCSConv::CreateConvIfNeeded() const
3494{
3495 if ( m_deferred )
3496 {
3497 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a 3498
bda3d86a
VZ
3499 // if we don't have neither the name nor the encoding, use the default
3500 // encoding for this system
3501 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3502 {
4c75209f 3503#if wxUSE_INTL
4d312c22 3504 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
4c75209f
VS
3505#else
3506 // fallback to some reasonable default:
3507 self->m_encoding = wxFONTENCODING_ISO8859_1;
bda3d86a 3508#endif // wxUSE_INTL
4c75209f 3509 }
bda3d86a 3510
e95354ec
VZ
3511 self->m_convReal = DoCreate();
3512 self->m_deferred = false;
6001e347 3513 }
6001e347
RR
3514}
3515
0f0298b1
VZ
3516bool wxCSConv::IsOk() const
3517{
3518 CreateConvIfNeeded();
3519
3520 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3521 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3522 return true; // always ok as we do it ourselves
3523
3524 // m_convReal->IsOk() is called at its own creation, so we know it must
3525 // be ok if m_convReal is non-NULL
3526 return m_convReal != NULL;
3527}
3528
1c714a5d
VZ
3529size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3530 const char *src, size_t srcLen) const
3531{
3532 CreateConvIfNeeded();
3533
2c74c558
VS
3534 if (m_convReal)
3535 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3536
3537 // latin-1 (direct)
3538 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3539}
3540
3541size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3542 const wchar_t *src, size_t srcLen) const
3543{
3544 CreateConvIfNeeded();
3545
2c74c558
VS
3546 if (m_convReal)
3547 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3548
3549 // latin-1 (direct)
3550 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3551}
3552
6001e347
RR
3553size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3554{
e95354ec 3555 CreateConvIfNeeded();
dccce9ea 3556
e95354ec
VZ
3557 if (m_convReal)
3558 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3559
3560 // latin-1 (direct)
4def3b35 3561 size_t len = strlen(psz);
dccce9ea 3562
f1339c56
RR
3563 if (buf)
3564 {
4def3b35 3565 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3566 buf[c] = (unsigned char)(psz[c]);
3567 }
dccce9ea 3568
f1339c56 3569 return len;
6001e347
RR
3570}
3571
3572size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3573{
e95354ec 3574 CreateConvIfNeeded();
dccce9ea 3575
e95354ec
VZ
3576 if (m_convReal)
3577 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3578
f1339c56 3579 // latin-1 (direct)
f8d791e0 3580 const size_t len = wxWcslen(psz);
f1339c56
RR
3581 if (buf)
3582 {
4def3b35 3583 for (size_t c = 0; c <= len; c++)
24642831
VS
3584 {
3585 if (psz[c] > 0xFF)
467e0479 3586 return wxCONV_FAILED;
ef199164 3587
907173e5 3588 buf[c] = (char)psz[c];
24642831
VS
3589 }
3590 }
3591 else
3592 {
3593 for (size_t c = 0; c <= len; c++)
3594 {
3595 if (psz[c] > 0xFF)
467e0479 3596 return wxCONV_FAILED;
24642831 3597 }
f1339c56 3598 }
dccce9ea 3599
f1339c56 3600 return len;
6001e347
RR
3601}
3602
7ef3ab50 3603size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3604{
3605 CreateConvIfNeeded();
3606
3607 if ( m_convReal )
3608 {
7ef3ab50 3609 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3610 }
3611
c1464d9d 3612 return 1;
eec47cc6
VZ
3613}
3614
bde4baac
VZ
3615// ----------------------------------------------------------------------------
3616// globals
3617// ----------------------------------------------------------------------------
3618
3619#ifdef __WINDOWS__
3620 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3621#elif defined(__WXMAC__) && !defined(__MACH__)
3622 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3623#else
dcc8fac0 3624 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3625#endif
3626
3627static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3628static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3629static wxMBConvUTF7 wxConvUTF7Obj;
3630static wxMBConvUTF8 wxConvUTF8Obj;
d43d9ee7 3631#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a
SC
3632static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3633#endif
bde4baac
VZ
3634WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3635WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3636WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3637WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3638WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3639WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
d5bef0a3 3640WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
f5a1953b
VZ
3641WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3642#ifdef __WXOSX__
d43d9ee7 3643#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a 3644 wxConvMacUTF8DObj;
d43d9ee7
SC
3645#else
3646 wxConvUTF8Obj;
3647#endif
69c928ef 3648#else // !__WXOSX__
ea8ce907 3649 wxConvLibcObj;
69c928ef
VZ
3650#endif // __WXOSX__/!__WXOSX__
3651
3652#if wxUSE_UNICODE
3653
3654wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3655{
3656 if ( !s )
3657 return wxWCharBuffer();
3658
3659 wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3660 if ( !wbuf )
3661 wbuf = wxConvUTF8.cMB2WX(s);
3662 if ( !wbuf )
3663 wbuf = wxConvISO8859_1.cMB2WX(s);
3664
3665 return wbuf;
3666}
3667
3668wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3669{
3670 if ( !ws )
3671 return wxCharBuffer();
3672
3673 wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3674 if ( !buf )
3675 buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3676
3677 return buf;
3678}
3679
3680#endif // wxUSE_UNICODE
f5a1953b 3681
bde4baac
VZ
3682#else // !wxUSE_WCHAR_T
3683
3684// stand-ins in absence of wchar_t
3685WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3686 wxConvISO8859_1,
3687 wxConvLocal,
3688 wxConvUTF8;
3689
3690#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T