]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
adding common search field file
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
6001e347
RR
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
373658eb 18#ifndef WX_PRECOMP
57bd4c60
WS
19 #ifdef __WXMSW__
20 #include "wx/msw/missing.h"
21 #endif
373658eb
VZ
22 #include "wx/intl.h"
23 #include "wx/log.h"
de6185e2 24 #include "wx/utils.h"
df69528b 25 #include "wx/hashmap.h"
ef199164 26#endif
373658eb 27
bde4baac
VZ
28#include "wx/strconv.h"
29
30#if wxUSE_WCHAR_T
31
7608a683 32#ifdef __WINDOWS__
532d575b 33 #include "wx/msw/private.h"
0a1c1e62
GRG
34#endif
35
1c193821 36#ifndef __WXWINCE__
1cd52418 37#include <errno.h>
1c193821
JS
38#endif
39
6001e347
RR
40#include <ctype.h>
41#include <string.h>
42#include <stdlib.h>
43
e95354ec
VZ
44#if defined(__WIN32__) && !defined(__WXMICROWIN__)
45 #define wxHAVE_WIN32_MB2WC
ef199164 46#endif
e95354ec 47
6001e347 48#ifdef __SALFORDC__
373658eb 49 #include <clib.h>
6001e347
RR
50#endif
51
b040e242 52#ifdef HAVE_ICONV
373658eb 53 #include <iconv.h>
b1d547eb 54 #include "wx/thread.h"
1cd52418 55#endif
1cd52418 56
373658eb
VZ
57#include "wx/encconv.h"
58#include "wx/fontmap.h"
59
335d31e0 60#ifdef __WXMAC__
40ba2f3b 61#ifndef __DARWIN__
4227afa4
SC
62#include <ATSUnicode.h>
63#include <TextCommon.h>
64#include <TextEncodingConverter.h>
40ba2f3b 65#endif
335d31e0 66
ef199164
DS
67// includes Mac headers
68#include "wx/mac/private.h"
335d31e0 69#endif
ce6f8d6f 70
ef199164 71
ce6f8d6f
VZ
72#define TRACE_STRCONV _T("strconv")
73
467e0479
VZ
74// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
75// be 4 bytes
4948c2b6 76#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
77 #define WC_UTF16
78#endif
79
ef199164 80
373658eb
VZ
81// ============================================================================
82// implementation
83// ============================================================================
84
69373110
VZ
85// helper function of cMB2WC(): check if n bytes at this location are all NUL
86static bool NotAllNULs(const char *p, size_t n)
87{
88 while ( n && *p++ == '\0' )
89 n--;
90
91 return n != 0;
92}
93
373658eb 94// ----------------------------------------------------------------------------
467e0479 95// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb 96// ----------------------------------------------------------------------------
6001e347 97
c91830cb 98static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 99{
ef199164 100 if (input <= 0xffff)
4def3b35 101 {
999836aa
VZ
102 if (output)
103 *output = (wxUint16) input;
ef199164 104
4def3b35 105 return 1;
dccce9ea 106 }
ef199164 107 else if (input >= 0x110000)
4def3b35 108 {
467e0479 109 return wxCONV_FAILED;
dccce9ea
VZ
110 }
111 else
4def3b35 112 {
dccce9ea 113 if (output)
4def3b35 114 {
ef199164
DS
115 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
116 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35 117 }
ef199164 118
4def3b35 119 return 2;
1cd52418 120 }
1cd52418
OK
121}
122
c91830cb 123static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 124{
ef199164 125 if ((*input < 0xd800) || (*input > 0xdfff))
4def3b35
VS
126 {
127 output = *input;
128 return 1;
dccce9ea 129 }
ef199164 130 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
4def3b35
VS
131 {
132 output = *input;
467e0479 133 return wxCONV_FAILED;
dccce9ea
VZ
134 }
135 else
4def3b35
VS
136 {
137 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
138 return 2;
139 }
1cd52418
OK
140}
141
467e0479 142#ifdef WC_UTF16
35d11700
VZ
143 typedef wchar_t wxDecodeSurrogate_t;
144#else // !WC_UTF16
145 typedef wxUint16 wxDecodeSurrogate_t;
146#endif // WC_UTF16/!WC_UTF16
467e0479
VZ
147
148// returns the next UTF-32 character from the wchar_t buffer and advances the
149// pointer to the character after this one
150//
151// if an invalid character is found, *pSrc is set to NULL, the caller must
152// check for this
35d11700 153static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479
VZ
154{
155 wxUint32 out;
8d3dd069
VZ
156 const size_t
157 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
467e0479
VZ
158 if ( n == wxCONV_FAILED )
159 *pSrc = NULL;
160 else
161 *pSrc += n;
162
163 return out;
164}
165
f6bcfd97 166// ----------------------------------------------------------------------------
6001e347 167// wxMBConv
f6bcfd97 168// ----------------------------------------------------------------------------
2c53a80a 169
483b0434
VZ
170size_t
171wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
172 const char *src, size_t srcLen) const
6001e347 173{
483b0434
VZ
174 // although new conversion classes are supposed to implement this function
175 // directly, the existins ones only implement the old MB2WC() and so, to
176 // avoid to have to rewrite all conversion classes at once, we provide a
177 // default (but not efficient) implementation of this one in terms of the
178 // old function by copying the input to ensure that it's NUL-terminated and
179 // then using MB2WC() to convert it
6001e347 180
483b0434
VZ
181 // the number of chars [which would be] written to dst [if it were not NULL]
182 size_t dstWritten = 0;
eec47cc6 183
c1464d9d 184 // the number of NULs terminating this string
a78c43f1 185 size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6 186
c1464d9d
VZ
187 // if we were not given the input size we just have to assume that the
188 // string is properly terminated as we have no way of knowing how long it
189 // is anyhow, but if we do have the size check whether there are enough
190 // NULs at the end
483b0434
VZ
191 wxCharBuffer bufTmp;
192 const char *srcEnd;
467e0479 193 if ( srcLen != wxNO_LEN )
eec47cc6 194 {
c1464d9d 195 // we need to know how to find the end of this string
7ef3ab50 196 nulLen = GetMBNulLen();
483b0434
VZ
197 if ( nulLen == wxCONV_FAILED )
198 return wxCONV_FAILED;
e4e3bbb4 199
c1464d9d 200 // if there are enough NULs we can avoid the copy
483b0434 201 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
202 {
203 // make a copy in order to properly NUL-terminate the string
483b0434 204 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 205 char * const p = bufTmp.data();
483b0434
VZ
206 memcpy(p, src, srcLen);
207 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 208 *s = '\0';
483b0434
VZ
209
210 src = bufTmp;
eec47cc6 211 }
e4e3bbb4 212
483b0434
VZ
213 srcEnd = src + srcLen;
214 }
215 else // quit after the first loop iteration
216 {
217 srcEnd = NULL;
218 }
e4e3bbb4 219
483b0434 220 for ( ;; )
eec47cc6 221 {
c1464d9d 222 // try to convert the current chunk
483b0434 223 size_t lenChunk = MB2WC(NULL, src, 0);
483b0434
VZ
224 if ( lenChunk == wxCONV_FAILED )
225 return wxCONV_FAILED;
e4e3bbb4 226
467e0479 227 lenChunk++; // for the L'\0' at the end of this chunk
e4e3bbb4 228
483b0434 229 dstWritten += lenChunk;
f5fb6871 230
467e0479
VZ
231 if ( lenChunk == 1 )
232 {
233 // nothing left in the input string, conversion succeeded
234 break;
235 }
236
483b0434
VZ
237 if ( dst )
238 {
239 if ( dstWritten > dstLen )
240 return wxCONV_FAILED;
241
830f8f11 242 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
243 return wxCONV_FAILED;
244
245 dst += lenChunk;
246 }
c1464d9d 247
483b0434 248 if ( !srcEnd )
c1464d9d 249 {
467e0479
VZ
250 // we convert just one chunk in this case as this is the entire
251 // string anyhow
c1464d9d
VZ
252 break;
253 }
eec47cc6
VZ
254
255 // advance the input pointer past the end of this chunk
483b0434 256 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
257 {
258 // notice that we must skip over multiple bytes here as we suppose
259 // that if NUL takes 2 or 4 bytes, then all the other characters do
260 // too and so if advanced by a single byte we might erroneously
261 // detect sequences of NUL bytes in the middle of the input
483b0434 262 src += nulLen;
c1464d9d 263 }
e4e3bbb4 264
483b0434 265 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
266
267 // note that ">=" (and not just "==") is needed here as the terminator
268 // we skipped just above could be inside or just after the buffer
269 // delimited by inEnd
483b0434 270 if ( src >= srcEnd )
c1464d9d
VZ
271 break;
272 }
273
483b0434 274 return dstWritten;
e4e3bbb4
RN
275}
276
483b0434
VZ
277size_t
278wxMBConv::FromWChar(char *dst, size_t dstLen,
279 const wchar_t *src, size_t srcLen) const
e4e3bbb4 280{
483b0434
VZ
281 // the number of chars [which would be] written to dst [if it were not NULL]
282 size_t dstWritten = 0;
e4e3bbb4 283
eec47cc6
VZ
284 // make a copy of the input string unless it is already properly
285 // NUL-terminated
286 //
287 // if we don't know its length we have no choice but to assume that it is,
288 // indeed, properly terminated
289 wxWCharBuffer bufTmp;
467e0479 290 if ( srcLen == wxNO_LEN )
e4e3bbb4 291 {
483b0434 292 srcLen = wxWcslen(src) + 1;
eec47cc6 293 }
483b0434 294 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
295 {
296 // make a copy in order to properly NUL-terminate the string
483b0434 297 bufTmp = wxWCharBuffer(srcLen);
ef199164 298 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434
VZ
299 src = bufTmp;
300 }
301
302 const size_t lenNul = GetMBNulLen();
303 for ( const wchar_t * const srcEnd = src + srcLen;
304 src < srcEnd;
305 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
306 {
307 // try to convert the current chunk
308 size_t lenChunk = WC2MB(NULL, src, 0);
309
310 if ( lenChunk == wxCONV_FAILED )
311 return wxCONV_FAILED;
312
313 lenChunk += lenNul;
314 dstWritten += lenChunk;
315
316 if ( dst )
317 {
318 if ( dstWritten > dstLen )
319 return wxCONV_FAILED;
320
321 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
322 return wxCONV_FAILED;
323
324 dst += lenChunk;
325 }
eec47cc6 326 }
e4e3bbb4 327
483b0434
VZ
328 return dstWritten;
329}
330
ef199164 331size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
509da451 332{
ef199164 333 size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479 334 if ( rc != wxCONV_FAILED )
509da451
VZ
335 {
336 // ToWChar() returns the buffer length, i.e. including the trailing
337 // NUL, while this method doesn't take it into account
338 rc--;
339 }
340
341 return rc;
342}
343
ef199164 344size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
509da451 345{
ef199164 346 size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479 347 if ( rc != wxCONV_FAILED )
509da451
VZ
348 {
349 rc -= GetMBNulLen();
350 }
351
352 return rc;
353}
354
483b0434
VZ
355wxMBConv::~wxMBConv()
356{
357 // nothing to do here (necessary for Darwin linking probably)
358}
e4e3bbb4 359
483b0434
VZ
360const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
361{
362 if ( psz )
eec47cc6 363 {
483b0434
VZ
364 // calculate the length of the buffer needed first
365 const size_t nLen = MB2WC(NULL, psz, 0);
467e0479 366 if ( nLen != wxCONV_FAILED )
f5fb6871 367 {
483b0434
VZ
368 // now do the actual conversion
369 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 370
483b0434
VZ
371 // +1 for the trailing NULL
372 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
373 return buf;
f5fb6871 374 }
483b0434 375 }
e4e3bbb4 376
483b0434
VZ
377 return wxWCharBuffer();
378}
3698ae71 379
483b0434
VZ
380const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
381{
382 if ( pwz )
383 {
384 const size_t nLen = WC2MB(NULL, pwz, 0);
467e0479 385 if ( nLen != wxCONV_FAILED )
483b0434
VZ
386 {
387 // extra space for trailing NUL(s)
388 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 389
483b0434
VZ
390 wxCharBuffer buf(nLen + extraLen - 1);
391 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
392 return buf;
393 }
394 }
395
396 return wxCharBuffer();
397}
e4e3bbb4 398
483b0434 399const wxWCharBuffer
ef199164 400wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
483b0434 401{
ef199164 402 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479 403 if ( dstLen != wxCONV_FAILED )
483b0434 404 {
830f8f11 405 wxWCharBuffer wbuf(dstLen - 1);
ef199164 406 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
407 {
408 if ( outLen )
467e0479
VZ
409 {
410 *outLen = dstLen;
411 if ( wbuf[dstLen - 1] == L'\0' )
412 (*outLen)--;
413 }
414
483b0434
VZ
415 return wbuf;
416 }
417 }
418
419 if ( outLen )
420 *outLen = 0;
421
422 return wxWCharBuffer();
423}
424
425const wxCharBuffer
ef199164 426wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
483b0434 427{
13d92ad6 428 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479 429 if ( dstLen != wxCONV_FAILED )
483b0434 430 {
168a76fe
VZ
431 // special case of empty input: can't allocate 0 size buffer below as
432 // wxCharBuffer insists on NUL-terminating it
433 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
ef199164 434 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434
VZ
435 {
436 if ( outLen )
467e0479
VZ
437 {
438 *outLen = dstLen;
439
440 const size_t nulLen = GetMBNulLen();
13d92ad6
VZ
441 if ( dstLen >= nulLen &&
442 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
467e0479
VZ
443 {
444 // in this case the output is NUL-terminated and we're not
445 // supposed to count NUL
13d92ad6 446 *outLen -= nulLen;
467e0479
VZ
447 }
448 }
d32a507d 449
483b0434
VZ
450 return buf;
451 }
e4e3bbb4
RN
452 }
453
eec47cc6
VZ
454 if ( outLen )
455 *outLen = 0;
456
457 return wxCharBuffer();
e4e3bbb4
RN
458}
459
6001e347 460// ----------------------------------------------------------------------------
bde4baac 461// wxMBConvLibc
6001e347
RR
462// ----------------------------------------------------------------------------
463
bde4baac
VZ
464size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
465{
466 return wxMB2WC(buf, psz, n);
467}
468
469size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
470{
471 return wxWC2MB(buf, psz, n);
472}
e1bfe89e
RR
473
474// ----------------------------------------------------------------------------
532d575b 475// wxConvBrokenFileNames
e1bfe89e
RR
476// ----------------------------------------------------------------------------
477
eec47cc6
VZ
478#ifdef __UNIX__
479
845905d5 480wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 481{
845905d5
MW
482 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
483 || wxStricmp(charset, _T("UTF8")) == 0 )
484 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
485 else
486 m_conv = new wxCSConv(charset);
ea8ce907
RR
487}
488
eec47cc6 489#endif // __UNIX__
c12b7f79 490
bde4baac 491// ----------------------------------------------------------------------------
3698ae71 492// UTF-7
bde4baac 493// ----------------------------------------------------------------------------
6001e347 494
15f2ee32 495// Implementation (C) 2004 Fredrik Roubert
6001e347 496
15f2ee32
RN
497//
498// BASE64 decoding table
499//
500static const unsigned char utf7unb64[] =
6001e347 501{
15f2ee32
RN
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
507 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
508 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
509 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
511 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
512 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
513 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
515 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
516 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
517 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
533 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
534};
535
536size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
537{
15f2ee32
RN
538 size_t len = 0;
539
04a37834 540 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
541 {
542 unsigned char cc = *psz++;
543 if (cc != '+')
544 {
545 // plain ASCII char
546 if (buf)
547 *buf++ = cc;
548 len++;
549 }
550 else if (*psz == '-')
551 {
552 // encoded plus sign
553 if (buf)
554 *buf++ = cc;
555 len++;
556 psz++;
557 }
04a37834 558 else // start of BASE64 encoded string
15f2ee32 559 {
04a37834 560 bool lsb, ok;
15f2ee32 561 unsigned int d, l;
04a37834
VZ
562 for ( ok = lsb = false, d = 0, l = 0;
563 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
564 psz++ )
15f2ee32
RN
565 {
566 d <<= 6;
567 d += cc;
568 for (l += 6; l >= 8; lsb = !lsb)
569 {
04a37834 570 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
571 if (lsb)
572 {
573 if (buf)
574 *buf++ |= c;
575 len ++;
576 }
577 else
04a37834 578 {
15f2ee32 579 if (buf)
6356d52a 580 *buf = (wchar_t)(c << 8);
04a37834
VZ
581 }
582
583 ok = true;
15f2ee32
RN
584 }
585 }
04a37834
VZ
586
587 if ( !ok )
588 {
589 // in valid UTF7 we should have valid characters after '+'
467e0479 590 return wxCONV_FAILED;
04a37834
VZ
591 }
592
15f2ee32
RN
593 if (*psz == '-')
594 psz++;
595 }
596 }
04a37834
VZ
597
598 if ( buf && (len < n) )
599 *buf = '\0';
600
15f2ee32 601 return len;
6001e347
RR
602}
603
15f2ee32
RN
604//
605// BASE64 encoding table
606//
607static const unsigned char utf7enb64[] =
608{
609 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
610 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
611 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
612 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
613 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
614 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
615 'w', 'x', 'y', 'z', '0', '1', '2', '3',
616 '4', '5', '6', '7', '8', '9', '+', '/'
617};
618
619//
620// UTF-7 encoding table
621//
622// 0 - Set D (directly encoded characters)
623// 1 - Set O (optional direct characters)
624// 2 - whitespace characters (optional)
625// 3 - special characters
626//
627static const unsigned char utf7encode[128] =
6001e347 628{
15f2ee32
RN
629 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
630 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
631 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
635 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
637};
638
667e5b3e 639size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 640{
15f2ee32
RN
641 size_t len = 0;
642
643 while (*psz && ((!buf) || (len < n)))
644 {
645 wchar_t cc = *psz++;
646 if (cc < 0x80 && utf7encode[cc] < 1)
647 {
648 // plain ASCII char
649 if (buf)
650 *buf++ = (char)cc;
ef199164 651
15f2ee32
RN
652 len++;
653 }
654#ifndef WC_UTF16
79c78d42 655 else if (((wxUint32)cc) > 0xffff)
b2c13097 656 {
15f2ee32 657 // no surrogate pair generation (yet?)
467e0479 658 return wxCONV_FAILED;
15f2ee32
RN
659 }
660#endif
661 else
662 {
663 if (buf)
664 *buf++ = '+';
ef199164 665
15f2ee32
RN
666 len++;
667 if (cc != '+')
668 {
669 // BASE64 encode string
670 unsigned int lsb, d, l;
73c902d6 671 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
672 {
673 for (lsb = 0; lsb < 2; lsb ++)
674 {
675 d <<= 8;
676 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
677
678 for (l += 8; l >= 6; )
679 {
680 l -= 6;
681 if (buf)
682 *buf++ = utf7enb64[(d >> l) % 64];
683 len++;
684 }
685 }
ef199164 686
15f2ee32
RN
687 cc = *psz;
688 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
689 break;
690 }
ef199164 691
15f2ee32
RN
692 if (l != 0)
693 {
694 if (buf)
695 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
ef199164 696
15f2ee32
RN
697 len++;
698 }
699 }
ef199164 700
15f2ee32
RN
701 if (buf)
702 *buf++ = '-';
703 len++;
704 }
705 }
ef199164 706
15f2ee32
RN
707 if (buf && (len < n))
708 *buf = 0;
ef199164 709
15f2ee32 710 return len;
6001e347
RR
711}
712
f6bcfd97 713// ----------------------------------------------------------------------------
6001e347 714// UTF-8
f6bcfd97 715// ----------------------------------------------------------------------------
6001e347 716
dccce9ea 717static wxUint32 utf8_max[]=
4def3b35 718 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 719
3698ae71
VZ
720// boundaries of the private use area we use to (temporarily) remap invalid
721// characters invalid in a UTF-8 encoded string
ea8ce907
RR
722const wxUint32 wxUnicodePUA = 0x100000;
723const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
724
6001e347
RR
725size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
726{
4def3b35
VS
727 size_t len = 0;
728
dccce9ea 729 while (*psz && ((!buf) || (len < n)))
4def3b35 730 {
ea8ce907
RR
731 const char *opsz = psz;
732 bool invalid = false;
4def3b35
VS
733 unsigned char cc = *psz++, fc = cc;
734 unsigned cnt;
dccce9ea 735 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 736 fc <<= 1;
ef199164 737
dccce9ea 738 if (!cnt)
4def3b35
VS
739 {
740 // plain ASCII char
dccce9ea 741 if (buf)
4def3b35
VS
742 *buf++ = cc;
743 len++;
561488ef
MW
744
745 // escape the escape character for octal escapes
746 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
747 && cc == '\\' && (!buf || len < n))
748 {
749 if (buf)
750 *buf++ = cc;
751 len++;
752 }
dccce9ea
VZ
753 }
754 else
4def3b35
VS
755 {
756 cnt--;
dccce9ea 757 if (!cnt)
4def3b35
VS
758 {
759 // invalid UTF-8 sequence
ea8ce907 760 invalid = true;
dccce9ea
VZ
761 }
762 else
4def3b35
VS
763 {
764 unsigned ocnt = cnt - 1;
765 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 766 while (cnt--)
4def3b35 767 {
ea8ce907 768 cc = *psz;
dccce9ea 769 if ((cc & 0xC0) != 0x80)
4def3b35
VS
770 {
771 // invalid UTF-8 sequence
ea8ce907
RR
772 invalid = true;
773 break;
4def3b35 774 }
ef199164 775
ea8ce907 776 psz++;
4def3b35
VS
777 res = (res << 6) | (cc & 0x3f);
778 }
ef199164 779
ea8ce907 780 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
781 {
782 // illegal UTF-8 encoding
ea8ce907 783 invalid = true;
4def3b35 784 }
ea8ce907
RR
785 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
786 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
787 {
788 // if one of our PUA characters turns up externally
789 // it must also be treated as an illegal sequence
790 // (a bit like you have to escape an escape character)
791 invalid = true;
792 }
793 else
794 {
1cd52418 795#ifdef WC_UTF16
ea8ce907
RR
796 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
797 size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479 798 if (pa == wxCONV_FAILED)
ea8ce907
RR
799 {
800 invalid = true;
801 }
802 else
803 {
804 if (buf)
805 buf += pa;
806 len += pa;
807 }
373658eb 808#else // !WC_UTF16
ea8ce907 809 if (buf)
38d4b1e4 810 *buf++ = (wchar_t)res;
ea8ce907 811 len++;
373658eb 812#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
813 }
814 }
ef199164 815
ea8ce907
RR
816 if (invalid)
817 {
818 if (m_options & MAP_INVALID_UTF8_TO_PUA)
819 {
820 while (opsz < psz && (!buf || len < n))
821 {
822#ifdef WC_UTF16
823 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
824 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
467e0479 825 wxASSERT(pa != wxCONV_FAILED);
ea8ce907
RR
826 if (buf)
827 buf += pa;
828 opsz++;
829 len += pa;
830#else
831 if (buf)
38d4b1e4 832 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
833 opsz++;
834 len++;
835#endif
836 }
837 }
3698ae71 838 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
839 {
840 while (opsz < psz && (!buf || len < n))
841 {
3698ae71
VZ
842 if ( buf && len + 3 < n )
843 {
17a1ebd1 844 unsigned char on = *opsz;
3698ae71 845 *buf++ = L'\\';
17a1ebd1
VZ
846 *buf++ = (wchar_t)( L'0' + on / 0100 );
847 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
848 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 849 }
ef199164 850
ea8ce907
RR
851 opsz++;
852 len += 4;
853 }
854 }
3698ae71 855 else // MAP_INVALID_UTF8_NOT
ea8ce907 856 {
467e0479 857 return wxCONV_FAILED;
ea8ce907 858 }
4def3b35
VS
859 }
860 }
6001e347 861 }
ef199164 862
dccce9ea 863 if (buf && (len < n))
4def3b35 864 *buf = 0;
ef199164 865
4def3b35 866 return len;
6001e347
RR
867}
868
3698ae71
VZ
869static inline bool isoctal(wchar_t wch)
870{
871 return L'0' <= wch && wch <= L'7';
872}
873
6001e347
RR
874size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
875{
4def3b35 876 size_t len = 0;
6001e347 877
dccce9ea 878 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
879 {
880 wxUint32 cc;
ef199164 881
1cd52418 882#ifdef WC_UTF16
b5153fd8
VZ
883 // cast is ok for WC_UTF16
884 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479 885 psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418 886#else
ef199164 887 cc = (*psz++) & 0x7fffffff;
4def3b35 888#endif
3698ae71
VZ
889
890 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
891 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 892 {
dccce9ea 893 if (buf)
ea8ce907 894 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 895 len++;
3698ae71 896 }
561488ef
MW
897 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
898 && cc == L'\\' && psz[0] == L'\\' )
899 {
900 if (buf)
901 *buf++ = (char)cc;
902 psz++;
903 len++;
904 }
3698ae71
VZ
905 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
906 cc == L'\\' &&
907 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 908 {
dccce9ea 909 if (buf)
3698ae71 910 {
ef199164
DS
911 *buf++ = (char) ((psz[0] - L'0') * 0100 +
912 (psz[1] - L'0') * 010 +
b2c13097 913 (psz[2] - L'0'));
3698ae71
VZ
914 }
915
916 psz += 3;
ea8ce907
RR
917 len++;
918 }
919 else
920 {
921 unsigned cnt;
ef199164
DS
922 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
923 {
924 }
925
ea8ce907 926 if (!cnt)
4def3b35 927 {
ea8ce907
RR
928 // plain ASCII char
929 if (buf)
930 *buf++ = (char) cc;
931 len++;
932 }
ea8ce907
RR
933 else
934 {
935 len += cnt + 1;
936 if (buf)
937 {
938 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
939 while (cnt--)
940 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
941 }
4def3b35
VS
942 }
943 }
6001e347 944 }
4def3b35 945
ef199164 946 if (buf && (len < n))
3698ae71 947 *buf = 0;
adb45366 948
4def3b35 949 return len;
6001e347
RR
950}
951
467e0479 952// ============================================================================
c91830cb 953// UTF-16
467e0479 954// ============================================================================
c91830cb
VZ
955
956#ifdef WORDS_BIGENDIAN
bde4baac
VZ
957 #define wxMBConvUTF16straight wxMBConvUTF16BE
958 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 959#else
bde4baac
VZ
960 #define wxMBConvUTF16swap wxMBConvUTF16BE
961 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
962#endif
963
467e0479
VZ
964/* static */
965size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
966{
967 if ( srcLen == wxNO_LEN )
968 {
969 // count the number of bytes in input, including the trailing NULs
ef199164
DS
970 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
971 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 972 ;
c91830cb 973
467e0479
VZ
974 srcLen *= BYTES_PER_CHAR;
975 }
976 else // we already have the length
977 {
978 // we can only convert an entire number of UTF-16 characters
979 if ( srcLen % BYTES_PER_CHAR )
980 return wxCONV_FAILED;
981 }
982
983 return srcLen;
984}
985
986// case when in-memory representation is UTF-16 too
c91830cb
VZ
987#ifdef WC_UTF16
988
467e0479
VZ
989// ----------------------------------------------------------------------------
990// conversions without endianness change
991// ----------------------------------------------------------------------------
992
993size_t
994wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
995 const char *src, size_t srcLen) const
c91830cb 996{
467e0479
VZ
997 // set up the scene for using memcpy() (which is presumably more efficient
998 // than copying the bytes one by one)
999 srcLen = GetLength(src, srcLen);
1000 if ( srcLen == wxNO_LEN )
1001 return wxCONV_FAILED;
c91830cb 1002
ef199164 1003 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1004 if ( dst )
c91830cb 1005 {
467e0479
VZ
1006 if ( dstLen < inLen )
1007 return wxCONV_FAILED;
c91830cb 1008
467e0479 1009 memcpy(dst, src, srcLen);
c91830cb 1010 }
d32a507d 1011
467e0479 1012 return inLen;
c91830cb
VZ
1013}
1014
467e0479
VZ
1015size_t
1016wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1017 const wchar_t *src, size_t srcLen) const
c91830cb 1018{
467e0479
VZ
1019 if ( srcLen == wxNO_LEN )
1020 srcLen = wxWcslen(src) + 1;
c91830cb 1021
467e0479
VZ
1022 srcLen *= BYTES_PER_CHAR;
1023
1024 if ( dst )
c91830cb 1025 {
467e0479
VZ
1026 if ( dstLen < srcLen )
1027 return wxCONV_FAILED;
d32a507d 1028
467e0479 1029 memcpy(dst, src, srcLen);
c91830cb 1030 }
d32a507d 1031
467e0479 1032 return srcLen;
c91830cb
VZ
1033}
1034
467e0479
VZ
1035// ----------------------------------------------------------------------------
1036// endian-reversing conversions
1037// ----------------------------------------------------------------------------
c91830cb 1038
467e0479
VZ
1039size_t
1040wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1041 const char *src, size_t srcLen) const
c91830cb 1042{
467e0479
VZ
1043 srcLen = GetLength(src, srcLen);
1044 if ( srcLen == wxNO_LEN )
1045 return wxCONV_FAILED;
c91830cb 1046
467e0479
VZ
1047 srcLen /= BYTES_PER_CHAR;
1048
1049 if ( dst )
c91830cb 1050 {
467e0479
VZ
1051 if ( dstLen < srcLen )
1052 return wxCONV_FAILED;
1053
ef199164
DS
1054 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1055 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1056 {
ef199164 1057 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1058 }
c91830cb 1059 }
bfab25d4 1060
467e0479 1061 return srcLen;
c91830cb
VZ
1062}
1063
467e0479
VZ
1064size_t
1065wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1066 const wchar_t *src, size_t srcLen) const
c91830cb 1067{
467e0479
VZ
1068 if ( srcLen == wxNO_LEN )
1069 srcLen = wxWcslen(src) + 1;
c91830cb 1070
467e0479
VZ
1071 srcLen *= BYTES_PER_CHAR;
1072
1073 if ( dst )
c91830cb 1074 {
467e0479
VZ
1075 if ( dstLen < srcLen )
1076 return wxCONV_FAILED;
1077
ef199164 1078 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
467e0479 1079 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1080 {
ef199164 1081 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
c91830cb 1082 }
c91830cb 1083 }
eec47cc6 1084
467e0479 1085 return srcLen;
c91830cb
VZ
1086}
1087
467e0479 1088#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1089
467e0479
VZ
1090// ----------------------------------------------------------------------------
1091// conversions without endianness change
1092// ----------------------------------------------------------------------------
c91830cb 1093
35d11700
VZ
1094size_t
1095wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1096 const char *src, size_t srcLen) const
c91830cb 1097{
35d11700
VZ
1098 srcLen = GetLength(src, srcLen);
1099 if ( srcLen == wxNO_LEN )
1100 return wxCONV_FAILED;
c91830cb 1101
ef199164 1102 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 1103 if ( !dst )
c91830cb 1104 {
35d11700
VZ
1105 // optimization: return maximal space which could be needed for this
1106 // string even if the real size could be smaller if the buffer contains
1107 // any surrogates
1108 return inLen;
c91830cb 1109 }
c91830cb 1110
35d11700 1111 size_t outLen = 0;
ef199164
DS
1112 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1113 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700 1114 {
ef199164
DS
1115 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1116 if ( !inBuff )
35d11700
VZ
1117 return wxCONV_FAILED;
1118
1119 if ( ++outLen > dstLen )
1120 return wxCONV_FAILED;
c91830cb 1121
35d11700
VZ
1122 *dst++ = ch;
1123 }
1124
1125
1126 return outLen;
1127}
c91830cb 1128
35d11700
VZ
1129size_t
1130wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1131 const wchar_t *src, size_t srcLen) const
c91830cb 1132{
35d11700
VZ
1133 if ( srcLen == wxNO_LEN )
1134 srcLen = wxWcslen(src) + 1;
c91830cb 1135
35d11700 1136 size_t outLen = 0;
ef199164 1137 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1138 for ( size_t n = 0; n < srcLen; n++ )
c91830cb
VZ
1139 {
1140 wxUint16 cc[2];
35d11700
VZ
1141 const size_t numChars = encode_utf16(*src++, cc);
1142 if ( numChars == wxCONV_FAILED )
1143 return wxCONV_FAILED;
c91830cb 1144
ef199164
DS
1145 outLen += numChars * BYTES_PER_CHAR;
1146 if ( outBuff )
c91830cb 1147 {
35d11700
VZ
1148 if ( outLen > dstLen )
1149 return wxCONV_FAILED;
1150
ef199164 1151 *outBuff++ = cc[0];
35d11700 1152 if ( numChars == 2 )
69b80d28 1153 {
35d11700 1154 // second character of a surrogate
ef199164 1155 *outBuff++ = cc[1];
69b80d28 1156 }
c91830cb 1157 }
c91830cb 1158 }
c91830cb 1159
35d11700 1160 return outLen;
c91830cb
VZ
1161}
1162
467e0479
VZ
1163// ----------------------------------------------------------------------------
1164// endian-reversing conversions
1165// ----------------------------------------------------------------------------
c91830cb 1166
35d11700
VZ
1167size_t
1168wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1169 const char *src, size_t srcLen) const
c91830cb 1170{
35d11700
VZ
1171 srcLen = GetLength(src, srcLen);
1172 if ( srcLen == wxNO_LEN )
1173 return wxCONV_FAILED;
1174
ef199164 1175 const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700
VZ
1176 if ( !dst )
1177 {
1178 // optimization: return maximal space which could be needed for this
1179 // string even if the real size could be smaller if the buffer contains
1180 // any surrogates
1181 return inLen;
1182 }
c91830cb 1183
35d11700 1184 size_t outLen = 0;
ef199164
DS
1185 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1186 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb 1187 {
35d11700
VZ
1188 wxUint32 ch;
1189 wxUint16 tmp[2];
ef199164
DS
1190
1191 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1192 inBuff++;
1193 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb 1194
35d11700
VZ
1195 const size_t numChars = decode_utf16(tmp, ch);
1196 if ( numChars == wxCONV_FAILED )
1197 return wxCONV_FAILED;
c91830cb 1198
35d11700 1199 if ( numChars == 2 )
ef199164 1200 inBuff++;
35d11700
VZ
1201
1202 if ( ++outLen > dstLen )
1203 return wxCONV_FAILED;
c91830cb 1204
35d11700 1205 *dst++ = ch;
c91830cb 1206 }
c91830cb 1207
c91830cb 1208
35d11700
VZ
1209 return outLen;
1210}
c91830cb 1211
35d11700
VZ
1212size_t
1213wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1214 const wchar_t *src, size_t srcLen) const
c91830cb 1215{
35d11700
VZ
1216 if ( srcLen == wxNO_LEN )
1217 srcLen = wxWcslen(src) + 1;
c91830cb 1218
35d11700 1219 size_t outLen = 0;
ef199164 1220 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
35d11700 1221 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb
VZ
1222 {
1223 wxUint16 cc[2];
35d11700
VZ
1224 const size_t numChars = encode_utf16(*src, cc);
1225 if ( numChars == wxCONV_FAILED )
1226 return wxCONV_FAILED;
c91830cb 1227
ef199164
DS
1228 outLen += numChars * BYTES_PER_CHAR;
1229 if ( outBuff )
c91830cb 1230 {
35d11700
VZ
1231 if ( outLen > dstLen )
1232 return wxCONV_FAILED;
1233
ef199164 1234 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700 1235 if ( numChars == 2 )
c91830cb 1236 {
35d11700 1237 // second character of a surrogate
ef199164 1238 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb
VZ
1239 }
1240 }
c91830cb 1241 }
c91830cb 1242
35d11700 1243 return outLen;
c91830cb
VZ
1244}
1245
467e0479 1246#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1247
1248
35d11700 1249// ============================================================================
c91830cb 1250// UTF-32
35d11700 1251// ============================================================================
c91830cb
VZ
1252
1253#ifdef WORDS_BIGENDIAN
467e0479
VZ
1254 #define wxMBConvUTF32straight wxMBConvUTF32BE
1255 #define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb 1256#else
467e0479
VZ
1257 #define wxMBConvUTF32swap wxMBConvUTF32BE
1258 #define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb
VZ
1259#endif
1260
1261
1262WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1263WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1264
467e0479
VZ
1265/* static */
1266size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1267{
1268 if ( srcLen == wxNO_LEN )
1269 {
1270 // count the number of bytes in input, including the trailing NULs
ef199164
DS
1271 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1272 for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479 1273 ;
c91830cb 1274
467e0479
VZ
1275 srcLen *= BYTES_PER_CHAR;
1276 }
1277 else // we already have the length
1278 {
1279 // we can only convert an entire number of UTF-32 characters
1280 if ( srcLen % BYTES_PER_CHAR )
1281 return wxCONV_FAILED;
1282 }
1283
1284 return srcLen;
1285}
1286
1287// case when in-memory representation is UTF-16
c91830cb
VZ
1288#ifdef WC_UTF16
1289
467e0479
VZ
1290// ----------------------------------------------------------------------------
1291// conversions without endianness change
1292// ----------------------------------------------------------------------------
1293
1294size_t
1295wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1296 const char *src, size_t srcLen) const
c91830cb 1297{
467e0479
VZ
1298 srcLen = GetLength(src, srcLen);
1299 if ( srcLen == wxNO_LEN )
1300 return wxCONV_FAILED;
c91830cb 1301
ef199164
DS
1302 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1303 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479
VZ
1304 size_t outLen = 0;
1305 for ( size_t n = 0; n < inLen; n++ )
c91830cb
VZ
1306 {
1307 wxUint16 cc[2];
ef199164 1308 const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479
VZ
1309 if ( numChars == wxCONV_FAILED )
1310 return wxCONV_FAILED;
c91830cb 1311
467e0479
VZ
1312 outLen += numChars;
1313 if ( dst )
c91830cb 1314 {
467e0479
VZ
1315 if ( outLen > dstLen )
1316 return wxCONV_FAILED;
d32a507d 1317
467e0479
VZ
1318 *dst++ = cc[0];
1319 if ( numChars == 2 )
1320 {
1321 // second character of a surrogate
1322 *dst++ = cc[1];
1323 }
1324 }
c91830cb 1325 }
d32a507d 1326
467e0479 1327 return outLen;
c91830cb
VZ
1328}
1329
467e0479
VZ
1330size_t
1331wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1332 const wchar_t *src, size_t srcLen) const
c91830cb 1333{
467e0479
VZ
1334 if ( srcLen == wxNO_LEN )
1335 srcLen = wxWcslen(src) + 1;
c91830cb 1336
467e0479 1337 if ( !dst )
c91830cb 1338 {
467e0479
VZ
1339 // optimization: return maximal space which could be needed for this
1340 // string instead of the exact amount which could be less if there are
1341 // any surrogates in the input
1342 //
1343 // we consider that surrogates are rare enough to make it worthwhile to
1344 // avoid running the loop below at the cost of slightly extra memory
1345 // consumption
ef199164 1346 return srcLen * BYTES_PER_CHAR;
467e0479 1347 }
c91830cb 1348
ef199164 1349 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1350 size_t outLen = 0;
1351 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1352 {
1353 const wxUint32 ch = wxDecodeSurrogate(&src);
1354 if ( !src )
1355 return wxCONV_FAILED;
c91830cb 1356
467e0479 1357 outLen += BYTES_PER_CHAR;
d32a507d 1358
467e0479
VZ
1359 if ( outLen > dstLen )
1360 return wxCONV_FAILED;
b5153fd8 1361
ef199164 1362 *outBuff++ = ch;
467e0479 1363 }
c91830cb 1364
467e0479 1365 return outLen;
c91830cb
VZ
1366}
1367
467e0479
VZ
1368// ----------------------------------------------------------------------------
1369// endian-reversing conversions
1370// ----------------------------------------------------------------------------
c91830cb 1371
467e0479
VZ
1372size_t
1373wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1374 const char *src, size_t srcLen) const
c91830cb 1375{
467e0479
VZ
1376 srcLen = GetLength(src, srcLen);
1377 if ( srcLen == wxNO_LEN )
1378 return wxCONV_FAILED;
c91830cb 1379
ef199164
DS
1380 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1381 const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 1382 size_t outLen = 0;
ef199164 1383 for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb 1384 {
c91830cb 1385 wxUint16 cc[2];
ef199164 1386 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479
VZ
1387 if ( numChars == wxCONV_FAILED )
1388 return wxCONV_FAILED;
c91830cb 1389
467e0479
VZ
1390 outLen += numChars;
1391 if ( dst )
c91830cb 1392 {
467e0479
VZ
1393 if ( outLen > dstLen )
1394 return wxCONV_FAILED;
d32a507d 1395
467e0479
VZ
1396 *dst++ = cc[0];
1397 if ( numChars == 2 )
1398 {
1399 // second character of a surrogate
1400 *dst++ = cc[1];
1401 }
1402 }
c91830cb 1403 }
b5153fd8 1404
467e0479 1405 return outLen;
c91830cb
VZ
1406}
1407
467e0479
VZ
1408size_t
1409wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1410 const wchar_t *src, size_t srcLen) const
c91830cb 1411{
467e0479
VZ
1412 if ( srcLen == wxNO_LEN )
1413 srcLen = wxWcslen(src) + 1;
c91830cb 1414
467e0479 1415 if ( !dst )
c91830cb 1416 {
467e0479
VZ
1417 // optimization: return maximal space which could be needed for this
1418 // string instead of the exact amount which could be less if there are
1419 // any surrogates in the input
1420 //
1421 // we consider that surrogates are rare enough to make it worthwhile to
1422 // avoid running the loop below at the cost of slightly extra memory
1423 // consumption
1424 return srcLen*BYTES_PER_CHAR;
1425 }
c91830cb 1426
ef199164 1427 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
467e0479
VZ
1428 size_t outLen = 0;
1429 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1430 {
1431 const wxUint32 ch = wxDecodeSurrogate(&src);
1432 if ( !src )
1433 return wxCONV_FAILED;
c91830cb 1434
467e0479 1435 outLen += BYTES_PER_CHAR;
d32a507d 1436
467e0479
VZ
1437 if ( outLen > dstLen )
1438 return wxCONV_FAILED;
b5153fd8 1439
ef199164 1440 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479 1441 }
c91830cb 1442
467e0479 1443 return outLen;
c91830cb
VZ
1444}
1445
467e0479 1446#else // !WC_UTF16: wchar_t is UTF-32
c91830cb 1447
35d11700
VZ
1448// ----------------------------------------------------------------------------
1449// conversions without endianness change
1450// ----------------------------------------------------------------------------
1451
1452size_t
1453wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1454 const char *src, size_t srcLen) const
c91830cb 1455{
35d11700
VZ
1456 // use memcpy() as it should be much faster than hand-written loop
1457 srcLen = GetLength(src, srcLen);
1458 if ( srcLen == wxNO_LEN )
1459 return wxCONV_FAILED;
c91830cb 1460
35d11700
VZ
1461 const size_t inLen = srcLen/BYTES_PER_CHAR;
1462 if ( dst )
c91830cb 1463 {
35d11700
VZ
1464 if ( dstLen < inLen )
1465 return wxCONV_FAILED;
b5153fd8 1466
35d11700
VZ
1467 memcpy(dst, src, srcLen);
1468 }
c91830cb 1469
35d11700 1470 return inLen;
c91830cb
VZ
1471}
1472
35d11700
VZ
1473size_t
1474wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1475 const wchar_t *src, size_t srcLen) const
c91830cb 1476{
35d11700
VZ
1477 if ( srcLen == wxNO_LEN )
1478 srcLen = wxWcslen(src) + 1;
1479
1480 srcLen *= BYTES_PER_CHAR;
c91830cb 1481
35d11700 1482 if ( dst )
c91830cb 1483 {
35d11700
VZ
1484 if ( dstLen < srcLen )
1485 return wxCONV_FAILED;
c91830cb 1486
35d11700 1487 memcpy(dst, src, srcLen);
c91830cb
VZ
1488 }
1489
35d11700 1490 return srcLen;
c91830cb
VZ
1491}
1492
35d11700
VZ
1493// ----------------------------------------------------------------------------
1494// endian-reversing conversions
1495// ----------------------------------------------------------------------------
c91830cb 1496
35d11700
VZ
1497size_t
1498wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1499 const char *src, size_t srcLen) const
c91830cb 1500{
35d11700
VZ
1501 srcLen = GetLength(src, srcLen);
1502 if ( srcLen == wxNO_LEN )
1503 return wxCONV_FAILED;
1504
1505 srcLen /= BYTES_PER_CHAR;
c91830cb 1506
35d11700 1507 if ( dst )
c91830cb 1508 {
35d11700
VZ
1509 if ( dstLen < srcLen )
1510 return wxCONV_FAILED;
1511
ef199164
DS
1512 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1513 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb 1514 {
ef199164 1515 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
c91830cb 1516 }
c91830cb 1517 }
b5153fd8 1518
35d11700 1519 return srcLen;
c91830cb
VZ
1520}
1521
35d11700
VZ
1522size_t
1523wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1524 const wchar_t *src, size_t srcLen) const
c91830cb 1525{
35d11700
VZ
1526 if ( srcLen == wxNO_LEN )
1527 srcLen = wxWcslen(src) + 1;
1528
1529 srcLen *= BYTES_PER_CHAR;
c91830cb 1530
35d11700 1531 if ( dst )
c91830cb 1532 {
35d11700
VZ
1533 if ( dstLen < srcLen )
1534 return wxCONV_FAILED;
1535
ef199164 1536 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
35d11700 1537 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb 1538 {
ef199164 1539 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
c91830cb 1540 }
c91830cb 1541 }
b5153fd8 1542
35d11700 1543 return srcLen;
c91830cb
VZ
1544}
1545
467e0479 1546#endif // WC_UTF16/!WC_UTF16
c91830cb
VZ
1547
1548
36acb880
VZ
1549// ============================================================================
1550// The classes doing conversion using the iconv_xxx() functions
1551// ============================================================================
3caec1bb 1552
b040e242 1553#ifdef HAVE_ICONV
3a0d76bc 1554
b1d547eb
VS
1555// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1556// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1557// (unless there's yet another bug in glibc) the only case when iconv()
1558// returns with (size_t)-1 (which means error) and says there are 0 bytes
1559// left in the input buffer -- when _real_ error occurs,
1560// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1561// iconv() failure.
3caec1bb
VS
1562// [This bug does not appear in glibc 2.2.]
1563#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1564#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1565 (errno != E2BIG || bufLeft != 0))
1566#else
1567#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1568#endif
1569
ab217dba 1570#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1571
74a7eb0b
VZ
1572#define ICONV_T_INVALID ((iconv_t)-1)
1573
1574#if SIZEOF_WCHAR_T == 4
1575 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1576 #define WC_ENC wxFONTENCODING_UTF32
1577#elif SIZEOF_WCHAR_T == 2
1578 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1579 #define WC_ENC wxFONTENCODING_UTF16
1580#else // sizeof(wchar_t) != 2 nor 4
1581 // does this ever happen?
1582 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1583#endif
1584
36acb880 1585// ----------------------------------------------------------------------------
e95354ec 1586// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1587// ----------------------------------------------------------------------------
1588
e95354ec 1589class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1590{
1591public:
e95354ec
VZ
1592 wxMBConv_iconv(const wxChar *name);
1593 virtual ~wxMBConv_iconv();
36acb880 1594
bde4baac
VZ
1595 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1596 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1597
d36c9347 1598 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1599 virtual size_t GetMBNulLen() const;
1600
d36c9347
VZ
1601 virtual wxMBConv *Clone() const
1602 {
1603 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1604 p->m_minMBCharWidth = m_minMBCharWidth;
1605 return p;
1606 }
1607
e95354ec 1608 bool IsOk() const
74a7eb0b 1609 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1610
1611protected:
ef199164
DS
1612 // the iconv handlers used to translate from multibyte
1613 // to wide char and in the other direction
36acb880
VZ
1614 iconv_t m2w,
1615 w2m;
ef199164 1616
b1d547eb
VS
1617#if wxUSE_THREADS
1618 // guards access to m2w and w2m objects
1619 wxMutex m_iconvMutex;
1620#endif
36acb880
VZ
1621
1622private:
e95354ec 1623 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1624 // available on this machine, it will remain NULL
74a7eb0b 1625 static wxString ms_wcCharsetName;
36acb880
VZ
1626
1627 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1628 // different endian-ness than the native one
405d8f46 1629 static bool ms_wcNeedsSwap;
eec47cc6 1630
d36c9347
VZ
1631
1632 // name of the encoding handled by this conversion
1633 wxString m_name;
1634
7ef3ab50 1635 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1636 // initially
1637 size_t m_minMBCharWidth;
36acb880
VZ
1638};
1639
8f115891
MW
1640// make the constructor available for unit testing
1641WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1642{
1643 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1644 if ( !result->IsOk() )
1645 {
1646 delete result;
1647 return 0;
1648 }
ef199164 1649
8f115891
MW
1650 return result;
1651}
1652
422e411e 1653wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1654bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1655
e95354ec 1656wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
d36c9347 1657 : m_name(name)
36acb880 1658{
c1464d9d 1659 m_minMBCharWidth = 0;
eec47cc6 1660
0331b385
VZ
1661 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1662 // names for the charsets
200a9923 1663 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1664
36acb880 1665 // check for charset that represents wchar_t:
74a7eb0b 1666 if ( ms_wcCharsetName.empty() )
f1339c56 1667 {
c2b83fdd
VZ
1668 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1669
74a7eb0b
VZ
1670#if wxUSE_FONTMAP
1671 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1672#else // !wxUSE_FONTMAP
91cb7f52 1673 static const wxChar *names_static[] =
36acb880 1674 {
74a7eb0b
VZ
1675#if SIZEOF_WCHAR_T == 4
1676 _T("UCS-4"),
1677#elif SIZEOF_WCHAR_T = 2
1678 _T("UCS-2"),
1679#endif
1680 NULL
1681 };
91cb7f52 1682 const wxChar **names = names_static;
74a7eb0b 1683#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1684
d1f024a8 1685 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1686 {
17a1ebd1 1687 const wxString nameCS(*names);
74a7eb0b
VZ
1688
1689 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1690 wxString nameXE(nameCS);
ef199164
DS
1691
1692#ifdef WORDS_BIGENDIAN
74a7eb0b 1693 nameXE += _T("BE");
ef199164 1694#else // little endian
74a7eb0b 1695 nameXE += _T("LE");
ef199164 1696#endif
74a7eb0b 1697
c2b83fdd
VZ
1698 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1699 nameXE.c_str());
1700
74a7eb0b
VZ
1701 m2w = iconv_open(nameXE.ToAscii(), cname);
1702 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1703 {
74a7eb0b 1704 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1705 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1706 nameCS.c_str());
17a1ebd1 1707 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1708
74a7eb0b
VZ
1709 // and check for bytesex ourselves:
1710 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1711 {
74a7eb0b
VZ
1712 char buf[2], *bufPtr;
1713 wchar_t wbuf[2], *wbufPtr;
1714 size_t insz, outsz;
1715 size_t res;
1716
1717 buf[0] = 'A';
1718 buf[1] = 0;
1719 wbuf[0] = 0;
1720 insz = 2;
1721 outsz = SIZEOF_WCHAR_T * 2;
1722 wbufPtr = wbuf;
1723 bufPtr = buf;
1724
ef199164
DS
1725 res = iconv(
1726 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1727 (char**)&wbufPtr, &outsz);
74a7eb0b
VZ
1728
1729 if (ICONV_FAILED(res, insz))
1730 {
1731 wxLogLastError(wxT("iconv"));
422e411e 1732 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1733 nameCS.c_str());
74a7eb0b
VZ
1734 }
1735 else // ok, can convert to this encoding, remember it
1736 {
17a1ebd1 1737 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1738 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1739 }
3a0d76bc
VS
1740 }
1741 }
74a7eb0b 1742 else // use charset not requiring byte swapping
36acb880 1743 {
74a7eb0b 1744 ms_wcCharsetName = nameXE;
36acb880 1745 }
3a0d76bc 1746 }
74a7eb0b 1747
0944fceb 1748 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1749 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1750 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1751 : ms_wcCharsetName.c_str(),
1752 ms_wcNeedsSwap ? _T(" (needs swap)")
1753 : _T(""));
3a0d76bc 1754 }
36acb880 1755 else // we already have ms_wcCharsetName
3caec1bb 1756 {
74a7eb0b 1757 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1758 }
dccce9ea 1759
74a7eb0b 1760 if ( ms_wcCharsetName.empty() )
f1339c56 1761 {
74a7eb0b 1762 w2m = ICONV_T_INVALID;
36acb880 1763 }
405d8f46
VZ
1764 else
1765 {
74a7eb0b
VZ
1766 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1767 if ( w2m == ICONV_T_INVALID )
1768 {
1769 wxLogTrace(TRACE_STRCONV,
1770 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1771 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1772 }
405d8f46 1773 }
36acb880 1774}
3caec1bb 1775
e95354ec 1776wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1777{
74a7eb0b 1778 if ( m2w != ICONV_T_INVALID )
36acb880 1779 iconv_close(m2w);
74a7eb0b 1780 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1781 iconv_close(w2m);
1782}
3a0d76bc 1783
bde4baac 1784size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1785{
69373110
VZ
1786 // find the string length: notice that must be done differently for
1787 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1788 size_t inbuf;
7ef3ab50 1789 const size_t nulLen = GetMBNulLen();
69373110
VZ
1790 switch ( nulLen )
1791 {
1792 default:
467e0479 1793 return wxCONV_FAILED;
69373110
VZ
1794
1795 case 1:
1796 inbuf = strlen(psz); // arguably more optimized than our version
1797 break;
1798
1799 case 2:
1800 case 4:
1801 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1802 // they also have to start at character boundary and not span two
1803 // adjacent characters
1804 const char *p;
1805 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1806 ;
1807 inbuf = p - psz;
1808 break;
1809 }
1810
b1d547eb 1811#if wxUSE_THREADS
6a17b868
SN
1812 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1813 // Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb
VS
1814 // wxConvLocal that are used all over wx code, so we have to make sure
1815 // the handle is used by at most one thread at the time. Otherwise
1816 // only a few wx classes would be safe to use from non-main threads
1817 // as MB<->WC conversion would fail "randomly".
1818 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1819#endif // wxUSE_THREADS
1820
36acb880
VZ
1821 size_t outbuf = n * SIZEOF_WCHAR_T;
1822 size_t res, cres;
1823 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1824 wchar_t *bufPtr = buf;
1825 const char *pszPtr = psz;
1826
1827 if (buf)
1828 {
1829 // have destination buffer, convert there
1830 cres = iconv(m2w,
1831 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1832 (char**)&bufPtr, &outbuf);
1833 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1834
36acb880 1835 if (ms_wcNeedsSwap)
3a0d76bc 1836 {
36acb880 1837 // convert to native endianness
17a1ebd1
VZ
1838 for ( unsigned i = 0; i < res; i++ )
1839 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1840 }
adb45366 1841
69373110 1842 // NUL-terminate the string if there is any space left
49dd9820
VS
1843 if (res < n)
1844 buf[res] = 0;
36acb880
VZ
1845 }
1846 else
1847 {
1848 // no destination buffer... convert using temp buffer
1849 // to calculate destination buffer requirement
1850 wchar_t tbuf[8];
1851 res = 0;
ef199164
DS
1852
1853 do
1854 {
36acb880 1855 bufPtr = tbuf;
ef199164 1856 outbuf = 8 * SIZEOF_WCHAR_T;
36acb880
VZ
1857
1858 cres = iconv(m2w,
1859 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1860 (char**)&bufPtr, &outbuf );
1861
ef199164
DS
1862 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1863 }
1864 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1865 }
dccce9ea 1866
36acb880 1867 if (ICONV_FAILED(cres, inbuf))
f1339c56 1868 {
36acb880 1869 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1870 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1871 return wxCONV_FAILED;
36acb880
VZ
1872 }
1873
1874 return res;
1875}
1876
bde4baac 1877size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1878{
b1d547eb
VS
1879#if wxUSE_THREADS
1880 // NB: explained in MB2WC
1881 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1882#endif
3698ae71 1883
156162ec
MW
1884 size_t inlen = wxWcslen(psz);
1885 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1886 size_t outbuf = n;
1887 size_t res, cres;
3a0d76bc 1888
36acb880 1889 wchar_t *tmpbuf = 0;
3caec1bb 1890
36acb880
VZ
1891 if (ms_wcNeedsSwap)
1892 {
1893 // need to copy to temp buffer to switch endianness
74a7eb0b 1894 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1895 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1896 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1897 for ( size_t i = 0; i < inlen; i++ )
1898 tmpbuf[n] = WC_BSWAP(psz[i]);
ef199164 1899
156162ec 1900 tmpbuf[inlen] = L'\0';
74a7eb0b 1901 psz = tmpbuf;
36acb880 1902 }
3a0d76bc 1903
36acb880
VZ
1904 if (buf)
1905 {
1906 // have destination buffer, convert there
1907 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1908
ef199164 1909 res = n - outbuf;
adb45366 1910
49dd9820
VS
1911 // NB: iconv was given only wcslen(psz) characters on input, and so
1912 // it couldn't convert the trailing zero. Let's do it ourselves
1913 // if there's some room left for it in the output buffer.
1914 if (res < n)
1915 buf[0] = 0;
36acb880
VZ
1916 }
1917 else
1918 {
ef199164 1919 // no destination buffer: convert using temp buffer
36acb880
VZ
1920 // to calculate destination buffer requirement
1921 char tbuf[16];
1922 res = 0;
ef199164
DS
1923 do
1924 {
1925 buf = tbuf;
1926 outbuf = 16;
36acb880
VZ
1927
1928 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1929
36acb880 1930 res += 16 - outbuf;
ef199164
DS
1931 }
1932 while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56 1933 }
dccce9ea 1934
36acb880
VZ
1935 if (ms_wcNeedsSwap)
1936 {
1937 free(tmpbuf);
1938 }
dccce9ea 1939
36acb880
VZ
1940 if (ICONV_FAILED(cres, inbuf))
1941 {
ce6f8d6f 1942 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479 1943 return wxCONV_FAILED;
36acb880
VZ
1944 }
1945
1946 return res;
1947}
1948
7ef3ab50 1949size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1950{
c1464d9d 1951 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1952 {
1953 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1954
1955#if wxUSE_THREADS
1956 // NB: explained in MB2WC
1957 wxMutexLocker lock(self->m_iconvMutex);
1958#endif
1959
356410fc 1960 wchar_t *wnul = L"";
c1464d9d 1961 char buf[8]; // should be enough for NUL in any encoding
356410fc 1962 size_t inLen = sizeof(wchar_t),
c1464d9d 1963 outLen = WXSIZEOF(buf);
ef199164
DS
1964 char *inBuff = (char *)wnul;
1965 char *outBuff = buf;
1966 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc 1967 {
c1464d9d 1968 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1969 }
1970 else // ok
1971 {
ef199164 1972 self->m_minMBCharWidth = outBuff - buf;
356410fc 1973 }
eec47cc6
VZ
1974 }
1975
c1464d9d 1976 return m_minMBCharWidth;
eec47cc6
VZ
1977}
1978
b040e242 1979#endif // HAVE_ICONV
36acb880 1980
e95354ec 1981
36acb880
VZ
1982// ============================================================================
1983// Win32 conversion classes
1984// ============================================================================
1cd52418 1985
e95354ec 1986#ifdef wxHAVE_WIN32_MB2WC
373658eb 1987
8b04d4c4 1988// from utils.cpp
d775fa82 1989#if wxUSE_FONTMAP
8b04d4c4
VZ
1990extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1991extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1992#endif
373658eb 1993
e95354ec 1994class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1995{
1996public:
bde4baac
VZ
1997 wxMBConv_win32()
1998 {
1999 m_CodePage = CP_ACP;
c1464d9d 2000 m_minMBCharWidth = 0;
bde4baac
VZ
2001 }
2002
d36c9347 2003 wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62 2004 : wxMBConv()
d36c9347
VZ
2005 {
2006 m_CodePage = conv.m_CodePage;
2007 m_minMBCharWidth = conv.m_minMBCharWidth;
2008 }
2009
7608a683 2010#if wxUSE_FONTMAP
e95354ec 2011 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
2012 {
2013 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 2014 m_minMBCharWidth = 0;
bde4baac 2015 }
dccce9ea 2016
e95354ec 2017 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
2018 {
2019 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 2020 m_minMBCharWidth = 0;
bde4baac 2021 }
eec47cc6 2022#endif // wxUSE_FONTMAP
8b04d4c4 2023
d36c9347 2024 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 2025 {
02272c9c
VZ
2026 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2027 // the behaviour is not compatible with the Unix version (using iconv)
2028 // and break the library itself, e.g. wxTextInputStream::NextChar()
2029 // wouldn't work if reading an incomplete MB char didn't result in an
2030 // error
667e5b3e 2031 //
89028980 2032 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
2033 // Win XP or newer and it is not supported for UTF-[78] so we always
2034 // use our own conversions in this case. See
89028980
VS
2035 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2036 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 2037 if ( m_CodePage == CP_UTF8 )
89028980 2038 {
830f8f11 2039 return wxConvUTF8.MB2WC(buf, psz, n);
89028980 2040 }
830f8f11
VZ
2041
2042 if ( m_CodePage == CP_UTF7 )
2043 {
2044 return wxConvUTF7.MB2WC(buf, psz, n);
2045 }
2046
2047 int flags = 0;
2048 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2049 IsAtLeastWin2kSP4() )
89028980 2050 {
830f8f11 2051 flags = MB_ERR_INVALID_CHARS;
89028980 2052 }
667e5b3e 2053
2b5f62a0
VZ
2054 const size_t len = ::MultiByteToWideChar
2055 (
2056 m_CodePage, // code page
667e5b3e 2057 flags, // flags: fall on error
2b5f62a0
VZ
2058 psz, // input string
2059 -1, // its length (NUL-terminated)
b4da152e 2060 buf, // output string
2b5f62a0
VZ
2061 buf ? n : 0 // size of output buffer
2062 );
89028980
VS
2063 if ( !len )
2064 {
2065 // function totally failed
467e0479 2066 return wxCONV_FAILED;
89028980
VS
2067 }
2068
2069 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2070 // check if we succeeded, by doing a double trip:
2071 if ( !flags && buf )
2072 {
53c174fc
VZ
2073 const size_t mbLen = strlen(psz);
2074 wxCharBuffer mbBuf(mbLen);
89028980
VS
2075 if ( ::WideCharToMultiByte
2076 (
2077 m_CodePage,
2078 0,
2079 buf,
2080 -1,
2081 mbBuf.data(),
53c174fc 2082 mbLen + 1, // size in bytes, not length
89028980
VS
2083 NULL,
2084 NULL
2085 ) == 0 ||
2086 strcmp(mbBuf, psz) != 0 )
2087 {
2088 // we didn't obtain the same thing we started from, hence
2089 // the conversion was lossy and we consider that it failed
467e0479 2090 return wxCONV_FAILED;
89028980
VS
2091 }
2092 }
2b5f62a0 2093
03a991bc
VZ
2094 // note that it returns count of written chars for buf != NULL and size
2095 // of the needed buffer for buf == NULL so in either case the length of
2096 // the string (which never includes the terminating NUL) is one less
89028980 2097 return len - 1;
f1339c56 2098 }
dccce9ea 2099
d36c9347 2100 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 2101 {
13dd924a
VZ
2102 /*
2103 we have a problem here: by default, WideCharToMultiByte() may
2104 replace characters unrepresentable in the target code page with bad
2105 quality approximations such as turning "1/2" symbol (U+00BD) into
2106 "1" for the code pages which don't have it and we, obviously, want
2107 to avoid this at any price
d775fa82 2108
13dd924a
VZ
2109 the trouble is that this function does it _silently_, i.e. it won't
2110 even tell us whether it did or not... Win98/2000 and higher provide
2111 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2112 we have to resort to a round trip, i.e. check that converting back
2113 results in the same string -- this is, of course, expensive but
2114 otherwise we simply can't be sure to not garble the data.
2115 */
2116
2117 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2118 // it doesn't work with CJK encodings (which we test for rather roughly
2119 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2120 // supporting it
907173e5
WS
2121 BOOL usedDef wxDUMMY_INITIALIZE(false);
2122 BOOL *pUsedDef;
13dd924a
VZ
2123 int flags;
2124 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2125 {
2126 // it's our lucky day
2127 flags = WC_NO_BEST_FIT_CHARS;
2128 pUsedDef = &usedDef;
2129 }
2130 else // old system or unsupported encoding
2131 {
2132 flags = 0;
2133 pUsedDef = NULL;
2134 }
2135
2b5f62a0
VZ
2136 const size_t len = ::WideCharToMultiByte
2137 (
2138 m_CodePage, // code page
13dd924a
VZ
2139 flags, // either none or no best fit
2140 pwz, // input string
2b5f62a0
VZ
2141 -1, // it is (wide) NUL-terminated
2142 buf, // output buffer
2143 buf ? n : 0, // and its size
2144 NULL, // default "replacement" char
13dd924a 2145 pUsedDef // [out] was it used?
2b5f62a0
VZ
2146 );
2147
13dd924a
VZ
2148 if ( !len )
2149 {
2150 // function totally failed
467e0479 2151 return wxCONV_FAILED;
13dd924a
VZ
2152 }
2153
2154 // if we were really converting, check if we succeeded
2155 if ( buf )
2156 {
2157 if ( flags )
2158 {
2159 // check if the conversion failed, i.e. if any replacements
2160 // were done
2161 if ( usedDef )
467e0479 2162 return wxCONV_FAILED;
13dd924a
VZ
2163 }
2164 else // we must resort to double tripping...
2165 {
2166 wxWCharBuffer wcBuf(n);
467e0479 2167 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
13dd924a
VZ
2168 wcscmp(wcBuf, pwz) != 0 )
2169 {
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
467e0479 2172 return wxCONV_FAILED;
13dd924a
VZ
2173 }
2174 }
2175 }
2176
03a991bc 2177 // see the comment above for the reason of "len - 1"
13dd924a 2178 return len - 1;
f1339c56 2179 }
dccce9ea 2180
7ef3ab50
VZ
2181 virtual size_t GetMBNulLen() const
2182 {
2183 if ( m_minMBCharWidth == 0 )
2184 {
2185 int len = ::WideCharToMultiByte
2186 (
2187 m_CodePage, // code page
2188 0, // no flags
2189 L"", // input string
2190 1, // translate just the NUL
2191 NULL, // output buffer
2192 0, // and its size
2193 NULL, // no replacement char
2194 NULL // [out] don't care if it was used
2195 );
2196
2197 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2198 switch ( len )
2199 {
2200 default:
2201 wxLogDebug(_T("Unexpected NUL length %d"), len);
ef199164
DS
2202 self->m_minMBCharWidth = (size_t)-1;
2203 break;
7ef3ab50
VZ
2204
2205 case 0:
2206 self->m_minMBCharWidth = (size_t)-1;
2207 break;
2208
2209 case 1:
2210 case 2:
2211 case 4:
2212 self->m_minMBCharWidth = len;
2213 break;
2214 }
2215 }
2216
2217 return m_minMBCharWidth;
2218 }
2219
d36c9347
VZ
2220 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2221
13dd924a
VZ
2222 bool IsOk() const { return m_CodePage != -1; }
2223
2224private:
2225 static bool CanUseNoBestFit()
2226 {
2227 static int s_isWin98Or2k = -1;
2228
2229 if ( s_isWin98Or2k == -1 )
2230 {
2231 int verMaj, verMin;
2232 switch ( wxGetOsVersion(&verMaj, &verMin) )
2233 {
406d283a 2234 case wxOS_WINDOWS_9X:
13dd924a
VZ
2235 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2236 break;
2237
406d283a 2238 case wxOS_WINDOWS_NT:
13dd924a
VZ
2239 s_isWin98Or2k = verMaj >= 5;
2240 break;
2241
2242 default:
ef199164 2243 // unknown: be conservative by default
13dd924a 2244 s_isWin98Or2k = 0;
ef199164 2245 break;
13dd924a
VZ
2246 }
2247
2248 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2249 }
2250
2251 return s_isWin98Or2k == 1;
2252 }
f1339c56 2253
89028980
VS
2254 static bool IsAtLeastWin2kSP4()
2255 {
8942f83a
WS
2256#ifdef __WXWINCE__
2257 return false;
2258#else
89028980
VS
2259 static int s_isAtLeastWin2kSP4 = -1;
2260
2261 if ( s_isAtLeastWin2kSP4 == -1 )
2262 {
2263 OSVERSIONINFOEX ver;
2264
2265 memset(&ver, 0, sizeof(ver));
2266 ver.dwOSVersionInfoSize = sizeof(ver);
2267 GetVersionEx((OSVERSIONINFO*)&ver);
2268
2269 s_isAtLeastWin2kSP4 =
2270 ((ver.dwMajorVersion > 5) || // Vista+
2271 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2272 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2273 ver.wServicePackMajor >= 4)) // 2000 SP4+
2274 ? 1 : 0;
2275 }
2276
2277 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2278#endif
89028980
VS
2279 }
2280
eec47cc6 2281
c1464d9d 2282 // the code page we're working with
b1d66b54 2283 long m_CodePage;
c1464d9d 2284
7ef3ab50 2285 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2286 // "unknown"
2287 size_t m_minMBCharWidth;
1cd52418 2288};
e95354ec
VZ
2289
2290#endif // wxHAVE_WIN32_MB2WC
2291
f7e98dee
RN
2292// ============================================================================
2293// Cocoa conversion classes
2294// ============================================================================
2295
2296#if defined(__WXCOCOA__)
2297
ef199164
DS
2298// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2299// Strangely enough, internally Core Foundation uses
2300// UTF-32 internally quite a bit - its just not public (yet).
f7e98dee
RN
2301
2302#include <CoreFoundation/CFString.h>
2303#include <CoreFoundation/CFStringEncodingExt.h>
2304
2305CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2306{
638357a0 2307 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ef199164
DS
2308
2309 switch (encoding)
ecd9653b 2310 {
ef199164
DS
2311 case wxFONTENCODING_DEFAULT :
2312 enc = CFStringGetSystemEncoding();
2313 break ;
2314
ecd9653b
WS
2315 case wxFONTENCODING_ISO8859_1 :
2316 enc = kCFStringEncodingISOLatin1 ;
2317 break ;
2318 case wxFONTENCODING_ISO8859_2 :
2319 enc = kCFStringEncodingISOLatin2;
2320 break ;
2321 case wxFONTENCODING_ISO8859_3 :
2322 enc = kCFStringEncodingISOLatin3 ;
2323 break ;
2324 case wxFONTENCODING_ISO8859_4 :
2325 enc = kCFStringEncodingISOLatin4;
2326 break ;
2327 case wxFONTENCODING_ISO8859_5 :
2328 enc = kCFStringEncodingISOLatinCyrillic;
2329 break ;
2330 case wxFONTENCODING_ISO8859_6 :
2331 enc = kCFStringEncodingISOLatinArabic;
2332 break ;
2333 case wxFONTENCODING_ISO8859_7 :
2334 enc = kCFStringEncodingISOLatinGreek;
2335 break ;
2336 case wxFONTENCODING_ISO8859_8 :
2337 enc = kCFStringEncodingISOLatinHebrew;
2338 break ;
2339 case wxFONTENCODING_ISO8859_9 :
2340 enc = kCFStringEncodingISOLatin5;
2341 break ;
2342 case wxFONTENCODING_ISO8859_10 :
2343 enc = kCFStringEncodingISOLatin6;
2344 break ;
2345 case wxFONTENCODING_ISO8859_11 :
2346 enc = kCFStringEncodingISOLatinThai;
2347 break ;
2348 case wxFONTENCODING_ISO8859_13 :
2349 enc = kCFStringEncodingISOLatin7;
2350 break ;
2351 case wxFONTENCODING_ISO8859_14 :
2352 enc = kCFStringEncodingISOLatin8;
2353 break ;
2354 case wxFONTENCODING_ISO8859_15 :
2355 enc = kCFStringEncodingISOLatin9;
2356 break ;
2357
2358 case wxFONTENCODING_KOI8 :
2359 enc = kCFStringEncodingKOI8_R;
2360 break ;
2361 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2362 enc = kCFStringEncodingDOSRussian;
2363 break ;
2364
2365// case wxFONTENCODING_BULGARIAN :
2366// enc = ;
2367// break ;
2368
2369 case wxFONTENCODING_CP437 :
ef199164 2370 enc = kCFStringEncodingDOSLatinUS ;
ecd9653b
WS
2371 break ;
2372 case wxFONTENCODING_CP850 :
2373 enc = kCFStringEncodingDOSLatin1;
2374 break ;
2375 case wxFONTENCODING_CP852 :
2376 enc = kCFStringEncodingDOSLatin2;
2377 break ;
2378 case wxFONTENCODING_CP855 :
2379 enc = kCFStringEncodingDOSCyrillic;
2380 break ;
2381 case wxFONTENCODING_CP866 :
ef199164 2382 enc = kCFStringEncodingDOSRussian ;
ecd9653b
WS
2383 break ;
2384 case wxFONTENCODING_CP874 :
2385 enc = kCFStringEncodingDOSThai;
2386 break ;
2387 case wxFONTENCODING_CP932 :
2388 enc = kCFStringEncodingDOSJapanese;
2389 break ;
2390 case wxFONTENCODING_CP936 :
ef199164 2391 enc = kCFStringEncodingDOSChineseSimplif ;
ecd9653b
WS
2392 break ;
2393 case wxFONTENCODING_CP949 :
2394 enc = kCFStringEncodingDOSKorean;
2395 break ;
2396 case wxFONTENCODING_CP950 :
2397 enc = kCFStringEncodingDOSChineseTrad;
2398 break ;
ecd9653b
WS
2399 case wxFONTENCODING_CP1250 :
2400 enc = kCFStringEncodingWindowsLatin2;
2401 break ;
2402 case wxFONTENCODING_CP1251 :
ef199164 2403 enc = kCFStringEncodingWindowsCyrillic ;
ecd9653b
WS
2404 break ;
2405 case wxFONTENCODING_CP1252 :
ef199164 2406 enc = kCFStringEncodingWindowsLatin1 ;
ecd9653b
WS
2407 break ;
2408 case wxFONTENCODING_CP1253 :
2409 enc = kCFStringEncodingWindowsGreek;
2410 break ;
2411 case wxFONTENCODING_CP1254 :
2412 enc = kCFStringEncodingWindowsLatin5;
2413 break ;
2414 case wxFONTENCODING_CP1255 :
ef199164 2415 enc = kCFStringEncodingWindowsHebrew ;
ecd9653b
WS
2416 break ;
2417 case wxFONTENCODING_CP1256 :
ef199164 2418 enc = kCFStringEncodingWindowsArabic ;
ecd9653b
WS
2419 break ;
2420 case wxFONTENCODING_CP1257 :
2421 enc = kCFStringEncodingWindowsBalticRim;
2422 break ;
638357a0
RN
2423// This only really encodes to UTF7 (if that) evidently
2424// case wxFONTENCODING_UTF7 :
2425// enc = kCFStringEncodingNonLossyASCII ;
2426// break ;
ecd9653b
WS
2427 case wxFONTENCODING_UTF8 :
2428 enc = kCFStringEncodingUTF8 ;
2429 break ;
2430 case wxFONTENCODING_EUC_JP :
2431 enc = kCFStringEncodingEUC_JP;
2432 break ;
2433 case wxFONTENCODING_UTF16 :
f7e98dee 2434 enc = kCFStringEncodingUnicode ;
ecd9653b 2435 break ;
f7e98dee
RN
2436 case wxFONTENCODING_MACROMAN :
2437 enc = kCFStringEncodingMacRoman ;
2438 break ;
2439 case wxFONTENCODING_MACJAPANESE :
2440 enc = kCFStringEncodingMacJapanese ;
2441 break ;
2442 case wxFONTENCODING_MACCHINESETRAD :
2443 enc = kCFStringEncodingMacChineseTrad ;
2444 break ;
2445 case wxFONTENCODING_MACKOREAN :
2446 enc = kCFStringEncodingMacKorean ;
2447 break ;
2448 case wxFONTENCODING_MACARABIC :
2449 enc = kCFStringEncodingMacArabic ;
2450 break ;
2451 case wxFONTENCODING_MACHEBREW :
2452 enc = kCFStringEncodingMacHebrew ;
2453 break ;
2454 case wxFONTENCODING_MACGREEK :
2455 enc = kCFStringEncodingMacGreek ;
2456 break ;
2457 case wxFONTENCODING_MACCYRILLIC :
2458 enc = kCFStringEncodingMacCyrillic ;
2459 break ;
2460 case wxFONTENCODING_MACDEVANAGARI :
2461 enc = kCFStringEncodingMacDevanagari ;
2462 break ;
2463 case wxFONTENCODING_MACGURMUKHI :
2464 enc = kCFStringEncodingMacGurmukhi ;
2465 break ;
2466 case wxFONTENCODING_MACGUJARATI :
2467 enc = kCFStringEncodingMacGujarati ;
2468 break ;
2469 case wxFONTENCODING_MACORIYA :
2470 enc = kCFStringEncodingMacOriya ;
2471 break ;
2472 case wxFONTENCODING_MACBENGALI :
2473 enc = kCFStringEncodingMacBengali ;
2474 break ;
2475 case wxFONTENCODING_MACTAMIL :
2476 enc = kCFStringEncodingMacTamil ;
2477 break ;
2478 case wxFONTENCODING_MACTELUGU :
2479 enc = kCFStringEncodingMacTelugu ;
2480 break ;
2481 case wxFONTENCODING_MACKANNADA :
2482 enc = kCFStringEncodingMacKannada ;
2483 break ;
2484 case wxFONTENCODING_MACMALAJALAM :
2485 enc = kCFStringEncodingMacMalayalam ;
2486 break ;
2487 case wxFONTENCODING_MACSINHALESE :
2488 enc = kCFStringEncodingMacSinhalese ;
2489 break ;
2490 case wxFONTENCODING_MACBURMESE :
2491 enc = kCFStringEncodingMacBurmese ;
2492 break ;
2493 case wxFONTENCODING_MACKHMER :
2494 enc = kCFStringEncodingMacKhmer ;
2495 break ;
2496 case wxFONTENCODING_MACTHAI :
2497 enc = kCFStringEncodingMacThai ;
2498 break ;
2499 case wxFONTENCODING_MACLAOTIAN :
2500 enc = kCFStringEncodingMacLaotian ;
2501 break ;
2502 case wxFONTENCODING_MACGEORGIAN :
2503 enc = kCFStringEncodingMacGeorgian ;
2504 break ;
2505 case wxFONTENCODING_MACARMENIAN :
2506 enc = kCFStringEncodingMacArmenian ;
2507 break ;
2508 case wxFONTENCODING_MACCHINESESIMP :
2509 enc = kCFStringEncodingMacChineseSimp ;
2510 break ;
2511 case wxFONTENCODING_MACTIBETAN :
2512 enc = kCFStringEncodingMacTibetan ;
2513 break ;
2514 case wxFONTENCODING_MACMONGOLIAN :
2515 enc = kCFStringEncodingMacMongolian ;
2516 break ;
2517 case wxFONTENCODING_MACETHIOPIC :
2518 enc = kCFStringEncodingMacEthiopic ;
2519 break ;
2520 case wxFONTENCODING_MACCENTRALEUR :
2521 enc = kCFStringEncodingMacCentralEurRoman ;
2522 break ;
2523 case wxFONTENCODING_MACVIATNAMESE :
2524 enc = kCFStringEncodingMacVietnamese ;
2525 break ;
2526 case wxFONTENCODING_MACARABICEXT :
2527 enc = kCFStringEncodingMacExtArabic ;
2528 break ;
2529 case wxFONTENCODING_MACSYMBOL :
2530 enc = kCFStringEncodingMacSymbol ;
2531 break ;
2532 case wxFONTENCODING_MACDINGBATS :
2533 enc = kCFStringEncodingMacDingbats ;
2534 break ;
2535 case wxFONTENCODING_MACTURKISH :
2536 enc = kCFStringEncodingMacTurkish ;
2537 break ;
2538 case wxFONTENCODING_MACCROATIAN :
2539 enc = kCFStringEncodingMacCroatian ;
2540 break ;
2541 case wxFONTENCODING_MACICELANDIC :
2542 enc = kCFStringEncodingMacIcelandic ;
2543 break ;
2544 case wxFONTENCODING_MACROMANIAN :
2545 enc = kCFStringEncodingMacRomanian ;
2546 break ;
2547 case wxFONTENCODING_MACCELTIC :
2548 enc = kCFStringEncodingMacCeltic ;
2549 break ;
2550 case wxFONTENCODING_MACGAELIC :
2551 enc = kCFStringEncodingMacGaelic ;
2552 break ;
ecd9653b
WS
2553// case wxFONTENCODING_MACKEYBOARD :
2554// enc = kCFStringEncodingMacKeyboardGlyphs ;
2555// break ;
ef199164 2556
ecd9653b
WS
2557 default :
2558 // because gcc is picky
2559 break ;
ef199164
DS
2560 }
2561
ecd9653b 2562 return enc ;
f7e98dee
RN
2563}
2564
f7e98dee
RN
2565class wxMBConv_cocoa : public wxMBConv
2566{
2567public:
2568 wxMBConv_cocoa()
2569 {
2570 Init(CFStringGetSystemEncoding()) ;
2571 }
2572
d36c9347
VZ
2573 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2574 {
2575 m_encoding = conv.m_encoding;
2576 }
2577
a6900d10 2578#if wxUSE_FONTMAP
f7e98dee
RN
2579 wxMBConv_cocoa(const wxChar* name)
2580 {
267e11c5 2581 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2582 }
a6900d10 2583#endif
f7e98dee
RN
2584
2585 wxMBConv_cocoa(wxFontEncoding encoding)
2586 {
2587 Init( wxCFStringEncFromFontEnc(encoding) );
2588 }
2589
d3c7fc99 2590 virtual ~wxMBConv_cocoa()
f7e98dee
RN
2591 {
2592 }
2593
2594 void Init( CFStringEncoding encoding)
2595 {
638357a0 2596 m_encoding = encoding ;
f7e98dee
RN
2597 }
2598
2599 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2600 {
2601 wxASSERT(szUnConv);
ecd9653b 2602
638357a0
RN
2603 CFStringRef theString = CFStringCreateWithBytes (
2604 NULL, //the allocator
2605 (const UInt8*)szUnConv,
2606 strlen(szUnConv),
2607 m_encoding,
2608 false //no BOM/external representation
f7e98dee
RN
2609 );
2610
2611 wxASSERT(theString);
2612
638357a0
RN
2613 size_t nOutLength = CFStringGetLength(theString);
2614
2615 if (szOut == NULL)
f7e98dee 2616 {
f7e98dee 2617 CFRelease(theString);
638357a0 2618 return nOutLength;
f7e98dee 2619 }
ecd9653b 2620
638357a0 2621 CFRange theRange = { 0, nOutSize };
ecd9653b 2622
638357a0
RN
2623#if SIZEOF_WCHAR_T == 4
2624 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2625#endif
3698ae71 2626
f7e98dee 2627 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2628
f7e98dee 2629 CFRelease(theString);
ecd9653b 2630
ef199164 2631 szUniCharBuffer[nOutLength] = '\0';
f7e98dee
RN
2632
2633#if SIZEOF_WCHAR_T == 4
ef199164
DS
2634 wxMBConvUTF16 converter;
2635 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2636 delete [] szUniCharBuffer;
f7e98dee 2637#endif
3698ae71 2638
638357a0 2639 return nOutLength;
f7e98dee
RN
2640 }
2641
2642 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2643 {
638357a0 2644 wxASSERT(szUnConv);
3698ae71 2645
f7e98dee 2646 size_t nRealOutSize;
638357a0 2647 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2648 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2649
f7e98dee 2650#if SIZEOF_WCHAR_T == 4
d9d488cf 2651 wxMBConvUTF16 converter ;
ef199164
DS
2652 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2653 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2654 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
f7e98dee 2655 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2656#endif
2657
2658 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2659 NULL, //allocator
2660 szUniBuffer,
2661 nBufSize,
638357a0 2662 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2663 );
ecd9653b 2664
f7e98dee 2665 wxASSERT(theString);
ecd9653b 2666
f7e98dee 2667 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2668 //so we check and use getchars instead in that case
2669 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2670 {
638357a0
RN
2671 if (szOut != NULL)
2672 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2673
638357a0
RN
2674 nRealOutSize = CFStringGetLength(theString) + 1;
2675 }
2676 else
2677 {
2678 CFStringGetBytes(
2679 theString,
2680 CFRangeMake(0, CFStringGetLength(theString)),
2681 m_encoding,
2682 0, //what to put in characters that can't be converted -
2683 //0 tells CFString to return NULL if it meets such a character
2684 false, //not an external representation
2685 (UInt8*) szOut,
3698ae71 2686 nOutSize,
638357a0
RN
2687 (CFIndex*) &nRealOutSize
2688 );
f7e98dee 2689 }
ecd9653b 2690
638357a0 2691 CFRelease(theString);
ecd9653b 2692
638357a0
RN
2693#if SIZEOF_WCHAR_T == 4
2694 delete[] szUniBuffer;
2695#endif
ecd9653b 2696
f7e98dee
RN
2697 return nRealOutSize - 1;
2698 }
2699
d36c9347
VZ
2700 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2701
f7e98dee 2702 bool IsOk() const
ecd9653b 2703 {
3698ae71 2704 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2705 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2706 }
2707
2708private:
638357a0 2709 CFStringEncoding m_encoding ;
f7e98dee
RN
2710};
2711
2712#endif // defined(__WXCOCOA__)
2713
335d31e0
SC
2714// ============================================================================
2715// Mac conversion classes
2716// ============================================================================
2717
2718#if defined(__WXMAC__) && defined(TARGET_CARBON)
2719
2720class wxMBConv_mac : public wxMBConv
2721{
2722public:
2723 wxMBConv_mac()
2724 {
2725 Init(CFStringGetSystemEncoding()) ;
2726 }
2727
d36c9347
VZ
2728 wxMBConv_mac(const wxMBConv_mac& conv)
2729 {
2730 Init(conv.m_char_encoding);
2731 }
2732
2d1659cf 2733#if wxUSE_FONTMAP
335d31e0
SC
2734 wxMBConv_mac(const wxChar* name)
2735 {
ef199164 2736 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
335d31e0 2737 }
2d1659cf 2738#endif
335d31e0
SC
2739
2740 wxMBConv_mac(wxFontEncoding encoding)
2741 {
d775fa82
WS
2742 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2743 }
2744
d3c7fc99 2745 virtual ~wxMBConv_mac()
d775fa82
WS
2746 {
2747 OSStatus status = noErr ;
739cb14a
SC
2748 if (m_MB2WC_converter)
2749 status = TECDisposeConverter(m_MB2WC_converter);
2750 if (m_WC2MB_converter)
2751 status = TECDisposeConverter(m_WC2MB_converter);
d775fa82
WS
2752 }
2753
739cb14a
SC
2754 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2755 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
d775fa82 2756 {
739cb14a
SC
2757 m_MB2WC_converter = NULL ;
2758 m_WC2MB_converter = NULL ;
2759 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
ef199164 2760 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
739cb14a 2761 }
d775fa82 2762
739cb14a
SC
2763 virtual void CreateIfNeeded() const
2764 {
2765 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2766 {
2767 OSStatus status = noErr ;
2768 status = TECCreateConverter(&m_MB2WC_converter,
d775fa82
WS
2769 m_char_encoding,
2770 m_unicode_encoding);
739cb14a
SC
2771 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2772 status = TECCreateConverter(&m_WC2MB_converter,
d775fa82
WS
2773 m_unicode_encoding,
2774 m_char_encoding);
739cb14a
SC
2775 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2776 }
d775fa82 2777 }
57bd4c60 2778
335d31e0
SC
2779 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2780 {
739cb14a 2781 CreateIfNeeded() ;
d775fa82
WS
2782 OSStatus status = noErr ;
2783 ByteCount byteOutLen ;
9088c87b 2784 ByteCount byteInLen = strlen(psz) + 1;
d775fa82
WS
2785 wchar_t *tbuf = NULL ;
2786 UniChar* ubuf = NULL ;
2787 size_t res = 0 ;
2788
2789 if (buf == NULL)
2790 {
ef199164
DS
2791 // Apple specs say at least 32
2792 n = wxMax( 32, byteInLen ) ;
2793 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
d775fa82 2794 }
ef199164 2795
d775fa82 2796 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
ef199164 2797
f3a355ce 2798#if SIZEOF_WCHAR_T == 4
d775fa82 2799 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2800#else
d775fa82 2801 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2802#endif
ef199164
DS
2803
2804 status = TECConvertText(
2805 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2806 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2807
f3a355ce 2808#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2809 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2810 // is not properly terminated we get random characters at the end
2811 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2812 wxMBConvUTF16 converter ;
ef199164 2813 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
d775fa82 2814 free( ubuf ) ;
f3a355ce 2815#else
d775fa82 2816 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2817#endif
ef199164 2818
d775fa82
WS
2819 if ( buf == NULL )
2820 free(tbuf) ;
335d31e0 2821
335d31e0
SC
2822 if ( buf && res < n)
2823 buf[res] = 0;
2824
d775fa82 2825 return res ;
335d31e0
SC
2826 }
2827
2828 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82 2829 {
739cb14a 2830 CreateIfNeeded() ;
d775fa82
WS
2831 OSStatus status = noErr ;
2832 ByteCount byteOutLen ;
2833 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2834
2835 char *tbuf = NULL ;
2836
2837 if (buf == NULL)
2838 {
ef199164
DS
2839 // Apple specs say at least 32
2840 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2841 tbuf = (char*) malloc( n ) ;
2842 }
2843
2844 ByteCount byteBufferLen = n ;
2845 UniChar* ubuf = NULL ;
ef199164 2846
f3a355ce 2847#if SIZEOF_WCHAR_T == 4
d9d488cf 2848 wxMBConvUTF16 converter ;
ef199164 2849 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
d775fa82
WS
2850 byteInLen = unicharlen ;
2851 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
ef199164 2852 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
f3a355ce 2853#else
d775fa82 2854 ubuf = (UniChar*) psz ;
f3a355ce 2855#endif
ef199164
DS
2856
2857 status = TECConvertText(
2858 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2859 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2860
f3a355ce 2861#if SIZEOF_WCHAR_T == 4
d775fa82 2862 free( ubuf ) ;
f3a355ce 2863#endif
ef199164 2864
d775fa82
WS
2865 if ( buf == NULL )
2866 free(tbuf) ;
335d31e0 2867
d775fa82 2868 size_t res = byteOutLen ;
335d31e0 2869 if ( buf && res < n)
638357a0 2870 {
335d31e0 2871 buf[res] = 0;
3698ae71 2872
638357a0
RN
2873 //we need to double-trip to verify it didn't insert any ? in place
2874 //of bogus characters
2875 wxWCharBuffer wcBuf(n);
2876 size_t pszlen = wxWcslen(psz);
467e0479 2877 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
638357a0
RN
2878 wxWcslen(wcBuf) != pszlen ||
2879 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2880 {
2881 // we didn't obtain the same thing we started from, hence
2882 // the conversion was lossy and we consider that it failed
467e0479 2883 return wxCONV_FAILED;
638357a0
RN
2884 }
2885 }
335d31e0 2886
d775fa82 2887 return res ;
335d31e0
SC
2888 }
2889
d3478e2c 2890 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2891
335d31e0 2892 bool IsOk() const
57bd4c60 2893 {
739cb14a 2894 CreateIfNeeded() ;
57bd4c60 2895 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
739cb14a 2896 }
335d31e0 2897
739cb14a
SC
2898protected :
2899 mutable TECObjectRef m_MB2WC_converter;
2900 mutable TECObjectRef m_WC2MB_converter;
d775fa82 2901
ef199164
DS
2902 TextEncodingBase m_char_encoding;
2903 TextEncodingBase m_unicode_encoding;
335d31e0
SC
2904};
2905
739cb14a
SC
2906// MB is decomposed (D) normalized UTF8
2907
2908class wxMBConv_macUTF8D : public wxMBConv_mac
2909{
2910public :
57bd4c60 2911 wxMBConv_macUTF8D()
739cb14a
SC
2912 {
2913 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2914 m_uni = NULL;
fbb0b8af 2915 m_uniBack = NULL ;
739cb14a 2916 }
57bd4c60 2917
d3c7fc99 2918 virtual ~wxMBConv_macUTF8D()
739cb14a 2919 {
fbb0b8af
SC
2920 if (m_uni!=NULL)
2921 DisposeUnicodeToTextInfo(&m_uni);
2922 if (m_uniBack!=NULL)
2923 DisposeUnicodeToTextInfo(&m_uniBack);
739cb14a 2924 }
57bd4c60 2925
739cb14a
SC
2926 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2927 {
2928 CreateIfNeeded() ;
2929 OSStatus status = noErr ;
2930 ByteCount byteOutLen ;
2931 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2932
2933 char *tbuf = NULL ;
2934
2935 if (buf == NULL)
2936 {
2937 // Apple specs say at least 32
2938 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2939 tbuf = (char*) malloc( n ) ;
2940 }
2941
2942 ByteCount byteBufferLen = n ;
2943 UniChar* ubuf = NULL ;
2944
2945#if SIZEOF_WCHAR_T == 4
2946 wxMBConvUTF16 converter ;
2947 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2948 byteInLen = unicharlen ;
2949 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2950 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2951#else
2952 ubuf = (UniChar*) psz ;
2953#endif
2954
57bd4c60
WS
2955 // ubuf is a non-decomposed UniChar buffer
2956
739cb14a
SC
2957 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2958 ByteCount dcubufread , dcubufwritten ;
57bd4c60
WS
2959 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2960
2961 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
739cb14a 2962 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
57bd4c60 2963
739cb14a
SC
2964 // we now convert that decomposed buffer into UTF8
2965
2966 status = TECConvertText(
2967 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2968 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2969
2970 free( dcubuf );
2971
2972#if SIZEOF_WCHAR_T == 4
2973 free( ubuf ) ;
2974#endif
2975
2976 if ( buf == NULL )
2977 free(tbuf) ;
2978
2979 size_t res = byteOutLen ;
2980 if ( buf && res < n)
2981 {
2982 buf[res] = 0;
2983 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2984 }
2985
2986 return res ;
2987 }
57bd4c60 2988
fbb0b8af
SC
2989 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2990 {
2991 CreateIfNeeded() ;
2992 OSStatus status = noErr ;
2993 ByteCount byteOutLen ;
2994 ByteCount byteInLen = strlen(psz) + 1;
2995 wchar_t *tbuf = NULL ;
2996 UniChar* ubuf = NULL ;
2997 size_t res = 0 ;
57bd4c60 2998
fbb0b8af
SC
2999 if (buf == NULL)
3000 {
3001 // Apple specs say at least 32
3002 n = wxMax( 32, byteInLen ) ;
3003 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3004 }
57bd4c60 3005
fbb0b8af 3006 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
57bd4c60 3007
fbb0b8af
SC
3008#if SIZEOF_WCHAR_T == 4
3009 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3010#else
3011 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3012#endif
57bd4c60 3013
fbb0b8af
SC
3014 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3015 ByteCount dcubufread , dcubufwritten ;
57bd4c60 3016 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
fbb0b8af
SC
3017
3018 status = TECConvertText(
3019 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3020 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3021 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3022 // is not properly terminated we get random characters at the end
3023 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60 3024
fbb0b8af 3025 // now from the decomposed UniChar to properly composed uniChar
57bd4c60 3026 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
fbb0b8af
SC
3027 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3028
3029 free( dcubuf );
3030 byteOutLen = dcubufwritten ;
3031 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
57bd4c60
WS
3032
3033
fbb0b8af
SC
3034#if SIZEOF_WCHAR_T == 4
3035 wxMBConvUTF16 converter ;
3036 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3037 free( ubuf ) ;
3038#else
3039 res = byteOutLen / sizeof( UniChar ) ;
3040#endif
57bd4c60 3041
fbb0b8af
SC
3042 if ( buf == NULL )
3043 free(tbuf) ;
57bd4c60 3044
fbb0b8af
SC
3045 if ( buf && res < n)
3046 buf[res] = 0;
57bd4c60 3047
fbb0b8af
SC
3048 return res ;
3049 }
3050
739cb14a
SC
3051 virtual void CreateIfNeeded() const
3052 {
3053 wxMBConv_mac::CreateIfNeeded() ;
3054 if ( m_uni == NULL )
3055 {
3056 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3057 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3058 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3059 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3060 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60
WS
3061
3062 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
739cb14a 3063 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
57bd4c60 3064
fbb0b8af
SC
3065 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3066 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3067 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3068 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3069 m_map.mappingVersion = kUnicodeUseLatestMapping;
57bd4c60 3070 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
fbb0b8af 3071 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
739cb14a
SC
3072 }
3073 }
3074protected :
3075 mutable UnicodeToTextInfo m_uni;
fbb0b8af 3076 mutable UnicodeToTextInfo m_uniBack;
739cb14a 3077 mutable UnicodeMapping m_map;
57bd4c60 3078};
335d31e0 3079#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 3080
36acb880
VZ
3081// ============================================================================
3082// wxEncodingConverter based conversion classes
3083// ============================================================================
3084
1e6feb95 3085#if wxUSE_FONTMAP
1cd52418 3086
e95354ec 3087class wxMBConv_wxwin : public wxMBConv
1cd52418 3088{
8b04d4c4
VZ
3089private:
3090 void Init()
3091 {
3092 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3093 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3094 }
3095
6001e347 3096public:
f1339c56
RR
3097 // temporarily just use wxEncodingConverter stuff,
3098 // so that it works while a better implementation is built
e95354ec 3099 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
3100 {
3101 if (name)
267e11c5 3102 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
3103 else
3104 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 3105
8b04d4c4
VZ
3106 Init();
3107 }
3108
e95354ec 3109 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
3110 {
3111 m_enc = enc;
3112
3113 Init();
f1339c56 3114 }
dccce9ea 3115
bde4baac 3116 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
3117 {
3118 size_t inbuf = strlen(psz);
dccce9ea 3119 if (buf)
c643a977 3120 {
ef199164 3121 if (!m2w.Convert(psz, buf))
467e0479 3122 return wxCONV_FAILED;
c643a977 3123 }
f1339c56
RR
3124 return inbuf;
3125 }
dccce9ea 3126
bde4baac 3127 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 3128 {
f8d791e0 3129 const size_t inbuf = wxWcslen(psz);
f1339c56 3130 if (buf)
c643a977 3131 {
ef199164 3132 if (!w2m.Convert(psz, buf))
467e0479 3133 return wxCONV_FAILED;
c643a977 3134 }
dccce9ea 3135
f1339c56
RR
3136 return inbuf;
3137 }
dccce9ea 3138
7ef3ab50 3139 virtual size_t GetMBNulLen() const
eec47cc6
VZ
3140 {
3141 switch ( m_enc )
3142 {
3143 case wxFONTENCODING_UTF16BE:
3144 case wxFONTENCODING_UTF16LE:
c1464d9d 3145 return 2;
eec47cc6
VZ
3146
3147 case wxFONTENCODING_UTF32BE:
3148 case wxFONTENCODING_UTF32LE:
c1464d9d 3149 return 4;
eec47cc6
VZ
3150
3151 default:
c1464d9d 3152 return 1;
eec47cc6
VZ
3153 }
3154 }
3155
d36c9347
VZ
3156 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3157
7ef3ab50
VZ
3158 bool IsOk() const { return m_ok; }
3159
3160public:
3161 wxFontEncoding m_enc;
3162 wxEncodingConverter m2w, w2m;
3163
3164private:
cafbf6fb
VZ
3165 // were we initialized successfully?
3166 bool m_ok;
fc7a2a60 3167
e95354ec 3168 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 3169};
6001e347 3170
8f115891
MW
3171// make the constructors available for unit testing
3172WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3173{
3174 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3175 if ( !result->IsOk() )
3176 {
3177 delete result;
3178 return 0;
3179 }
ef199164 3180
8f115891
MW
3181 return result;
3182}
3183
1e6feb95
VZ
3184#endif // wxUSE_FONTMAP
3185
36acb880
VZ
3186// ============================================================================
3187// wxCSConv implementation
3188// ============================================================================
3189
8b04d4c4 3190void wxCSConv::Init()
6001e347 3191{
e95354ec
VZ
3192 m_name = NULL;
3193 m_convReal = NULL;
3194 m_deferred = true;
3195}
3196
8b04d4c4
VZ
3197wxCSConv::wxCSConv(const wxChar *charset)
3198{
3199 Init();
82713003 3200
e95354ec
VZ
3201 if ( charset )
3202 {
e95354ec
VZ
3203 SetName(charset);
3204 }
bda3d86a 3205
e4277538
VZ
3206#if wxUSE_FONTMAP
3207 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3208#else
bda3d86a 3209 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 3210#endif
6001e347
RR
3211}
3212
8b04d4c4
VZ
3213wxCSConv::wxCSConv(wxFontEncoding encoding)
3214{
bda3d86a 3215 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
3216 {
3217 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3218
3219 encoding = wxFONTENCODING_SYSTEM;
3220 }
3221
8b04d4c4
VZ
3222 Init();
3223
bda3d86a 3224 m_encoding = encoding;
8b04d4c4
VZ
3225}
3226
6001e347
RR
3227wxCSConv::~wxCSConv()
3228{
65e50848
JS
3229 Clear();
3230}
3231
54380f29 3232wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 3233 : wxMBConv()
54380f29 3234{
8b04d4c4
VZ
3235 Init();
3236
54380f29 3237 SetName(conv.m_name);
8b04d4c4 3238 m_encoding = conv.m_encoding;
54380f29
GD
3239}
3240
3241wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3242{
3243 Clear();
8b04d4c4 3244
54380f29 3245 SetName(conv.m_name);
8b04d4c4
VZ
3246 m_encoding = conv.m_encoding;
3247
54380f29
GD
3248 return *this;
3249}
3250
65e50848
JS
3251void wxCSConv::Clear()
3252{
8b04d4c4 3253 free(m_name);
e95354ec 3254 delete m_convReal;
8b04d4c4 3255
65e50848 3256 m_name = NULL;
e95354ec 3257 m_convReal = NULL;
6001e347
RR
3258}
3259
3260void wxCSConv::SetName(const wxChar *charset)
3261{
f1339c56
RR
3262 if (charset)
3263 {
3264 m_name = wxStrdup(charset);
e95354ec 3265 m_deferred = true;
f1339c56 3266 }
6001e347
RR
3267}
3268
8b3eb85d 3269#if wxUSE_FONTMAP
8b3eb85d
VZ
3270
3271WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 3272 wxEncodingNameCache );
8b3eb85d
VZ
3273
3274static wxEncodingNameCache gs_nameCache;
3275#endif
3276
e95354ec
VZ
3277wxMBConv *wxCSConv::DoCreate() const
3278{
ce6f8d6f
VZ
3279#if wxUSE_FONTMAP
3280 wxLogTrace(TRACE_STRCONV,
3281 wxT("creating conversion for %s"),
3282 (m_name ? m_name
3283 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3284#endif // wxUSE_FONTMAP
3285
c547282d
VZ
3286 // check for the special case of ASCII or ISO8859-1 charset: as we have
3287 // special knowledge of it anyhow, we don't need to create a special
3288 // conversion object
e4277538
VZ
3289 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3290 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 3291 {
e95354ec
VZ
3292 // don't convert at all
3293 return NULL;
3294 }
dccce9ea 3295
e95354ec
VZ
3296 // we trust OS to do conversion better than we can so try external
3297 // conversion methods first
3298 //
3299 // the full order is:
3300 // 1. OS conversion (iconv() under Unix or Win32 API)
3301 // 2. hard coded conversions for UTF
3302 // 3. wxEncodingConverter as fall back
3303
3304 // step (1)
3305#ifdef HAVE_ICONV
c547282d 3306#if !wxUSE_FONTMAP
e95354ec 3307 if ( m_name )
c547282d 3308#endif // !wxUSE_FONTMAP
e95354ec 3309 {
c547282d 3310 wxString name(m_name);
3ef10cfc 3311#if wxUSE_FONTMAP
8b3eb85d 3312 wxFontEncoding encoding(m_encoding);
3ef10cfc 3313#endif
8b3eb85d
VZ
3314
3315 if ( !name.empty() )
3316 {
3317 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3318 if ( conv->IsOk() )
3319 return conv;
3320
3321 delete conv;
c547282d
VZ
3322
3323#if wxUSE_FONTMAP
8b3eb85d
VZ
3324 encoding =
3325 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 3326#endif // wxUSE_FONTMAP
8b3eb85d
VZ
3327 }
3328#if wxUSE_FONTMAP
3329 {
3330 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3331 if ( it != gs_nameCache.end() )
3332 {
3333 if ( it->second.empty() )
3334 return NULL;
c547282d 3335
8b3eb85d
VZ
3336 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3337 if ( conv->IsOk() )
3338 return conv;
e95354ec 3339
8b3eb85d
VZ
3340 delete conv;
3341 }
3342
3343 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3c67ec06 3344 // CS : in case this does not return valid names (eg for MacRoman) encoding
57bd4c60 3345 // got a 'failure' entry in the cache all the same, although it just has to
3c67ec06
SC
3346 // be created using a different method, so only store failed iconv creation
3347 // attempts (or perhaps we shoulnd't do this at all ?)
3348 if ( names[0] != NULL )
8b3eb85d 3349 {
3c67ec06 3350 for ( ; *names; ++names )
8b3eb85d 3351 {
3c67ec06
SC
3352 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3353 if ( conv->IsOk() )
3354 {
3355 gs_nameCache[encoding] = *names;
3356 return conv;
3357 }
3358
3359 delete conv;
8b3eb85d
VZ
3360 }
3361
3c67ec06 3362 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d 3363 }
8b3eb85d
VZ
3364 }
3365#endif // wxUSE_FONTMAP
e95354ec
VZ
3366 }
3367#endif // HAVE_ICONV
3368
3369#ifdef wxHAVE_WIN32_MB2WC
3370 {
7608a683 3371#if wxUSE_FONTMAP
e95354ec
VZ
3372 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3373 : new wxMBConv_win32(m_encoding);
3374 if ( conv->IsOk() )
3375 return conv;
3376
3377 delete conv;
7608a683
WS
3378#else
3379 return NULL;
3380#endif
e95354ec
VZ
3381 }
3382#endif // wxHAVE_WIN32_MB2WC
ef199164 3383
d775fa82
WS
3384#if defined(__WXMAC__)
3385 {
5c3c8676 3386 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 3387 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 3388 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82 3389 {
2d1659cf 3390#if wxUSE_FONTMAP
d775fa82
WS
3391 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3392 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
3393#else
3394 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3395#endif
d775fa82 3396 if ( conv->IsOk() )
f7e98dee
RN
3397 return conv;
3398
3399 delete conv;
3400 }
3401 }
3402#endif
ef199164 3403
f7e98dee
RN
3404#if defined(__WXCOCOA__)
3405 {
3406 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3407 {
a6900d10 3408#if wxUSE_FONTMAP
f7e98dee
RN
3409 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3410 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3411#else
3412 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3413#endif
ef199164 3414
f7e98dee 3415 if ( conv->IsOk() )
d775fa82
WS
3416 return conv;
3417
3418 delete conv;
3419 }
335d31e0
SC
3420 }
3421#endif
e95354ec
VZ
3422 // step (2)
3423 wxFontEncoding enc = m_encoding;
3424#if wxUSE_FONTMAP
c547282d
VZ
3425 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3426 {
3427 // use "false" to suppress interactive dialogs -- we can be called from
3428 // anywhere and popping up a dialog from here is the last thing we want to
3429 // do
267e11c5 3430 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3431 }
e95354ec
VZ
3432#endif // wxUSE_FONTMAP
3433
3434 switch ( enc )
3435 {
3436 case wxFONTENCODING_UTF7:
3437 return new wxMBConvUTF7;
3438
3439 case wxFONTENCODING_UTF8:
3440 return new wxMBConvUTF8;
3441
e95354ec
VZ
3442 case wxFONTENCODING_UTF16BE:
3443 return new wxMBConvUTF16BE;
3444
3445 case wxFONTENCODING_UTF16LE:
3446 return new wxMBConvUTF16LE;
3447
e95354ec
VZ
3448 case wxFONTENCODING_UTF32BE:
3449 return new wxMBConvUTF32BE;
3450
3451 case wxFONTENCODING_UTF32LE:
3452 return new wxMBConvUTF32LE;
3453
3454 default:
3455 // nothing to do but put here to suppress gcc warnings
ef199164 3456 break;
e95354ec
VZ
3457 }
3458
3459 // step (3)
3460#if wxUSE_FONTMAP
3461 {
3462 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3463 : new wxMBConv_wxwin(m_encoding);
3464 if ( conv->IsOk() )
3465 return conv;
3466
3467 delete conv;
3468 }
3469#endif // wxUSE_FONTMAP
3470
a58d4f4d
VS
3471 // NB: This is a hack to prevent deadlock. What could otherwise happen
3472 // in Unicode build: wxConvLocal creation ends up being here
3473 // because of some failure and logs the error. But wxLog will try to
6a17b868
SN
3474 // attach a timestamp, for which it will need wxConvLocal (to convert
3475 // time to char* and then wchar_t*), but that fails, tries to log the
3476 // error, but wxLog has an (already locked) critical section that
3477 // guards the static buffer.
a58d4f4d
VS
3478 static bool alreadyLoggingError = false;
3479 if (!alreadyLoggingError)
3480 {
3481 alreadyLoggingError = true;
3482 wxLogError(_("Cannot convert from the charset '%s'!"),
3483 m_name ? m_name
e95354ec
VZ
3484 :
3485#if wxUSE_FONTMAP
267e11c5 3486 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec 3487#else // !wxUSE_FONTMAP
3ef10cfc 3488 wxString::Format(_("encoding %i"), m_encoding).c_str()
e95354ec
VZ
3489#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3490 );
ef199164 3491
a58d4f4d
VS
3492 alreadyLoggingError = false;
3493 }
e95354ec
VZ
3494
3495 return NULL;
3496}
3497
3498void wxCSConv::CreateConvIfNeeded() const
3499{
3500 if ( m_deferred )
3501 {
3502 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a 3503
bda3d86a
VZ
3504 // if we don't have neither the name nor the encoding, use the default
3505 // encoding for this system
3506 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3507 {
4c75209f 3508#if wxUSE_INTL
4d312c22 3509 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
4c75209f
VS
3510#else
3511 // fallback to some reasonable default:
3512 self->m_encoding = wxFONTENCODING_ISO8859_1;
bda3d86a 3513#endif // wxUSE_INTL
4c75209f 3514 }
bda3d86a 3515
e95354ec
VZ
3516 self->m_convReal = DoCreate();
3517 self->m_deferred = false;
6001e347 3518 }
6001e347
RR
3519}
3520
1c714a5d
VZ
3521size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3522 const char *src, size_t srcLen) const
3523{
3524 CreateConvIfNeeded();
3525
2c74c558
VS
3526 if (m_convReal)
3527 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3528
3529 // latin-1 (direct)
3530 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3531}
3532
3533size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3534 const wchar_t *src, size_t srcLen) const
3535{
3536 CreateConvIfNeeded();
3537
2c74c558
VS
3538 if (m_convReal)
3539 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3540
3541 // latin-1 (direct)
3542 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
1c714a5d
VZ
3543}
3544
6001e347
RR
3545size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3546{
e95354ec 3547 CreateConvIfNeeded();
dccce9ea 3548
e95354ec
VZ
3549 if (m_convReal)
3550 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3551
3552 // latin-1 (direct)
4def3b35 3553 size_t len = strlen(psz);
dccce9ea 3554
f1339c56
RR
3555 if (buf)
3556 {
4def3b35 3557 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3558 buf[c] = (unsigned char)(psz[c]);
3559 }
dccce9ea 3560
f1339c56 3561 return len;
6001e347
RR
3562}
3563
3564size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3565{
e95354ec 3566 CreateConvIfNeeded();
dccce9ea 3567
e95354ec
VZ
3568 if (m_convReal)
3569 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3570
f1339c56 3571 // latin-1 (direct)
f8d791e0 3572 const size_t len = wxWcslen(psz);
f1339c56
RR
3573 if (buf)
3574 {
4def3b35 3575 for (size_t c = 0; c <= len; c++)
24642831
VS
3576 {
3577 if (psz[c] > 0xFF)
467e0479 3578 return wxCONV_FAILED;
ef199164 3579
907173e5 3580 buf[c] = (char)psz[c];
24642831
VS
3581 }
3582 }
3583 else
3584 {
3585 for (size_t c = 0; c <= len; c++)
3586 {
3587 if (psz[c] > 0xFF)
467e0479 3588 return wxCONV_FAILED;
24642831 3589 }
f1339c56 3590 }
dccce9ea 3591
f1339c56 3592 return len;
6001e347
RR
3593}
3594
7ef3ab50 3595size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3596{
3597 CreateConvIfNeeded();
3598
3599 if ( m_convReal )
3600 {
7ef3ab50 3601 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3602 }
3603
c1464d9d 3604 return 1;
eec47cc6
VZ
3605}
3606
bde4baac
VZ
3607// ----------------------------------------------------------------------------
3608// globals
3609// ----------------------------------------------------------------------------
3610
3611#ifdef __WINDOWS__
3612 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3613#elif defined(__WXMAC__) && !defined(__MACH__)
3614 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3615#else
dcc8fac0 3616 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3617#endif
3618
3619static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3620static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3621static wxMBConvUTF7 wxConvUTF7Obj;
3622static wxMBConvUTF8 wxConvUTF8Obj;
d43d9ee7 3623#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a
SC
3624static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3625#endif
bde4baac
VZ
3626WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3627WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3628WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3629WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3630WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3631WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
d5bef0a3 3632WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
f5a1953b
VZ
3633WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3634#ifdef __WXOSX__
d43d9ee7 3635#if defined(__WXMAC__) && defined(TARGET_CARBON)
739cb14a 3636 wxConvMacUTF8DObj;
d43d9ee7
SC
3637#else
3638 wxConvUTF8Obj;
3639#endif
f5a1953b 3640#else
ea8ce907 3641 wxConvLibcObj;
f5a1953b
VZ
3642#endif
3643
bde4baac
VZ
3644#else // !wxUSE_WCHAR_T
3645
3646// stand-ins in absence of wchar_t
3647WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3648 wxConvISO8859_1,
3649 wxConvLocal,
3650 wxConvUTF8;
3651
3652#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T