]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
encconv.cpp: Remove comment about 5 being big enough value for ENC_PER_PLATFORM ...
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
6001e347
RR
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
373658eb
VZ
30#ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33#endif // WX_PRECOMP
34
bde4baac
VZ
35#include "wx/strconv.h"
36
37#if wxUSE_WCHAR_T
38
7608a683 39#ifdef __WINDOWS__
532d575b 40 #include "wx/msw/private.h"
13dd924a 41 #include "wx/msw/missing.h"
0a1c1e62
GRG
42#endif
43
1c193821 44#ifndef __WXWINCE__
1cd52418 45#include <errno.h>
1c193821
JS
46#endif
47
6001e347
RR
48#include <ctype.h>
49#include <string.h>
50#include <stdlib.h>
51
e95354ec
VZ
52#if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54#endif // __WIN32__ but !__WXMICROWIN__
55
6001e347 56#ifdef __SALFORDC__
373658eb 57 #include <clib.h>
6001e347
RR
58#endif
59
b040e242 60#ifdef HAVE_ICONV
373658eb 61 #include <iconv.h>
b1d547eb 62 #include "wx/thread.h"
1cd52418 63#endif
1cd52418 64
373658eb
VZ
65#include "wx/encconv.h"
66#include "wx/fontmap.h"
7608a683 67#include "wx/utils.h"
373658eb 68
335d31e0 69#ifdef __WXMAC__
40ba2f3b 70#ifndef __DARWIN__
4227afa4
SC
71#include <ATSUnicode.h>
72#include <TextCommon.h>
73#include <TextEncodingConverter.h>
40ba2f3b 74#endif
335d31e0
SC
75
76#include "wx/mac/private.h" // includes mac headers
77#endif
ce6f8d6f
VZ
78
79#define TRACE_STRCONV _T("strconv")
80
4948c2b6 81#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
82 #define WC_UTF16
83#endif
84
373658eb
VZ
85// ============================================================================
86// implementation
87// ============================================================================
88
69373110
VZ
89// helper function of cMB2WC(): check if n bytes at this location are all NUL
90static bool NotAllNULs(const char *p, size_t n)
91{
92 while ( n && *p++ == '\0' )
93 n--;
94
95 return n != 0;
96}
97
373658eb 98// ----------------------------------------------------------------------------
c91830cb 99// UTF-16 en/decoding to/from UCS-4
373658eb 100// ----------------------------------------------------------------------------
6001e347 101
b0a6bb75 102
c91830cb 103static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 104{
dccce9ea 105 if (input<=0xffff)
4def3b35 106 {
999836aa
VZ
107 if (output)
108 *output = (wxUint16) input;
4def3b35 109 return 1;
dccce9ea
VZ
110 }
111 else if (input>=0x110000)
4def3b35
VS
112 {
113 return (size_t)-1;
dccce9ea
VZ
114 }
115 else
4def3b35 116 {
dccce9ea 117 if (output)
4def3b35 118 {
c91830cb 119 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 120 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
121 }
122 return 2;
1cd52418 123 }
1cd52418
OK
124}
125
c91830cb 126static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 127{
dccce9ea 128 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
129 {
130 output = *input;
131 return 1;
dccce9ea 132 }
cdb14ecb 133 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
134 {
135 output = *input;
136 return (size_t)-1;
dccce9ea
VZ
137 }
138 else
4def3b35
VS
139 {
140 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
141 return 2;
142 }
1cd52418
OK
143}
144
b0a6bb75 145
f6bcfd97 146// ----------------------------------------------------------------------------
6001e347 147// wxMBConv
f6bcfd97 148// ----------------------------------------------------------------------------
2c53a80a 149
483b0434
VZ
150size_t
151wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
152 const char *src, size_t srcLen) const
6001e347 153{
483b0434
VZ
154 // although new conversion classes are supposed to implement this function
155 // directly, the existins ones only implement the old MB2WC() and so, to
156 // avoid to have to rewrite all conversion classes at once, we provide a
157 // default (but not efficient) implementation of this one in terms of the
158 // old function by copying the input to ensure that it's NUL-terminated and
159 // then using MB2WC() to convert it
6001e347 160
483b0434
VZ
161 // the number of chars [which would be] written to dst [if it were not NULL]
162 size_t dstWritten = 0;
eec47cc6 163
c1464d9d 164 // the number of NULs terminating this string
483b0434 165 size_t nulLen wxDUMMY_INITIALIZE(0);
eec47cc6 166
c1464d9d
VZ
167 // if we were not given the input size we just have to assume that the
168 // string is properly terminated as we have no way of knowing how long it
169 // is anyhow, but if we do have the size check whether there are enough
170 // NULs at the end
483b0434
VZ
171 wxCharBuffer bufTmp;
172 const char *srcEnd;
173 if ( srcLen != (size_t)-1 )
eec47cc6 174 {
c1464d9d 175 // we need to know how to find the end of this string
7ef3ab50 176 nulLen = GetMBNulLen();
483b0434
VZ
177 if ( nulLen == wxCONV_FAILED )
178 return wxCONV_FAILED;
e4e3bbb4 179
c1464d9d 180 // if there are enough NULs we can avoid the copy
483b0434 181 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
182 {
183 // make a copy in order to properly NUL-terminate the string
483b0434 184 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 185 char * const p = bufTmp.data();
483b0434
VZ
186 memcpy(p, src, srcLen);
187 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 188 *s = '\0';
483b0434
VZ
189
190 src = bufTmp;
eec47cc6 191 }
e4e3bbb4 192
483b0434
VZ
193 srcEnd = src + srcLen;
194 }
195 else // quit after the first loop iteration
196 {
197 srcEnd = NULL;
198 }
e4e3bbb4 199
483b0434 200 for ( ;; )
eec47cc6 201 {
c1464d9d 202 // try to convert the current chunk
483b0434 203 size_t lenChunk = MB2WC(NULL, src, 0);
eec47cc6 204 if ( lenChunk == 0 )
f5fb6871 205 {
eec47cc6 206 // nothing left in the input string, conversion succeeded
c1464d9d 207 break;
f5fb6871
RN
208 }
209
483b0434
VZ
210 if ( lenChunk == wxCONV_FAILED )
211 return wxCONV_FAILED;
e4e3bbb4 212
c1464d9d
VZ
213 // if we already have a previous chunk, leave the NUL separating it
214 // from this one
483b0434 215 if ( dstWritten )
c1464d9d 216 {
483b0434
VZ
217 dstWritten++;
218 if ( dst )
219 dst++;
c1464d9d 220 }
e4e3bbb4 221
483b0434 222 dstWritten += lenChunk;
f5fb6871 223
483b0434
VZ
224 if ( dst )
225 {
226 if ( dstWritten > dstLen )
227 return wxCONV_FAILED;
228
229 lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
230 if ( lenChunk == wxCONV_FAILED )
231 return wxCONV_FAILED;
232
233 dst += lenChunk;
234 }
c1464d9d 235
483b0434 236 if ( !srcEnd )
c1464d9d 237 {
483b0434
VZ
238 // we convert the entire string in this cas, as we suppose that the
239 // string is NUL-terminated and so srcEnd is not used at all
c1464d9d
VZ
240 break;
241 }
eec47cc6
VZ
242
243 // advance the input pointer past the end of this chunk
483b0434 244 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
245 {
246 // notice that we must skip over multiple bytes here as we suppose
247 // that if NUL takes 2 or 4 bytes, then all the other characters do
248 // too and so if advanced by a single byte we might erroneously
249 // detect sequences of NUL bytes in the middle of the input
483b0434 250 src += nulLen;
c1464d9d 251 }
e4e3bbb4 252
483b0434 253 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
254
255 // note that ">=" (and not just "==") is needed here as the terminator
256 // we skipped just above could be inside or just after the buffer
257 // delimited by inEnd
483b0434 258 if ( src >= srcEnd )
c1464d9d
VZ
259 break;
260 }
261
483b0434 262 return dstWritten;
e4e3bbb4
RN
263}
264
483b0434
VZ
265size_t
266wxMBConv::FromWChar(char *dst, size_t dstLen,
267 const wchar_t *src, size_t srcLen) const
e4e3bbb4 268{
483b0434
VZ
269 // the number of chars [which would be] written to dst [if it were not NULL]
270 size_t dstWritten = 0;
e4e3bbb4 271
eec47cc6
VZ
272 // make a copy of the input string unless it is already properly
273 // NUL-terminated
274 //
275 // if we don't know its length we have no choice but to assume that it is,
276 // indeed, properly terminated
277 wxWCharBuffer bufTmp;
483b0434 278 if ( srcLen == (size_t)-1 )
e4e3bbb4 279 {
483b0434 280 srcLen = wxWcslen(src) + 1;
eec47cc6 281 }
483b0434 282 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
283 {
284 // make a copy in order to properly NUL-terminate the string
483b0434
VZ
285 bufTmp = wxWCharBuffer(srcLen);
286 memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
287 src = bufTmp;
288 }
289
290 const size_t lenNul = GetMBNulLen();
291 for ( const wchar_t * const srcEnd = src + srcLen;
292 src < srcEnd;
293 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
294 {
295 // try to convert the current chunk
296 size_t lenChunk = WC2MB(NULL, src, 0);
297
298 if ( lenChunk == wxCONV_FAILED )
299 return wxCONV_FAILED;
300
301 lenChunk += lenNul;
302 dstWritten += lenChunk;
303
304 if ( dst )
305 {
306 if ( dstWritten > dstLen )
307 return wxCONV_FAILED;
308
309 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
310 return wxCONV_FAILED;
311
312 dst += lenChunk;
313 }
eec47cc6 314 }
e4e3bbb4 315
483b0434
VZ
316 return dstWritten;
317}
318
509da451
VZ
319size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
320{
321 size_t rc = ToWChar(out, outLen, in);
322 if ( rc != wxCONV_FAILED )
323 {
324 // ToWChar() returns the buffer length, i.e. including the trailing
325 // NUL, while this method doesn't take it into account
326 rc--;
327 }
328
329 return rc;
330}
331
332size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
333{
334 size_t rc = FromWChar(out, outLen, in);
335 if ( rc != wxCONV_FAILED )
336 {
337 rc -= GetMBNulLen();
338 }
339
340 return rc;
341}
342
483b0434
VZ
343wxMBConv::~wxMBConv()
344{
345 // nothing to do here (necessary for Darwin linking probably)
346}
e4e3bbb4 347
483b0434
VZ
348const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
349{
350 if ( psz )
eec47cc6 351 {
483b0434
VZ
352 // calculate the length of the buffer needed first
353 const size_t nLen = MB2WC(NULL, psz, 0);
354 if ( nLen != wxCONV_FAILED )
f5fb6871 355 {
483b0434
VZ
356 // now do the actual conversion
357 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 358
483b0434
VZ
359 // +1 for the trailing NULL
360 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
361 return buf;
f5fb6871 362 }
483b0434 363 }
e4e3bbb4 364
483b0434
VZ
365 return wxWCharBuffer();
366}
3698ae71 367
483b0434
VZ
368const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
369{
370 if ( pwz )
371 {
372 const size_t nLen = WC2MB(NULL, pwz, 0);
373 if ( nLen != wxCONV_FAILED )
374 {
375 // extra space for trailing NUL(s)
376 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 377
483b0434
VZ
378 wxCharBuffer buf(nLen + extraLen - 1);
379 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
380 return buf;
381 }
382 }
383
384 return wxCharBuffer();
385}
e4e3bbb4 386
483b0434
VZ
387const wxWCharBuffer
388wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
389{
390 const size_t dstLen = ToWChar(NULL, 0, in, inLen);
391 if ( dstLen != wxCONV_FAILED )
392 {
393 wxWCharBuffer wbuf(dstLen);
394 if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
395 {
396 if ( outLen )
397 *outLen = dstLen;
398 return wbuf;
399 }
400 }
401
402 if ( outLen )
403 *outLen = 0;
404
405 return wxWCharBuffer();
406}
407
408const wxCharBuffer
409wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
410{
411 const size_t dstLen = FromWChar(NULL, 0, in, inLen);
412 if ( dstLen != wxCONV_FAILED )
413 {
414 wxCharBuffer buf(dstLen);
415 if ( FromWChar(buf.data(), dstLen, in, inLen) )
416 {
417 if ( outLen )
418 *outLen = dstLen;
419 return buf;
420 }
e4e3bbb4
RN
421 }
422
eec47cc6
VZ
423 if ( outLen )
424 *outLen = 0;
425
426 return wxCharBuffer();
e4e3bbb4
RN
427}
428
6001e347 429// ----------------------------------------------------------------------------
bde4baac 430// wxMBConvLibc
6001e347
RR
431// ----------------------------------------------------------------------------
432
bde4baac
VZ
433size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
434{
435 return wxMB2WC(buf, psz, n);
436}
437
438size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
439{
440 return wxWC2MB(buf, psz, n);
441}
e1bfe89e
RR
442
443// ----------------------------------------------------------------------------
532d575b 444// wxConvBrokenFileNames
e1bfe89e
RR
445// ----------------------------------------------------------------------------
446
eec47cc6
VZ
447#ifdef __UNIX__
448
845905d5 449wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 450{
845905d5
MW
451 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
452 || wxStricmp(charset, _T("UTF8")) == 0 )
453 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
454 else
455 m_conv = new wxCSConv(charset);
ea8ce907
RR
456}
457
eec47cc6 458#endif // __UNIX__
c12b7f79 459
bde4baac 460// ----------------------------------------------------------------------------
3698ae71 461// UTF-7
bde4baac 462// ----------------------------------------------------------------------------
6001e347 463
15f2ee32 464// Implementation (C) 2004 Fredrik Roubert
6001e347 465
15f2ee32
RN
466//
467// BASE64 decoding table
468//
469static const unsigned char utf7unb64[] =
6001e347 470{
15f2ee32
RN
471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
473 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
474 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
475 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
476 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
477 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
478 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
479 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
480 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
481 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
482 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
483 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
484 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
485 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
486 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
487 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
488 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
489 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
490 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
491 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
492 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
493 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
494 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
495 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
496 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
503};
504
505size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
506{
15f2ee32
RN
507 size_t len = 0;
508
04a37834 509 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
510 {
511 unsigned char cc = *psz++;
512 if (cc != '+')
513 {
514 // plain ASCII char
515 if (buf)
516 *buf++ = cc;
517 len++;
518 }
519 else if (*psz == '-')
520 {
521 // encoded plus sign
522 if (buf)
523 *buf++ = cc;
524 len++;
525 psz++;
526 }
04a37834 527 else // start of BASE64 encoded string
15f2ee32 528 {
04a37834 529 bool lsb, ok;
15f2ee32 530 unsigned int d, l;
04a37834
VZ
531 for ( ok = lsb = false, d = 0, l = 0;
532 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
533 psz++ )
15f2ee32
RN
534 {
535 d <<= 6;
536 d += cc;
537 for (l += 6; l >= 8; lsb = !lsb)
538 {
04a37834 539 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
540 if (lsb)
541 {
542 if (buf)
543 *buf++ |= c;
544 len ++;
545 }
546 else
04a37834 547 {
15f2ee32 548 if (buf)
6356d52a 549 *buf = (wchar_t)(c << 8);
04a37834
VZ
550 }
551
552 ok = true;
15f2ee32
RN
553 }
554 }
04a37834
VZ
555
556 if ( !ok )
557 {
558 // in valid UTF7 we should have valid characters after '+'
559 return (size_t)-1;
560 }
561
15f2ee32
RN
562 if (*psz == '-')
563 psz++;
564 }
565 }
04a37834
VZ
566
567 if ( buf && (len < n) )
568 *buf = '\0';
569
15f2ee32 570 return len;
6001e347
RR
571}
572
15f2ee32
RN
573//
574// BASE64 encoding table
575//
576static const unsigned char utf7enb64[] =
577{
578 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
579 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
580 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
581 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
582 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
583 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
584 'w', 'x', 'y', 'z', '0', '1', '2', '3',
585 '4', '5', '6', '7', '8', '9', '+', '/'
586};
587
588//
589// UTF-7 encoding table
590//
591// 0 - Set D (directly encoded characters)
592// 1 - Set O (optional direct characters)
593// 2 - whitespace characters (optional)
594// 3 - special characters
595//
596static const unsigned char utf7encode[128] =
6001e347 597{
15f2ee32
RN
598 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
599 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
600 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
602 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
604 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
606};
607
667e5b3e 608size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 609{
15f2ee32
RN
610 size_t len = 0;
611
612 while (*psz && ((!buf) || (len < n)))
613 {
614 wchar_t cc = *psz++;
615 if (cc < 0x80 && utf7encode[cc] < 1)
616 {
617 // plain ASCII char
618 if (buf)
619 *buf++ = (char)cc;
620 len++;
621 }
622#ifndef WC_UTF16
79c78d42 623 else if (((wxUint32)cc) > 0xffff)
b2c13097 624 {
15f2ee32
RN
625 // no surrogate pair generation (yet?)
626 return (size_t)-1;
627 }
628#endif
629 else
630 {
631 if (buf)
632 *buf++ = '+';
633 len++;
634 if (cc != '+')
635 {
636 // BASE64 encode string
637 unsigned int lsb, d, l;
73c902d6 638 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
639 {
640 for (lsb = 0; lsb < 2; lsb ++)
641 {
642 d <<= 8;
643 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
644
645 for (l += 8; l >= 6; )
646 {
647 l -= 6;
648 if (buf)
649 *buf++ = utf7enb64[(d >> l) % 64];
650 len++;
651 }
652 }
653 cc = *psz;
654 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
655 break;
656 }
657 if (l != 0)
658 {
659 if (buf)
660 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
661 len++;
662 }
663 }
664 if (buf)
665 *buf++ = '-';
666 len++;
667 }
668 }
669 if (buf && (len < n))
670 *buf = 0;
671 return len;
6001e347
RR
672}
673
f6bcfd97 674// ----------------------------------------------------------------------------
6001e347 675// UTF-8
f6bcfd97 676// ----------------------------------------------------------------------------
6001e347 677
dccce9ea 678static wxUint32 utf8_max[]=
4def3b35 679 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 680
3698ae71
VZ
681// boundaries of the private use area we use to (temporarily) remap invalid
682// characters invalid in a UTF-8 encoded string
ea8ce907
RR
683const wxUint32 wxUnicodePUA = 0x100000;
684const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
685
6001e347
RR
686size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
687{
4def3b35
VS
688 size_t len = 0;
689
dccce9ea 690 while (*psz && ((!buf) || (len < n)))
4def3b35 691 {
ea8ce907
RR
692 const char *opsz = psz;
693 bool invalid = false;
4def3b35
VS
694 unsigned char cc = *psz++, fc = cc;
695 unsigned cnt;
dccce9ea 696 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 697 fc <<= 1;
dccce9ea 698 if (!cnt)
4def3b35
VS
699 {
700 // plain ASCII char
dccce9ea 701 if (buf)
4def3b35
VS
702 *buf++ = cc;
703 len++;
561488ef
MW
704
705 // escape the escape character for octal escapes
706 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
707 && cc == '\\' && (!buf || len < n))
708 {
709 if (buf)
710 *buf++ = cc;
711 len++;
712 }
dccce9ea
VZ
713 }
714 else
4def3b35
VS
715 {
716 cnt--;
dccce9ea 717 if (!cnt)
4def3b35
VS
718 {
719 // invalid UTF-8 sequence
ea8ce907 720 invalid = true;
dccce9ea
VZ
721 }
722 else
4def3b35
VS
723 {
724 unsigned ocnt = cnt - 1;
725 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 726 while (cnt--)
4def3b35 727 {
ea8ce907 728 cc = *psz;
dccce9ea 729 if ((cc & 0xC0) != 0x80)
4def3b35
VS
730 {
731 // invalid UTF-8 sequence
ea8ce907
RR
732 invalid = true;
733 break;
4def3b35 734 }
ea8ce907 735 psz++;
4def3b35
VS
736 res = (res << 6) | (cc & 0x3f);
737 }
ea8ce907 738 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
739 {
740 // illegal UTF-8 encoding
ea8ce907 741 invalid = true;
4def3b35 742 }
ea8ce907
RR
743 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
744 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
745 {
746 // if one of our PUA characters turns up externally
747 // it must also be treated as an illegal sequence
748 // (a bit like you have to escape an escape character)
749 invalid = true;
750 }
751 else
752 {
1cd52418 753#ifdef WC_UTF16
ea8ce907
RR
754 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
755 size_t pa = encode_utf16(res, (wxUint16 *)buf);
756 if (pa == (size_t)-1)
757 {
758 invalid = true;
759 }
760 else
761 {
762 if (buf)
763 buf += pa;
764 len += pa;
765 }
373658eb 766#else // !WC_UTF16
ea8ce907 767 if (buf)
38d4b1e4 768 *buf++ = (wchar_t)res;
ea8ce907 769 len++;
373658eb 770#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
771 }
772 }
773 if (invalid)
774 {
775 if (m_options & MAP_INVALID_UTF8_TO_PUA)
776 {
777 while (opsz < psz && (!buf || len < n))
778 {
779#ifdef WC_UTF16
780 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
781 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
782 wxASSERT(pa != (size_t)-1);
783 if (buf)
784 buf += pa;
785 opsz++;
786 len += pa;
787#else
788 if (buf)
38d4b1e4 789 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
790 opsz++;
791 len++;
792#endif
793 }
794 }
3698ae71 795 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
796 {
797 while (opsz < psz && (!buf || len < n))
798 {
3698ae71
VZ
799 if ( buf && len + 3 < n )
800 {
17a1ebd1 801 unsigned char on = *opsz;
3698ae71 802 *buf++ = L'\\';
17a1ebd1
VZ
803 *buf++ = (wchar_t)( L'0' + on / 0100 );
804 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
805 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 806 }
ea8ce907
RR
807 opsz++;
808 len += 4;
809 }
810 }
3698ae71 811 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
812 {
813 return (size_t)-1;
814 }
4def3b35
VS
815 }
816 }
6001e347 817 }
dccce9ea 818 if (buf && (len < n))
4def3b35
VS
819 *buf = 0;
820 return len;
6001e347
RR
821}
822
3698ae71
VZ
823static inline bool isoctal(wchar_t wch)
824{
825 return L'0' <= wch && wch <= L'7';
826}
827
6001e347
RR
828size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
829{
4def3b35 830 size_t len = 0;
6001e347 831
dccce9ea 832 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
833 {
834 wxUint32 cc;
1cd52418 835#ifdef WC_UTF16
b5153fd8
VZ
836 // cast is ok for WC_UTF16
837 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 838 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 839#else
4def3b35
VS
840 cc=(*psz++) & 0x7fffffff;
841#endif
3698ae71
VZ
842
843 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
844 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 845 {
dccce9ea 846 if (buf)
ea8ce907 847 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 848 len++;
3698ae71 849 }
561488ef
MW
850 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
851 && cc == L'\\' && psz[0] == L'\\' )
852 {
853 if (buf)
854 *buf++ = (char)cc;
855 psz++;
856 len++;
857 }
3698ae71
VZ
858 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
859 cc == L'\\' &&
860 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 861 {
dccce9ea 862 if (buf)
3698ae71 863 {
b2c13097
WS
864 *buf++ = (char) ((psz[0] - L'0')*0100 +
865 (psz[1] - L'0')*010 +
866 (psz[2] - L'0'));
3698ae71
VZ
867 }
868
869 psz += 3;
ea8ce907
RR
870 len++;
871 }
872 else
873 {
874 unsigned cnt;
875 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
876 if (!cnt)
4def3b35 877 {
ea8ce907
RR
878 // plain ASCII char
879 if (buf)
880 *buf++ = (char) cc;
881 len++;
882 }
883
884 else
885 {
886 len += cnt + 1;
887 if (buf)
888 {
889 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
890 while (cnt--)
891 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
892 }
4def3b35
VS
893 }
894 }
6001e347 895 }
4def3b35 896
3698ae71
VZ
897 if (buf && (len<n))
898 *buf = 0;
adb45366 899
4def3b35 900 return len;
6001e347
RR
901}
902
c91830cb
VZ
903// ----------------------------------------------------------------------------
904// UTF-16
905// ----------------------------------------------------------------------------
906
907#ifdef WORDS_BIGENDIAN
bde4baac
VZ
908 #define wxMBConvUTF16straight wxMBConvUTF16BE
909 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 910#else
bde4baac
VZ
911 #define wxMBConvUTF16swap wxMBConvUTF16BE
912 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
913#endif
914
915
c91830cb
VZ
916#ifdef WC_UTF16
917
c91830cb
VZ
918// copy 16bit MB to 16bit String
919size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
920{
921 size_t len=0;
922
923 while (*(wxUint16*)psz && (!buf || len < n))
924 {
925 if (buf)
926 *buf++ = *(wxUint16*)psz;
927 len++;
928
929 psz += sizeof(wxUint16);
930 }
931 if (buf && len<n) *buf=0;
932
933 return len;
934}
935
936
937// copy 16bit String to 16bit MB
938size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
939{
940 size_t len=0;
941
942 while (*psz && (!buf || len < n))
943 {
944 if (buf)
945 {
946 *(wxUint16*)buf = *psz;
947 buf += sizeof(wxUint16);
948 }
949 len += sizeof(wxUint16);
950 psz++;
951 }
952 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
953
954 return len;
955}
956
957
958// swap 16bit MB to 16bit String
959size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
960{
bfab25d4 961 size_t len = 0;
c91830cb 962
da12017a
VZ
963 // UTF16 string must be terminated by 2 NULs as single NULs may occur
964 // inside the string
965 while ( (psz[0] || psz[1]) && (!buf || len < n) )
c91830cb 966 {
bfab25d4 967 if ( buf )
c91830cb
VZ
968 {
969 ((char *)buf)[0] = psz[1];
970 ((char *)buf)[1] = psz[0];
971 buf++;
972 }
973 len++;
bfab25d4 974 psz += 2;
c91830cb 975 }
bfab25d4
VZ
976
977 if ( buf && len < n )
978 *buf = L'\0';
c91830cb
VZ
979
980 return len;
981}
982
983
984// swap 16bit MB to 16bit String
985size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
986{
eec47cc6 987 size_t len = 0;
c91830cb 988
eec47cc6 989 while ( *psz && (!buf || len < n) )
c91830cb 990 {
eec47cc6 991 if ( buf )
c91830cb
VZ
992 {
993 *buf++ = ((char*)psz)[1];
994 *buf++ = ((char*)psz)[0];
995 }
eec47cc6 996 len += 2;
c91830cb
VZ
997 psz++;
998 }
eec47cc6
VZ
999
1000 if ( buf && len < n )
1001 *buf = '\0';
c91830cb
VZ
1002
1003 return len;
1004}
1005
1006
1007#else // WC_UTF16
1008
1009
1010// copy 16bit MB to 32bit String
1011size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1012{
1013 size_t len=0;
1014
1015 while (*(wxUint16*)psz && (!buf || len < n))
1016 {
1017 wxUint32 cc;
1018 size_t pa=decode_utf16((wxUint16*)psz, cc);
1019 if (pa == (size_t)-1)
1020 return pa;
1021
1022 if (buf)
38d4b1e4 1023 *buf++ = (wchar_t)cc;
c91830cb
VZ
1024 len++;
1025 psz += pa * sizeof(wxUint16);
1026 }
1027 if (buf && len<n) *buf=0;
1028
1029 return len;
1030}
1031
1032
1033// copy 32bit String to 16bit MB
1034size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1035{
1036 size_t len=0;
1037
1038 while (*psz && (!buf || len < n))
1039 {
1040 wxUint16 cc[2];
1041 size_t pa=encode_utf16(*psz, cc);
1042
1043 if (pa == (size_t)-1)
1044 return pa;
1045
1046 if (buf)
1047 {
69b80d28 1048 *(wxUint16*)buf = cc[0];
b5153fd8 1049 buf += sizeof(wxUint16);
c91830cb 1050 if (pa > 1)
69b80d28
VZ
1051 {
1052 *(wxUint16*)buf = cc[1];
1053 buf += sizeof(wxUint16);
1054 }
c91830cb
VZ
1055 }
1056
1057 len += pa*sizeof(wxUint16);
1058 psz++;
1059 }
1060 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1061
1062 return len;
1063}
1064
1065
1066// swap 16bit MB to 32bit String
1067size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1068{
1069 size_t len=0;
1070
1071 while (*(wxUint16*)psz && (!buf || len < n))
1072 {
1073 wxUint32 cc;
1074 char tmp[4];
1075 tmp[0]=psz[1]; tmp[1]=psz[0];
1076 tmp[2]=psz[3]; tmp[3]=psz[2];
1077
1078 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1079 if (pa == (size_t)-1)
1080 return pa;
1081
1082 if (buf)
38d4b1e4 1083 *buf++ = (wchar_t)cc;
c91830cb
VZ
1084
1085 len++;
1086 psz += pa * sizeof(wxUint16);
1087 }
1088 if (buf && len<n) *buf=0;
1089
1090 return len;
1091}
1092
1093
1094// swap 32bit String to 16bit MB
1095size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1096{
1097 size_t len=0;
1098
1099 while (*psz && (!buf || len < n))
1100 {
1101 wxUint16 cc[2];
1102 size_t pa=encode_utf16(*psz, cc);
1103
1104 if (pa == (size_t)-1)
1105 return pa;
1106
1107 if (buf)
1108 {
1109 *buf++ = ((char*)cc)[1];
1110 *buf++ = ((char*)cc)[0];
1111 if (pa > 1)
1112 {
1113 *buf++ = ((char*)cc)[3];
1114 *buf++ = ((char*)cc)[2];
1115 }
1116 }
1117
1118 len += pa*sizeof(wxUint16);
1119 psz++;
1120 }
1121 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1122
1123 return len;
1124}
1125
1126#endif // WC_UTF16
1127
1128
1129// ----------------------------------------------------------------------------
1130// UTF-32
1131// ----------------------------------------------------------------------------
1132
1133#ifdef WORDS_BIGENDIAN
1134#define wxMBConvUTF32straight wxMBConvUTF32BE
1135#define wxMBConvUTF32swap wxMBConvUTF32LE
1136#else
1137#define wxMBConvUTF32swap wxMBConvUTF32BE
1138#define wxMBConvUTF32straight wxMBConvUTF32LE
1139#endif
1140
1141
1142WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1143WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1144
1145
1146#ifdef WC_UTF16
1147
1148// copy 32bit MB to 16bit String
1149size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150{
1151 size_t len=0;
1152
1153 while (*(wxUint32*)psz && (!buf || len < n))
1154 {
1155 wxUint16 cc[2];
1156
1157 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1158 if (pa == (size_t)-1)
1159 return pa;
1160
1161 if (buf)
1162 {
1163 *buf++ = cc[0];
1164 if (pa > 1)
1165 *buf++ = cc[1];
1166 }
1167 len += pa;
1168 psz += sizeof(wxUint32);
1169 }
1170 if (buf && len<n) *buf=0;
1171
1172 return len;
1173}
1174
1175
1176// copy 16bit String to 32bit MB
1177size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1178{
1179 size_t len=0;
1180
1181 while (*psz && (!buf || len < n))
1182 {
1183 wxUint32 cc;
1184
b5153fd8
VZ
1185 // cast is ok for WC_UTF16
1186 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1187 if (pa == (size_t)-1)
1188 return pa;
1189
1190 if (buf)
1191 {
1192 *(wxUint32*)buf = cc;
1193 buf += sizeof(wxUint32);
1194 }
1195 len += sizeof(wxUint32);
1196 psz += pa;
1197 }
b5153fd8
VZ
1198
1199 if (buf && len<=n-sizeof(wxUint32))
1200 *(wxUint32*)buf=0;
c91830cb
VZ
1201
1202 return len;
1203}
1204
1205
1206
1207// swap 32bit MB to 16bit String
1208size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1209{
1210 size_t len=0;
1211
1212 while (*(wxUint32*)psz && (!buf || len < n))
1213 {
1214 char tmp[4];
1215 tmp[0] = psz[3]; tmp[1] = psz[2];
1216 tmp[2] = psz[1]; tmp[3] = psz[0];
1217
1218
1219 wxUint16 cc[2];
1220
1221 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1222 if (pa == (size_t)-1)
1223 return pa;
1224
1225 if (buf)
1226 {
1227 *buf++ = cc[0];
1228 if (pa > 1)
1229 *buf++ = cc[1];
1230 }
1231 len += pa;
1232 psz += sizeof(wxUint32);
1233 }
b5153fd8
VZ
1234
1235 if (buf && len<n)
1236 *buf=0;
c91830cb
VZ
1237
1238 return len;
1239}
1240
1241
1242// swap 16bit String to 32bit MB
1243size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1244{
1245 size_t len=0;
1246
1247 while (*psz && (!buf || len < n))
1248 {
1249 char cc[4];
1250
b5153fd8
VZ
1251 // cast is ok for WC_UTF16
1252 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1253 if (pa == (size_t)-1)
1254 return pa;
1255
1256 if (buf)
1257 {
1258 *buf++ = cc[3];
1259 *buf++ = cc[2];
1260 *buf++ = cc[1];
1261 *buf++ = cc[0];
1262 }
1263 len += sizeof(wxUint32);
1264 psz += pa;
1265 }
b5153fd8
VZ
1266
1267 if (buf && len<=n-sizeof(wxUint32))
1268 *(wxUint32*)buf=0;
c91830cb
VZ
1269
1270 return len;
1271}
1272
1273#else // WC_UTF16
1274
1275
1276// copy 32bit MB to 32bit String
1277size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1278{
1279 size_t len=0;
1280
1281 while (*(wxUint32*)psz && (!buf || len < n))
1282 {
1283 if (buf)
38d4b1e4 1284 *buf++ = (wchar_t)(*(wxUint32*)psz);
c91830cb
VZ
1285 len++;
1286 psz += sizeof(wxUint32);
1287 }
b5153fd8
VZ
1288
1289 if (buf && len<n)
1290 *buf=0;
c91830cb
VZ
1291
1292 return len;
1293}
1294
1295
1296// copy 32bit String to 32bit MB
1297size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1298{
1299 size_t len=0;
1300
1301 while (*psz && (!buf || len < n))
1302 {
1303 if (buf)
1304 {
1305 *(wxUint32*)buf = *psz;
1306 buf += sizeof(wxUint32);
1307 }
1308
1309 len += sizeof(wxUint32);
1310 psz++;
1311 }
1312
b5153fd8
VZ
1313 if (buf && len<=n-sizeof(wxUint32))
1314 *(wxUint32*)buf=0;
c91830cb
VZ
1315
1316 return len;
1317}
1318
1319
1320// swap 32bit MB to 32bit String
1321size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1322{
1323 size_t len=0;
1324
1325 while (*(wxUint32*)psz && (!buf || len < n))
1326 {
1327 if (buf)
1328 {
1329 ((char *)buf)[0] = psz[3];
1330 ((char *)buf)[1] = psz[2];
1331 ((char *)buf)[2] = psz[1];
1332 ((char *)buf)[3] = psz[0];
1333 buf++;
1334 }
1335 len++;
1336 psz += sizeof(wxUint32);
1337 }
b5153fd8
VZ
1338
1339 if (buf && len<n)
1340 *buf=0;
c91830cb
VZ
1341
1342 return len;
1343}
1344
1345
1346// swap 32bit String to 32bit MB
1347size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1348{
1349 size_t len=0;
1350
1351 while (*psz && (!buf || len < n))
1352 {
1353 if (buf)
1354 {
1355 *buf++ = ((char *)psz)[3];
1356 *buf++ = ((char *)psz)[2];
1357 *buf++ = ((char *)psz)[1];
1358 *buf++ = ((char *)psz)[0];
1359 }
1360 len += sizeof(wxUint32);
1361 psz++;
1362 }
b5153fd8
VZ
1363
1364 if (buf && len<=n-sizeof(wxUint32))
1365 *(wxUint32*)buf=0;
c91830cb
VZ
1366
1367 return len;
1368}
1369
1370
1371#endif // WC_UTF16
1372
1373
36acb880
VZ
1374// ============================================================================
1375// The classes doing conversion using the iconv_xxx() functions
1376// ============================================================================
3caec1bb 1377
b040e242 1378#ifdef HAVE_ICONV
3a0d76bc 1379
b1d547eb
VS
1380// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1381// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1382// (unless there's yet another bug in glibc) the only case when iconv()
1383// returns with (size_t)-1 (which means error) and says there are 0 bytes
1384// left in the input buffer -- when _real_ error occurs,
1385// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1386// iconv() failure.
3caec1bb
VS
1387// [This bug does not appear in glibc 2.2.]
1388#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1389#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1390 (errno != E2BIG || bufLeft != 0))
1391#else
1392#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1393#endif
1394
ab217dba 1395#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1396
74a7eb0b
VZ
1397#define ICONV_T_INVALID ((iconv_t)-1)
1398
1399#if SIZEOF_WCHAR_T == 4
1400 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1401 #define WC_ENC wxFONTENCODING_UTF32
1402#elif SIZEOF_WCHAR_T == 2
1403 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1404 #define WC_ENC wxFONTENCODING_UTF16
1405#else // sizeof(wchar_t) != 2 nor 4
1406 // does this ever happen?
1407 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1408#endif
1409
36acb880 1410// ----------------------------------------------------------------------------
e95354ec 1411// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1412// ----------------------------------------------------------------------------
1413
e95354ec 1414class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1415{
1416public:
e95354ec
VZ
1417 wxMBConv_iconv(const wxChar *name);
1418 virtual ~wxMBConv_iconv();
36acb880 1419
bde4baac
VZ
1420 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1421 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1422
7ef3ab50
VZ
1423 // classify this encoding as explained in wxMBConv::GetMBNulLen()
1424 // comment
1425 virtual size_t GetMBNulLen() const;
1426
e95354ec 1427 bool IsOk() const
74a7eb0b 1428 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1429
1430protected:
1431 // the iconv handlers used to translate from multibyte to wide char and in
1432 // the other direction
1433 iconv_t m2w,
1434 w2m;
b1d547eb
VS
1435#if wxUSE_THREADS
1436 // guards access to m2w and w2m objects
1437 wxMutex m_iconvMutex;
1438#endif
36acb880
VZ
1439
1440private:
e95354ec 1441 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1442 // available on this machine, it will remain NULL
74a7eb0b 1443 static wxString ms_wcCharsetName;
36acb880
VZ
1444
1445 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1446 // different endian-ness than the native one
405d8f46 1447 static bool ms_wcNeedsSwap;
eec47cc6 1448
7ef3ab50 1449 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1450 // initially
1451 size_t m_minMBCharWidth;
36acb880
VZ
1452};
1453
8f115891
MW
1454// make the constructor available for unit testing
1455WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1456{
1457 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1458 if ( !result->IsOk() )
1459 {
1460 delete result;
1461 return 0;
1462 }
1463 return result;
1464}
1465
422e411e 1466wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1467bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1468
e95354ec 1469wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1470{
c1464d9d 1471 m_minMBCharWidth = 0;
eec47cc6 1472
0331b385
VZ
1473 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1474 // names for the charsets
200a9923 1475 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1476
36acb880 1477 // check for charset that represents wchar_t:
74a7eb0b 1478 if ( ms_wcCharsetName.empty() )
f1339c56 1479 {
c2b83fdd
VZ
1480 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1481
74a7eb0b
VZ
1482#if wxUSE_FONTMAP
1483 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1484#else // !wxUSE_FONTMAP
1485 static const wxChar *names[] =
36acb880 1486 {
74a7eb0b
VZ
1487#if SIZEOF_WCHAR_T == 4
1488 _T("UCS-4"),
1489#elif SIZEOF_WCHAR_T = 2
1490 _T("UCS-2"),
1491#endif
1492 NULL
1493 };
1494#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1495
d1f024a8 1496 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1497 {
17a1ebd1 1498 const wxString nameCS(*names);
74a7eb0b
VZ
1499
1500 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1501 wxString nameXE(nameCS);
74a7eb0b
VZ
1502 #ifdef WORDS_BIGENDIAN
1503 nameXE += _T("BE");
1504 #else // little endian
1505 nameXE += _T("LE");
1506 #endif
1507
c2b83fdd
VZ
1508 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1509 nameXE.c_str());
1510
74a7eb0b
VZ
1511 m2w = iconv_open(nameXE.ToAscii(), cname);
1512 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1513 {
74a7eb0b 1514 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1515 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1516 nameCS.c_str());
17a1ebd1 1517 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1518
74a7eb0b
VZ
1519 // and check for bytesex ourselves:
1520 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1521 {
74a7eb0b
VZ
1522 char buf[2], *bufPtr;
1523 wchar_t wbuf[2], *wbufPtr;
1524 size_t insz, outsz;
1525 size_t res;
1526
1527 buf[0] = 'A';
1528 buf[1] = 0;
1529 wbuf[0] = 0;
1530 insz = 2;
1531 outsz = SIZEOF_WCHAR_T * 2;
1532 wbufPtr = wbuf;
1533 bufPtr = buf;
1534
1535 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1536 (char**)&wbufPtr, &outsz);
1537
1538 if (ICONV_FAILED(res, insz))
1539 {
1540 wxLogLastError(wxT("iconv"));
422e411e 1541 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1542 nameCS.c_str());
74a7eb0b
VZ
1543 }
1544 else // ok, can convert to this encoding, remember it
1545 {
17a1ebd1 1546 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1547 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1548 }
3a0d76bc
VS
1549 }
1550 }
74a7eb0b 1551 else // use charset not requiring byte swapping
36acb880 1552 {
74a7eb0b 1553 ms_wcCharsetName = nameXE;
36acb880 1554 }
3a0d76bc 1555 }
74a7eb0b 1556
0944fceb 1557 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1558 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1559 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1560 : ms_wcCharsetName.c_str(),
1561 ms_wcNeedsSwap ? _T(" (needs swap)")
1562 : _T(""));
3a0d76bc 1563 }
36acb880 1564 else // we already have ms_wcCharsetName
3caec1bb 1565 {
74a7eb0b 1566 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1567 }
dccce9ea 1568
74a7eb0b 1569 if ( ms_wcCharsetName.empty() )
f1339c56 1570 {
74a7eb0b 1571 w2m = ICONV_T_INVALID;
36acb880 1572 }
405d8f46
VZ
1573 else
1574 {
74a7eb0b
VZ
1575 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1576 if ( w2m == ICONV_T_INVALID )
1577 {
1578 wxLogTrace(TRACE_STRCONV,
1579 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1580 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1581 }
405d8f46 1582 }
36acb880 1583}
3caec1bb 1584
e95354ec 1585wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1586{
74a7eb0b 1587 if ( m2w != ICONV_T_INVALID )
36acb880 1588 iconv_close(m2w);
74a7eb0b 1589 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1590 iconv_close(w2m);
1591}
3a0d76bc 1592
bde4baac 1593size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1594{
69373110
VZ
1595 // find the string length: notice that must be done differently for
1596 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1597 size_t inbuf;
7ef3ab50 1598 const size_t nulLen = GetMBNulLen();
69373110
VZ
1599 switch ( nulLen )
1600 {
1601 default:
1602 return (size_t)-1;
1603
1604 case 1:
1605 inbuf = strlen(psz); // arguably more optimized than our version
1606 break;
1607
1608 case 2:
1609 case 4:
1610 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1611 // they also have to start at character boundary and not span two
1612 // adjacent characters
1613 const char *p;
1614 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1615 ;
1616 inbuf = p - psz;
1617 break;
1618 }
1619
b1d547eb
VS
1620#if wxUSE_THREADS
1621 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1622 // Unfortunately there is a couple of global wxCSConv objects such as
1623 // wxConvLocal that are used all over wx code, so we have to make sure
1624 // the handle is used by at most one thread at the time. Otherwise
1625 // only a few wx classes would be safe to use from non-main threads
1626 // as MB<->WC conversion would fail "randomly".
1627 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1628#endif // wxUSE_THREADS
1629
3698ae71 1630
36acb880
VZ
1631 size_t outbuf = n * SIZEOF_WCHAR_T;
1632 size_t res, cres;
1633 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1634 wchar_t *bufPtr = buf;
1635 const char *pszPtr = psz;
1636
1637 if (buf)
1638 {
1639 // have destination buffer, convert there
1640 cres = iconv(m2w,
1641 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1642 (char**)&bufPtr, &outbuf);
1643 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1644
36acb880 1645 if (ms_wcNeedsSwap)
3a0d76bc 1646 {
36acb880 1647 // convert to native endianness
17a1ebd1
VZ
1648 for ( unsigned i = 0; i < res; i++ )
1649 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1650 }
adb45366 1651
69373110 1652 // NUL-terminate the string if there is any space left
49dd9820
VS
1653 if (res < n)
1654 buf[res] = 0;
36acb880
VZ
1655 }
1656 else
1657 {
1658 // no destination buffer... convert using temp buffer
1659 // to calculate destination buffer requirement
1660 wchar_t tbuf[8];
1661 res = 0;
1662 do {
1663 bufPtr = tbuf;
1664 outbuf = 8*SIZEOF_WCHAR_T;
1665
1666 cres = iconv(m2w,
1667 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1668 (char**)&bufPtr, &outbuf );
1669
1670 res += 8-(outbuf/SIZEOF_WCHAR_T);
1671 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1672 }
dccce9ea 1673
36acb880 1674 if (ICONV_FAILED(cres, inbuf))
f1339c56 1675 {
36acb880 1676 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1677 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1678 return (size_t)-1;
1679 }
1680
1681 return res;
1682}
1683
bde4baac 1684size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1685{
b1d547eb
VS
1686#if wxUSE_THREADS
1687 // NB: explained in MB2WC
1688 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1689#endif
3698ae71 1690
156162ec
MW
1691 size_t inlen = wxWcslen(psz);
1692 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1693 size_t outbuf = n;
1694 size_t res, cres;
3a0d76bc 1695
36acb880 1696 wchar_t *tmpbuf = 0;
3caec1bb 1697
36acb880
VZ
1698 if (ms_wcNeedsSwap)
1699 {
1700 // need to copy to temp buffer to switch endianness
74a7eb0b 1701 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1702 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1703 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1704 for ( size_t i = 0; i < inlen; i++ )
1705 tmpbuf[n] = WC_BSWAP(psz[i]);
156162ec 1706 tmpbuf[inlen] = L'\0';
74a7eb0b 1707 psz = tmpbuf;
36acb880 1708 }
3a0d76bc 1709
36acb880
VZ
1710 if (buf)
1711 {
1712 // have destination buffer, convert there
1713 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1714
36acb880 1715 res = n-outbuf;
adb45366 1716
49dd9820
VS
1717 // NB: iconv was given only wcslen(psz) characters on input, and so
1718 // it couldn't convert the trailing zero. Let's do it ourselves
1719 // if there's some room left for it in the output buffer.
1720 if (res < n)
1721 buf[0] = 0;
36acb880
VZ
1722 }
1723 else
1724 {
1725 // no destination buffer... convert using temp buffer
1726 // to calculate destination buffer requirement
1727 char tbuf[16];
1728 res = 0;
1729 do {
1730 buf = tbuf; outbuf = 16;
1731
1732 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1733
36acb880
VZ
1734 res += 16 - outbuf;
1735 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1736 }
dccce9ea 1737
36acb880
VZ
1738 if (ms_wcNeedsSwap)
1739 {
1740 free(tmpbuf);
1741 }
dccce9ea 1742
36acb880
VZ
1743 if (ICONV_FAILED(cres, inbuf))
1744 {
ce6f8d6f 1745 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1746 return (size_t)-1;
1747 }
1748
1749 return res;
1750}
1751
7ef3ab50 1752size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1753{
c1464d9d 1754 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1755 {
1756 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1757
1758#if wxUSE_THREADS
1759 // NB: explained in MB2WC
1760 wxMutexLocker lock(self->m_iconvMutex);
1761#endif
1762
356410fc 1763 wchar_t *wnul = L"";
c1464d9d 1764 char buf[8]; // should be enough for NUL in any encoding
356410fc 1765 size_t inLen = sizeof(wchar_t),
c1464d9d 1766 outLen = WXSIZEOF(buf);
39406a5d 1767 char *in = (char *)wnul;
c1464d9d 1768 char *out = buf;
39406a5d 1769 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
356410fc 1770 {
c1464d9d 1771 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1772 }
1773 else // ok
1774 {
c1464d9d 1775 self->m_minMBCharWidth = out - buf;
356410fc 1776 }
eec47cc6
VZ
1777 }
1778
c1464d9d 1779 return m_minMBCharWidth;
eec47cc6
VZ
1780}
1781
b040e242 1782#endif // HAVE_ICONV
36acb880 1783
e95354ec 1784
36acb880
VZ
1785// ============================================================================
1786// Win32 conversion classes
1787// ============================================================================
1cd52418 1788
e95354ec 1789#ifdef wxHAVE_WIN32_MB2WC
373658eb 1790
8b04d4c4 1791// from utils.cpp
d775fa82 1792#if wxUSE_FONTMAP
8b04d4c4
VZ
1793extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1794extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1795#endif
373658eb 1796
e95354ec 1797class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1798{
1799public:
bde4baac
VZ
1800 wxMBConv_win32()
1801 {
1802 m_CodePage = CP_ACP;
c1464d9d 1803 m_minMBCharWidth = 0;
bde4baac
VZ
1804 }
1805
7608a683 1806#if wxUSE_FONTMAP
e95354ec 1807 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1808 {
1809 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 1810 m_minMBCharWidth = 0;
bde4baac 1811 }
dccce9ea 1812
e95354ec 1813 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1814 {
1815 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 1816 m_minMBCharWidth = 0;
bde4baac 1817 }
eec47cc6 1818#endif // wxUSE_FONTMAP
8b04d4c4 1819
bde4baac 1820 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1821 {
02272c9c
VZ
1822 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1823 // the behaviour is not compatible with the Unix version (using iconv)
1824 // and break the library itself, e.g. wxTextInputStream::NextChar()
1825 // wouldn't work if reading an incomplete MB char didn't result in an
1826 // error
667e5b3e
VZ
1827 //
1828 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1829 // an error (tested under Windows Server 2003) and apparently it is
1830 // done on purpose, i.e. the function accepts any input in this case
1831 // and although I'd prefer to return error on ill-formed output, our
1832 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1833 // explicitly ill-formed according to RFC 2152) neither so we don't
1834 // even have any fallback here...
89028980
VS
1835 //
1836 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1837 // Win XP or newer and if it is specified on older versions, conversion
1838 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1839 // fails. So we can only use the flag on newer Windows versions.
1840 // Additionally, the flag is not supported by UTF7, symbol and CJK
1841 // encodings. See here:
1842 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1843 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1844 int flags = 0;
1845 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1846 m_CodePage < 50000 &&
1847 IsAtLeastWin2kSP4() )
1848 {
1849 flags = MB_ERR_INVALID_CHARS;
1850 }
1851 else if ( m_CodePage == CP_UTF8 )
1852 {
1853 // Avoid round-trip in the special case of UTF-8 by using our
1854 // own UTF-8 conversion code:
1855 return wxMBConvUTF8().MB2WC(buf, psz, n);
1856 }
667e5b3e 1857
2b5f62a0
VZ
1858 const size_t len = ::MultiByteToWideChar
1859 (
1860 m_CodePage, // code page
667e5b3e 1861 flags, // flags: fall on error
2b5f62a0
VZ
1862 psz, // input string
1863 -1, // its length (NUL-terminated)
b4da152e 1864 buf, // output string
2b5f62a0
VZ
1865 buf ? n : 0 // size of output buffer
1866 );
89028980
VS
1867 if ( !len )
1868 {
1869 // function totally failed
1870 return (size_t)-1;
1871 }
1872
1873 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1874 // check if we succeeded, by doing a double trip:
1875 if ( !flags && buf )
1876 {
53c174fc
VZ
1877 const size_t mbLen = strlen(psz);
1878 wxCharBuffer mbBuf(mbLen);
89028980
VS
1879 if ( ::WideCharToMultiByte
1880 (
1881 m_CodePage,
1882 0,
1883 buf,
1884 -1,
1885 mbBuf.data(),
53c174fc 1886 mbLen + 1, // size in bytes, not length
89028980
VS
1887 NULL,
1888 NULL
1889 ) == 0 ||
1890 strcmp(mbBuf, psz) != 0 )
1891 {
1892 // we didn't obtain the same thing we started from, hence
1893 // the conversion was lossy and we consider that it failed
1894 return (size_t)-1;
1895 }
1896 }
2b5f62a0 1897
03a991bc
VZ
1898 // note that it returns count of written chars for buf != NULL and size
1899 // of the needed buffer for buf == NULL so in either case the length of
1900 // the string (which never includes the terminating NUL) is one less
89028980 1901 return len - 1;
f1339c56 1902 }
dccce9ea 1903
13dd924a 1904 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1905 {
13dd924a
VZ
1906 /*
1907 we have a problem here: by default, WideCharToMultiByte() may
1908 replace characters unrepresentable in the target code page with bad
1909 quality approximations such as turning "1/2" symbol (U+00BD) into
1910 "1" for the code pages which don't have it and we, obviously, want
1911 to avoid this at any price
d775fa82 1912
13dd924a
VZ
1913 the trouble is that this function does it _silently_, i.e. it won't
1914 even tell us whether it did or not... Win98/2000 and higher provide
1915 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1916 we have to resort to a round trip, i.e. check that converting back
1917 results in the same string -- this is, of course, expensive but
1918 otherwise we simply can't be sure to not garble the data.
1919 */
1920
1921 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1922 // it doesn't work with CJK encodings (which we test for rather roughly
1923 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1924 // supporting it
907173e5
WS
1925 BOOL usedDef wxDUMMY_INITIALIZE(false);
1926 BOOL *pUsedDef;
13dd924a
VZ
1927 int flags;
1928 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1929 {
1930 // it's our lucky day
1931 flags = WC_NO_BEST_FIT_CHARS;
1932 pUsedDef = &usedDef;
1933 }
1934 else // old system or unsupported encoding
1935 {
1936 flags = 0;
1937 pUsedDef = NULL;
1938 }
1939
2b5f62a0
VZ
1940 const size_t len = ::WideCharToMultiByte
1941 (
1942 m_CodePage, // code page
13dd924a
VZ
1943 flags, // either none or no best fit
1944 pwz, // input string
2b5f62a0
VZ
1945 -1, // it is (wide) NUL-terminated
1946 buf, // output buffer
1947 buf ? n : 0, // and its size
1948 NULL, // default "replacement" char
13dd924a 1949 pUsedDef // [out] was it used?
2b5f62a0
VZ
1950 );
1951
13dd924a
VZ
1952 if ( !len )
1953 {
1954 // function totally failed
1955 return (size_t)-1;
1956 }
1957
1958 // if we were really converting, check if we succeeded
1959 if ( buf )
1960 {
1961 if ( flags )
1962 {
1963 // check if the conversion failed, i.e. if any replacements
1964 // were done
1965 if ( usedDef )
1966 return (size_t)-1;
1967 }
1968 else // we must resort to double tripping...
1969 {
1970 wxWCharBuffer wcBuf(n);
1971 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1972 wcscmp(wcBuf, pwz) != 0 )
1973 {
1974 // we didn't obtain the same thing we started from, hence
1975 // the conversion was lossy and we consider that it failed
1976 return (size_t)-1;
1977 }
1978 }
1979 }
1980
03a991bc 1981 // see the comment above for the reason of "len - 1"
13dd924a 1982 return len - 1;
f1339c56 1983 }
dccce9ea 1984
7ef3ab50
VZ
1985 virtual size_t GetMBNulLen() const
1986 {
1987 if ( m_minMBCharWidth == 0 )
1988 {
1989 int len = ::WideCharToMultiByte
1990 (
1991 m_CodePage, // code page
1992 0, // no flags
1993 L"", // input string
1994 1, // translate just the NUL
1995 NULL, // output buffer
1996 0, // and its size
1997 NULL, // no replacement char
1998 NULL // [out] don't care if it was used
1999 );
2000
2001 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2002 switch ( len )
2003 {
2004 default:
2005 wxLogDebug(_T("Unexpected NUL length %d"), len);
2006 // fall through
2007
2008 case 0:
2009 self->m_minMBCharWidth = (size_t)-1;
2010 break;
2011
2012 case 1:
2013 case 2:
2014 case 4:
2015 self->m_minMBCharWidth = len;
2016 break;
2017 }
2018 }
2019
2020 return m_minMBCharWidth;
2021 }
2022
13dd924a
VZ
2023 bool IsOk() const { return m_CodePage != -1; }
2024
2025private:
2026 static bool CanUseNoBestFit()
2027 {
2028 static int s_isWin98Or2k = -1;
2029
2030 if ( s_isWin98Or2k == -1 )
2031 {
2032 int verMaj, verMin;
2033 switch ( wxGetOsVersion(&verMaj, &verMin) )
2034 {
2035 case wxWIN95:
2036 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2037 break;
2038
2039 case wxWINDOWS_NT:
2040 s_isWin98Or2k = verMaj >= 5;
2041 break;
2042
2043 default:
2044 // unknown, be conseravtive by default
2045 s_isWin98Or2k = 0;
2046 }
2047
2048 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2049 }
2050
2051 return s_isWin98Or2k == 1;
2052 }
f1339c56 2053
89028980
VS
2054 static bool IsAtLeastWin2kSP4()
2055 {
8942f83a
WS
2056#ifdef __WXWINCE__
2057 return false;
2058#else
89028980
VS
2059 static int s_isAtLeastWin2kSP4 = -1;
2060
2061 if ( s_isAtLeastWin2kSP4 == -1 )
2062 {
2063 OSVERSIONINFOEX ver;
2064
2065 memset(&ver, 0, sizeof(ver));
2066 ver.dwOSVersionInfoSize = sizeof(ver);
2067 GetVersionEx((OSVERSIONINFO*)&ver);
2068
2069 s_isAtLeastWin2kSP4 =
2070 ((ver.dwMajorVersion > 5) || // Vista+
2071 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2072 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2073 ver.wServicePackMajor >= 4)) // 2000 SP4+
2074 ? 1 : 0;
2075 }
2076
2077 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2078#endif
89028980
VS
2079 }
2080
eec47cc6 2081
c1464d9d 2082 // the code page we're working with
b1d66b54 2083 long m_CodePage;
c1464d9d 2084
7ef3ab50 2085 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2086 // "unknown"
2087 size_t m_minMBCharWidth;
1cd52418 2088};
e95354ec
VZ
2089
2090#endif // wxHAVE_WIN32_MB2WC
2091
f7e98dee
RN
2092// ============================================================================
2093// Cocoa conversion classes
2094// ============================================================================
2095
2096#if defined(__WXCOCOA__)
2097
ecd9653b 2098// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
2099// Cocoa. Strangely enough, internally Core Foundation uses
2100// UTF 32 internally quite a bit - its just not public (yet).
2101
2102#include <CoreFoundation/CFString.h>
2103#include <CoreFoundation/CFStringEncodingExt.h>
2104
2105CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2106{
638357a0 2107 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
2108 if ( encoding == wxFONTENCODING_DEFAULT )
2109 {
638357a0 2110 enc = CFStringGetSystemEncoding();
ecd9653b
WS
2111 }
2112 else switch( encoding)
2113 {
2114 case wxFONTENCODING_ISO8859_1 :
2115 enc = kCFStringEncodingISOLatin1 ;
2116 break ;
2117 case wxFONTENCODING_ISO8859_2 :
2118 enc = kCFStringEncodingISOLatin2;
2119 break ;
2120 case wxFONTENCODING_ISO8859_3 :
2121 enc = kCFStringEncodingISOLatin3 ;
2122 break ;
2123 case wxFONTENCODING_ISO8859_4 :
2124 enc = kCFStringEncodingISOLatin4;
2125 break ;
2126 case wxFONTENCODING_ISO8859_5 :
2127 enc = kCFStringEncodingISOLatinCyrillic;
2128 break ;
2129 case wxFONTENCODING_ISO8859_6 :
2130 enc = kCFStringEncodingISOLatinArabic;
2131 break ;
2132 case wxFONTENCODING_ISO8859_7 :
2133 enc = kCFStringEncodingISOLatinGreek;
2134 break ;
2135 case wxFONTENCODING_ISO8859_8 :
2136 enc = kCFStringEncodingISOLatinHebrew;
2137 break ;
2138 case wxFONTENCODING_ISO8859_9 :
2139 enc = kCFStringEncodingISOLatin5;
2140 break ;
2141 case wxFONTENCODING_ISO8859_10 :
2142 enc = kCFStringEncodingISOLatin6;
2143 break ;
2144 case wxFONTENCODING_ISO8859_11 :
2145 enc = kCFStringEncodingISOLatinThai;
2146 break ;
2147 case wxFONTENCODING_ISO8859_13 :
2148 enc = kCFStringEncodingISOLatin7;
2149 break ;
2150 case wxFONTENCODING_ISO8859_14 :
2151 enc = kCFStringEncodingISOLatin8;
2152 break ;
2153 case wxFONTENCODING_ISO8859_15 :
2154 enc = kCFStringEncodingISOLatin9;
2155 break ;
2156
2157 case wxFONTENCODING_KOI8 :
2158 enc = kCFStringEncodingKOI8_R;
2159 break ;
2160 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2161 enc = kCFStringEncodingDOSRussian;
2162 break ;
2163
2164// case wxFONTENCODING_BULGARIAN :
2165// enc = ;
2166// break ;
2167
2168 case wxFONTENCODING_CP437 :
2169 enc =kCFStringEncodingDOSLatinUS ;
2170 break ;
2171 case wxFONTENCODING_CP850 :
2172 enc = kCFStringEncodingDOSLatin1;
2173 break ;
2174 case wxFONTENCODING_CP852 :
2175 enc = kCFStringEncodingDOSLatin2;
2176 break ;
2177 case wxFONTENCODING_CP855 :
2178 enc = kCFStringEncodingDOSCyrillic;
2179 break ;
2180 case wxFONTENCODING_CP866 :
2181 enc =kCFStringEncodingDOSRussian ;
2182 break ;
2183 case wxFONTENCODING_CP874 :
2184 enc = kCFStringEncodingDOSThai;
2185 break ;
2186 case wxFONTENCODING_CP932 :
2187 enc = kCFStringEncodingDOSJapanese;
2188 break ;
2189 case wxFONTENCODING_CP936 :
2190 enc =kCFStringEncodingDOSChineseSimplif ;
2191 break ;
2192 case wxFONTENCODING_CP949 :
2193 enc = kCFStringEncodingDOSKorean;
2194 break ;
2195 case wxFONTENCODING_CP950 :
2196 enc = kCFStringEncodingDOSChineseTrad;
2197 break ;
ecd9653b
WS
2198 case wxFONTENCODING_CP1250 :
2199 enc = kCFStringEncodingWindowsLatin2;
2200 break ;
2201 case wxFONTENCODING_CP1251 :
2202 enc =kCFStringEncodingWindowsCyrillic ;
2203 break ;
2204 case wxFONTENCODING_CP1252 :
2205 enc =kCFStringEncodingWindowsLatin1 ;
2206 break ;
2207 case wxFONTENCODING_CP1253 :
2208 enc = kCFStringEncodingWindowsGreek;
2209 break ;
2210 case wxFONTENCODING_CP1254 :
2211 enc = kCFStringEncodingWindowsLatin5;
2212 break ;
2213 case wxFONTENCODING_CP1255 :
2214 enc =kCFStringEncodingWindowsHebrew ;
2215 break ;
2216 case wxFONTENCODING_CP1256 :
2217 enc =kCFStringEncodingWindowsArabic ;
2218 break ;
2219 case wxFONTENCODING_CP1257 :
2220 enc = kCFStringEncodingWindowsBalticRim;
2221 break ;
638357a0
RN
2222// This only really encodes to UTF7 (if that) evidently
2223// case wxFONTENCODING_UTF7 :
2224// enc = kCFStringEncodingNonLossyASCII ;
2225// break ;
ecd9653b
WS
2226 case wxFONTENCODING_UTF8 :
2227 enc = kCFStringEncodingUTF8 ;
2228 break ;
2229 case wxFONTENCODING_EUC_JP :
2230 enc = kCFStringEncodingEUC_JP;
2231 break ;
2232 case wxFONTENCODING_UTF16 :
f7e98dee 2233 enc = kCFStringEncodingUnicode ;
ecd9653b 2234 break ;
f7e98dee
RN
2235 case wxFONTENCODING_MACROMAN :
2236 enc = kCFStringEncodingMacRoman ;
2237 break ;
2238 case wxFONTENCODING_MACJAPANESE :
2239 enc = kCFStringEncodingMacJapanese ;
2240 break ;
2241 case wxFONTENCODING_MACCHINESETRAD :
2242 enc = kCFStringEncodingMacChineseTrad ;
2243 break ;
2244 case wxFONTENCODING_MACKOREAN :
2245 enc = kCFStringEncodingMacKorean ;
2246 break ;
2247 case wxFONTENCODING_MACARABIC :
2248 enc = kCFStringEncodingMacArabic ;
2249 break ;
2250 case wxFONTENCODING_MACHEBREW :
2251 enc = kCFStringEncodingMacHebrew ;
2252 break ;
2253 case wxFONTENCODING_MACGREEK :
2254 enc = kCFStringEncodingMacGreek ;
2255 break ;
2256 case wxFONTENCODING_MACCYRILLIC :
2257 enc = kCFStringEncodingMacCyrillic ;
2258 break ;
2259 case wxFONTENCODING_MACDEVANAGARI :
2260 enc = kCFStringEncodingMacDevanagari ;
2261 break ;
2262 case wxFONTENCODING_MACGURMUKHI :
2263 enc = kCFStringEncodingMacGurmukhi ;
2264 break ;
2265 case wxFONTENCODING_MACGUJARATI :
2266 enc = kCFStringEncodingMacGujarati ;
2267 break ;
2268 case wxFONTENCODING_MACORIYA :
2269 enc = kCFStringEncodingMacOriya ;
2270 break ;
2271 case wxFONTENCODING_MACBENGALI :
2272 enc = kCFStringEncodingMacBengali ;
2273 break ;
2274 case wxFONTENCODING_MACTAMIL :
2275 enc = kCFStringEncodingMacTamil ;
2276 break ;
2277 case wxFONTENCODING_MACTELUGU :
2278 enc = kCFStringEncodingMacTelugu ;
2279 break ;
2280 case wxFONTENCODING_MACKANNADA :
2281 enc = kCFStringEncodingMacKannada ;
2282 break ;
2283 case wxFONTENCODING_MACMALAJALAM :
2284 enc = kCFStringEncodingMacMalayalam ;
2285 break ;
2286 case wxFONTENCODING_MACSINHALESE :
2287 enc = kCFStringEncodingMacSinhalese ;
2288 break ;
2289 case wxFONTENCODING_MACBURMESE :
2290 enc = kCFStringEncodingMacBurmese ;
2291 break ;
2292 case wxFONTENCODING_MACKHMER :
2293 enc = kCFStringEncodingMacKhmer ;
2294 break ;
2295 case wxFONTENCODING_MACTHAI :
2296 enc = kCFStringEncodingMacThai ;
2297 break ;
2298 case wxFONTENCODING_MACLAOTIAN :
2299 enc = kCFStringEncodingMacLaotian ;
2300 break ;
2301 case wxFONTENCODING_MACGEORGIAN :
2302 enc = kCFStringEncodingMacGeorgian ;
2303 break ;
2304 case wxFONTENCODING_MACARMENIAN :
2305 enc = kCFStringEncodingMacArmenian ;
2306 break ;
2307 case wxFONTENCODING_MACCHINESESIMP :
2308 enc = kCFStringEncodingMacChineseSimp ;
2309 break ;
2310 case wxFONTENCODING_MACTIBETAN :
2311 enc = kCFStringEncodingMacTibetan ;
2312 break ;
2313 case wxFONTENCODING_MACMONGOLIAN :
2314 enc = kCFStringEncodingMacMongolian ;
2315 break ;
2316 case wxFONTENCODING_MACETHIOPIC :
2317 enc = kCFStringEncodingMacEthiopic ;
2318 break ;
2319 case wxFONTENCODING_MACCENTRALEUR :
2320 enc = kCFStringEncodingMacCentralEurRoman ;
2321 break ;
2322 case wxFONTENCODING_MACVIATNAMESE :
2323 enc = kCFStringEncodingMacVietnamese ;
2324 break ;
2325 case wxFONTENCODING_MACARABICEXT :
2326 enc = kCFStringEncodingMacExtArabic ;
2327 break ;
2328 case wxFONTENCODING_MACSYMBOL :
2329 enc = kCFStringEncodingMacSymbol ;
2330 break ;
2331 case wxFONTENCODING_MACDINGBATS :
2332 enc = kCFStringEncodingMacDingbats ;
2333 break ;
2334 case wxFONTENCODING_MACTURKISH :
2335 enc = kCFStringEncodingMacTurkish ;
2336 break ;
2337 case wxFONTENCODING_MACCROATIAN :
2338 enc = kCFStringEncodingMacCroatian ;
2339 break ;
2340 case wxFONTENCODING_MACICELANDIC :
2341 enc = kCFStringEncodingMacIcelandic ;
2342 break ;
2343 case wxFONTENCODING_MACROMANIAN :
2344 enc = kCFStringEncodingMacRomanian ;
2345 break ;
2346 case wxFONTENCODING_MACCELTIC :
2347 enc = kCFStringEncodingMacCeltic ;
2348 break ;
2349 case wxFONTENCODING_MACGAELIC :
2350 enc = kCFStringEncodingMacGaelic ;
2351 break ;
ecd9653b
WS
2352// case wxFONTENCODING_MACKEYBOARD :
2353// enc = kCFStringEncodingMacKeyboardGlyphs ;
2354// break ;
2355 default :
2356 // because gcc is picky
2357 break ;
2358 } ;
2359 return enc ;
f7e98dee
RN
2360}
2361
f7e98dee
RN
2362class wxMBConv_cocoa : public wxMBConv
2363{
2364public:
2365 wxMBConv_cocoa()
2366 {
2367 Init(CFStringGetSystemEncoding()) ;
2368 }
2369
a6900d10 2370#if wxUSE_FONTMAP
f7e98dee
RN
2371 wxMBConv_cocoa(const wxChar* name)
2372 {
267e11c5 2373 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2374 }
a6900d10 2375#endif
f7e98dee
RN
2376
2377 wxMBConv_cocoa(wxFontEncoding encoding)
2378 {
2379 Init( wxCFStringEncFromFontEnc(encoding) );
2380 }
2381
2382 ~wxMBConv_cocoa()
2383 {
2384 }
2385
2386 void Init( CFStringEncoding encoding)
2387 {
638357a0 2388 m_encoding = encoding ;
f7e98dee
RN
2389 }
2390
2391 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2392 {
2393 wxASSERT(szUnConv);
ecd9653b 2394
638357a0
RN
2395 CFStringRef theString = CFStringCreateWithBytes (
2396 NULL, //the allocator
2397 (const UInt8*)szUnConv,
2398 strlen(szUnConv),
2399 m_encoding,
2400 false //no BOM/external representation
f7e98dee
RN
2401 );
2402
2403 wxASSERT(theString);
2404
638357a0
RN
2405 size_t nOutLength = CFStringGetLength(theString);
2406
2407 if (szOut == NULL)
f7e98dee 2408 {
f7e98dee 2409 CFRelease(theString);
638357a0 2410 return nOutLength;
f7e98dee 2411 }
ecd9653b 2412
638357a0 2413 CFRange theRange = { 0, nOutSize };
ecd9653b 2414
638357a0
RN
2415#if SIZEOF_WCHAR_T == 4
2416 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2417#endif
3698ae71 2418
f7e98dee 2419 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2420
f7e98dee 2421 CFRelease(theString);
ecd9653b 2422
638357a0 2423 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2424
2425#if SIZEOF_WCHAR_T == 4
2426 wxMBConvUTF16 converter ;
638357a0 2427 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2428 delete[] szUniCharBuffer;
2429#endif
3698ae71 2430
638357a0 2431 return nOutLength;
f7e98dee
RN
2432 }
2433
2434 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2435 {
638357a0 2436 wxASSERT(szUnConv);
3698ae71 2437
f7e98dee 2438 size_t nRealOutSize;
638357a0 2439 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2440 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2441
f7e98dee 2442#if SIZEOF_WCHAR_T == 4
d9d488cf 2443 wxMBConvUTF16 converter ;
f7e98dee
RN
2444 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2445 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2446 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2447 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2448#endif
2449
2450 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2451 NULL, //allocator
2452 szUniBuffer,
2453 nBufSize,
638357a0 2454 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2455 );
ecd9653b 2456
f7e98dee 2457 wxASSERT(theString);
ecd9653b 2458
f7e98dee 2459 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2460 //so we check and use getchars instead in that case
2461 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2462 {
638357a0
RN
2463 if (szOut != NULL)
2464 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2465
638357a0
RN
2466 nRealOutSize = CFStringGetLength(theString) + 1;
2467 }
2468 else
2469 {
2470 CFStringGetBytes(
2471 theString,
2472 CFRangeMake(0, CFStringGetLength(theString)),
2473 m_encoding,
2474 0, //what to put in characters that can't be converted -
2475 //0 tells CFString to return NULL if it meets such a character
2476 false, //not an external representation
2477 (UInt8*) szOut,
3698ae71 2478 nOutSize,
638357a0
RN
2479 (CFIndex*) &nRealOutSize
2480 );
f7e98dee 2481 }
ecd9653b 2482
638357a0 2483 CFRelease(theString);
ecd9653b 2484
638357a0
RN
2485#if SIZEOF_WCHAR_T == 4
2486 delete[] szUniBuffer;
2487#endif
ecd9653b 2488
f7e98dee
RN
2489 return nRealOutSize - 1;
2490 }
2491
2492 bool IsOk() const
ecd9653b 2493 {
3698ae71 2494 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2495 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2496 }
2497
2498private:
638357a0 2499 CFStringEncoding m_encoding ;
f7e98dee
RN
2500};
2501
2502#endif // defined(__WXCOCOA__)
2503
335d31e0
SC
2504// ============================================================================
2505// Mac conversion classes
2506// ============================================================================
2507
2508#if defined(__WXMAC__) && defined(TARGET_CARBON)
2509
2510class wxMBConv_mac : public wxMBConv
2511{
2512public:
2513 wxMBConv_mac()
2514 {
2515 Init(CFStringGetSystemEncoding()) ;
2516 }
2517
2d1659cf 2518#if wxUSE_FONTMAP
335d31e0
SC
2519 wxMBConv_mac(const wxChar* name)
2520 {
267e11c5 2521 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2522 }
2d1659cf 2523#endif
335d31e0
SC
2524
2525 wxMBConv_mac(wxFontEncoding encoding)
2526 {
d775fa82
WS
2527 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2528 }
2529
2530 ~wxMBConv_mac()
2531 {
2532 OSStatus status = noErr ;
2533 status = TECDisposeConverter(m_MB2WC_converter);
2534 status = TECDisposeConverter(m_WC2MB_converter);
2535 }
2536
2537
2538 void Init( TextEncodingBase encoding)
2539 {
2540 OSStatus status = noErr ;
2541 m_char_encoding = encoding ;
2542 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2543
2544 status = TECCreateConverter(&m_MB2WC_converter,
2545 m_char_encoding,
2546 m_unicode_encoding);
2547 status = TECCreateConverter(&m_WC2MB_converter,
2548 m_unicode_encoding,
2549 m_char_encoding);
2550 }
2551
335d31e0
SC
2552 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2553 {
d775fa82
WS
2554 OSStatus status = noErr ;
2555 ByteCount byteOutLen ;
2556 ByteCount byteInLen = strlen(psz) ;
2557 wchar_t *tbuf = NULL ;
2558 UniChar* ubuf = NULL ;
2559 size_t res = 0 ;
2560
2561 if (buf == NULL)
2562 {
638357a0 2563 //apple specs say at least 32
c543817b 2564 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2565 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2566 }
2567 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2568#if SIZEOF_WCHAR_T == 4
d775fa82 2569 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2570#else
d775fa82 2571 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2572#endif
d775fa82
WS
2573 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2574 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2575#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2576 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2577 // is not properly terminated we get random characters at the end
2578 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2579 wxMBConvUTF16 converter ;
d775fa82
WS
2580 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2581 free( ubuf ) ;
f3a355ce 2582#else
d775fa82 2583 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2584#endif
d775fa82
WS
2585 if ( buf == NULL )
2586 free(tbuf) ;
335d31e0 2587
335d31e0
SC
2588 if ( buf && res < n)
2589 buf[res] = 0;
2590
d775fa82 2591 return res ;
335d31e0
SC
2592 }
2593
2594 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2595 {
2596 OSStatus status = noErr ;
2597 ByteCount byteOutLen ;
2598 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2599
2600 char *tbuf = NULL ;
2601
2602 if (buf == NULL)
2603 {
638357a0 2604 //apple specs say at least 32
c543817b 2605 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2606 tbuf = (char*) malloc( n ) ;
2607 }
2608
2609 ByteCount byteBufferLen = n ;
2610 UniChar* ubuf = NULL ;
f3a355ce 2611#if SIZEOF_WCHAR_T == 4
d9d488cf 2612 wxMBConvUTF16 converter ;
d775fa82
WS
2613 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2614 byteInLen = unicharlen ;
2615 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2616 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2617#else
d775fa82 2618 ubuf = (UniChar*) psz ;
f3a355ce 2619#endif
d775fa82
WS
2620 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2621 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2622#if SIZEOF_WCHAR_T == 4
d775fa82 2623 free( ubuf ) ;
f3a355ce 2624#endif
d775fa82
WS
2625 if ( buf == NULL )
2626 free(tbuf) ;
335d31e0 2627
d775fa82 2628 size_t res = byteOutLen ;
335d31e0 2629 if ( buf && res < n)
638357a0 2630 {
335d31e0 2631 buf[res] = 0;
3698ae71 2632
638357a0
RN
2633 //we need to double-trip to verify it didn't insert any ? in place
2634 //of bogus characters
2635 wxWCharBuffer wcBuf(n);
2636 size_t pszlen = wxWcslen(psz);
2637 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2638 wxWcslen(wcBuf) != pszlen ||
2639 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2640 {
2641 // we didn't obtain the same thing we started from, hence
2642 // the conversion was lossy and we consider that it failed
2643 return (size_t)-1;
2644 }
2645 }
335d31e0 2646
d775fa82 2647 return res ;
335d31e0
SC
2648 }
2649
2650 bool IsOk() const
2651 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2652
2653private:
d775fa82
WS
2654 TECObjectRef m_MB2WC_converter ;
2655 TECObjectRef m_WC2MB_converter ;
2656
2657 TextEncodingBase m_char_encoding ;
2658 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2659};
2660
2661#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2662
36acb880
VZ
2663// ============================================================================
2664// wxEncodingConverter based conversion classes
2665// ============================================================================
2666
1e6feb95 2667#if wxUSE_FONTMAP
1cd52418 2668
e95354ec 2669class wxMBConv_wxwin : public wxMBConv
1cd52418 2670{
8b04d4c4
VZ
2671private:
2672 void Init()
2673 {
2674 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2675 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2676 }
2677
6001e347 2678public:
f1339c56
RR
2679 // temporarily just use wxEncodingConverter stuff,
2680 // so that it works while a better implementation is built
e95354ec 2681 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2682 {
2683 if (name)
267e11c5 2684 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2685 else
2686 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2687
8b04d4c4
VZ
2688 Init();
2689 }
2690
e95354ec 2691 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2692 {
2693 m_enc = enc;
2694
2695 Init();
f1339c56 2696 }
dccce9ea 2697
bde4baac 2698 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2699 {
2700 size_t inbuf = strlen(psz);
dccce9ea 2701 if (buf)
c643a977
VS
2702 {
2703 if (!m2w.Convert(psz,buf))
2704 return (size_t)-1;
2705 }
f1339c56
RR
2706 return inbuf;
2707 }
dccce9ea 2708
bde4baac 2709 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2710 {
f8d791e0 2711 const size_t inbuf = wxWcslen(psz);
f1339c56 2712 if (buf)
c643a977
VS
2713 {
2714 if (!w2m.Convert(psz,buf))
2715 return (size_t)-1;
2716 }
dccce9ea 2717
f1339c56
RR
2718 return inbuf;
2719 }
dccce9ea 2720
7ef3ab50 2721 virtual size_t GetMBNulLen() const
eec47cc6
VZ
2722 {
2723 switch ( m_enc )
2724 {
2725 case wxFONTENCODING_UTF16BE:
2726 case wxFONTENCODING_UTF16LE:
c1464d9d 2727 return 2;
eec47cc6
VZ
2728
2729 case wxFONTENCODING_UTF32BE:
2730 case wxFONTENCODING_UTF32LE:
c1464d9d 2731 return 4;
eec47cc6
VZ
2732
2733 default:
c1464d9d 2734 return 1;
eec47cc6
VZ
2735 }
2736 }
2737
7ef3ab50
VZ
2738 bool IsOk() const { return m_ok; }
2739
2740public:
2741 wxFontEncoding m_enc;
2742 wxEncodingConverter m2w, w2m;
2743
2744private:
cafbf6fb
VZ
2745 // were we initialized successfully?
2746 bool m_ok;
fc7a2a60 2747
e95354ec 2748 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2749};
6001e347 2750
8f115891
MW
2751// make the constructors available for unit testing
2752WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2753{
2754 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2755 if ( !result->IsOk() )
2756 {
2757 delete result;
2758 return 0;
2759 }
2760 return result;
2761}
2762
1e6feb95
VZ
2763#endif // wxUSE_FONTMAP
2764
36acb880
VZ
2765// ============================================================================
2766// wxCSConv implementation
2767// ============================================================================
2768
8b04d4c4 2769void wxCSConv::Init()
6001e347 2770{
e95354ec
VZ
2771 m_name = NULL;
2772 m_convReal = NULL;
2773 m_deferred = true;
2774}
2775
8b04d4c4
VZ
2776wxCSConv::wxCSConv(const wxChar *charset)
2777{
2778 Init();
82713003 2779
e95354ec
VZ
2780 if ( charset )
2781 {
e95354ec
VZ
2782 SetName(charset);
2783 }
bda3d86a 2784
e4277538
VZ
2785#if wxUSE_FONTMAP
2786 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2787#else
bda3d86a 2788 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 2789#endif
6001e347
RR
2790}
2791
8b04d4c4
VZ
2792wxCSConv::wxCSConv(wxFontEncoding encoding)
2793{
bda3d86a 2794 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2795 {
2796 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2797
2798 encoding = wxFONTENCODING_SYSTEM;
2799 }
2800
8b04d4c4
VZ
2801 Init();
2802
bda3d86a 2803 m_encoding = encoding;
8b04d4c4
VZ
2804}
2805
6001e347
RR
2806wxCSConv::~wxCSConv()
2807{
65e50848
JS
2808 Clear();
2809}
2810
54380f29 2811wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2812 : wxMBConv()
54380f29 2813{
8b04d4c4
VZ
2814 Init();
2815
54380f29 2816 SetName(conv.m_name);
8b04d4c4 2817 m_encoding = conv.m_encoding;
54380f29
GD
2818}
2819
2820wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2821{
2822 Clear();
8b04d4c4 2823
54380f29 2824 SetName(conv.m_name);
8b04d4c4
VZ
2825 m_encoding = conv.m_encoding;
2826
54380f29
GD
2827 return *this;
2828}
2829
65e50848
JS
2830void wxCSConv::Clear()
2831{
8b04d4c4 2832 free(m_name);
e95354ec 2833 delete m_convReal;
8b04d4c4 2834
65e50848 2835 m_name = NULL;
e95354ec 2836 m_convReal = NULL;
6001e347
RR
2837}
2838
2839void wxCSConv::SetName(const wxChar *charset)
2840{
f1339c56
RR
2841 if (charset)
2842 {
2843 m_name = wxStrdup(charset);
e95354ec 2844 m_deferred = true;
f1339c56 2845 }
6001e347
RR
2846}
2847
8b3eb85d
VZ
2848#if wxUSE_FONTMAP
2849#include "wx/hashmap.h"
2850
2851WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 2852 wxEncodingNameCache );
8b3eb85d
VZ
2853
2854static wxEncodingNameCache gs_nameCache;
2855#endif
2856
e95354ec
VZ
2857wxMBConv *wxCSConv::DoCreate() const
2858{
ce6f8d6f
VZ
2859#if wxUSE_FONTMAP
2860 wxLogTrace(TRACE_STRCONV,
2861 wxT("creating conversion for %s"),
2862 (m_name ? m_name
2863 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2864#endif // wxUSE_FONTMAP
2865
c547282d
VZ
2866 // check for the special case of ASCII or ISO8859-1 charset: as we have
2867 // special knowledge of it anyhow, we don't need to create a special
2868 // conversion object
e4277538
VZ
2869 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2870 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 2871 {
e95354ec
VZ
2872 // don't convert at all
2873 return NULL;
2874 }
dccce9ea 2875
e95354ec
VZ
2876 // we trust OS to do conversion better than we can so try external
2877 // conversion methods first
2878 //
2879 // the full order is:
2880 // 1. OS conversion (iconv() under Unix or Win32 API)
2881 // 2. hard coded conversions for UTF
2882 // 3. wxEncodingConverter as fall back
2883
2884 // step (1)
2885#ifdef HAVE_ICONV
c547282d 2886#if !wxUSE_FONTMAP
e95354ec 2887 if ( m_name )
c547282d 2888#endif // !wxUSE_FONTMAP
e95354ec 2889 {
c547282d 2890 wxString name(m_name);
8b3eb85d
VZ
2891 wxFontEncoding encoding(m_encoding);
2892
2893 if ( !name.empty() )
2894 {
2895 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2896 if ( conv->IsOk() )
2897 return conv;
2898
2899 delete conv;
c547282d
VZ
2900
2901#if wxUSE_FONTMAP
8b3eb85d
VZ
2902 encoding =
2903 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 2904#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2905 }
2906#if wxUSE_FONTMAP
2907 {
2908 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2909 if ( it != gs_nameCache.end() )
2910 {
2911 if ( it->second.empty() )
2912 return NULL;
c547282d 2913
8b3eb85d
VZ
2914 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2915 if ( conv->IsOk() )
2916 return conv;
e95354ec 2917
8b3eb85d
VZ
2918 delete conv;
2919 }
2920
2921 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2922
2923 for ( ; *names; ++names )
2924 {
2925 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2926 if ( conv->IsOk() )
2927 {
2928 gs_nameCache[encoding] = *names;
2929 return conv;
2930 }
2931
2932 delete conv;
2933 }
2934
40711af8 2935 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d
VZ
2936 }
2937#endif // wxUSE_FONTMAP
e95354ec
VZ
2938 }
2939#endif // HAVE_ICONV
2940
2941#ifdef wxHAVE_WIN32_MB2WC
2942 {
7608a683 2943#if wxUSE_FONTMAP
e95354ec
VZ
2944 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2945 : new wxMBConv_win32(m_encoding);
2946 if ( conv->IsOk() )
2947 return conv;
2948
2949 delete conv;
7608a683
WS
2950#else
2951 return NULL;
2952#endif
e95354ec
VZ
2953 }
2954#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2955#if defined(__WXMAC__)
2956 {
5c3c8676 2957 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2958 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2959 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2960 {
2961
2d1659cf 2962#if wxUSE_FONTMAP
d775fa82
WS
2963 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2964 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2965#else
2966 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2967#endif
d775fa82 2968 if ( conv->IsOk() )
f7e98dee
RN
2969 return conv;
2970
2971 delete conv;
2972 }
2973 }
2974#endif
2975#if defined(__WXCOCOA__)
2976 {
2977 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2978 {
2979
a6900d10 2980#if wxUSE_FONTMAP
f7e98dee
RN
2981 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2982 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2983#else
2984 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2985#endif
f7e98dee 2986 if ( conv->IsOk() )
d775fa82
WS
2987 return conv;
2988
2989 delete conv;
2990 }
335d31e0
SC
2991 }
2992#endif
e95354ec
VZ
2993 // step (2)
2994 wxFontEncoding enc = m_encoding;
2995#if wxUSE_FONTMAP
c547282d
VZ
2996 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2997 {
2998 // use "false" to suppress interactive dialogs -- we can be called from
2999 // anywhere and popping up a dialog from here is the last thing we want to
3000 // do
267e11c5 3001 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3002 }
e95354ec
VZ
3003#endif // wxUSE_FONTMAP
3004
3005 switch ( enc )
3006 {
3007 case wxFONTENCODING_UTF7:
3008 return new wxMBConvUTF7;
3009
3010 case wxFONTENCODING_UTF8:
3011 return new wxMBConvUTF8;
3012
e95354ec
VZ
3013 case wxFONTENCODING_UTF16BE:
3014 return new wxMBConvUTF16BE;
3015
3016 case wxFONTENCODING_UTF16LE:
3017 return new wxMBConvUTF16LE;
3018
e95354ec
VZ
3019 case wxFONTENCODING_UTF32BE:
3020 return new wxMBConvUTF32BE;
3021
3022 case wxFONTENCODING_UTF32LE:
3023 return new wxMBConvUTF32LE;
3024
3025 default:
3026 // nothing to do but put here to suppress gcc warnings
3027 ;
3028 }
3029
3030 // step (3)
3031#if wxUSE_FONTMAP
3032 {
3033 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3034 : new wxMBConv_wxwin(m_encoding);
3035 if ( conv->IsOk() )
3036 return conv;
3037
3038 delete conv;
3039 }
3040#endif // wxUSE_FONTMAP
3041
a58d4f4d
VS
3042 // NB: This is a hack to prevent deadlock. What could otherwise happen
3043 // in Unicode build: wxConvLocal creation ends up being here
3044 // because of some failure and logs the error. But wxLog will try to
3045 // attach timestamp, for which it will need wxConvLocal (to convert
3046 // time to char* and then wchar_t*), but that fails, tries to log
3047 // error, but wxLog has a (already locked) critical section that
3048 // guards static buffer.
3049 static bool alreadyLoggingError = false;
3050 if (!alreadyLoggingError)
3051 {
3052 alreadyLoggingError = true;
3053 wxLogError(_("Cannot convert from the charset '%s'!"),
3054 m_name ? m_name
e95354ec
VZ
3055 :
3056#if wxUSE_FONTMAP
267e11c5 3057 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
3058#else // !wxUSE_FONTMAP
3059 wxString::Format(_("encoding %s"), m_encoding).c_str()
3060#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3061 );
a58d4f4d
VS
3062 alreadyLoggingError = false;
3063 }
e95354ec
VZ
3064
3065 return NULL;
3066}
3067
3068void wxCSConv::CreateConvIfNeeded() const
3069{
3070 if ( m_deferred )
3071 {
3072 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
3073
3074#if wxUSE_INTL
3075 // if we don't have neither the name nor the encoding, use the default
3076 // encoding for this system
3077 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3078 {
4d312c22 3079 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
3080 }
3081#endif // wxUSE_INTL
3082
e95354ec
VZ
3083 self->m_convReal = DoCreate();
3084 self->m_deferred = false;
6001e347 3085 }
6001e347
RR
3086}
3087
3088size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3089{
e95354ec 3090 CreateConvIfNeeded();
dccce9ea 3091
e95354ec
VZ
3092 if (m_convReal)
3093 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3094
3095 // latin-1 (direct)
4def3b35 3096 size_t len = strlen(psz);
dccce9ea 3097
f1339c56
RR
3098 if (buf)
3099 {
4def3b35 3100 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3101 buf[c] = (unsigned char)(psz[c]);
3102 }
dccce9ea 3103
f1339c56 3104 return len;
6001e347
RR
3105}
3106
3107size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3108{
e95354ec 3109 CreateConvIfNeeded();
dccce9ea 3110
e95354ec
VZ
3111 if (m_convReal)
3112 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3113
f1339c56 3114 // latin-1 (direct)
f8d791e0 3115 const size_t len = wxWcslen(psz);
f1339c56
RR
3116 if (buf)
3117 {
4def3b35 3118 for (size_t c = 0; c <= len; c++)
24642831
VS
3119 {
3120 if (psz[c] > 0xFF)
3121 return (size_t)-1;
907173e5 3122 buf[c] = (char)psz[c];
24642831
VS
3123 }
3124 }
3125 else
3126 {
3127 for (size_t c = 0; c <= len; c++)
3128 {
3129 if (psz[c] > 0xFF)
3130 return (size_t)-1;
3131 }
f1339c56 3132 }
dccce9ea 3133
f1339c56 3134 return len;
6001e347
RR
3135}
3136
7ef3ab50 3137size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3138{
3139 CreateConvIfNeeded();
3140
3141 if ( m_convReal )
3142 {
7ef3ab50 3143 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3144 }
3145
c1464d9d 3146 return 1;
eec47cc6
VZ
3147}
3148
bde4baac
VZ
3149// ----------------------------------------------------------------------------
3150// globals
3151// ----------------------------------------------------------------------------
3152
3153#ifdef __WINDOWS__
3154 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3155#elif defined(__WXMAC__) && !defined(__MACH__)
3156 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3157#else
dcc8fac0 3158 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3159#endif
3160
3161static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3162static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3163static wxMBConvUTF7 wxConvUTF7Obj;
3164static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 3165
bde4baac
VZ
3166WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3167WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3168WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3169WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3170WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3171WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
3172WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3173#ifdef __WXOSX__
ea8ce907 3174 wxConvUTF8Obj;
f5a1953b 3175#else
ea8ce907 3176 wxConvLibcObj;
f5a1953b
VZ
3177#endif
3178
bde4baac
VZ
3179
3180#else // !wxUSE_WCHAR_T
3181
3182// stand-ins in absence of wchar_t
3183WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3184 wxConvISO8859_1,
3185 wxConvLocal,
3186 wxConvUTF8;
3187
3188#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T