]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
implemented locating objects in gridbag sizer,
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347 1/////////////////////////////////////////////////////////////////////////////
38d4b1e4 2// Name: src/common/strconv.cpp
6001e347 3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
6001e347
RR
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
373658eb
VZ
30#ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33#endif // WX_PRECOMP
34
bde4baac
VZ
35#include "wx/strconv.h"
36
37#if wxUSE_WCHAR_T
38
7608a683 39#ifdef __WINDOWS__
532d575b 40 #include "wx/msw/private.h"
13dd924a 41 #include "wx/msw/missing.h"
0a1c1e62
GRG
42#endif
43
1c193821 44#ifndef __WXWINCE__
1cd52418 45#include <errno.h>
1c193821
JS
46#endif
47
6001e347
RR
48#include <ctype.h>
49#include <string.h>
50#include <stdlib.h>
51
e95354ec
VZ
52#if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54#endif // __WIN32__ but !__WXMICROWIN__
55
6001e347 56#ifdef __SALFORDC__
373658eb 57 #include <clib.h>
6001e347
RR
58#endif
59
b040e242 60#ifdef HAVE_ICONV
373658eb 61 #include <iconv.h>
b1d547eb 62 #include "wx/thread.h"
1cd52418 63#endif
1cd52418 64
373658eb
VZ
65#include "wx/encconv.h"
66#include "wx/fontmap.h"
7608a683 67#include "wx/utils.h"
373658eb 68
335d31e0 69#ifdef __WXMAC__
40ba2f3b 70#ifndef __DARWIN__
4227afa4
SC
71#include <ATSUnicode.h>
72#include <TextCommon.h>
73#include <TextEncodingConverter.h>
40ba2f3b 74#endif
335d31e0
SC
75
76#include "wx/mac/private.h" // includes mac headers
77#endif
ce6f8d6f
VZ
78
79#define TRACE_STRCONV _T("strconv")
80
4948c2b6 81#if SIZEOF_WCHAR_T == 2
ac11db3a
MW
82 #define WC_UTF16
83#endif
84
373658eb
VZ
85// ============================================================================
86// implementation
87// ============================================================================
88
69373110
VZ
89// helper function of cMB2WC(): check if n bytes at this location are all NUL
90static bool NotAllNULs(const char *p, size_t n)
91{
92 while ( n && *p++ == '\0' )
93 n--;
94
95 return n != 0;
96}
97
373658eb 98// ----------------------------------------------------------------------------
c91830cb 99// UTF-16 en/decoding to/from UCS-4
373658eb 100// ----------------------------------------------------------------------------
6001e347 101
b0a6bb75 102
c91830cb 103static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 104{
dccce9ea 105 if (input<=0xffff)
4def3b35 106 {
999836aa
VZ
107 if (output)
108 *output = (wxUint16) input;
4def3b35 109 return 1;
dccce9ea
VZ
110 }
111 else if (input>=0x110000)
4def3b35
VS
112 {
113 return (size_t)-1;
dccce9ea
VZ
114 }
115 else
4def3b35 116 {
dccce9ea 117 if (output)
4def3b35 118 {
c91830cb 119 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 120 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
121 }
122 return 2;
1cd52418 123 }
1cd52418
OK
124}
125
c91830cb 126static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 127{
dccce9ea 128 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
129 {
130 output = *input;
131 return 1;
dccce9ea 132 }
cdb14ecb 133 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
4def3b35
VS
134 {
135 output = *input;
136 return (size_t)-1;
dccce9ea
VZ
137 }
138 else
4def3b35
VS
139 {
140 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
141 return 2;
142 }
1cd52418
OK
143}
144
b0a6bb75 145
f6bcfd97 146// ----------------------------------------------------------------------------
6001e347 147// wxMBConv
f6bcfd97 148// ----------------------------------------------------------------------------
2c53a80a 149
483b0434
VZ
150size_t
151wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
152 const char *src, size_t srcLen) const
6001e347 153{
483b0434
VZ
154 // although new conversion classes are supposed to implement this function
155 // directly, the existins ones only implement the old MB2WC() and so, to
156 // avoid to have to rewrite all conversion classes at once, we provide a
157 // default (but not efficient) implementation of this one in terms of the
158 // old function by copying the input to ensure that it's NUL-terminated and
159 // then using MB2WC() to convert it
6001e347 160
483b0434
VZ
161 // the number of chars [which would be] written to dst [if it were not NULL]
162 size_t dstWritten = 0;
eec47cc6 163
c1464d9d 164 // the number of NULs terminating this string
483b0434 165 size_t nulLen wxDUMMY_INITIALIZE(0);
eec47cc6 166
c1464d9d
VZ
167 // if we were not given the input size we just have to assume that the
168 // string is properly terminated as we have no way of knowing how long it
169 // is anyhow, but if we do have the size check whether there are enough
170 // NULs at the end
483b0434
VZ
171 wxCharBuffer bufTmp;
172 const char *srcEnd;
173 if ( srcLen != (size_t)-1 )
eec47cc6 174 {
c1464d9d 175 // we need to know how to find the end of this string
7ef3ab50 176 nulLen = GetMBNulLen();
483b0434
VZ
177 if ( nulLen == wxCONV_FAILED )
178 return wxCONV_FAILED;
e4e3bbb4 179
c1464d9d 180 // if there are enough NULs we can avoid the copy
483b0434 181 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6
VZ
182 {
183 // make a copy in order to properly NUL-terminate the string
483b0434 184 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d 185 char * const p = bufTmp.data();
483b0434
VZ
186 memcpy(p, src, srcLen);
187 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d 188 *s = '\0';
483b0434
VZ
189
190 src = bufTmp;
eec47cc6 191 }
e4e3bbb4 192
483b0434
VZ
193 srcEnd = src + srcLen;
194 }
195 else // quit after the first loop iteration
196 {
197 srcEnd = NULL;
198 }
e4e3bbb4 199
483b0434 200 for ( ;; )
eec47cc6 201 {
c1464d9d 202 // try to convert the current chunk
483b0434 203 size_t lenChunk = MB2WC(NULL, src, 0);
eec47cc6 204 if ( lenChunk == 0 )
f5fb6871 205 {
830f8f11
VZ
206 // nothing left in the input string, conversion succeeded; but
207 // still account for the trailing NULL
208 dstWritten++;
c1464d9d 209 break;
f5fb6871
RN
210 }
211
483b0434
VZ
212 if ( lenChunk == wxCONV_FAILED )
213 return wxCONV_FAILED;
e4e3bbb4 214
830f8f11 215 lenChunk++; // for trailing NUL
e4e3bbb4 216
483b0434 217 dstWritten += lenChunk;
f5fb6871 218
483b0434
VZ
219 if ( dst )
220 {
221 if ( dstWritten > dstLen )
222 return wxCONV_FAILED;
223
830f8f11 224 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
483b0434
VZ
225 return wxCONV_FAILED;
226
227 dst += lenChunk;
228 }
c1464d9d 229
483b0434 230 if ( !srcEnd )
c1464d9d 231 {
483b0434
VZ
232 // we convert the entire string in this cas, as we suppose that the
233 // string is NUL-terminated and so srcEnd is not used at all
c1464d9d
VZ
234 break;
235 }
eec47cc6
VZ
236
237 // advance the input pointer past the end of this chunk
483b0434 238 while ( NotAllNULs(src, nulLen) )
c1464d9d
VZ
239 {
240 // notice that we must skip over multiple bytes here as we suppose
241 // that if NUL takes 2 or 4 bytes, then all the other characters do
242 // too and so if advanced by a single byte we might erroneously
243 // detect sequences of NUL bytes in the middle of the input
483b0434 244 src += nulLen;
c1464d9d 245 }
e4e3bbb4 246
483b0434 247 src += nulLen; // skipping over its terminator as well
c1464d9d
VZ
248
249 // note that ">=" (and not just "==") is needed here as the terminator
250 // we skipped just above could be inside or just after the buffer
251 // delimited by inEnd
483b0434 252 if ( src >= srcEnd )
c1464d9d
VZ
253 break;
254 }
255
483b0434 256 return dstWritten;
e4e3bbb4
RN
257}
258
483b0434
VZ
259size_t
260wxMBConv::FromWChar(char *dst, size_t dstLen,
261 const wchar_t *src, size_t srcLen) const
e4e3bbb4 262{
483b0434
VZ
263 // the number of chars [which would be] written to dst [if it were not NULL]
264 size_t dstWritten = 0;
e4e3bbb4 265
eec47cc6
VZ
266 // make a copy of the input string unless it is already properly
267 // NUL-terminated
268 //
269 // if we don't know its length we have no choice but to assume that it is,
270 // indeed, properly terminated
271 wxWCharBuffer bufTmp;
483b0434 272 if ( srcLen == (size_t)-1 )
e4e3bbb4 273 {
483b0434 274 srcLen = wxWcslen(src) + 1;
eec47cc6 275 }
483b0434 276 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6
VZ
277 {
278 // make a copy in order to properly NUL-terminate the string
483b0434
VZ
279 bufTmp = wxWCharBuffer(srcLen);
280 memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
281 src = bufTmp;
282 }
283
284 const size_t lenNul = GetMBNulLen();
285 for ( const wchar_t * const srcEnd = src + srcLen;
286 src < srcEnd;
287 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
288 {
289 // try to convert the current chunk
290 size_t lenChunk = WC2MB(NULL, src, 0);
291
292 if ( lenChunk == wxCONV_FAILED )
293 return wxCONV_FAILED;
294
295 lenChunk += lenNul;
296 dstWritten += lenChunk;
297
298 if ( dst )
299 {
300 if ( dstWritten > dstLen )
301 return wxCONV_FAILED;
302
303 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
304 return wxCONV_FAILED;
305
306 dst += lenChunk;
307 }
eec47cc6 308 }
e4e3bbb4 309
483b0434
VZ
310 return dstWritten;
311}
312
509da451
VZ
313size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
314{
315 size_t rc = ToWChar(out, outLen, in);
316 if ( rc != wxCONV_FAILED )
317 {
318 // ToWChar() returns the buffer length, i.e. including the trailing
319 // NUL, while this method doesn't take it into account
320 rc--;
321 }
322
323 return rc;
324}
325
326size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
327{
328 size_t rc = FromWChar(out, outLen, in);
329 if ( rc != wxCONV_FAILED )
330 {
331 rc -= GetMBNulLen();
332 }
333
334 return rc;
335}
336
483b0434
VZ
337wxMBConv::~wxMBConv()
338{
339 // nothing to do here (necessary for Darwin linking probably)
340}
e4e3bbb4 341
483b0434
VZ
342const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
343{
344 if ( psz )
eec47cc6 345 {
483b0434
VZ
346 // calculate the length of the buffer needed first
347 const size_t nLen = MB2WC(NULL, psz, 0);
348 if ( nLen != wxCONV_FAILED )
f5fb6871 349 {
483b0434
VZ
350 // now do the actual conversion
351 wxWCharBuffer buf(nLen /* +1 added implicitly */);
eec47cc6 352
483b0434
VZ
353 // +1 for the trailing NULL
354 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
355 return buf;
f5fb6871 356 }
483b0434 357 }
e4e3bbb4 358
483b0434
VZ
359 return wxWCharBuffer();
360}
3698ae71 361
483b0434
VZ
362const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
363{
364 if ( pwz )
365 {
366 const size_t nLen = WC2MB(NULL, pwz, 0);
367 if ( nLen != wxCONV_FAILED )
368 {
369 // extra space for trailing NUL(s)
370 static const size_t extraLen = GetMaxMBNulLen();
f5fb6871 371
483b0434
VZ
372 wxCharBuffer buf(nLen + extraLen - 1);
373 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
374 return buf;
375 }
376 }
377
378 return wxCharBuffer();
379}
e4e3bbb4 380
483b0434
VZ
381const wxWCharBuffer
382wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
383{
384 const size_t dstLen = ToWChar(NULL, 0, in, inLen);
385 if ( dstLen != wxCONV_FAILED )
386 {
830f8f11 387 wxWCharBuffer wbuf(dstLen - 1);
483b0434
VZ
388 if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
389 {
390 if ( outLen )
830f8f11 391 *outLen = dstLen - 1;
483b0434
VZ
392 return wbuf;
393 }
394 }
395
396 if ( outLen )
397 *outLen = 0;
398
399 return wxWCharBuffer();
400}
401
402const wxCharBuffer
403wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
404{
405 const size_t dstLen = FromWChar(NULL, 0, in, inLen);
406 if ( dstLen != wxCONV_FAILED )
407 {
830f8f11 408 wxCharBuffer buf(dstLen - 1);
483b0434
VZ
409 if ( FromWChar(buf.data(), dstLen, in, inLen) )
410 {
411 if ( outLen )
830f8f11 412 *outLen = dstLen - 1;
483b0434
VZ
413 return buf;
414 }
e4e3bbb4
RN
415 }
416
eec47cc6
VZ
417 if ( outLen )
418 *outLen = 0;
419
420 return wxCharBuffer();
e4e3bbb4
RN
421}
422
6001e347 423// ----------------------------------------------------------------------------
bde4baac 424// wxMBConvLibc
6001e347
RR
425// ----------------------------------------------------------------------------
426
bde4baac
VZ
427size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
428{
429 return wxMB2WC(buf, psz, n);
430}
431
432size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
433{
434 return wxWC2MB(buf, psz, n);
435}
e1bfe89e
RR
436
437// ----------------------------------------------------------------------------
532d575b 438// wxConvBrokenFileNames
e1bfe89e
RR
439// ----------------------------------------------------------------------------
440
eec47cc6
VZ
441#ifdef __UNIX__
442
845905d5 443wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
ea8ce907 444{
845905d5
MW
445 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
446 || wxStricmp(charset, _T("UTF8")) == 0 )
447 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
448 else
449 m_conv = new wxCSConv(charset);
ea8ce907
RR
450}
451
eec47cc6 452#endif // __UNIX__
c12b7f79 453
bde4baac 454// ----------------------------------------------------------------------------
3698ae71 455// UTF-7
bde4baac 456// ----------------------------------------------------------------------------
6001e347 457
15f2ee32 458// Implementation (C) 2004 Fredrik Roubert
6001e347 459
15f2ee32
RN
460//
461// BASE64 decoding table
462//
463static const unsigned char utf7unb64[] =
6001e347 464{
15f2ee32
RN
465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
470 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
471 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
472 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
473 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
474 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
475 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
476 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
477 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
478 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
479 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
480 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
481 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
482 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
483 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
484 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
485 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
486 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
487 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
488 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
489 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
490 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
491 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
492 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
493 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
494 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
495 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
496 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
497};
498
499size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
500{
15f2ee32
RN
501 size_t len = 0;
502
04a37834 503 while ( *psz && (!buf || (len < n)) )
15f2ee32
RN
504 {
505 unsigned char cc = *psz++;
506 if (cc != '+')
507 {
508 // plain ASCII char
509 if (buf)
510 *buf++ = cc;
511 len++;
512 }
513 else if (*psz == '-')
514 {
515 // encoded plus sign
516 if (buf)
517 *buf++ = cc;
518 len++;
519 psz++;
520 }
04a37834 521 else // start of BASE64 encoded string
15f2ee32 522 {
04a37834 523 bool lsb, ok;
15f2ee32 524 unsigned int d, l;
04a37834
VZ
525 for ( ok = lsb = false, d = 0, l = 0;
526 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
527 psz++ )
15f2ee32
RN
528 {
529 d <<= 6;
530 d += cc;
531 for (l += 6; l >= 8; lsb = !lsb)
532 {
04a37834 533 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
534 if (lsb)
535 {
536 if (buf)
537 *buf++ |= c;
538 len ++;
539 }
540 else
04a37834 541 {
15f2ee32 542 if (buf)
6356d52a 543 *buf = (wchar_t)(c << 8);
04a37834
VZ
544 }
545
546 ok = true;
15f2ee32
RN
547 }
548 }
04a37834
VZ
549
550 if ( !ok )
551 {
552 // in valid UTF7 we should have valid characters after '+'
553 return (size_t)-1;
554 }
555
15f2ee32
RN
556 if (*psz == '-')
557 psz++;
558 }
559 }
04a37834
VZ
560
561 if ( buf && (len < n) )
562 *buf = '\0';
563
15f2ee32 564 return len;
6001e347
RR
565}
566
15f2ee32
RN
567//
568// BASE64 encoding table
569//
570static const unsigned char utf7enb64[] =
571{
572 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
573 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
574 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
575 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
576 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
577 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
578 'w', 'x', 'y', 'z', '0', '1', '2', '3',
579 '4', '5', '6', '7', '8', '9', '+', '/'
580};
581
582//
583// UTF-7 encoding table
584//
585// 0 - Set D (directly encoded characters)
586// 1 - Set O (optional direct characters)
587// 2 - whitespace characters (optional)
588// 3 - special characters
589//
590static const unsigned char utf7encode[128] =
6001e347 591{
15f2ee32
RN
592 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
593 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
594 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
596 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
598 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
600};
601
667e5b3e 602size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32 603{
15f2ee32
RN
604 size_t len = 0;
605
606 while (*psz && ((!buf) || (len < n)))
607 {
608 wchar_t cc = *psz++;
609 if (cc < 0x80 && utf7encode[cc] < 1)
610 {
611 // plain ASCII char
612 if (buf)
613 *buf++ = (char)cc;
614 len++;
615 }
616#ifndef WC_UTF16
79c78d42 617 else if (((wxUint32)cc) > 0xffff)
b2c13097 618 {
15f2ee32
RN
619 // no surrogate pair generation (yet?)
620 return (size_t)-1;
621 }
622#endif
623 else
624 {
625 if (buf)
626 *buf++ = '+';
627 len++;
628 if (cc != '+')
629 {
630 // BASE64 encode string
631 unsigned int lsb, d, l;
73c902d6 632 for (d = 0, l = 0; /*nothing*/; psz++)
15f2ee32
RN
633 {
634 for (lsb = 0; lsb < 2; lsb ++)
635 {
636 d <<= 8;
637 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
638
639 for (l += 8; l >= 6; )
640 {
641 l -= 6;
642 if (buf)
643 *buf++ = utf7enb64[(d >> l) % 64];
644 len++;
645 }
646 }
647 cc = *psz;
648 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
649 break;
650 }
651 if (l != 0)
652 {
653 if (buf)
654 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
655 len++;
656 }
657 }
658 if (buf)
659 *buf++ = '-';
660 len++;
661 }
662 }
663 if (buf && (len < n))
664 *buf = 0;
665 return len;
6001e347
RR
666}
667
f6bcfd97 668// ----------------------------------------------------------------------------
6001e347 669// UTF-8
f6bcfd97 670// ----------------------------------------------------------------------------
6001e347 671
dccce9ea 672static wxUint32 utf8_max[]=
4def3b35 673 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347 674
3698ae71
VZ
675// boundaries of the private use area we use to (temporarily) remap invalid
676// characters invalid in a UTF-8 encoded string
ea8ce907
RR
677const wxUint32 wxUnicodePUA = 0x100000;
678const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
679
6001e347
RR
680size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
681{
4def3b35
VS
682 size_t len = 0;
683
dccce9ea 684 while (*psz && ((!buf) || (len < n)))
4def3b35 685 {
ea8ce907
RR
686 const char *opsz = psz;
687 bool invalid = false;
4def3b35
VS
688 unsigned char cc = *psz++, fc = cc;
689 unsigned cnt;
dccce9ea 690 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 691 fc <<= 1;
dccce9ea 692 if (!cnt)
4def3b35
VS
693 {
694 // plain ASCII char
dccce9ea 695 if (buf)
4def3b35
VS
696 *buf++ = cc;
697 len++;
561488ef
MW
698
699 // escape the escape character for octal escapes
700 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
701 && cc == '\\' && (!buf || len < n))
702 {
703 if (buf)
704 *buf++ = cc;
705 len++;
706 }
dccce9ea
VZ
707 }
708 else
4def3b35
VS
709 {
710 cnt--;
dccce9ea 711 if (!cnt)
4def3b35
VS
712 {
713 // invalid UTF-8 sequence
ea8ce907 714 invalid = true;
dccce9ea
VZ
715 }
716 else
4def3b35
VS
717 {
718 unsigned ocnt = cnt - 1;
719 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 720 while (cnt--)
4def3b35 721 {
ea8ce907 722 cc = *psz;
dccce9ea 723 if ((cc & 0xC0) != 0x80)
4def3b35
VS
724 {
725 // invalid UTF-8 sequence
ea8ce907
RR
726 invalid = true;
727 break;
4def3b35 728 }
ea8ce907 729 psz++;
4def3b35
VS
730 res = (res << 6) | (cc & 0x3f);
731 }
ea8ce907 732 if (invalid || res <= utf8_max[ocnt])
4def3b35
VS
733 {
734 // illegal UTF-8 encoding
ea8ce907 735 invalid = true;
4def3b35 736 }
ea8ce907
RR
737 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
738 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
739 {
740 // if one of our PUA characters turns up externally
741 // it must also be treated as an illegal sequence
742 // (a bit like you have to escape an escape character)
743 invalid = true;
744 }
745 else
746 {
1cd52418 747#ifdef WC_UTF16
ea8ce907
RR
748 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
749 size_t pa = encode_utf16(res, (wxUint16 *)buf);
750 if (pa == (size_t)-1)
751 {
752 invalid = true;
753 }
754 else
755 {
756 if (buf)
757 buf += pa;
758 len += pa;
759 }
373658eb 760#else // !WC_UTF16
ea8ce907 761 if (buf)
38d4b1e4 762 *buf++ = (wchar_t)res;
ea8ce907 763 len++;
373658eb 764#endif // WC_UTF16/!WC_UTF16
ea8ce907
RR
765 }
766 }
767 if (invalid)
768 {
769 if (m_options & MAP_INVALID_UTF8_TO_PUA)
770 {
771 while (opsz < psz && (!buf || len < n))
772 {
773#ifdef WC_UTF16
774 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
775 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
776 wxASSERT(pa != (size_t)-1);
777 if (buf)
778 buf += pa;
779 opsz++;
780 len += pa;
781#else
782 if (buf)
38d4b1e4 783 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
ea8ce907
RR
784 opsz++;
785 len++;
786#endif
787 }
788 }
3698ae71 789 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907
RR
790 {
791 while (opsz < psz && (!buf || len < n))
792 {
3698ae71
VZ
793 if ( buf && len + 3 < n )
794 {
17a1ebd1 795 unsigned char on = *opsz;
3698ae71 796 *buf++ = L'\\';
17a1ebd1
VZ
797 *buf++ = (wchar_t)( L'0' + on / 0100 );
798 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
799 *buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71 800 }
ea8ce907
RR
801 opsz++;
802 len += 4;
803 }
804 }
3698ae71 805 else // MAP_INVALID_UTF8_NOT
ea8ce907
RR
806 {
807 return (size_t)-1;
808 }
4def3b35
VS
809 }
810 }
6001e347 811 }
dccce9ea 812 if (buf && (len < n))
4def3b35
VS
813 *buf = 0;
814 return len;
6001e347
RR
815}
816
3698ae71
VZ
817static inline bool isoctal(wchar_t wch)
818{
819 return L'0' <= wch && wch <= L'7';
820}
821
6001e347
RR
822size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
823{
4def3b35 824 size_t len = 0;
6001e347 825
dccce9ea 826 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
827 {
828 wxUint32 cc;
1cd52418 829#ifdef WC_UTF16
b5153fd8
VZ
830 // cast is ok for WC_UTF16
831 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 832 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 833#else
4def3b35
VS
834 cc=(*psz++) & 0x7fffffff;
835#endif
3698ae71
VZ
836
837 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
838 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35 839 {
dccce9ea 840 if (buf)
ea8ce907 841 *buf++ = (char)(cc - wxUnicodePUA);
4def3b35 842 len++;
3698ae71 843 }
561488ef
MW
844 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
845 && cc == L'\\' && psz[0] == L'\\' )
846 {
847 if (buf)
848 *buf++ = (char)cc;
849 psz++;
850 len++;
851 }
3698ae71
VZ
852 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
853 cc == L'\\' &&
854 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35 855 {
dccce9ea 856 if (buf)
3698ae71 857 {
b2c13097
WS
858 *buf++ = (char) ((psz[0] - L'0')*0100 +
859 (psz[1] - L'0')*010 +
860 (psz[2] - L'0'));
3698ae71
VZ
861 }
862
863 psz += 3;
ea8ce907
RR
864 len++;
865 }
866 else
867 {
868 unsigned cnt;
869 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
870 if (!cnt)
4def3b35 871 {
ea8ce907
RR
872 // plain ASCII char
873 if (buf)
874 *buf++ = (char) cc;
875 len++;
876 }
877
878 else
879 {
880 len += cnt + 1;
881 if (buf)
882 {
883 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
884 while (cnt--)
885 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
886 }
4def3b35
VS
887 }
888 }
6001e347 889 }
4def3b35 890
3698ae71
VZ
891 if (buf && (len<n))
892 *buf = 0;
adb45366 893
4def3b35 894 return len;
6001e347
RR
895}
896
c91830cb
VZ
897// ----------------------------------------------------------------------------
898// UTF-16
899// ----------------------------------------------------------------------------
900
901#ifdef WORDS_BIGENDIAN
bde4baac
VZ
902 #define wxMBConvUTF16straight wxMBConvUTF16BE
903 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 904#else
bde4baac
VZ
905 #define wxMBConvUTF16swap wxMBConvUTF16BE
906 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
907#endif
908
909
c91830cb
VZ
910#ifdef WC_UTF16
911
c91830cb
VZ
912// copy 16bit MB to 16bit String
913size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
914{
915 size_t len=0;
916
917 while (*(wxUint16*)psz && (!buf || len < n))
918 {
919 if (buf)
920 *buf++ = *(wxUint16*)psz;
921 len++;
922
923 psz += sizeof(wxUint16);
924 }
925 if (buf && len<n) *buf=0;
926
927 return len;
928}
929
930
931// copy 16bit String to 16bit MB
932size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
933{
934 size_t len=0;
935
936 while (*psz && (!buf || len < n))
937 {
938 if (buf)
939 {
940 *(wxUint16*)buf = *psz;
941 buf += sizeof(wxUint16);
942 }
943 len += sizeof(wxUint16);
944 psz++;
945 }
946 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
947
948 return len;
949}
950
951
952// swap 16bit MB to 16bit String
953size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
954{
bfab25d4 955 size_t len = 0;
c91830cb 956
da12017a
VZ
957 // UTF16 string must be terminated by 2 NULs as single NULs may occur
958 // inside the string
959 while ( (psz[0] || psz[1]) && (!buf || len < n) )
c91830cb 960 {
bfab25d4 961 if ( buf )
c91830cb
VZ
962 {
963 ((char *)buf)[0] = psz[1];
964 ((char *)buf)[1] = psz[0];
965 buf++;
966 }
967 len++;
bfab25d4 968 psz += 2;
c91830cb 969 }
bfab25d4
VZ
970
971 if ( buf && len < n )
972 *buf = L'\0';
c91830cb
VZ
973
974 return len;
975}
976
977
978// swap 16bit MB to 16bit String
979size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
980{
eec47cc6 981 size_t len = 0;
c91830cb 982
eec47cc6 983 while ( *psz && (!buf || len < n) )
c91830cb 984 {
eec47cc6 985 if ( buf )
c91830cb
VZ
986 {
987 *buf++ = ((char*)psz)[1];
988 *buf++ = ((char*)psz)[0];
989 }
eec47cc6 990 len += 2;
c91830cb
VZ
991 psz++;
992 }
eec47cc6 993
64f56529
VZ
994 if ( buf && len < n - 1 )
995 {
996 buf[0] =
997 buf[1] = '\0';
998 }
c91830cb
VZ
999
1000 return len;
1001}
1002
1003
1004#else // WC_UTF16
1005
1006
1007// copy 16bit MB to 32bit String
1008size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1009{
1010 size_t len=0;
1011
1012 while (*(wxUint16*)psz && (!buf || len < n))
1013 {
1014 wxUint32 cc;
1015 size_t pa=decode_utf16((wxUint16*)psz, cc);
1016 if (pa == (size_t)-1)
1017 return pa;
1018
1019 if (buf)
38d4b1e4 1020 *buf++ = (wchar_t)cc;
c91830cb
VZ
1021 len++;
1022 psz += pa * sizeof(wxUint16);
1023 }
1024 if (buf && len<n) *buf=0;
1025
1026 return len;
1027}
1028
1029
1030// copy 32bit String to 16bit MB
1031size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1032{
1033 size_t len=0;
1034
1035 while (*psz && (!buf || len < n))
1036 {
1037 wxUint16 cc[2];
1038 size_t pa=encode_utf16(*psz, cc);
1039
1040 if (pa == (size_t)-1)
1041 return pa;
1042
1043 if (buf)
1044 {
69b80d28 1045 *(wxUint16*)buf = cc[0];
b5153fd8 1046 buf += sizeof(wxUint16);
c91830cb 1047 if (pa > 1)
69b80d28
VZ
1048 {
1049 *(wxUint16*)buf = cc[1];
1050 buf += sizeof(wxUint16);
1051 }
c91830cb
VZ
1052 }
1053
1054 len += pa*sizeof(wxUint16);
1055 psz++;
1056 }
1057 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1058
1059 return len;
1060}
1061
1062
1063// swap 16bit MB to 32bit String
1064size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1065{
1066 size_t len=0;
1067
1068 while (*(wxUint16*)psz && (!buf || len < n))
1069 {
1070 wxUint32 cc;
1071 char tmp[4];
1072 tmp[0]=psz[1]; tmp[1]=psz[0];
1073 tmp[2]=psz[3]; tmp[3]=psz[2];
1074
1075 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1076 if (pa == (size_t)-1)
1077 return pa;
1078
1079 if (buf)
38d4b1e4 1080 *buf++ = (wchar_t)cc;
c91830cb
VZ
1081
1082 len++;
1083 psz += pa * sizeof(wxUint16);
1084 }
1085 if (buf && len<n) *buf=0;
1086
1087 return len;
1088}
1089
1090
1091// swap 32bit String to 16bit MB
1092size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1093{
1094 size_t len=0;
1095
1096 while (*psz && (!buf || len < n))
1097 {
1098 wxUint16 cc[2];
1099 size_t pa=encode_utf16(*psz, cc);
1100
1101 if (pa == (size_t)-1)
1102 return pa;
1103
1104 if (buf)
1105 {
1106 *buf++ = ((char*)cc)[1];
1107 *buf++ = ((char*)cc)[0];
1108 if (pa > 1)
1109 {
1110 *buf++ = ((char*)cc)[3];
1111 *buf++ = ((char*)cc)[2];
1112 }
1113 }
1114
1115 len += pa*sizeof(wxUint16);
1116 psz++;
1117 }
1118 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1119
1120 return len;
1121}
1122
1123#endif // WC_UTF16
1124
1125
1126// ----------------------------------------------------------------------------
1127// UTF-32
1128// ----------------------------------------------------------------------------
1129
1130#ifdef WORDS_BIGENDIAN
1131#define wxMBConvUTF32straight wxMBConvUTF32BE
1132#define wxMBConvUTF32swap wxMBConvUTF32LE
1133#else
1134#define wxMBConvUTF32swap wxMBConvUTF32BE
1135#define wxMBConvUTF32straight wxMBConvUTF32LE
1136#endif
1137
1138
1139WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1140WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1141
1142
1143#ifdef WC_UTF16
1144
1145// copy 32bit MB to 16bit String
1146size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1147{
1148 size_t len=0;
1149
1150 while (*(wxUint32*)psz && (!buf || len < n))
1151 {
1152 wxUint16 cc[2];
1153
1154 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1155 if (pa == (size_t)-1)
1156 return pa;
1157
1158 if (buf)
1159 {
1160 *buf++ = cc[0];
1161 if (pa > 1)
1162 *buf++ = cc[1];
1163 }
1164 len += pa;
1165 psz += sizeof(wxUint32);
1166 }
1167 if (buf && len<n) *buf=0;
1168
1169 return len;
1170}
1171
1172
1173// copy 16bit String to 32bit MB
1174size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1175{
1176 size_t len=0;
1177
1178 while (*psz && (!buf || len < n))
1179 {
1180 wxUint32 cc;
1181
b5153fd8
VZ
1182 // cast is ok for WC_UTF16
1183 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
1184 if (pa == (size_t)-1)
1185 return pa;
1186
1187 if (buf)
1188 {
1189 *(wxUint32*)buf = cc;
1190 buf += sizeof(wxUint32);
1191 }
1192 len += sizeof(wxUint32);
1193 psz += pa;
1194 }
b5153fd8
VZ
1195
1196 if (buf && len<=n-sizeof(wxUint32))
1197 *(wxUint32*)buf=0;
c91830cb
VZ
1198
1199 return len;
1200}
1201
1202
1203
1204// swap 32bit MB to 16bit String
1205size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1206{
1207 size_t len=0;
1208
1209 while (*(wxUint32*)psz && (!buf || len < n))
1210 {
1211 char tmp[4];
1212 tmp[0] = psz[3]; tmp[1] = psz[2];
1213 tmp[2] = psz[1]; tmp[3] = psz[0];
1214
1215
1216 wxUint16 cc[2];
1217
1218 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1219 if (pa == (size_t)-1)
1220 return pa;
1221
1222 if (buf)
1223 {
1224 *buf++ = cc[0];
1225 if (pa > 1)
1226 *buf++ = cc[1];
1227 }
1228 len += pa;
1229 psz += sizeof(wxUint32);
1230 }
b5153fd8
VZ
1231
1232 if (buf && len<n)
1233 *buf=0;
c91830cb
VZ
1234
1235 return len;
1236}
1237
1238
1239// swap 16bit String to 32bit MB
1240size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1241{
1242 size_t len=0;
1243
1244 while (*psz && (!buf || len < n))
1245 {
1246 char cc[4];
1247
b5153fd8
VZ
1248 // cast is ok for WC_UTF16
1249 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1250 if (pa == (size_t)-1)
1251 return pa;
1252
1253 if (buf)
1254 {
1255 *buf++ = cc[3];
1256 *buf++ = cc[2];
1257 *buf++ = cc[1];
1258 *buf++ = cc[0];
1259 }
1260 len += sizeof(wxUint32);
1261 psz += pa;
1262 }
b5153fd8
VZ
1263
1264 if (buf && len<=n-sizeof(wxUint32))
1265 *(wxUint32*)buf=0;
c91830cb
VZ
1266
1267 return len;
1268}
1269
1270#else // WC_UTF16
1271
1272
1273// copy 32bit MB to 32bit String
1274size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1275{
1276 size_t len=0;
1277
1278 while (*(wxUint32*)psz && (!buf || len < n))
1279 {
1280 if (buf)
38d4b1e4 1281 *buf++ = (wchar_t)(*(wxUint32*)psz);
c91830cb
VZ
1282 len++;
1283 psz += sizeof(wxUint32);
1284 }
b5153fd8
VZ
1285
1286 if (buf && len<n)
1287 *buf=0;
c91830cb
VZ
1288
1289 return len;
1290}
1291
1292
1293// copy 32bit String to 32bit MB
1294size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1295{
1296 size_t len=0;
1297
1298 while (*psz && (!buf || len < n))
1299 {
1300 if (buf)
1301 {
1302 *(wxUint32*)buf = *psz;
1303 buf += sizeof(wxUint32);
1304 }
1305
1306 len += sizeof(wxUint32);
1307 psz++;
1308 }
1309
b5153fd8
VZ
1310 if (buf && len<=n-sizeof(wxUint32))
1311 *(wxUint32*)buf=0;
c91830cb
VZ
1312
1313 return len;
1314}
1315
1316
1317// swap 32bit MB to 32bit String
1318size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1319{
1320 size_t len=0;
1321
1322 while (*(wxUint32*)psz && (!buf || len < n))
1323 {
1324 if (buf)
1325 {
1326 ((char *)buf)[0] = psz[3];
1327 ((char *)buf)[1] = psz[2];
1328 ((char *)buf)[2] = psz[1];
1329 ((char *)buf)[3] = psz[0];
1330 buf++;
1331 }
1332 len++;
1333 psz += sizeof(wxUint32);
1334 }
b5153fd8
VZ
1335
1336 if (buf && len<n)
1337 *buf=0;
c91830cb
VZ
1338
1339 return len;
1340}
1341
1342
1343// swap 32bit String to 32bit MB
1344size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1345{
1346 size_t len=0;
1347
1348 while (*psz && (!buf || len < n))
1349 {
1350 if (buf)
1351 {
1352 *buf++ = ((char *)psz)[3];
1353 *buf++ = ((char *)psz)[2];
1354 *buf++ = ((char *)psz)[1];
1355 *buf++ = ((char *)psz)[0];
1356 }
1357 len += sizeof(wxUint32);
1358 psz++;
1359 }
b5153fd8
VZ
1360
1361 if (buf && len<=n-sizeof(wxUint32))
1362 *(wxUint32*)buf=0;
c91830cb
VZ
1363
1364 return len;
1365}
1366
1367
1368#endif // WC_UTF16
1369
1370
36acb880
VZ
1371// ============================================================================
1372// The classes doing conversion using the iconv_xxx() functions
1373// ============================================================================
3caec1bb 1374
b040e242 1375#ifdef HAVE_ICONV
3a0d76bc 1376
b1d547eb
VS
1377// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1378// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1379// (unless there's yet another bug in glibc) the only case when iconv()
1380// returns with (size_t)-1 (which means error) and says there are 0 bytes
1381// left in the input buffer -- when _real_ error occurs,
1382// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1383// iconv() failure.
3caec1bb
VS
1384// [This bug does not appear in glibc 2.2.]
1385#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1386#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1387 (errno != E2BIG || bufLeft != 0))
1388#else
1389#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1390#endif
1391
ab217dba 1392#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880 1393
74a7eb0b
VZ
1394#define ICONV_T_INVALID ((iconv_t)-1)
1395
1396#if SIZEOF_WCHAR_T == 4
1397 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1398 #define WC_ENC wxFONTENCODING_UTF32
1399#elif SIZEOF_WCHAR_T == 2
1400 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1401 #define WC_ENC wxFONTENCODING_UTF16
1402#else // sizeof(wchar_t) != 2 nor 4
1403 // does this ever happen?
1404 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1405#endif
1406
36acb880 1407// ----------------------------------------------------------------------------
e95354ec 1408// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1409// ----------------------------------------------------------------------------
1410
e95354ec 1411class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1412{
1413public:
e95354ec
VZ
1414 wxMBConv_iconv(const wxChar *name);
1415 virtual ~wxMBConv_iconv();
36acb880 1416
bde4baac
VZ
1417 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1418 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1419
d36c9347 1420 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
7ef3ab50
VZ
1421 virtual size_t GetMBNulLen() const;
1422
d36c9347
VZ
1423 virtual wxMBConv *Clone() const
1424 {
1425 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1426 p->m_minMBCharWidth = m_minMBCharWidth;
1427 return p;
1428 }
1429
e95354ec 1430 bool IsOk() const
74a7eb0b 1431 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880
VZ
1432
1433protected:
1434 // the iconv handlers used to translate from multibyte to wide char and in
1435 // the other direction
1436 iconv_t m2w,
1437 w2m;
b1d547eb
VS
1438#if wxUSE_THREADS
1439 // guards access to m2w and w2m objects
1440 wxMutex m_iconvMutex;
1441#endif
36acb880
VZ
1442
1443private:
e95354ec 1444 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880 1445 // available on this machine, it will remain NULL
74a7eb0b 1446 static wxString ms_wcCharsetName;
36acb880
VZ
1447
1448 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1449 // different endian-ness than the native one
405d8f46 1450 static bool ms_wcNeedsSwap;
eec47cc6 1451
d36c9347
VZ
1452
1453 // name of the encoding handled by this conversion
1454 wxString m_name;
1455
7ef3ab50 1456 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d
VZ
1457 // initially
1458 size_t m_minMBCharWidth;
36acb880
VZ
1459};
1460
8f115891
MW
1461// make the constructor available for unit testing
1462WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1463{
1464 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1465 if ( !result->IsOk() )
1466 {
1467 delete result;
1468 return 0;
1469 }
1470 return result;
1471}
1472
422e411e 1473wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec 1474bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1475
e95354ec 1476wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
d36c9347 1477 : m_name(name)
36acb880 1478{
c1464d9d 1479 m_minMBCharWidth = 0;
eec47cc6 1480
0331b385
VZ
1481 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1482 // names for the charsets
200a9923 1483 const wxCharBuffer cname(wxString(name).ToAscii());
04c79127 1484
36acb880 1485 // check for charset that represents wchar_t:
74a7eb0b 1486 if ( ms_wcCharsetName.empty() )
f1339c56 1487 {
c2b83fdd
VZ
1488 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1489
74a7eb0b
VZ
1490#if wxUSE_FONTMAP
1491 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1492#else // !wxUSE_FONTMAP
1493 static const wxChar *names[] =
36acb880 1494 {
74a7eb0b
VZ
1495#if SIZEOF_WCHAR_T == 4
1496 _T("UCS-4"),
1497#elif SIZEOF_WCHAR_T = 2
1498 _T("UCS-2"),
1499#endif
1500 NULL
1501 };
1502#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880 1503
d1f024a8 1504 for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b 1505 {
17a1ebd1 1506 const wxString nameCS(*names);
74a7eb0b
VZ
1507
1508 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1 1509 wxString nameXE(nameCS);
74a7eb0b
VZ
1510 #ifdef WORDS_BIGENDIAN
1511 nameXE += _T("BE");
1512 #else // little endian
1513 nameXE += _T("LE");
1514 #endif
1515
c2b83fdd
VZ
1516 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1517 nameXE.c_str());
1518
74a7eb0b
VZ
1519 m2w = iconv_open(nameXE.ToAscii(), cname);
1520 if ( m2w == ICONV_T_INVALID )
3a0d76bc 1521 {
74a7eb0b 1522 // try charset w/o bytesex info (e.g. "UCS4")
c2b83fdd
VZ
1523 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1524 nameCS.c_str());
17a1ebd1 1525 m2w = iconv_open(nameCS.ToAscii(), cname);
3a0d76bc 1526
74a7eb0b
VZ
1527 // and check for bytesex ourselves:
1528 if ( m2w != ICONV_T_INVALID )
3a0d76bc 1529 {
74a7eb0b
VZ
1530 char buf[2], *bufPtr;
1531 wchar_t wbuf[2], *wbufPtr;
1532 size_t insz, outsz;
1533 size_t res;
1534
1535 buf[0] = 'A';
1536 buf[1] = 0;
1537 wbuf[0] = 0;
1538 insz = 2;
1539 outsz = SIZEOF_WCHAR_T * 2;
1540 wbufPtr = wbuf;
1541 bufPtr = buf;
1542
1543 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1544 (char**)&wbufPtr, &outsz);
1545
1546 if (ICONV_FAILED(res, insz))
1547 {
1548 wxLogLastError(wxT("iconv"));
422e411e 1549 wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1 1550 nameCS.c_str());
74a7eb0b
VZ
1551 }
1552 else // ok, can convert to this encoding, remember it
1553 {
17a1ebd1 1554 ms_wcCharsetName = nameCS;
74a7eb0b
VZ
1555 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1556 }
3a0d76bc
VS
1557 }
1558 }
74a7eb0b 1559 else // use charset not requiring byte swapping
36acb880 1560 {
74a7eb0b 1561 ms_wcCharsetName = nameXE;
36acb880 1562 }
3a0d76bc 1563 }
74a7eb0b 1564
0944fceb 1565 wxLogTrace(TRACE_STRCONV,
74a7eb0b 1566 wxT("iconv wchar_t charset is \"%s\"%s"),
cae8f1bf 1567 ms_wcCharsetName.empty() ? _T("<none>")
74a7eb0b
VZ
1568 : ms_wcCharsetName.c_str(),
1569 ms_wcNeedsSwap ? _T(" (needs swap)")
1570 : _T(""));
3a0d76bc 1571 }
36acb880 1572 else // we already have ms_wcCharsetName
3caec1bb 1573 {
74a7eb0b 1574 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
f1339c56 1575 }
dccce9ea 1576
74a7eb0b 1577 if ( ms_wcCharsetName.empty() )
f1339c56 1578 {
74a7eb0b 1579 w2m = ICONV_T_INVALID;
36acb880 1580 }
405d8f46
VZ
1581 else
1582 {
74a7eb0b
VZ
1583 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1584 if ( w2m == ICONV_T_INVALID )
1585 {
1586 wxLogTrace(TRACE_STRCONV,
1587 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
422e411e 1588 ms_wcCharsetName.c_str(), cname.data());
74a7eb0b 1589 }
405d8f46 1590 }
36acb880 1591}
3caec1bb 1592
e95354ec 1593wxMBConv_iconv::~wxMBConv_iconv()
36acb880 1594{
74a7eb0b 1595 if ( m2w != ICONV_T_INVALID )
36acb880 1596 iconv_close(m2w);
74a7eb0b 1597 if ( w2m != ICONV_T_INVALID )
36acb880
VZ
1598 iconv_close(w2m);
1599}
3a0d76bc 1600
bde4baac 1601size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1602{
69373110
VZ
1603 // find the string length: notice that must be done differently for
1604 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1605 size_t inbuf;
7ef3ab50 1606 const size_t nulLen = GetMBNulLen();
69373110
VZ
1607 switch ( nulLen )
1608 {
1609 default:
1610 return (size_t)-1;
1611
1612 case 1:
1613 inbuf = strlen(psz); // arguably more optimized than our version
1614 break;
1615
1616 case 2:
1617 case 4:
1618 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1619 // they also have to start at character boundary and not span two
1620 // adjacent characters
1621 const char *p;
1622 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1623 ;
1624 inbuf = p - psz;
1625 break;
1626 }
1627
b1d547eb
VS
1628#if wxUSE_THREADS
1629 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1630 // Unfortunately there is a couple of global wxCSConv objects such as
1631 // wxConvLocal that are used all over wx code, so we have to make sure
1632 // the handle is used by at most one thread at the time. Otherwise
1633 // only a few wx classes would be safe to use from non-main threads
1634 // as MB<->WC conversion would fail "randomly".
1635 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110
VZ
1636#endif // wxUSE_THREADS
1637
3698ae71 1638
36acb880
VZ
1639 size_t outbuf = n * SIZEOF_WCHAR_T;
1640 size_t res, cres;
1641 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1642 wchar_t *bufPtr = buf;
1643 const char *pszPtr = psz;
1644
1645 if (buf)
1646 {
1647 // have destination buffer, convert there
1648 cres = iconv(m2w,
1649 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1650 (char**)&bufPtr, &outbuf);
1651 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1652
36acb880 1653 if (ms_wcNeedsSwap)
3a0d76bc 1654 {
36acb880 1655 // convert to native endianness
17a1ebd1
VZ
1656 for ( unsigned i = 0; i < res; i++ )
1657 buf[n] = WC_BSWAP(buf[i]);
3a0d76bc 1658 }
adb45366 1659
69373110 1660 // NUL-terminate the string if there is any space left
49dd9820
VS
1661 if (res < n)
1662 buf[res] = 0;
36acb880
VZ
1663 }
1664 else
1665 {
1666 // no destination buffer... convert using temp buffer
1667 // to calculate destination buffer requirement
1668 wchar_t tbuf[8];
1669 res = 0;
1670 do {
1671 bufPtr = tbuf;
1672 outbuf = 8*SIZEOF_WCHAR_T;
1673
1674 cres = iconv(m2w,
1675 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1676 (char**)&bufPtr, &outbuf );
1677
1678 res += 8-(outbuf/SIZEOF_WCHAR_T);
1679 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1680 }
dccce9ea 1681
36acb880 1682 if (ICONV_FAILED(cres, inbuf))
f1339c56 1683 {
36acb880 1684 //VS: it is ok if iconv fails, hence trace only
ce6f8d6f 1685 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1686 return (size_t)-1;
1687 }
1688
1689 return res;
1690}
1691
bde4baac 1692size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1693{
b1d547eb
VS
1694#if wxUSE_THREADS
1695 // NB: explained in MB2WC
1696 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1697#endif
3698ae71 1698
156162ec
MW
1699 size_t inlen = wxWcslen(psz);
1700 size_t inbuf = inlen * SIZEOF_WCHAR_T;
36acb880
VZ
1701 size_t outbuf = n;
1702 size_t res, cres;
3a0d76bc 1703
36acb880 1704 wchar_t *tmpbuf = 0;
3caec1bb 1705
36acb880
VZ
1706 if (ms_wcNeedsSwap)
1707 {
1708 // need to copy to temp buffer to switch endianness
74a7eb0b 1709 // (doing WC_BSWAP twice on the original buffer won't help, as it
36acb880 1710 // could be in read-only memory, or be accessed in some other thread)
74a7eb0b 1711 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
17a1ebd1
VZ
1712 for ( size_t i = 0; i < inlen; i++ )
1713 tmpbuf[n] = WC_BSWAP(psz[i]);
156162ec 1714 tmpbuf[inlen] = L'\0';
74a7eb0b 1715 psz = tmpbuf;
36acb880 1716 }
3a0d76bc 1717
36acb880
VZ
1718 if (buf)
1719 {
1720 // have destination buffer, convert there
1721 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1722
36acb880 1723 res = n-outbuf;
adb45366 1724
49dd9820
VS
1725 // NB: iconv was given only wcslen(psz) characters on input, and so
1726 // it couldn't convert the trailing zero. Let's do it ourselves
1727 // if there's some room left for it in the output buffer.
1728 if (res < n)
1729 buf[0] = 0;
36acb880
VZ
1730 }
1731 else
1732 {
1733 // no destination buffer... convert using temp buffer
1734 // to calculate destination buffer requirement
1735 char tbuf[16];
1736 res = 0;
1737 do {
1738 buf = tbuf; outbuf = 16;
1739
1740 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1741
36acb880
VZ
1742 res += 16 - outbuf;
1743 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1744 }
dccce9ea 1745
36acb880
VZ
1746 if (ms_wcNeedsSwap)
1747 {
1748 free(tmpbuf);
1749 }
dccce9ea 1750
36acb880
VZ
1751 if (ICONV_FAILED(cres, inbuf))
1752 {
ce6f8d6f 1753 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
36acb880
VZ
1754 return (size_t)-1;
1755 }
1756
1757 return res;
1758}
1759
7ef3ab50 1760size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6 1761{
c1464d9d 1762 if ( m_minMBCharWidth == 0 )
eec47cc6
VZ
1763 {
1764 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1765
1766#if wxUSE_THREADS
1767 // NB: explained in MB2WC
1768 wxMutexLocker lock(self->m_iconvMutex);
1769#endif
1770
356410fc 1771 wchar_t *wnul = L"";
c1464d9d 1772 char buf[8]; // should be enough for NUL in any encoding
356410fc 1773 size_t inLen = sizeof(wchar_t),
c1464d9d 1774 outLen = WXSIZEOF(buf);
39406a5d 1775 char *in = (char *)wnul;
c1464d9d 1776 char *out = buf;
39406a5d 1777 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
356410fc 1778 {
c1464d9d 1779 self->m_minMBCharWidth = (size_t)-1;
356410fc
VZ
1780 }
1781 else // ok
1782 {
c1464d9d 1783 self->m_minMBCharWidth = out - buf;
356410fc 1784 }
eec47cc6
VZ
1785 }
1786
c1464d9d 1787 return m_minMBCharWidth;
eec47cc6
VZ
1788}
1789
b040e242 1790#endif // HAVE_ICONV
36acb880 1791
e95354ec 1792
36acb880
VZ
1793// ============================================================================
1794// Win32 conversion classes
1795// ============================================================================
1cd52418 1796
e95354ec 1797#ifdef wxHAVE_WIN32_MB2WC
373658eb 1798
8b04d4c4 1799// from utils.cpp
d775fa82 1800#if wxUSE_FONTMAP
8b04d4c4
VZ
1801extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1802extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1803#endif
373658eb 1804
e95354ec 1805class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1806{
1807public:
bde4baac
VZ
1808 wxMBConv_win32()
1809 {
1810 m_CodePage = CP_ACP;
c1464d9d 1811 m_minMBCharWidth = 0;
bde4baac
VZ
1812 }
1813
d36c9347
VZ
1814 wxMBConv_win32(const wxMBConv_win32& conv)
1815 {
1816 m_CodePage = conv.m_CodePage;
1817 m_minMBCharWidth = conv.m_minMBCharWidth;
1818 }
1819
7608a683 1820#if wxUSE_FONTMAP
e95354ec 1821 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1822 {
1823 m_CodePage = wxCharsetToCodepage(name);
c1464d9d 1824 m_minMBCharWidth = 0;
bde4baac 1825 }
dccce9ea 1826
e95354ec 1827 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1828 {
1829 m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d 1830 m_minMBCharWidth = 0;
bde4baac 1831 }
eec47cc6 1832#endif // wxUSE_FONTMAP
8b04d4c4 1833
d36c9347 1834 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1835 {
02272c9c
VZ
1836 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1837 // the behaviour is not compatible with the Unix version (using iconv)
1838 // and break the library itself, e.g. wxTextInputStream::NextChar()
1839 // wouldn't work if reading an incomplete MB char didn't result in an
1840 // error
667e5b3e 1841 //
89028980 1842 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11
VZ
1843 // Win XP or newer and it is not supported for UTF-[78] so we always
1844 // use our own conversions in this case. See
89028980
VS
1845 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1846 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11 1847 if ( m_CodePage == CP_UTF8 )
89028980 1848 {
830f8f11 1849 return wxConvUTF8.MB2WC(buf, psz, n);
89028980 1850 }
830f8f11
VZ
1851
1852 if ( m_CodePage == CP_UTF7 )
1853 {
1854 return wxConvUTF7.MB2WC(buf, psz, n);
1855 }
1856
1857 int flags = 0;
1858 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
1859 IsAtLeastWin2kSP4() )
89028980 1860 {
830f8f11 1861 flags = MB_ERR_INVALID_CHARS;
89028980 1862 }
667e5b3e 1863
2b5f62a0
VZ
1864 const size_t len = ::MultiByteToWideChar
1865 (
1866 m_CodePage, // code page
667e5b3e 1867 flags, // flags: fall on error
2b5f62a0
VZ
1868 psz, // input string
1869 -1, // its length (NUL-terminated)
b4da152e 1870 buf, // output string
2b5f62a0
VZ
1871 buf ? n : 0 // size of output buffer
1872 );
89028980
VS
1873 if ( !len )
1874 {
1875 // function totally failed
1876 return (size_t)-1;
1877 }
1878
1879 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1880 // check if we succeeded, by doing a double trip:
1881 if ( !flags && buf )
1882 {
53c174fc
VZ
1883 const size_t mbLen = strlen(psz);
1884 wxCharBuffer mbBuf(mbLen);
89028980
VS
1885 if ( ::WideCharToMultiByte
1886 (
1887 m_CodePage,
1888 0,
1889 buf,
1890 -1,
1891 mbBuf.data(),
53c174fc 1892 mbLen + 1, // size in bytes, not length
89028980
VS
1893 NULL,
1894 NULL
1895 ) == 0 ||
1896 strcmp(mbBuf, psz) != 0 )
1897 {
1898 // we didn't obtain the same thing we started from, hence
1899 // the conversion was lossy and we consider that it failed
1900 return (size_t)-1;
1901 }
1902 }
2b5f62a0 1903
03a991bc
VZ
1904 // note that it returns count of written chars for buf != NULL and size
1905 // of the needed buffer for buf == NULL so in either case the length of
1906 // the string (which never includes the terminating NUL) is one less
89028980 1907 return len - 1;
f1339c56 1908 }
dccce9ea 1909
d36c9347 1910 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1911 {
13dd924a
VZ
1912 /*
1913 we have a problem here: by default, WideCharToMultiByte() may
1914 replace characters unrepresentable in the target code page with bad
1915 quality approximations such as turning "1/2" symbol (U+00BD) into
1916 "1" for the code pages which don't have it and we, obviously, want
1917 to avoid this at any price
d775fa82 1918
13dd924a
VZ
1919 the trouble is that this function does it _silently_, i.e. it won't
1920 even tell us whether it did or not... Win98/2000 and higher provide
1921 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1922 we have to resort to a round trip, i.e. check that converting back
1923 results in the same string -- this is, of course, expensive but
1924 otherwise we simply can't be sure to not garble the data.
1925 */
1926
1927 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1928 // it doesn't work with CJK encodings (which we test for rather roughly
1929 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1930 // supporting it
907173e5
WS
1931 BOOL usedDef wxDUMMY_INITIALIZE(false);
1932 BOOL *pUsedDef;
13dd924a
VZ
1933 int flags;
1934 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1935 {
1936 // it's our lucky day
1937 flags = WC_NO_BEST_FIT_CHARS;
1938 pUsedDef = &usedDef;
1939 }
1940 else // old system or unsupported encoding
1941 {
1942 flags = 0;
1943 pUsedDef = NULL;
1944 }
1945
2b5f62a0
VZ
1946 const size_t len = ::WideCharToMultiByte
1947 (
1948 m_CodePage, // code page
13dd924a
VZ
1949 flags, // either none or no best fit
1950 pwz, // input string
2b5f62a0
VZ
1951 -1, // it is (wide) NUL-terminated
1952 buf, // output buffer
1953 buf ? n : 0, // and its size
1954 NULL, // default "replacement" char
13dd924a 1955 pUsedDef // [out] was it used?
2b5f62a0
VZ
1956 );
1957
13dd924a
VZ
1958 if ( !len )
1959 {
1960 // function totally failed
1961 return (size_t)-1;
1962 }
1963
1964 // if we were really converting, check if we succeeded
1965 if ( buf )
1966 {
1967 if ( flags )
1968 {
1969 // check if the conversion failed, i.e. if any replacements
1970 // were done
1971 if ( usedDef )
1972 return (size_t)-1;
1973 }
1974 else // we must resort to double tripping...
1975 {
1976 wxWCharBuffer wcBuf(n);
1977 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1978 wcscmp(wcBuf, pwz) != 0 )
1979 {
1980 // we didn't obtain the same thing we started from, hence
1981 // the conversion was lossy and we consider that it failed
1982 return (size_t)-1;
1983 }
1984 }
1985 }
1986
03a991bc 1987 // see the comment above for the reason of "len - 1"
13dd924a 1988 return len - 1;
f1339c56 1989 }
dccce9ea 1990
7ef3ab50
VZ
1991 virtual size_t GetMBNulLen() const
1992 {
1993 if ( m_minMBCharWidth == 0 )
1994 {
1995 int len = ::WideCharToMultiByte
1996 (
1997 m_CodePage, // code page
1998 0, // no flags
1999 L"", // input string
2000 1, // translate just the NUL
2001 NULL, // output buffer
2002 0, // and its size
2003 NULL, // no replacement char
2004 NULL // [out] don't care if it was used
2005 );
2006
2007 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2008 switch ( len )
2009 {
2010 default:
2011 wxLogDebug(_T("Unexpected NUL length %d"), len);
2012 // fall through
2013
2014 case 0:
2015 self->m_minMBCharWidth = (size_t)-1;
2016 break;
2017
2018 case 1:
2019 case 2:
2020 case 4:
2021 self->m_minMBCharWidth = len;
2022 break;
2023 }
2024 }
2025
2026 return m_minMBCharWidth;
2027 }
2028
d36c9347
VZ
2029 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2030
13dd924a
VZ
2031 bool IsOk() const { return m_CodePage != -1; }
2032
2033private:
2034 static bool CanUseNoBestFit()
2035 {
2036 static int s_isWin98Or2k = -1;
2037
2038 if ( s_isWin98Or2k == -1 )
2039 {
2040 int verMaj, verMin;
2041 switch ( wxGetOsVersion(&verMaj, &verMin) )
2042 {
2043 case wxWIN95:
2044 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2045 break;
2046
2047 case wxWINDOWS_NT:
2048 s_isWin98Or2k = verMaj >= 5;
2049 break;
2050
2051 default:
2052 // unknown, be conseravtive by default
2053 s_isWin98Or2k = 0;
2054 }
2055
2056 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2057 }
2058
2059 return s_isWin98Or2k == 1;
2060 }
f1339c56 2061
89028980
VS
2062 static bool IsAtLeastWin2kSP4()
2063 {
8942f83a
WS
2064#ifdef __WXWINCE__
2065 return false;
2066#else
89028980
VS
2067 static int s_isAtLeastWin2kSP4 = -1;
2068
2069 if ( s_isAtLeastWin2kSP4 == -1 )
2070 {
2071 OSVERSIONINFOEX ver;
2072
2073 memset(&ver, 0, sizeof(ver));
2074 ver.dwOSVersionInfoSize = sizeof(ver);
2075 GetVersionEx((OSVERSIONINFO*)&ver);
2076
2077 s_isAtLeastWin2kSP4 =
2078 ((ver.dwMajorVersion > 5) || // Vista+
2079 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2080 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2081 ver.wServicePackMajor >= 4)) // 2000 SP4+
2082 ? 1 : 0;
2083 }
2084
2085 return s_isAtLeastWin2kSP4 == 1;
8942f83a 2086#endif
89028980
VS
2087 }
2088
eec47cc6 2089
c1464d9d 2090 // the code page we're working with
b1d66b54 2091 long m_CodePage;
c1464d9d 2092
7ef3ab50 2093 // cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d
VZ
2094 // "unknown"
2095 size_t m_minMBCharWidth;
1cd52418 2096};
e95354ec
VZ
2097
2098#endif // wxHAVE_WIN32_MB2WC
2099
f7e98dee
RN
2100// ============================================================================
2101// Cocoa conversion classes
2102// ============================================================================
2103
2104#if defined(__WXCOCOA__)
2105
ecd9653b 2106// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
2107// Cocoa. Strangely enough, internally Core Foundation uses
2108// UTF 32 internally quite a bit - its just not public (yet).
2109
2110#include <CoreFoundation/CFString.h>
2111#include <CoreFoundation/CFStringEncodingExt.h>
2112
2113CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 2114{
638357a0 2115 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
2116 if ( encoding == wxFONTENCODING_DEFAULT )
2117 {
638357a0 2118 enc = CFStringGetSystemEncoding();
ecd9653b
WS
2119 }
2120 else switch( encoding)
2121 {
2122 case wxFONTENCODING_ISO8859_1 :
2123 enc = kCFStringEncodingISOLatin1 ;
2124 break ;
2125 case wxFONTENCODING_ISO8859_2 :
2126 enc = kCFStringEncodingISOLatin2;
2127 break ;
2128 case wxFONTENCODING_ISO8859_3 :
2129 enc = kCFStringEncodingISOLatin3 ;
2130 break ;
2131 case wxFONTENCODING_ISO8859_4 :
2132 enc = kCFStringEncodingISOLatin4;
2133 break ;
2134 case wxFONTENCODING_ISO8859_5 :
2135 enc = kCFStringEncodingISOLatinCyrillic;
2136 break ;
2137 case wxFONTENCODING_ISO8859_6 :
2138 enc = kCFStringEncodingISOLatinArabic;
2139 break ;
2140 case wxFONTENCODING_ISO8859_7 :
2141 enc = kCFStringEncodingISOLatinGreek;
2142 break ;
2143 case wxFONTENCODING_ISO8859_8 :
2144 enc = kCFStringEncodingISOLatinHebrew;
2145 break ;
2146 case wxFONTENCODING_ISO8859_9 :
2147 enc = kCFStringEncodingISOLatin5;
2148 break ;
2149 case wxFONTENCODING_ISO8859_10 :
2150 enc = kCFStringEncodingISOLatin6;
2151 break ;
2152 case wxFONTENCODING_ISO8859_11 :
2153 enc = kCFStringEncodingISOLatinThai;
2154 break ;
2155 case wxFONTENCODING_ISO8859_13 :
2156 enc = kCFStringEncodingISOLatin7;
2157 break ;
2158 case wxFONTENCODING_ISO8859_14 :
2159 enc = kCFStringEncodingISOLatin8;
2160 break ;
2161 case wxFONTENCODING_ISO8859_15 :
2162 enc = kCFStringEncodingISOLatin9;
2163 break ;
2164
2165 case wxFONTENCODING_KOI8 :
2166 enc = kCFStringEncodingKOI8_R;
2167 break ;
2168 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2169 enc = kCFStringEncodingDOSRussian;
2170 break ;
2171
2172// case wxFONTENCODING_BULGARIAN :
2173// enc = ;
2174// break ;
2175
2176 case wxFONTENCODING_CP437 :
2177 enc =kCFStringEncodingDOSLatinUS ;
2178 break ;
2179 case wxFONTENCODING_CP850 :
2180 enc = kCFStringEncodingDOSLatin1;
2181 break ;
2182 case wxFONTENCODING_CP852 :
2183 enc = kCFStringEncodingDOSLatin2;
2184 break ;
2185 case wxFONTENCODING_CP855 :
2186 enc = kCFStringEncodingDOSCyrillic;
2187 break ;
2188 case wxFONTENCODING_CP866 :
2189 enc =kCFStringEncodingDOSRussian ;
2190 break ;
2191 case wxFONTENCODING_CP874 :
2192 enc = kCFStringEncodingDOSThai;
2193 break ;
2194 case wxFONTENCODING_CP932 :
2195 enc = kCFStringEncodingDOSJapanese;
2196 break ;
2197 case wxFONTENCODING_CP936 :
2198 enc =kCFStringEncodingDOSChineseSimplif ;
2199 break ;
2200 case wxFONTENCODING_CP949 :
2201 enc = kCFStringEncodingDOSKorean;
2202 break ;
2203 case wxFONTENCODING_CP950 :
2204 enc = kCFStringEncodingDOSChineseTrad;
2205 break ;
ecd9653b
WS
2206 case wxFONTENCODING_CP1250 :
2207 enc = kCFStringEncodingWindowsLatin2;
2208 break ;
2209 case wxFONTENCODING_CP1251 :
2210 enc =kCFStringEncodingWindowsCyrillic ;
2211 break ;
2212 case wxFONTENCODING_CP1252 :
2213 enc =kCFStringEncodingWindowsLatin1 ;
2214 break ;
2215 case wxFONTENCODING_CP1253 :
2216 enc = kCFStringEncodingWindowsGreek;
2217 break ;
2218 case wxFONTENCODING_CP1254 :
2219 enc = kCFStringEncodingWindowsLatin5;
2220 break ;
2221 case wxFONTENCODING_CP1255 :
2222 enc =kCFStringEncodingWindowsHebrew ;
2223 break ;
2224 case wxFONTENCODING_CP1256 :
2225 enc =kCFStringEncodingWindowsArabic ;
2226 break ;
2227 case wxFONTENCODING_CP1257 :
2228 enc = kCFStringEncodingWindowsBalticRim;
2229 break ;
638357a0
RN
2230// This only really encodes to UTF7 (if that) evidently
2231// case wxFONTENCODING_UTF7 :
2232// enc = kCFStringEncodingNonLossyASCII ;
2233// break ;
ecd9653b
WS
2234 case wxFONTENCODING_UTF8 :
2235 enc = kCFStringEncodingUTF8 ;
2236 break ;
2237 case wxFONTENCODING_EUC_JP :
2238 enc = kCFStringEncodingEUC_JP;
2239 break ;
2240 case wxFONTENCODING_UTF16 :
f7e98dee 2241 enc = kCFStringEncodingUnicode ;
ecd9653b 2242 break ;
f7e98dee
RN
2243 case wxFONTENCODING_MACROMAN :
2244 enc = kCFStringEncodingMacRoman ;
2245 break ;
2246 case wxFONTENCODING_MACJAPANESE :
2247 enc = kCFStringEncodingMacJapanese ;
2248 break ;
2249 case wxFONTENCODING_MACCHINESETRAD :
2250 enc = kCFStringEncodingMacChineseTrad ;
2251 break ;
2252 case wxFONTENCODING_MACKOREAN :
2253 enc = kCFStringEncodingMacKorean ;
2254 break ;
2255 case wxFONTENCODING_MACARABIC :
2256 enc = kCFStringEncodingMacArabic ;
2257 break ;
2258 case wxFONTENCODING_MACHEBREW :
2259 enc = kCFStringEncodingMacHebrew ;
2260 break ;
2261 case wxFONTENCODING_MACGREEK :
2262 enc = kCFStringEncodingMacGreek ;
2263 break ;
2264 case wxFONTENCODING_MACCYRILLIC :
2265 enc = kCFStringEncodingMacCyrillic ;
2266 break ;
2267 case wxFONTENCODING_MACDEVANAGARI :
2268 enc = kCFStringEncodingMacDevanagari ;
2269 break ;
2270 case wxFONTENCODING_MACGURMUKHI :
2271 enc = kCFStringEncodingMacGurmukhi ;
2272 break ;
2273 case wxFONTENCODING_MACGUJARATI :
2274 enc = kCFStringEncodingMacGujarati ;
2275 break ;
2276 case wxFONTENCODING_MACORIYA :
2277 enc = kCFStringEncodingMacOriya ;
2278 break ;
2279 case wxFONTENCODING_MACBENGALI :
2280 enc = kCFStringEncodingMacBengali ;
2281 break ;
2282 case wxFONTENCODING_MACTAMIL :
2283 enc = kCFStringEncodingMacTamil ;
2284 break ;
2285 case wxFONTENCODING_MACTELUGU :
2286 enc = kCFStringEncodingMacTelugu ;
2287 break ;
2288 case wxFONTENCODING_MACKANNADA :
2289 enc = kCFStringEncodingMacKannada ;
2290 break ;
2291 case wxFONTENCODING_MACMALAJALAM :
2292 enc = kCFStringEncodingMacMalayalam ;
2293 break ;
2294 case wxFONTENCODING_MACSINHALESE :
2295 enc = kCFStringEncodingMacSinhalese ;
2296 break ;
2297 case wxFONTENCODING_MACBURMESE :
2298 enc = kCFStringEncodingMacBurmese ;
2299 break ;
2300 case wxFONTENCODING_MACKHMER :
2301 enc = kCFStringEncodingMacKhmer ;
2302 break ;
2303 case wxFONTENCODING_MACTHAI :
2304 enc = kCFStringEncodingMacThai ;
2305 break ;
2306 case wxFONTENCODING_MACLAOTIAN :
2307 enc = kCFStringEncodingMacLaotian ;
2308 break ;
2309 case wxFONTENCODING_MACGEORGIAN :
2310 enc = kCFStringEncodingMacGeorgian ;
2311 break ;
2312 case wxFONTENCODING_MACARMENIAN :
2313 enc = kCFStringEncodingMacArmenian ;
2314 break ;
2315 case wxFONTENCODING_MACCHINESESIMP :
2316 enc = kCFStringEncodingMacChineseSimp ;
2317 break ;
2318 case wxFONTENCODING_MACTIBETAN :
2319 enc = kCFStringEncodingMacTibetan ;
2320 break ;
2321 case wxFONTENCODING_MACMONGOLIAN :
2322 enc = kCFStringEncodingMacMongolian ;
2323 break ;
2324 case wxFONTENCODING_MACETHIOPIC :
2325 enc = kCFStringEncodingMacEthiopic ;
2326 break ;
2327 case wxFONTENCODING_MACCENTRALEUR :
2328 enc = kCFStringEncodingMacCentralEurRoman ;
2329 break ;
2330 case wxFONTENCODING_MACVIATNAMESE :
2331 enc = kCFStringEncodingMacVietnamese ;
2332 break ;
2333 case wxFONTENCODING_MACARABICEXT :
2334 enc = kCFStringEncodingMacExtArabic ;
2335 break ;
2336 case wxFONTENCODING_MACSYMBOL :
2337 enc = kCFStringEncodingMacSymbol ;
2338 break ;
2339 case wxFONTENCODING_MACDINGBATS :
2340 enc = kCFStringEncodingMacDingbats ;
2341 break ;
2342 case wxFONTENCODING_MACTURKISH :
2343 enc = kCFStringEncodingMacTurkish ;
2344 break ;
2345 case wxFONTENCODING_MACCROATIAN :
2346 enc = kCFStringEncodingMacCroatian ;
2347 break ;
2348 case wxFONTENCODING_MACICELANDIC :
2349 enc = kCFStringEncodingMacIcelandic ;
2350 break ;
2351 case wxFONTENCODING_MACROMANIAN :
2352 enc = kCFStringEncodingMacRomanian ;
2353 break ;
2354 case wxFONTENCODING_MACCELTIC :
2355 enc = kCFStringEncodingMacCeltic ;
2356 break ;
2357 case wxFONTENCODING_MACGAELIC :
2358 enc = kCFStringEncodingMacGaelic ;
2359 break ;
ecd9653b
WS
2360// case wxFONTENCODING_MACKEYBOARD :
2361// enc = kCFStringEncodingMacKeyboardGlyphs ;
2362// break ;
2363 default :
2364 // because gcc is picky
2365 break ;
2366 } ;
2367 return enc ;
f7e98dee
RN
2368}
2369
f7e98dee
RN
2370class wxMBConv_cocoa : public wxMBConv
2371{
2372public:
2373 wxMBConv_cocoa()
2374 {
2375 Init(CFStringGetSystemEncoding()) ;
2376 }
2377
d36c9347
VZ
2378 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2379 {
2380 m_encoding = conv.m_encoding;
2381 }
2382
a6900d10 2383#if wxUSE_FONTMAP
f7e98dee
RN
2384 wxMBConv_cocoa(const wxChar* name)
2385 {
267e11c5 2386 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
f7e98dee 2387 }
a6900d10 2388#endif
f7e98dee
RN
2389
2390 wxMBConv_cocoa(wxFontEncoding encoding)
2391 {
2392 Init( wxCFStringEncFromFontEnc(encoding) );
2393 }
2394
2395 ~wxMBConv_cocoa()
2396 {
2397 }
2398
2399 void Init( CFStringEncoding encoding)
2400 {
638357a0 2401 m_encoding = encoding ;
f7e98dee
RN
2402 }
2403
2404 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2405 {
2406 wxASSERT(szUnConv);
ecd9653b 2407
638357a0
RN
2408 CFStringRef theString = CFStringCreateWithBytes (
2409 NULL, //the allocator
2410 (const UInt8*)szUnConv,
2411 strlen(szUnConv),
2412 m_encoding,
2413 false //no BOM/external representation
f7e98dee
RN
2414 );
2415
2416 wxASSERT(theString);
2417
638357a0
RN
2418 size_t nOutLength = CFStringGetLength(theString);
2419
2420 if (szOut == NULL)
f7e98dee 2421 {
f7e98dee 2422 CFRelease(theString);
638357a0 2423 return nOutLength;
f7e98dee 2424 }
ecd9653b 2425
638357a0 2426 CFRange theRange = { 0, nOutSize };
ecd9653b 2427
638357a0
RN
2428#if SIZEOF_WCHAR_T == 4
2429 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2430#endif
3698ae71 2431
f7e98dee 2432 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
3698ae71 2433
f7e98dee 2434 CFRelease(theString);
ecd9653b 2435
638357a0 2436 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
2437
2438#if SIZEOF_WCHAR_T == 4
2439 wxMBConvUTF16 converter ;
638357a0 2440 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
2441 delete[] szUniCharBuffer;
2442#endif
3698ae71 2443
638357a0 2444 return nOutLength;
f7e98dee
RN
2445 }
2446
2447 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2448 {
638357a0 2449 wxASSERT(szUnConv);
3698ae71 2450
f7e98dee 2451 size_t nRealOutSize;
638357a0 2452 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 2453 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 2454
f7e98dee 2455#if SIZEOF_WCHAR_T == 4
d9d488cf 2456 wxMBConvUTF16 converter ;
f7e98dee
RN
2457 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2458 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2459 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2460 nBufSize /= sizeof(UniChar);
f7e98dee
RN
2461#endif
2462
2463 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2464 NULL, //allocator
2465 szUniBuffer,
2466 nBufSize,
638357a0 2467 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 2468 );
ecd9653b 2469
f7e98dee 2470 wxASSERT(theString);
ecd9653b 2471
f7e98dee 2472 //Note that CER puts a BOM when converting to unicode
638357a0
RN
2473 //so we check and use getchars instead in that case
2474 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 2475 {
638357a0
RN
2476 if (szOut != NULL)
2477 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
3698ae71 2478
638357a0
RN
2479 nRealOutSize = CFStringGetLength(theString) + 1;
2480 }
2481 else
2482 {
2483 CFStringGetBytes(
2484 theString,
2485 CFRangeMake(0, CFStringGetLength(theString)),
2486 m_encoding,
2487 0, //what to put in characters that can't be converted -
2488 //0 tells CFString to return NULL if it meets such a character
2489 false, //not an external representation
2490 (UInt8*) szOut,
3698ae71 2491 nOutSize,
638357a0
RN
2492 (CFIndex*) &nRealOutSize
2493 );
f7e98dee 2494 }
ecd9653b 2495
638357a0 2496 CFRelease(theString);
ecd9653b 2497
638357a0
RN
2498#if SIZEOF_WCHAR_T == 4
2499 delete[] szUniBuffer;
2500#endif
ecd9653b 2501
f7e98dee
RN
2502 return nRealOutSize - 1;
2503 }
2504
d36c9347
VZ
2505 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2506
f7e98dee 2507 bool IsOk() const
ecd9653b 2508 {
3698ae71 2509 return m_encoding != kCFStringEncodingInvalidId &&
638357a0 2510 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2511 }
2512
2513private:
638357a0 2514 CFStringEncoding m_encoding ;
f7e98dee
RN
2515};
2516
2517#endif // defined(__WXCOCOA__)
2518
335d31e0
SC
2519// ============================================================================
2520// Mac conversion classes
2521// ============================================================================
2522
2523#if defined(__WXMAC__) && defined(TARGET_CARBON)
2524
2525class wxMBConv_mac : public wxMBConv
2526{
2527public:
2528 wxMBConv_mac()
2529 {
2530 Init(CFStringGetSystemEncoding()) ;
2531 }
2532
d36c9347
VZ
2533 wxMBConv_mac(const wxMBConv_mac& conv)
2534 {
2535 Init(conv.m_char_encoding);
2536 }
2537
2d1659cf 2538#if wxUSE_FONTMAP
335d31e0
SC
2539 wxMBConv_mac(const wxChar* name)
2540 {
267e11c5 2541 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2542 }
2d1659cf 2543#endif
335d31e0
SC
2544
2545 wxMBConv_mac(wxFontEncoding encoding)
2546 {
d775fa82
WS
2547 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2548 }
2549
2550 ~wxMBConv_mac()
2551 {
2552 OSStatus status = noErr ;
2553 status = TECDisposeConverter(m_MB2WC_converter);
2554 status = TECDisposeConverter(m_WC2MB_converter);
2555 }
2556
2557
2558 void Init( TextEncodingBase encoding)
2559 {
2560 OSStatus status = noErr ;
2561 m_char_encoding = encoding ;
2562 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2563
2564 status = TECCreateConverter(&m_MB2WC_converter,
2565 m_char_encoding,
2566 m_unicode_encoding);
2567 status = TECCreateConverter(&m_WC2MB_converter,
2568 m_unicode_encoding,
2569 m_char_encoding);
2570 }
2571
335d31e0
SC
2572 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2573 {
d775fa82
WS
2574 OSStatus status = noErr ;
2575 ByteCount byteOutLen ;
2576 ByteCount byteInLen = strlen(psz) ;
2577 wchar_t *tbuf = NULL ;
2578 UniChar* ubuf = NULL ;
2579 size_t res = 0 ;
2580
2581 if (buf == NULL)
2582 {
638357a0 2583 //apple specs say at least 32
c543817b 2584 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2585 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2586 }
2587 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2588#if SIZEOF_WCHAR_T == 4
d775fa82 2589 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2590#else
d775fa82 2591 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2592#endif
d775fa82
WS
2593 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2594 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2595#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2596 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2597 // is not properly terminated we get random characters at the end
2598 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d9d488cf 2599 wxMBConvUTF16 converter ;
d775fa82
WS
2600 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2601 free( ubuf ) ;
f3a355ce 2602#else
d775fa82 2603 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2604#endif
d775fa82
WS
2605 if ( buf == NULL )
2606 free(tbuf) ;
335d31e0 2607
335d31e0
SC
2608 if ( buf && res < n)
2609 buf[res] = 0;
2610
d775fa82 2611 return res ;
335d31e0
SC
2612 }
2613
2614 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2615 {
2616 OSStatus status = noErr ;
2617 ByteCount byteOutLen ;
2618 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2619
2620 char *tbuf = NULL ;
2621
2622 if (buf == NULL)
2623 {
638357a0 2624 //apple specs say at least 32
c543817b 2625 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2626 tbuf = (char*) malloc( n ) ;
2627 }
2628
2629 ByteCount byteBufferLen = n ;
2630 UniChar* ubuf = NULL ;
f3a355ce 2631#if SIZEOF_WCHAR_T == 4
d9d488cf 2632 wxMBConvUTF16 converter ;
d775fa82
WS
2633 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2634 byteInLen = unicharlen ;
2635 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2636 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2637#else
d775fa82 2638 ubuf = (UniChar*) psz ;
f3a355ce 2639#endif
d775fa82
WS
2640 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2641 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2642#if SIZEOF_WCHAR_T == 4
d775fa82 2643 free( ubuf ) ;
f3a355ce 2644#endif
d775fa82
WS
2645 if ( buf == NULL )
2646 free(tbuf) ;
335d31e0 2647
d775fa82 2648 size_t res = byteOutLen ;
335d31e0 2649 if ( buf && res < n)
638357a0 2650 {
335d31e0 2651 buf[res] = 0;
3698ae71 2652
638357a0
RN
2653 //we need to double-trip to verify it didn't insert any ? in place
2654 //of bogus characters
2655 wxWCharBuffer wcBuf(n);
2656 size_t pszlen = wxWcslen(psz);
2657 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2658 wxWcslen(wcBuf) != pszlen ||
2659 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2660 {
2661 // we didn't obtain the same thing we started from, hence
2662 // the conversion was lossy and we consider that it failed
2663 return (size_t)-1;
2664 }
2665 }
335d31e0 2666
d775fa82 2667 return res ;
335d31e0
SC
2668 }
2669
d3478e2c 2670 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
d36c9347 2671
335d31e0
SC
2672 bool IsOk() const
2673 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2674
2675private:
d775fa82
WS
2676 TECObjectRef m_MB2WC_converter ;
2677 TECObjectRef m_WC2MB_converter ;
2678
2679 TextEncodingBase m_char_encoding ;
2680 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2681};
2682
2683#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2684
36acb880
VZ
2685// ============================================================================
2686// wxEncodingConverter based conversion classes
2687// ============================================================================
2688
1e6feb95 2689#if wxUSE_FONTMAP
1cd52418 2690
e95354ec 2691class wxMBConv_wxwin : public wxMBConv
1cd52418 2692{
8b04d4c4
VZ
2693private:
2694 void Init()
2695 {
2696 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2697 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2698 }
2699
6001e347 2700public:
f1339c56
RR
2701 // temporarily just use wxEncodingConverter stuff,
2702 // so that it works while a better implementation is built
e95354ec 2703 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2704 {
2705 if (name)
267e11c5 2706 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2707 else
2708 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2709
8b04d4c4
VZ
2710 Init();
2711 }
2712
e95354ec 2713 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2714 {
2715 m_enc = enc;
2716
2717 Init();
f1339c56 2718 }
dccce9ea 2719
bde4baac 2720 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2721 {
2722 size_t inbuf = strlen(psz);
dccce9ea 2723 if (buf)
c643a977
VS
2724 {
2725 if (!m2w.Convert(psz,buf))
2726 return (size_t)-1;
2727 }
f1339c56
RR
2728 return inbuf;
2729 }
dccce9ea 2730
bde4baac 2731 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2732 {
f8d791e0 2733 const size_t inbuf = wxWcslen(psz);
f1339c56 2734 if (buf)
c643a977
VS
2735 {
2736 if (!w2m.Convert(psz,buf))
2737 return (size_t)-1;
2738 }
dccce9ea 2739
f1339c56
RR
2740 return inbuf;
2741 }
dccce9ea 2742
7ef3ab50 2743 virtual size_t GetMBNulLen() const
eec47cc6
VZ
2744 {
2745 switch ( m_enc )
2746 {
2747 case wxFONTENCODING_UTF16BE:
2748 case wxFONTENCODING_UTF16LE:
c1464d9d 2749 return 2;
eec47cc6
VZ
2750
2751 case wxFONTENCODING_UTF32BE:
2752 case wxFONTENCODING_UTF32LE:
c1464d9d 2753 return 4;
eec47cc6
VZ
2754
2755 default:
c1464d9d 2756 return 1;
eec47cc6
VZ
2757 }
2758 }
2759
d36c9347
VZ
2760 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2761
7ef3ab50
VZ
2762 bool IsOk() const { return m_ok; }
2763
2764public:
2765 wxFontEncoding m_enc;
2766 wxEncodingConverter m2w, w2m;
2767
2768private:
cafbf6fb
VZ
2769 // were we initialized successfully?
2770 bool m_ok;
fc7a2a60 2771
e95354ec 2772 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2773};
6001e347 2774
8f115891
MW
2775// make the constructors available for unit testing
2776WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2777{
2778 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2779 if ( !result->IsOk() )
2780 {
2781 delete result;
2782 return 0;
2783 }
2784 return result;
2785}
2786
1e6feb95
VZ
2787#endif // wxUSE_FONTMAP
2788
36acb880
VZ
2789// ============================================================================
2790// wxCSConv implementation
2791// ============================================================================
2792
8b04d4c4 2793void wxCSConv::Init()
6001e347 2794{
e95354ec
VZ
2795 m_name = NULL;
2796 m_convReal = NULL;
2797 m_deferred = true;
2798}
2799
8b04d4c4
VZ
2800wxCSConv::wxCSConv(const wxChar *charset)
2801{
2802 Init();
82713003 2803
e95354ec
VZ
2804 if ( charset )
2805 {
e95354ec
VZ
2806 SetName(charset);
2807 }
bda3d86a 2808
e4277538
VZ
2809#if wxUSE_FONTMAP
2810 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2811#else
bda3d86a 2812 m_encoding = wxFONTENCODING_SYSTEM;
e4277538 2813#endif
6001e347
RR
2814}
2815
8b04d4c4
VZ
2816wxCSConv::wxCSConv(wxFontEncoding encoding)
2817{
bda3d86a 2818 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2819 {
2820 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2821
2822 encoding = wxFONTENCODING_SYSTEM;
2823 }
2824
8b04d4c4
VZ
2825 Init();
2826
bda3d86a 2827 m_encoding = encoding;
8b04d4c4
VZ
2828}
2829
6001e347
RR
2830wxCSConv::~wxCSConv()
2831{
65e50848
JS
2832 Clear();
2833}
2834
54380f29 2835wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2836 : wxMBConv()
54380f29 2837{
8b04d4c4
VZ
2838 Init();
2839
54380f29 2840 SetName(conv.m_name);
8b04d4c4 2841 m_encoding = conv.m_encoding;
54380f29
GD
2842}
2843
2844wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2845{
2846 Clear();
8b04d4c4 2847
54380f29 2848 SetName(conv.m_name);
8b04d4c4
VZ
2849 m_encoding = conv.m_encoding;
2850
54380f29
GD
2851 return *this;
2852}
2853
65e50848
JS
2854void wxCSConv::Clear()
2855{
8b04d4c4 2856 free(m_name);
e95354ec 2857 delete m_convReal;
8b04d4c4 2858
65e50848 2859 m_name = NULL;
e95354ec 2860 m_convReal = NULL;
6001e347
RR
2861}
2862
2863void wxCSConv::SetName(const wxChar *charset)
2864{
f1339c56
RR
2865 if (charset)
2866 {
2867 m_name = wxStrdup(charset);
e95354ec 2868 m_deferred = true;
f1339c56 2869 }
6001e347
RR
2870}
2871
8b3eb85d
VZ
2872#if wxUSE_FONTMAP
2873#include "wx/hashmap.h"
2874
2875WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9 2876 wxEncodingNameCache );
8b3eb85d
VZ
2877
2878static wxEncodingNameCache gs_nameCache;
2879#endif
2880
e95354ec
VZ
2881wxMBConv *wxCSConv::DoCreate() const
2882{
ce6f8d6f
VZ
2883#if wxUSE_FONTMAP
2884 wxLogTrace(TRACE_STRCONV,
2885 wxT("creating conversion for %s"),
2886 (m_name ? m_name
2887 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2888#endif // wxUSE_FONTMAP
2889
c547282d
VZ
2890 // check for the special case of ASCII or ISO8859-1 charset: as we have
2891 // special knowledge of it anyhow, we don't need to create a special
2892 // conversion object
e4277538
VZ
2893 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2894 m_encoding == wxFONTENCODING_DEFAULT )
f1339c56 2895 {
e95354ec
VZ
2896 // don't convert at all
2897 return NULL;
2898 }
dccce9ea 2899
e95354ec
VZ
2900 // we trust OS to do conversion better than we can so try external
2901 // conversion methods first
2902 //
2903 // the full order is:
2904 // 1. OS conversion (iconv() under Unix or Win32 API)
2905 // 2. hard coded conversions for UTF
2906 // 3. wxEncodingConverter as fall back
2907
2908 // step (1)
2909#ifdef HAVE_ICONV
c547282d 2910#if !wxUSE_FONTMAP
e95354ec 2911 if ( m_name )
c547282d 2912#endif // !wxUSE_FONTMAP
e95354ec 2913 {
c547282d 2914 wxString name(m_name);
8b3eb85d
VZ
2915 wxFontEncoding encoding(m_encoding);
2916
2917 if ( !name.empty() )
2918 {
2919 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2920 if ( conv->IsOk() )
2921 return conv;
2922
2923 delete conv;
c547282d
VZ
2924
2925#if wxUSE_FONTMAP
8b3eb85d
VZ
2926 encoding =
2927 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
c547282d 2928#endif // wxUSE_FONTMAP
8b3eb85d
VZ
2929 }
2930#if wxUSE_FONTMAP
2931 {
2932 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2933 if ( it != gs_nameCache.end() )
2934 {
2935 if ( it->second.empty() )
2936 return NULL;
c547282d 2937
8b3eb85d
VZ
2938 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2939 if ( conv->IsOk() )
2940 return conv;
e95354ec 2941
8b3eb85d
VZ
2942 delete conv;
2943 }
2944
2945 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2946
2947 for ( ; *names; ++names )
2948 {
2949 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2950 if ( conv->IsOk() )
2951 {
2952 gs_nameCache[encoding] = *names;
2953 return conv;
2954 }
2955
2956 delete conv;
2957 }
2958
40711af8 2959 gs_nameCache[encoding] = _T(""); // cache the failure
8b3eb85d
VZ
2960 }
2961#endif // wxUSE_FONTMAP
e95354ec
VZ
2962 }
2963#endif // HAVE_ICONV
2964
2965#ifdef wxHAVE_WIN32_MB2WC
2966 {
7608a683 2967#if wxUSE_FONTMAP
e95354ec
VZ
2968 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2969 : new wxMBConv_win32(m_encoding);
2970 if ( conv->IsOk() )
2971 return conv;
2972
2973 delete conv;
7608a683
WS
2974#else
2975 return NULL;
2976#endif
e95354ec
VZ
2977 }
2978#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2979#if defined(__WXMAC__)
2980 {
5c3c8676 2981 // leave UTF16 and UTF32 to the built-ins of wx
3698ae71 2982 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
5c3c8676 2983 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
d775fa82
WS
2984 {
2985
2d1659cf 2986#if wxUSE_FONTMAP
d775fa82
WS
2987 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2988 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2989#else
2990 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2991#endif
d775fa82 2992 if ( conv->IsOk() )
f7e98dee
RN
2993 return conv;
2994
2995 delete conv;
2996 }
2997 }
2998#endif
2999#if defined(__WXCOCOA__)
3000 {
3001 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3002 {
3003
a6900d10 3004#if wxUSE_FONTMAP
f7e98dee
RN
3005 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3006 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
3007#else
3008 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3009#endif
f7e98dee 3010 if ( conv->IsOk() )
d775fa82
WS
3011 return conv;
3012
3013 delete conv;
3014 }
335d31e0
SC
3015 }
3016#endif
e95354ec
VZ
3017 // step (2)
3018 wxFontEncoding enc = m_encoding;
3019#if wxUSE_FONTMAP
c547282d
VZ
3020 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3021 {
3022 // use "false" to suppress interactive dialogs -- we can be called from
3023 // anywhere and popping up a dialog from here is the last thing we want to
3024 // do
267e11c5 3025 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d 3026 }
e95354ec
VZ
3027#endif // wxUSE_FONTMAP
3028
3029 switch ( enc )
3030 {
3031 case wxFONTENCODING_UTF7:
3032 return new wxMBConvUTF7;
3033
3034 case wxFONTENCODING_UTF8:
3035 return new wxMBConvUTF8;
3036
e95354ec
VZ
3037 case wxFONTENCODING_UTF16BE:
3038 return new wxMBConvUTF16BE;
3039
3040 case wxFONTENCODING_UTF16LE:
3041 return new wxMBConvUTF16LE;
3042
e95354ec
VZ
3043 case wxFONTENCODING_UTF32BE:
3044 return new wxMBConvUTF32BE;
3045
3046 case wxFONTENCODING_UTF32LE:
3047 return new wxMBConvUTF32LE;
3048
3049 default:
3050 // nothing to do but put here to suppress gcc warnings
3051 ;
3052 }
3053
3054 // step (3)
3055#if wxUSE_FONTMAP
3056 {
3057 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3058 : new wxMBConv_wxwin(m_encoding);
3059 if ( conv->IsOk() )
3060 return conv;
3061
3062 delete conv;
3063 }
3064#endif // wxUSE_FONTMAP
3065
a58d4f4d
VS
3066 // NB: This is a hack to prevent deadlock. What could otherwise happen
3067 // in Unicode build: wxConvLocal creation ends up being here
3068 // because of some failure and logs the error. But wxLog will try to
3069 // attach timestamp, for which it will need wxConvLocal (to convert
3070 // time to char* and then wchar_t*), but that fails, tries to log
3071 // error, but wxLog has a (already locked) critical section that
3072 // guards static buffer.
3073 static bool alreadyLoggingError = false;
3074 if (!alreadyLoggingError)
3075 {
3076 alreadyLoggingError = true;
3077 wxLogError(_("Cannot convert from the charset '%s'!"),
3078 m_name ? m_name
e95354ec
VZ
3079 :
3080#if wxUSE_FONTMAP
267e11c5 3081 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
e95354ec
VZ
3082#else // !wxUSE_FONTMAP
3083 wxString::Format(_("encoding %s"), m_encoding).c_str()
3084#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3085 );
a58d4f4d
VS
3086 alreadyLoggingError = false;
3087 }
e95354ec
VZ
3088
3089 return NULL;
3090}
3091
3092void wxCSConv::CreateConvIfNeeded() const
3093{
3094 if ( m_deferred )
3095 {
3096 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
3097
3098#if wxUSE_INTL
3099 // if we don't have neither the name nor the encoding, use the default
3100 // encoding for this system
3101 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3102 {
4d312c22 3103 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
3104 }
3105#endif // wxUSE_INTL
3106
e95354ec
VZ
3107 self->m_convReal = DoCreate();
3108 self->m_deferred = false;
6001e347 3109 }
6001e347
RR
3110}
3111
3112size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3113{
e95354ec 3114 CreateConvIfNeeded();
dccce9ea 3115
e95354ec
VZ
3116 if (m_convReal)
3117 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
3118
3119 // latin-1 (direct)
4def3b35 3120 size_t len = strlen(psz);
dccce9ea 3121
f1339c56
RR
3122 if (buf)
3123 {
4def3b35 3124 for (size_t c = 0; c <= len; c++)
f1339c56
RR
3125 buf[c] = (unsigned char)(psz[c]);
3126 }
dccce9ea 3127
f1339c56 3128 return len;
6001e347
RR
3129}
3130
3131size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3132{
e95354ec 3133 CreateConvIfNeeded();
dccce9ea 3134
e95354ec
VZ
3135 if (m_convReal)
3136 return m_convReal->WC2MB(buf, psz, n);
1cd52418 3137
f1339c56 3138 // latin-1 (direct)
f8d791e0 3139 const size_t len = wxWcslen(psz);
f1339c56
RR
3140 if (buf)
3141 {
4def3b35 3142 for (size_t c = 0; c <= len; c++)
24642831
VS
3143 {
3144 if (psz[c] > 0xFF)
3145 return (size_t)-1;
907173e5 3146 buf[c] = (char)psz[c];
24642831
VS
3147 }
3148 }
3149 else
3150 {
3151 for (size_t c = 0; c <= len; c++)
3152 {
3153 if (psz[c] > 0xFF)
3154 return (size_t)-1;
3155 }
f1339c56 3156 }
dccce9ea 3157
f1339c56 3158 return len;
6001e347
RR
3159}
3160
7ef3ab50 3161size_t wxCSConv::GetMBNulLen() const
eec47cc6
VZ
3162{
3163 CreateConvIfNeeded();
3164
3165 if ( m_convReal )
3166 {
7ef3ab50 3167 return m_convReal->GetMBNulLen();
eec47cc6
VZ
3168 }
3169
c1464d9d 3170 return 1;
eec47cc6
VZ
3171}
3172
bde4baac
VZ
3173// ----------------------------------------------------------------------------
3174// globals
3175// ----------------------------------------------------------------------------
3176
3177#ifdef __WINDOWS__
3178 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
3179#elif defined(__WXMAC__) && !defined(__MACH__)
3180 static wxMBConv_mac wxConvLibcObj ;
bde4baac 3181#else
dcc8fac0 3182 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
3183#endif
3184
3185static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3186static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3187static wxMBConvUTF7 wxConvUTF7Obj;
3188static wxMBConvUTF8 wxConvUTF8Obj;
c12b7f79 3189
bde4baac
VZ
3190WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3191WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3192WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3193WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3194WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3195WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
3196WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3197#ifdef __WXOSX__
ea8ce907 3198 wxConvUTF8Obj;
f5a1953b 3199#else
ea8ce907 3200 wxConvLibcObj;
f5a1953b
VZ
3201#endif
3202
bde4baac
VZ
3203
3204#else // !wxUSE_WCHAR_T
3205
3206// stand-ins in absence of wchar_t
3207WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3208 wxConvISO8859_1,
3209 wxConvLocal,
3210 wxConvUTF8;
3211
3212#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T