]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Forgot to free string after usage.
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea
VZ
154 }
155 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
bde4baac 359// ----------------------------------------------------------------------------
15f2ee32 360// UTF-7
bde4baac 361// ----------------------------------------------------------------------------
6001e347 362
15f2ee32 363// Implementation (C) 2004 Fredrik Roubert
6001e347 364
15f2ee32
RN
365//
366// BASE64 decoding table
367//
368static const unsigned char utf7unb64[] =
6001e347 369{
15f2ee32
RN
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
376 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
377 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
379 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
380 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
381 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
382 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
383 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
384 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
385 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
402};
403
404size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
405{
406
407 size_t len = 0;
408
409 while (*psz && ((!buf) || (len < n)))
410 {
411 unsigned char cc = *psz++;
412 if (cc != '+')
413 {
414 // plain ASCII char
415 if (buf)
416 *buf++ = cc;
417 len++;
418 }
419 else if (*psz == '-')
420 {
421 // encoded plus sign
422 if (buf)
423 *buf++ = cc;
424 len++;
425 psz++;
426 }
427 else
428 {
429 // BASE64 encoded string
430 bool lsb;
431 unsigned char c;
432 unsigned int d, l;
433 for (lsb = false, d = 0, l = 0;
434 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
435 {
436 d <<= 6;
437 d += cc;
438 for (l += 6; l >= 8; lsb = !lsb)
439 {
6356d52a 440 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
441 if (lsb)
442 {
443 if (buf)
444 *buf++ |= c;
445 len ++;
446 }
447 else
448 if (buf)
6356d52a 449 *buf = (wchar_t)(c << 8);
15f2ee32
RN
450 }
451 }
452 if (*psz == '-')
453 psz++;
454 }
455 }
456 if (buf && (len < n))
457 *buf = 0;
458 return len;
6001e347
RR
459}
460
15f2ee32
RN
461//
462// BASE64 encoding table
463//
464static const unsigned char utf7enb64[] =
465{
466 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
467 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
468 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
469 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
470 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
471 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
472 'w', 'x', 'y', 'z', '0', '1', '2', '3',
473 '4', '5', '6', '7', '8', '9', '+', '/'
474};
475
476//
477// UTF-7 encoding table
478//
479// 0 - Set D (directly encoded characters)
480// 1 - Set O (optional direct characters)
481// 2 - whitespace characters (optional)
482// 3 - special characters
483//
484static const unsigned char utf7encode[128] =
6001e347 485{
15f2ee32
RN
486 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
487 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
488 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
490 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
492 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
494};
495
496size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
497*psz, size_t n) const
498{
499
500
501 size_t len = 0;
502
503 while (*psz && ((!buf) || (len < n)))
504 {
505 wchar_t cc = *psz++;
506 if (cc < 0x80 && utf7encode[cc] < 1)
507 {
508 // plain ASCII char
509 if (buf)
510 *buf++ = (char)cc;
511 len++;
512 }
513#ifndef WC_UTF16
79c78d42 514 else if (((wxUint32)cc) > 0xffff)
6e394fc6 515 {
15f2ee32
RN
516 // no surrogate pair generation (yet?)
517 return (size_t)-1;
518 }
519#endif
520 else
521 {
522 if (buf)
523 *buf++ = '+';
524 len++;
525 if (cc != '+')
526 {
527 // BASE64 encode string
528 unsigned int lsb, d, l;
529 for (d = 0, l = 0;; psz++)
530 {
531 for (lsb = 0; lsb < 2; lsb ++)
532 {
533 d <<= 8;
534 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
535
536 for (l += 8; l >= 6; )
537 {
538 l -= 6;
539 if (buf)
540 *buf++ = utf7enb64[(d >> l) % 64];
541 len++;
542 }
543 }
544 cc = *psz;
545 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
546 break;
547 }
548 if (l != 0)
549 {
550 if (buf)
551 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
552 len++;
553 }
554 }
555 if (buf)
556 *buf++ = '-';
557 len++;
558 }
559 }
560 if (buf && (len < n))
561 *buf = 0;
562 return len;
6001e347
RR
563}
564
f6bcfd97 565// ----------------------------------------------------------------------------
6001e347 566// UTF-8
f6bcfd97 567// ----------------------------------------------------------------------------
6001e347 568
dccce9ea 569static wxUint32 utf8_max[]=
4def3b35 570 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
571
572size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
573{
4def3b35
VS
574 size_t len = 0;
575
dccce9ea 576 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
577 {
578 unsigned char cc = *psz++, fc = cc;
579 unsigned cnt;
dccce9ea 580 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 581 fc <<= 1;
dccce9ea 582 if (!cnt)
4def3b35
VS
583 {
584 // plain ASCII char
dccce9ea 585 if (buf)
4def3b35
VS
586 *buf++ = cc;
587 len++;
dccce9ea
VZ
588 }
589 else
4def3b35
VS
590 {
591 cnt--;
dccce9ea 592 if (!cnt)
4def3b35
VS
593 {
594 // invalid UTF-8 sequence
595 return (size_t)-1;
dccce9ea
VZ
596 }
597 else
4def3b35
VS
598 {
599 unsigned ocnt = cnt - 1;
600 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 601 while (cnt--)
4def3b35
VS
602 {
603 cc = *psz++;
dccce9ea 604 if ((cc & 0xC0) != 0x80)
4def3b35
VS
605 {
606 // invalid UTF-8 sequence
607 return (size_t)-1;
608 }
609 res = (res << 6) | (cc & 0x3f);
610 }
dccce9ea 611 if (res <= utf8_max[ocnt])
4def3b35
VS
612 {
613 // illegal UTF-8 encoding
614 return (size_t)-1;
615 }
1cd52418 616#ifdef WC_UTF16
b5153fd8
VZ
617 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
618 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
619 if (pa == (size_t)-1)
620 return (size_t)-1;
dccce9ea 621 if (buf)
4def3b35
VS
622 buf += pa;
623 len += pa;
373658eb 624#else // !WC_UTF16
dccce9ea 625 if (buf)
4def3b35
VS
626 *buf++ = res;
627 len++;
373658eb 628#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
629 }
630 }
6001e347 631 }
dccce9ea 632 if (buf && (len < n))
4def3b35
VS
633 *buf = 0;
634 return len;
6001e347
RR
635}
636
637size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
638{
4def3b35 639 size_t len = 0;
6001e347 640
dccce9ea 641 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
642 {
643 wxUint32 cc;
1cd52418 644#ifdef WC_UTF16
b5153fd8
VZ
645 // cast is ok for WC_UTF16
646 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 647 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 648#else
4def3b35
VS
649 cc=(*psz++) & 0x7fffffff;
650#endif
651 unsigned cnt;
652 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 653 if (!cnt)
4def3b35
VS
654 {
655 // plain ASCII char
dccce9ea 656 if (buf)
574c939e 657 *buf++ = (char) cc;
4def3b35 658 len++;
dccce9ea
VZ
659 }
660
661 else
4def3b35
VS
662 {
663 len += cnt + 1;
dccce9ea 664 if (buf)
4def3b35 665 {
574c939e 666 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 667 while (cnt--)
574c939e 668 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
669 }
670 }
6001e347 671 }
4def3b35
VS
672
673 if (buf && (len<n)) *buf = 0;
adb45366 674
4def3b35 675 return len;
6001e347
RR
676}
677
c91830cb
VZ
678
679
680
681// ----------------------------------------------------------------------------
682// UTF-16
683// ----------------------------------------------------------------------------
684
685#ifdef WORDS_BIGENDIAN
bde4baac
VZ
686 #define wxMBConvUTF16straight wxMBConvUTF16BE
687 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 688#else
bde4baac
VZ
689 #define wxMBConvUTF16swap wxMBConvUTF16BE
690 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
691#endif
692
693
c91830cb
VZ
694#ifdef WC_UTF16
695
c91830cb
VZ
696// copy 16bit MB to 16bit String
697size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
698{
699 size_t len=0;
700
701 while (*(wxUint16*)psz && (!buf || len < n))
702 {
703 if (buf)
704 *buf++ = *(wxUint16*)psz;
705 len++;
706
707 psz += sizeof(wxUint16);
708 }
709 if (buf && len<n) *buf=0;
710
711 return len;
712}
713
714
715// copy 16bit String to 16bit MB
716size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
717{
718 size_t len=0;
719
720 while (*psz && (!buf || len < n))
721 {
722 if (buf)
723 {
724 *(wxUint16*)buf = *psz;
725 buf += sizeof(wxUint16);
726 }
727 len += sizeof(wxUint16);
728 psz++;
729 }
730 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
731
732 return len;
733}
734
735
736// swap 16bit MB to 16bit String
737size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
738{
739 size_t len=0;
740
741 while (*(wxUint16*)psz && (!buf || len < n))
742 {
743 if (buf)
744 {
745 ((char *)buf)[0] = psz[1];
746 ((char *)buf)[1] = psz[0];
747 buf++;
748 }
749 len++;
750 psz += sizeof(wxUint16);
751 }
752 if (buf && len<n) *buf=0;
753
754 return len;
755}
756
757
758// swap 16bit MB to 16bit String
759size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
760{
761 size_t len=0;
762
763 while (*psz && (!buf || len < n))
764 {
765 if (buf)
766 {
767 *buf++ = ((char*)psz)[1];
768 *buf++ = ((char*)psz)[0];
769 }
770 len += sizeof(wxUint16);
771 psz++;
772 }
773 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
774
775 return len;
776}
777
778
779#else // WC_UTF16
780
781
782// copy 16bit MB to 32bit String
783size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
784{
785 size_t len=0;
786
787 while (*(wxUint16*)psz && (!buf || len < n))
788 {
789 wxUint32 cc;
790 size_t pa=decode_utf16((wxUint16*)psz, cc);
791 if (pa == (size_t)-1)
792 return pa;
793
794 if (buf)
795 *buf++ = cc;
796 len++;
797 psz += pa * sizeof(wxUint16);
798 }
799 if (buf && len<n) *buf=0;
800
801 return len;
802}
803
804
805// copy 32bit String to 16bit MB
806size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
807{
808 size_t len=0;
809
810 while (*psz && (!buf || len < n))
811 {
812 wxUint16 cc[2];
813 size_t pa=encode_utf16(*psz, cc);
814
815 if (pa == (size_t)-1)
816 return pa;
817
818 if (buf)
819 {
69b80d28 820 *(wxUint16*)buf = cc[0];
b5153fd8 821 buf += sizeof(wxUint16);
c91830cb 822 if (pa > 1)
69b80d28
VZ
823 {
824 *(wxUint16*)buf = cc[1];
825 buf += sizeof(wxUint16);
826 }
c91830cb
VZ
827 }
828
829 len += pa*sizeof(wxUint16);
830 psz++;
831 }
832 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
833
834 return len;
835}
836
837
838// swap 16bit MB to 32bit String
839size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
840{
841 size_t len=0;
842
843 while (*(wxUint16*)psz && (!buf || len < n))
844 {
845 wxUint32 cc;
846 char tmp[4];
847 tmp[0]=psz[1]; tmp[1]=psz[0];
848 tmp[2]=psz[3]; tmp[3]=psz[2];
849
850 size_t pa=decode_utf16((wxUint16*)tmp, cc);
851 if (pa == (size_t)-1)
852 return pa;
853
854 if (buf)
855 *buf++ = cc;
856
857 len++;
858 psz += pa * sizeof(wxUint16);
859 }
860 if (buf && len<n) *buf=0;
861
862 return len;
863}
864
865
866// swap 32bit String to 16bit MB
867size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
868{
869 size_t len=0;
870
871 while (*psz && (!buf || len < n))
872 {
873 wxUint16 cc[2];
874 size_t pa=encode_utf16(*psz, cc);
875
876 if (pa == (size_t)-1)
877 return pa;
878
879 if (buf)
880 {
881 *buf++ = ((char*)cc)[1];
882 *buf++ = ((char*)cc)[0];
883 if (pa > 1)
884 {
885 *buf++ = ((char*)cc)[3];
886 *buf++ = ((char*)cc)[2];
887 }
888 }
889
890 len += pa*sizeof(wxUint16);
891 psz++;
892 }
893 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
894
895 return len;
896}
897
898#endif // WC_UTF16
899
900
901// ----------------------------------------------------------------------------
902// UTF-32
903// ----------------------------------------------------------------------------
904
905#ifdef WORDS_BIGENDIAN
906#define wxMBConvUTF32straight wxMBConvUTF32BE
907#define wxMBConvUTF32swap wxMBConvUTF32LE
908#else
909#define wxMBConvUTF32swap wxMBConvUTF32BE
910#define wxMBConvUTF32straight wxMBConvUTF32LE
911#endif
912
913
914WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
915WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
916
917
918#ifdef WC_UTF16
919
920// copy 32bit MB to 16bit String
921size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
922{
923 size_t len=0;
924
925 while (*(wxUint32*)psz && (!buf || len < n))
926 {
927 wxUint16 cc[2];
928
929 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
930 if (pa == (size_t)-1)
931 return pa;
932
933 if (buf)
934 {
935 *buf++ = cc[0];
936 if (pa > 1)
937 *buf++ = cc[1];
938 }
939 len += pa;
940 psz += sizeof(wxUint32);
941 }
942 if (buf && len<n) *buf=0;
943
944 return len;
945}
946
947
948// copy 16bit String to 32bit MB
949size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
950{
951 size_t len=0;
952
953 while (*psz && (!buf || len < n))
954 {
955 wxUint32 cc;
956
b5153fd8
VZ
957 // cast is ok for WC_UTF16
958 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
959 if (pa == (size_t)-1)
960 return pa;
961
962 if (buf)
963 {
964 *(wxUint32*)buf = cc;
965 buf += sizeof(wxUint32);
966 }
967 len += sizeof(wxUint32);
968 psz += pa;
969 }
b5153fd8
VZ
970
971 if (buf && len<=n-sizeof(wxUint32))
972 *(wxUint32*)buf=0;
c91830cb
VZ
973
974 return len;
975}
976
977
978
979// swap 32bit MB to 16bit String
980size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
981{
982 size_t len=0;
983
984 while (*(wxUint32*)psz && (!buf || len < n))
985 {
986 char tmp[4];
987 tmp[0] = psz[3]; tmp[1] = psz[2];
988 tmp[2] = psz[1]; tmp[3] = psz[0];
989
990
991 wxUint16 cc[2];
992
993 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
994 if (pa == (size_t)-1)
995 return pa;
996
997 if (buf)
998 {
999 *buf++ = cc[0];
1000 if (pa > 1)
1001 *buf++ = cc[1];
1002 }
1003 len += pa;
1004 psz += sizeof(wxUint32);
1005 }
b5153fd8
VZ
1006
1007 if (buf && len<n)
1008 *buf=0;
c91830cb
VZ
1009
1010 return len;
1011}
1012
1013
1014// swap 16bit String to 32bit MB
1015size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1016{
1017 size_t len=0;
1018
1019 while (*psz && (!buf || len < n))
1020 {
1021 char cc[4];
1022
b5153fd8
VZ
1023 // cast is ok for WC_UTF16
1024 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1025 if (pa == (size_t)-1)
1026 return pa;
1027
1028 if (buf)
1029 {
1030 *buf++ = cc[3];
1031 *buf++ = cc[2];
1032 *buf++ = cc[1];
1033 *buf++ = cc[0];
1034 }
1035 len += sizeof(wxUint32);
1036 psz += pa;
1037 }
b5153fd8
VZ
1038
1039 if (buf && len<=n-sizeof(wxUint32))
1040 *(wxUint32*)buf=0;
c91830cb
VZ
1041
1042 return len;
1043}
1044
1045#else // WC_UTF16
1046
1047
1048// copy 32bit MB to 32bit String
1049size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1050{
1051 size_t len=0;
1052
1053 while (*(wxUint32*)psz && (!buf || len < n))
1054 {
1055 if (buf)
1056 *buf++ = *(wxUint32*)psz;
1057 len++;
1058 psz += sizeof(wxUint32);
1059 }
b5153fd8
VZ
1060
1061 if (buf && len<n)
1062 *buf=0;
c91830cb
VZ
1063
1064 return len;
1065}
1066
1067
1068// copy 32bit String to 32bit MB
1069size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1070{
1071 size_t len=0;
1072
1073 while (*psz && (!buf || len < n))
1074 {
1075 if (buf)
1076 {
1077 *(wxUint32*)buf = *psz;
1078 buf += sizeof(wxUint32);
1079 }
1080
1081 len += sizeof(wxUint32);
1082 psz++;
1083 }
1084
b5153fd8
VZ
1085 if (buf && len<=n-sizeof(wxUint32))
1086 *(wxUint32*)buf=0;
c91830cb
VZ
1087
1088 return len;
1089}
1090
1091
1092// swap 32bit MB to 32bit String
1093size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1094{
1095 size_t len=0;
1096
1097 while (*(wxUint32*)psz && (!buf || len < n))
1098 {
1099 if (buf)
1100 {
1101 ((char *)buf)[0] = psz[3];
1102 ((char *)buf)[1] = psz[2];
1103 ((char *)buf)[2] = psz[1];
1104 ((char *)buf)[3] = psz[0];
1105 buf++;
1106 }
1107 len++;
1108 psz += sizeof(wxUint32);
1109 }
b5153fd8
VZ
1110
1111 if (buf && len<n)
1112 *buf=0;
c91830cb
VZ
1113
1114 return len;
1115}
1116
1117
1118// swap 32bit String to 32bit MB
1119size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1120{
1121 size_t len=0;
1122
1123 while (*psz && (!buf || len < n))
1124 {
1125 if (buf)
1126 {
1127 *buf++ = ((char *)psz)[3];
1128 *buf++ = ((char *)psz)[2];
1129 *buf++ = ((char *)psz)[1];
1130 *buf++ = ((char *)psz)[0];
1131 }
1132 len += sizeof(wxUint32);
1133 psz++;
1134 }
b5153fd8
VZ
1135
1136 if (buf && len<=n-sizeof(wxUint32))
1137 *(wxUint32*)buf=0;
c91830cb
VZ
1138
1139 return len;
1140}
1141
1142
1143#endif // WC_UTF16
1144
1145
36acb880
VZ
1146// ============================================================================
1147// The classes doing conversion using the iconv_xxx() functions
1148// ============================================================================
3caec1bb 1149
b040e242 1150#ifdef HAVE_ICONV
3a0d76bc 1151
b1d547eb
VS
1152// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1153// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1154// (unless there's yet another bug in glibc) the only case when iconv()
1155// returns with (size_t)-1 (which means error) and says there are 0 bytes
1156// left in the input buffer -- when _real_ error occurs,
1157// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1158// iconv() failure.
3caec1bb
VS
1159// [This bug does not appear in glibc 2.2.]
1160#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1161#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1162 (errno != E2BIG || bufLeft != 0))
1163#else
1164#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1165#endif
1166
ab217dba 1167#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1168
1169// ----------------------------------------------------------------------------
e95354ec 1170// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1171// ----------------------------------------------------------------------------
1172
e95354ec 1173class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1174{
1175public:
e95354ec
VZ
1176 wxMBConv_iconv(const wxChar *name);
1177 virtual ~wxMBConv_iconv();
36acb880 1178
bde4baac
VZ
1179 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1180 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1181
e95354ec 1182 bool IsOk() const
36acb880
VZ
1183 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1184
1185protected:
1186 // the iconv handlers used to translate from multibyte to wide char and in
1187 // the other direction
1188 iconv_t m2w,
1189 w2m;
b1d547eb
VS
1190#if wxUSE_THREADS
1191 // guards access to m2w and w2m objects
1192 wxMutex m_iconvMutex;
1193#endif
36acb880
VZ
1194
1195private:
e95354ec 1196 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1197 // available on this machine, it will remain NULL
1198 static const char *ms_wcCharsetName;
1199
1200 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1201 // different endian-ness than the native one
405d8f46 1202 static bool ms_wcNeedsSwap;
36acb880
VZ
1203};
1204
e95354ec
VZ
1205const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1206bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1207
e95354ec 1208wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1209{
04c79127
RR
1210 // Do it the hard way
1211 char cname[100];
1212 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1213 cname[i] = (char) name[i];
1214
36acb880
VZ
1215 // check for charset that represents wchar_t:
1216 if (ms_wcCharsetName == NULL)
f1339c56 1217 {
e95354ec 1218 ms_wcNeedsSwap = false;
dccce9ea 1219
36acb880
VZ
1220 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1221 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1222 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1223
36acb880
VZ
1224 if (m2w == (iconv_t)-1)
1225 {
1226 // try charset w/o bytesex info (e.g. "UCS4")
1227 // and check for bytesex ourselves:
1228 ms_wcCharsetName = WC_NAME;
04c79127 1229 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1230
1231 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1232 if (m2w == (iconv_t)-1)
1233 {
36acb880 1234 ms_wcCharsetName = "WCHAR_T";
04c79127 1235 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1236 }
3a0d76bc 1237
36acb880
VZ
1238 if (m2w != (iconv_t)-1)
1239 {
1240 char buf[2], *bufPtr;
1241 wchar_t wbuf[2], *wbufPtr;
1242 size_t insz, outsz;
1243 size_t res;
1244
1245 buf[0] = 'A';
1246 buf[1] = 0;
1247 wbuf[0] = 0;
1248 insz = 2;
1249 outsz = SIZEOF_WCHAR_T * 2;
1250 wbufPtr = wbuf;
1251 bufPtr = buf;
1252
1253 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1254 (char**)&wbufPtr, &outsz);
1255
1256 if (ICONV_FAILED(res, insz))
3a0d76bc 1257 {
36acb880
VZ
1258 ms_wcCharsetName = NULL;
1259 wxLogLastError(wxT("iconv"));
2b5f62a0 1260 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1261 }
1262 else
1263 {
36acb880 1264 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1265 }
1266 }
36acb880
VZ
1267 else
1268 {
1269 ms_wcCharsetName = NULL;
373658eb 1270
77ffb593 1271 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1272 // fall back to using wxEncodingConverter.
1273 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1274 //wxLogError(
36acb880 1275 }
3a0d76bc 1276 }
36acb880 1277 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1278 }
36acb880 1279 else // we already have ms_wcCharsetName
3caec1bb 1280 {
04c79127 1281 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1282 }
dccce9ea 1283
36acb880
VZ
1284 // NB: don't ever pass NULL to iconv_open(), it may crash!
1285 if ( ms_wcCharsetName )
f1339c56 1286 {
04c79127 1287 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1288 }
405d8f46
VZ
1289 else
1290 {
1291 w2m = (iconv_t)-1;
1292 }
36acb880 1293}
3caec1bb 1294
e95354ec 1295wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1296{
1297 if ( m2w != (iconv_t)-1 )
1298 iconv_close(m2w);
1299 if ( w2m != (iconv_t)-1 )
1300 iconv_close(w2m);
1301}
3a0d76bc 1302
bde4baac 1303size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1304{
b1d547eb
VS
1305#if wxUSE_THREADS
1306 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1307 // Unfortunately there is a couple of global wxCSConv objects such as
1308 // wxConvLocal that are used all over wx code, so we have to make sure
1309 // the handle is used by at most one thread at the time. Otherwise
1310 // only a few wx classes would be safe to use from non-main threads
1311 // as MB<->WC conversion would fail "randomly".
1312 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1313#endif
1314
36acb880
VZ
1315 size_t inbuf = strlen(psz);
1316 size_t outbuf = n * SIZEOF_WCHAR_T;
1317 size_t res, cres;
1318 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1319 wchar_t *bufPtr = buf;
1320 const char *pszPtr = psz;
1321
1322 if (buf)
1323 {
1324 // have destination buffer, convert there
1325 cres = iconv(m2w,
1326 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1327 (char**)&bufPtr, &outbuf);
1328 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1329
36acb880 1330 if (ms_wcNeedsSwap)
3a0d76bc 1331 {
36acb880
VZ
1332 // convert to native endianness
1333 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1334 }
adb45366 1335
49dd9820
VS
1336 // NB: iconv was given only strlen(psz) characters on input, and so
1337 // it couldn't convert the trailing zero. Let's do it ourselves
1338 // if there's some room left for it in the output buffer.
1339 if (res < n)
1340 buf[res] = 0;
36acb880
VZ
1341 }
1342 else
1343 {
1344 // no destination buffer... convert using temp buffer
1345 // to calculate destination buffer requirement
1346 wchar_t tbuf[8];
1347 res = 0;
1348 do {
1349 bufPtr = tbuf;
1350 outbuf = 8*SIZEOF_WCHAR_T;
1351
1352 cres = iconv(m2w,
1353 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1354 (char**)&bufPtr, &outbuf );
1355
1356 res += 8-(outbuf/SIZEOF_WCHAR_T);
1357 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1358 }
dccce9ea 1359
36acb880 1360 if (ICONV_FAILED(cres, inbuf))
f1339c56 1361 {
36acb880
VZ
1362 //VS: it is ok if iconv fails, hence trace only
1363 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1364 return (size_t)-1;
1365 }
1366
1367 return res;
1368}
1369
bde4baac 1370size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1371{
b1d547eb
VS
1372#if wxUSE_THREADS
1373 // NB: explained in MB2WC
1374 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1375#endif
1376
f8d791e0 1377 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1378 size_t outbuf = n;
1379 size_t res, cres;
3a0d76bc 1380
36acb880 1381 wchar_t *tmpbuf = 0;
3caec1bb 1382
36acb880
VZ
1383 if (ms_wcNeedsSwap)
1384 {
1385 // need to copy to temp buffer to switch endianness
1386 // this absolutely doesn't rock!
1387 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1388 // could be in read-only memory, or be accessed in some other thread)
1389 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1390 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1391 WC_BSWAP(tmpbuf, inbuf)
1392 psz=tmpbuf;
1393 }
3a0d76bc 1394
36acb880
VZ
1395 if (buf)
1396 {
1397 // have destination buffer, convert there
1398 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1399
36acb880 1400 res = n-outbuf;
adb45366 1401
49dd9820
VS
1402 // NB: iconv was given only wcslen(psz) characters on input, and so
1403 // it couldn't convert the trailing zero. Let's do it ourselves
1404 // if there's some room left for it in the output buffer.
1405 if (res < n)
1406 buf[0] = 0;
36acb880
VZ
1407 }
1408 else
1409 {
1410 // no destination buffer... convert using temp buffer
1411 // to calculate destination buffer requirement
1412 char tbuf[16];
1413 res = 0;
1414 do {
1415 buf = tbuf; outbuf = 16;
1416
1417 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1418
36acb880
VZ
1419 res += 16 - outbuf;
1420 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1421 }
dccce9ea 1422
36acb880
VZ
1423 if (ms_wcNeedsSwap)
1424 {
1425 free(tmpbuf);
1426 }
dccce9ea 1427
36acb880
VZ
1428 if (ICONV_FAILED(cres, inbuf))
1429 {
1430 //VS: it is ok if iconv fails, hence trace only
1431 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1432 return (size_t)-1;
1433 }
1434
1435 return res;
1436}
1437
b040e242 1438#endif // HAVE_ICONV
36acb880 1439
e95354ec 1440
36acb880
VZ
1441// ============================================================================
1442// Win32 conversion classes
1443// ============================================================================
1cd52418 1444
e95354ec 1445#ifdef wxHAVE_WIN32_MB2WC
373658eb 1446
8b04d4c4 1447// from utils.cpp
d775fa82 1448#if wxUSE_FONTMAP
8b04d4c4
VZ
1449extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1450extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1451#endif
373658eb 1452
e95354ec 1453class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1454{
1455public:
bde4baac
VZ
1456 wxMBConv_win32()
1457 {
1458 m_CodePage = CP_ACP;
1459 }
1460
7608a683 1461#if wxUSE_FONTMAP
e95354ec 1462 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1463 {
1464 m_CodePage = wxCharsetToCodepage(name);
1465 }
dccce9ea 1466
e95354ec 1467 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1468 {
1469 m_CodePage = wxEncodingToCodepage(encoding);
1470 }
7608a683 1471#endif
8b04d4c4 1472
bde4baac 1473 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1474 {
02272c9c
VZ
1475 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1476 // the behaviour is not compatible with the Unix version (using iconv)
1477 // and break the library itself, e.g. wxTextInputStream::NextChar()
1478 // wouldn't work if reading an incomplete MB char didn't result in an
1479 // error
2b5f62a0
VZ
1480 const size_t len = ::MultiByteToWideChar
1481 (
1482 m_CodePage, // code page
02272c9c 1483 MB_ERR_INVALID_CHARS, // flags: fall on error
2b5f62a0
VZ
1484 psz, // input string
1485 -1, // its length (NUL-terminated)
b4da152e 1486 buf, // output string
2b5f62a0
VZ
1487 buf ? n : 0 // size of output buffer
1488 );
1489
03a991bc
VZ
1490 // note that it returns count of written chars for buf != NULL and size
1491 // of the needed buffer for buf == NULL so in either case the length of
1492 // the string (which never includes the terminating NUL) is one less
1493 return len ? len - 1 : (size_t)-1;
f1339c56 1494 }
dccce9ea 1495
13dd924a 1496 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1497 {
13dd924a
VZ
1498 /*
1499 we have a problem here: by default, WideCharToMultiByte() may
1500 replace characters unrepresentable in the target code page with bad
1501 quality approximations such as turning "1/2" symbol (U+00BD) into
1502 "1" for the code pages which don't have it and we, obviously, want
1503 to avoid this at any price
d775fa82 1504
13dd924a
VZ
1505 the trouble is that this function does it _silently_, i.e. it won't
1506 even tell us whether it did or not... Win98/2000 and higher provide
1507 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1508 we have to resort to a round trip, i.e. check that converting back
1509 results in the same string -- this is, of course, expensive but
1510 otherwise we simply can't be sure to not garble the data.
1511 */
1512
1513 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1514 // it doesn't work with CJK encodings (which we test for rather roughly
1515 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1516 // supporting it
907173e5
WS
1517 BOOL usedDef wxDUMMY_INITIALIZE(false);
1518 BOOL *pUsedDef;
13dd924a
VZ
1519 int flags;
1520 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1521 {
1522 // it's our lucky day
1523 flags = WC_NO_BEST_FIT_CHARS;
1524 pUsedDef = &usedDef;
1525 }
1526 else // old system or unsupported encoding
1527 {
1528 flags = 0;
1529 pUsedDef = NULL;
1530 }
1531
2b5f62a0
VZ
1532 const size_t len = ::WideCharToMultiByte
1533 (
1534 m_CodePage, // code page
13dd924a
VZ
1535 flags, // either none or no best fit
1536 pwz, // input string
2b5f62a0
VZ
1537 -1, // it is (wide) NUL-terminated
1538 buf, // output buffer
1539 buf ? n : 0, // and its size
1540 NULL, // default "replacement" char
13dd924a 1541 pUsedDef // [out] was it used?
2b5f62a0
VZ
1542 );
1543
13dd924a
VZ
1544 if ( !len )
1545 {
1546 // function totally failed
1547 return (size_t)-1;
1548 }
1549
1550 // if we were really converting, check if we succeeded
1551 if ( buf )
1552 {
1553 if ( flags )
1554 {
1555 // check if the conversion failed, i.e. if any replacements
1556 // were done
1557 if ( usedDef )
1558 return (size_t)-1;
1559 }
1560 else // we must resort to double tripping...
1561 {
1562 wxWCharBuffer wcBuf(n);
1563 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1564 wcscmp(wcBuf, pwz) != 0 )
1565 {
1566 // we didn't obtain the same thing we started from, hence
1567 // the conversion was lossy and we consider that it failed
1568 return (size_t)-1;
1569 }
1570 }
1571 }
1572
03a991bc 1573 // see the comment above for the reason of "len - 1"
13dd924a 1574 return len - 1;
f1339c56 1575 }
dccce9ea 1576
13dd924a
VZ
1577 bool IsOk() const { return m_CodePage != -1; }
1578
1579private:
1580 static bool CanUseNoBestFit()
1581 {
1582 static int s_isWin98Or2k = -1;
1583
1584 if ( s_isWin98Or2k == -1 )
1585 {
1586 int verMaj, verMin;
1587 switch ( wxGetOsVersion(&verMaj, &verMin) )
1588 {
1589 case wxWIN95:
1590 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1591 break;
1592
1593 case wxWINDOWS_NT:
1594 s_isWin98Or2k = verMaj >= 5;
1595 break;
1596
1597 default:
1598 // unknown, be conseravtive by default
1599 s_isWin98Or2k = 0;
1600 }
1601
1602 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1603 }
1604
1605 return s_isWin98Or2k == 1;
1606 }
f1339c56 1607
b1d66b54 1608 long m_CodePage;
1cd52418 1609};
e95354ec
VZ
1610
1611#endif // wxHAVE_WIN32_MB2WC
1612
f7e98dee
RN
1613// ============================================================================
1614// Cocoa conversion classes
1615// ============================================================================
1616
1617#if defined(__WXCOCOA__)
1618
ecd9653b 1619// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1620// Cocoa. Strangely enough, internally Core Foundation uses
1621// UTF 32 internally quite a bit - its just not public (yet).
1622
1623#include <CoreFoundation/CFString.h>
1624#include <CoreFoundation/CFStringEncodingExt.h>
1625
1626CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1627{
638357a0 1628 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1629 if ( encoding == wxFONTENCODING_DEFAULT )
1630 {
638357a0 1631 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1632 }
1633 else switch( encoding)
1634 {
1635 case wxFONTENCODING_ISO8859_1 :
1636 enc = kCFStringEncodingISOLatin1 ;
1637 break ;
1638 case wxFONTENCODING_ISO8859_2 :
1639 enc = kCFStringEncodingISOLatin2;
1640 break ;
1641 case wxFONTENCODING_ISO8859_3 :
1642 enc = kCFStringEncodingISOLatin3 ;
1643 break ;
1644 case wxFONTENCODING_ISO8859_4 :
1645 enc = kCFStringEncodingISOLatin4;
1646 break ;
1647 case wxFONTENCODING_ISO8859_5 :
1648 enc = kCFStringEncodingISOLatinCyrillic;
1649 break ;
1650 case wxFONTENCODING_ISO8859_6 :
1651 enc = kCFStringEncodingISOLatinArabic;
1652 break ;
1653 case wxFONTENCODING_ISO8859_7 :
1654 enc = kCFStringEncodingISOLatinGreek;
1655 break ;
1656 case wxFONTENCODING_ISO8859_8 :
1657 enc = kCFStringEncodingISOLatinHebrew;
1658 break ;
1659 case wxFONTENCODING_ISO8859_9 :
1660 enc = kCFStringEncodingISOLatin5;
1661 break ;
1662 case wxFONTENCODING_ISO8859_10 :
1663 enc = kCFStringEncodingISOLatin6;
1664 break ;
1665 case wxFONTENCODING_ISO8859_11 :
1666 enc = kCFStringEncodingISOLatinThai;
1667 break ;
1668 case wxFONTENCODING_ISO8859_13 :
1669 enc = kCFStringEncodingISOLatin7;
1670 break ;
1671 case wxFONTENCODING_ISO8859_14 :
1672 enc = kCFStringEncodingISOLatin8;
1673 break ;
1674 case wxFONTENCODING_ISO8859_15 :
1675 enc = kCFStringEncodingISOLatin9;
1676 break ;
1677
1678 case wxFONTENCODING_KOI8 :
1679 enc = kCFStringEncodingKOI8_R;
1680 break ;
1681 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1682 enc = kCFStringEncodingDOSRussian;
1683 break ;
1684
1685// case wxFONTENCODING_BULGARIAN :
1686// enc = ;
1687// break ;
1688
1689 case wxFONTENCODING_CP437 :
1690 enc =kCFStringEncodingDOSLatinUS ;
1691 break ;
1692 case wxFONTENCODING_CP850 :
1693 enc = kCFStringEncodingDOSLatin1;
1694 break ;
1695 case wxFONTENCODING_CP852 :
1696 enc = kCFStringEncodingDOSLatin2;
1697 break ;
1698 case wxFONTENCODING_CP855 :
1699 enc = kCFStringEncodingDOSCyrillic;
1700 break ;
1701 case wxFONTENCODING_CP866 :
1702 enc =kCFStringEncodingDOSRussian ;
1703 break ;
1704 case wxFONTENCODING_CP874 :
1705 enc = kCFStringEncodingDOSThai;
1706 break ;
1707 case wxFONTENCODING_CP932 :
1708 enc = kCFStringEncodingDOSJapanese;
1709 break ;
1710 case wxFONTENCODING_CP936 :
1711 enc =kCFStringEncodingDOSChineseSimplif ;
1712 break ;
1713 case wxFONTENCODING_CP949 :
1714 enc = kCFStringEncodingDOSKorean;
1715 break ;
1716 case wxFONTENCODING_CP950 :
1717 enc = kCFStringEncodingDOSChineseTrad;
1718 break ;
ecd9653b
WS
1719 case wxFONTENCODING_CP1250 :
1720 enc = kCFStringEncodingWindowsLatin2;
1721 break ;
1722 case wxFONTENCODING_CP1251 :
1723 enc =kCFStringEncodingWindowsCyrillic ;
1724 break ;
1725 case wxFONTENCODING_CP1252 :
1726 enc =kCFStringEncodingWindowsLatin1 ;
1727 break ;
1728 case wxFONTENCODING_CP1253 :
1729 enc = kCFStringEncodingWindowsGreek;
1730 break ;
1731 case wxFONTENCODING_CP1254 :
1732 enc = kCFStringEncodingWindowsLatin5;
1733 break ;
1734 case wxFONTENCODING_CP1255 :
1735 enc =kCFStringEncodingWindowsHebrew ;
1736 break ;
1737 case wxFONTENCODING_CP1256 :
1738 enc =kCFStringEncodingWindowsArabic ;
1739 break ;
1740 case wxFONTENCODING_CP1257 :
1741 enc = kCFStringEncodingWindowsBalticRim;
1742 break ;
638357a0
RN
1743// This only really encodes to UTF7 (if that) evidently
1744// case wxFONTENCODING_UTF7 :
1745// enc = kCFStringEncodingNonLossyASCII ;
1746// break ;
ecd9653b
WS
1747 case wxFONTENCODING_UTF8 :
1748 enc = kCFStringEncodingUTF8 ;
1749 break ;
1750 case wxFONTENCODING_EUC_JP :
1751 enc = kCFStringEncodingEUC_JP;
1752 break ;
1753 case wxFONTENCODING_UTF16 :
f7e98dee 1754 enc = kCFStringEncodingUnicode ;
ecd9653b 1755 break ;
f7e98dee
RN
1756 case wxFONTENCODING_MACROMAN :
1757 enc = kCFStringEncodingMacRoman ;
1758 break ;
1759 case wxFONTENCODING_MACJAPANESE :
1760 enc = kCFStringEncodingMacJapanese ;
1761 break ;
1762 case wxFONTENCODING_MACCHINESETRAD :
1763 enc = kCFStringEncodingMacChineseTrad ;
1764 break ;
1765 case wxFONTENCODING_MACKOREAN :
1766 enc = kCFStringEncodingMacKorean ;
1767 break ;
1768 case wxFONTENCODING_MACARABIC :
1769 enc = kCFStringEncodingMacArabic ;
1770 break ;
1771 case wxFONTENCODING_MACHEBREW :
1772 enc = kCFStringEncodingMacHebrew ;
1773 break ;
1774 case wxFONTENCODING_MACGREEK :
1775 enc = kCFStringEncodingMacGreek ;
1776 break ;
1777 case wxFONTENCODING_MACCYRILLIC :
1778 enc = kCFStringEncodingMacCyrillic ;
1779 break ;
1780 case wxFONTENCODING_MACDEVANAGARI :
1781 enc = kCFStringEncodingMacDevanagari ;
1782 break ;
1783 case wxFONTENCODING_MACGURMUKHI :
1784 enc = kCFStringEncodingMacGurmukhi ;
1785 break ;
1786 case wxFONTENCODING_MACGUJARATI :
1787 enc = kCFStringEncodingMacGujarati ;
1788 break ;
1789 case wxFONTENCODING_MACORIYA :
1790 enc = kCFStringEncodingMacOriya ;
1791 break ;
1792 case wxFONTENCODING_MACBENGALI :
1793 enc = kCFStringEncodingMacBengali ;
1794 break ;
1795 case wxFONTENCODING_MACTAMIL :
1796 enc = kCFStringEncodingMacTamil ;
1797 break ;
1798 case wxFONTENCODING_MACTELUGU :
1799 enc = kCFStringEncodingMacTelugu ;
1800 break ;
1801 case wxFONTENCODING_MACKANNADA :
1802 enc = kCFStringEncodingMacKannada ;
1803 break ;
1804 case wxFONTENCODING_MACMALAJALAM :
1805 enc = kCFStringEncodingMacMalayalam ;
1806 break ;
1807 case wxFONTENCODING_MACSINHALESE :
1808 enc = kCFStringEncodingMacSinhalese ;
1809 break ;
1810 case wxFONTENCODING_MACBURMESE :
1811 enc = kCFStringEncodingMacBurmese ;
1812 break ;
1813 case wxFONTENCODING_MACKHMER :
1814 enc = kCFStringEncodingMacKhmer ;
1815 break ;
1816 case wxFONTENCODING_MACTHAI :
1817 enc = kCFStringEncodingMacThai ;
1818 break ;
1819 case wxFONTENCODING_MACLAOTIAN :
1820 enc = kCFStringEncodingMacLaotian ;
1821 break ;
1822 case wxFONTENCODING_MACGEORGIAN :
1823 enc = kCFStringEncodingMacGeorgian ;
1824 break ;
1825 case wxFONTENCODING_MACARMENIAN :
1826 enc = kCFStringEncodingMacArmenian ;
1827 break ;
1828 case wxFONTENCODING_MACCHINESESIMP :
1829 enc = kCFStringEncodingMacChineseSimp ;
1830 break ;
1831 case wxFONTENCODING_MACTIBETAN :
1832 enc = kCFStringEncodingMacTibetan ;
1833 break ;
1834 case wxFONTENCODING_MACMONGOLIAN :
1835 enc = kCFStringEncodingMacMongolian ;
1836 break ;
1837 case wxFONTENCODING_MACETHIOPIC :
1838 enc = kCFStringEncodingMacEthiopic ;
1839 break ;
1840 case wxFONTENCODING_MACCENTRALEUR :
1841 enc = kCFStringEncodingMacCentralEurRoman ;
1842 break ;
1843 case wxFONTENCODING_MACVIATNAMESE :
1844 enc = kCFStringEncodingMacVietnamese ;
1845 break ;
1846 case wxFONTENCODING_MACARABICEXT :
1847 enc = kCFStringEncodingMacExtArabic ;
1848 break ;
1849 case wxFONTENCODING_MACSYMBOL :
1850 enc = kCFStringEncodingMacSymbol ;
1851 break ;
1852 case wxFONTENCODING_MACDINGBATS :
1853 enc = kCFStringEncodingMacDingbats ;
1854 break ;
1855 case wxFONTENCODING_MACTURKISH :
1856 enc = kCFStringEncodingMacTurkish ;
1857 break ;
1858 case wxFONTENCODING_MACCROATIAN :
1859 enc = kCFStringEncodingMacCroatian ;
1860 break ;
1861 case wxFONTENCODING_MACICELANDIC :
1862 enc = kCFStringEncodingMacIcelandic ;
1863 break ;
1864 case wxFONTENCODING_MACROMANIAN :
1865 enc = kCFStringEncodingMacRomanian ;
1866 break ;
1867 case wxFONTENCODING_MACCELTIC :
1868 enc = kCFStringEncodingMacCeltic ;
1869 break ;
1870 case wxFONTENCODING_MACGAELIC :
1871 enc = kCFStringEncodingMacGaelic ;
1872 break ;
ecd9653b
WS
1873// case wxFONTENCODING_MACKEYBOARD :
1874// enc = kCFStringEncodingMacKeyboardGlyphs ;
1875// break ;
1876 default :
1877 // because gcc is picky
1878 break ;
1879 } ;
1880 return enc ;
f7e98dee
RN
1881}
1882
f7e98dee
RN
1883class wxMBConv_cocoa : public wxMBConv
1884{
1885public:
1886 wxMBConv_cocoa()
1887 {
1888 Init(CFStringGetSystemEncoding()) ;
1889 }
1890
1891 wxMBConv_cocoa(const wxChar* name)
1892 {
1893 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1894 }
1895
1896 wxMBConv_cocoa(wxFontEncoding encoding)
1897 {
1898 Init( wxCFStringEncFromFontEnc(encoding) );
1899 }
1900
1901 ~wxMBConv_cocoa()
1902 {
1903 }
1904
1905 void Init( CFStringEncoding encoding)
1906 {
638357a0 1907 m_encoding = encoding ;
f7e98dee
RN
1908 }
1909
1910 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1911 {
1912 wxASSERT(szUnConv);
ecd9653b 1913
638357a0
RN
1914 CFStringRef theString = CFStringCreateWithBytes (
1915 NULL, //the allocator
1916 (const UInt8*)szUnConv,
1917 strlen(szUnConv),
1918 m_encoding,
1919 false //no BOM/external representation
f7e98dee
RN
1920 );
1921
1922 wxASSERT(theString);
1923
638357a0
RN
1924 size_t nOutLength = CFStringGetLength(theString);
1925
1926 if (szOut == NULL)
f7e98dee 1927 {
f7e98dee 1928 CFRelease(theString);
638357a0 1929 return nOutLength;
f7e98dee 1930 }
ecd9653b 1931
638357a0 1932 CFRange theRange = { 0, nOutSize };
ecd9653b 1933
638357a0
RN
1934#if SIZEOF_WCHAR_T == 4
1935 UniChar* szUniCharBuffer = new UniChar[nOutSize];
1936#endif
1937
f7e98dee 1938 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
638357a0 1939
f7e98dee 1940 CFRelease(theString);
ecd9653b 1941
638357a0 1942 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
1943
1944#if SIZEOF_WCHAR_T == 4
1945 wxMBConvUTF16 converter ;
638357a0 1946 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
1947 delete[] szUniCharBuffer;
1948#endif
638357a0
RN
1949
1950 return nOutLength;
f7e98dee
RN
1951 }
1952
1953 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1954 {
638357a0
RN
1955 wxASSERT(szUnConv);
1956
f7e98dee 1957 size_t nRealOutSize;
638357a0 1958 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 1959 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 1960
f7e98dee
RN
1961#if SIZEOF_WCHAR_T == 4
1962 wxMBConvUTF16BE converter ;
1963 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1964 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1965 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1966 nBufSize /= sizeof(UniChar);
f7e98dee
RN
1967#endif
1968
1969 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1970 NULL, //allocator
1971 szUniBuffer,
1972 nBufSize,
638357a0 1973 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 1974 );
ecd9653b 1975
f7e98dee 1976 wxASSERT(theString);
ecd9653b 1977
f7e98dee 1978 //Note that CER puts a BOM when converting to unicode
638357a0
RN
1979 //so we check and use getchars instead in that case
1980 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 1981 {
638357a0
RN
1982 if (szOut != NULL)
1983 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1984
1985 nRealOutSize = CFStringGetLength(theString) + 1;
1986 }
1987 else
1988 {
1989 CFStringGetBytes(
1990 theString,
1991 CFRangeMake(0, CFStringGetLength(theString)),
1992 m_encoding,
1993 0, //what to put in characters that can't be converted -
1994 //0 tells CFString to return NULL if it meets such a character
1995 false, //not an external representation
1996 (UInt8*) szOut,
1997 nOutSize,
1998 (CFIndex*) &nRealOutSize
1999 );
f7e98dee 2000 }
ecd9653b 2001
638357a0 2002 CFRelease(theString);
ecd9653b 2003
638357a0
RN
2004#if SIZEOF_WCHAR_T == 4
2005 delete[] szUniBuffer;
2006#endif
ecd9653b 2007
f7e98dee
RN
2008 return nRealOutSize - 1;
2009 }
2010
2011 bool IsOk() const
ecd9653b 2012 {
638357a0
RN
2013 return m_encoding != kCFStringEncodingInvalidId &&
2014 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2015 }
2016
2017private:
638357a0 2018 CFStringEncoding m_encoding ;
f7e98dee
RN
2019};
2020
2021#endif // defined(__WXCOCOA__)
2022
335d31e0
SC
2023// ============================================================================
2024// Mac conversion classes
2025// ============================================================================
2026
2027#if defined(__WXMAC__) && defined(TARGET_CARBON)
2028
2029class wxMBConv_mac : public wxMBConv
2030{
2031public:
2032 wxMBConv_mac()
2033 {
2034 Init(CFStringGetSystemEncoding()) ;
2035 }
2036
2037 wxMBConv_mac(const wxChar* name)
2038 {
d775fa82 2039 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0
SC
2040 }
2041
2042 wxMBConv_mac(wxFontEncoding encoding)
2043 {
d775fa82
WS
2044 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2045 }
2046
2047 ~wxMBConv_mac()
2048 {
2049 OSStatus status = noErr ;
2050 status = TECDisposeConverter(m_MB2WC_converter);
2051 status = TECDisposeConverter(m_WC2MB_converter);
2052 }
2053
2054
2055 void Init( TextEncodingBase encoding)
2056 {
2057 OSStatus status = noErr ;
2058 m_char_encoding = encoding ;
2059 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2060
2061 status = TECCreateConverter(&m_MB2WC_converter,
2062 m_char_encoding,
2063 m_unicode_encoding);
2064 status = TECCreateConverter(&m_WC2MB_converter,
2065 m_unicode_encoding,
2066 m_char_encoding);
2067 }
2068
335d31e0
SC
2069 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2070 {
d775fa82
WS
2071 OSStatus status = noErr ;
2072 ByteCount byteOutLen ;
2073 ByteCount byteInLen = strlen(psz) ;
2074 wchar_t *tbuf = NULL ;
2075 UniChar* ubuf = NULL ;
2076 size_t res = 0 ;
2077
2078 if (buf == NULL)
2079 {
638357a0 2080 //apple specs say at least 32
c543817b 2081 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2082 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2083 }
2084 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2085#if SIZEOF_WCHAR_T == 4
d775fa82 2086 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2087#else
d775fa82 2088 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2089#endif
d775fa82
WS
2090 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2091 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2092#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2093 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2094 // is not properly terminated we get random characters at the end
2095 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2096 wxMBConvUTF16BE converter ;
2097 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2098 free( ubuf ) ;
f3a355ce 2099#else
d775fa82 2100 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2101#endif
d775fa82
WS
2102 if ( buf == NULL )
2103 free(tbuf) ;
335d31e0 2104
335d31e0
SC
2105 if ( buf && res < n)
2106 buf[res] = 0;
2107
d775fa82 2108 return res ;
335d31e0
SC
2109 }
2110
2111 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2112 {
2113 OSStatus status = noErr ;
2114 ByteCount byteOutLen ;
2115 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2116
2117 char *tbuf = NULL ;
2118
2119 if (buf == NULL)
2120 {
638357a0 2121 //apple specs say at least 32
c543817b 2122 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2123 tbuf = (char*) malloc( n ) ;
2124 }
2125
2126 ByteCount byteBufferLen = n ;
2127 UniChar* ubuf = NULL ;
f3a355ce 2128#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2129 wxMBConvUTF16BE converter ;
2130 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2131 byteInLen = unicharlen ;
2132 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2133 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2134#else
d775fa82 2135 ubuf = (UniChar*) psz ;
f3a355ce 2136#endif
d775fa82
WS
2137 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2138 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2139#if SIZEOF_WCHAR_T == 4
d775fa82 2140 free( ubuf ) ;
f3a355ce 2141#endif
d775fa82
WS
2142 if ( buf == NULL )
2143 free(tbuf) ;
335d31e0 2144
d775fa82 2145 size_t res = byteOutLen ;
335d31e0 2146 if ( buf && res < n)
638357a0 2147 {
335d31e0 2148 buf[res] = 0;
638357a0
RN
2149
2150 //we need to double-trip to verify it didn't insert any ? in place
2151 //of bogus characters
2152 wxWCharBuffer wcBuf(n);
2153 size_t pszlen = wxWcslen(psz);
2154 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2155 wxWcslen(wcBuf) != pszlen ||
2156 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2157 {
2158 // we didn't obtain the same thing we started from, hence
2159 // the conversion was lossy and we consider that it failed
2160 return (size_t)-1;
2161 }
2162 }
335d31e0 2163
d775fa82 2164 return res ;
335d31e0
SC
2165 }
2166
2167 bool IsOk() const
2168 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2169
2170private:
d775fa82
WS
2171 TECObjectRef m_MB2WC_converter ;
2172 TECObjectRef m_WC2MB_converter ;
2173
2174 TextEncodingBase m_char_encoding ;
2175 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2176};
2177
2178#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2179
36acb880
VZ
2180// ============================================================================
2181// wxEncodingConverter based conversion classes
2182// ============================================================================
2183
1e6feb95 2184#if wxUSE_FONTMAP
1cd52418 2185
e95354ec 2186class wxMBConv_wxwin : public wxMBConv
1cd52418 2187{
8b04d4c4
VZ
2188private:
2189 void Init()
2190 {
2191 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2192 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2193 }
2194
6001e347 2195public:
f1339c56
RR
2196 // temporarily just use wxEncodingConverter stuff,
2197 // so that it works while a better implementation is built
e95354ec 2198 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2199 {
2200 if (name)
e95354ec 2201 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2202 else
2203 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2204
8b04d4c4
VZ
2205 Init();
2206 }
2207
e95354ec 2208 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2209 {
2210 m_enc = enc;
2211
2212 Init();
f1339c56 2213 }
dccce9ea 2214
bde4baac 2215 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2216 {
2217 size_t inbuf = strlen(psz);
dccce9ea 2218 if (buf)
c643a977
VS
2219 {
2220 if (!m2w.Convert(psz,buf))
2221 return (size_t)-1;
2222 }
f1339c56
RR
2223 return inbuf;
2224 }
dccce9ea 2225
bde4baac 2226 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2227 {
f8d791e0 2228 const size_t inbuf = wxWcslen(psz);
f1339c56 2229 if (buf)
c643a977
VS
2230 {
2231 if (!w2m.Convert(psz,buf))
2232 return (size_t)-1;
2233 }
dccce9ea 2234
f1339c56
RR
2235 return inbuf;
2236 }
dccce9ea 2237
e95354ec 2238 bool IsOk() const { return m_ok; }
f1339c56
RR
2239
2240public:
8b04d4c4 2241 wxFontEncoding m_enc;
f1339c56 2242 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2243
2244 // were we initialized successfully?
2245 bool m_ok;
fc7a2a60 2246
e95354ec 2247 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2248};
6001e347 2249
1e6feb95
VZ
2250#endif // wxUSE_FONTMAP
2251
36acb880
VZ
2252// ============================================================================
2253// wxCSConv implementation
2254// ============================================================================
2255
8b04d4c4 2256void wxCSConv::Init()
6001e347 2257{
e95354ec
VZ
2258 m_name = NULL;
2259 m_convReal = NULL;
2260 m_deferred = true;
2261}
2262
8b04d4c4
VZ
2263wxCSConv::wxCSConv(const wxChar *charset)
2264{
2265 Init();
82713003 2266
e95354ec
VZ
2267 if ( charset )
2268 {
e95354ec
VZ
2269 SetName(charset);
2270 }
bda3d86a
VZ
2271
2272 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2273}
2274
8b04d4c4
VZ
2275wxCSConv::wxCSConv(wxFontEncoding encoding)
2276{
bda3d86a 2277 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2278 {
2279 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2280
2281 encoding = wxFONTENCODING_SYSTEM;
2282 }
2283
8b04d4c4
VZ
2284 Init();
2285
bda3d86a 2286 m_encoding = encoding;
8b04d4c4
VZ
2287}
2288
6001e347
RR
2289wxCSConv::~wxCSConv()
2290{
65e50848
JS
2291 Clear();
2292}
2293
54380f29 2294wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2295 : wxMBConv()
54380f29 2296{
8b04d4c4
VZ
2297 Init();
2298
54380f29 2299 SetName(conv.m_name);
8b04d4c4 2300 m_encoding = conv.m_encoding;
54380f29
GD
2301}
2302
2303wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2304{
2305 Clear();
8b04d4c4 2306
54380f29 2307 SetName(conv.m_name);
8b04d4c4
VZ
2308 m_encoding = conv.m_encoding;
2309
54380f29
GD
2310 return *this;
2311}
2312
65e50848
JS
2313void wxCSConv::Clear()
2314{
8b04d4c4 2315 free(m_name);
e95354ec 2316 delete m_convReal;
8b04d4c4 2317
65e50848 2318 m_name = NULL;
e95354ec 2319 m_convReal = NULL;
6001e347
RR
2320}
2321
2322void wxCSConv::SetName(const wxChar *charset)
2323{
f1339c56
RR
2324 if (charset)
2325 {
2326 m_name = wxStrdup(charset);
e95354ec 2327 m_deferred = true;
f1339c56 2328 }
6001e347
RR
2329}
2330
e95354ec
VZ
2331wxMBConv *wxCSConv::DoCreate() const
2332{
c547282d
VZ
2333 // check for the special case of ASCII or ISO8859-1 charset: as we have
2334 // special knowledge of it anyhow, we don't need to create a special
2335 // conversion object
2336 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2337 {
e95354ec
VZ
2338 // don't convert at all
2339 return NULL;
2340 }
dccce9ea 2341
e95354ec
VZ
2342 // we trust OS to do conversion better than we can so try external
2343 // conversion methods first
2344 //
2345 // the full order is:
2346 // 1. OS conversion (iconv() under Unix or Win32 API)
2347 // 2. hard coded conversions for UTF
2348 // 3. wxEncodingConverter as fall back
2349
2350 // step (1)
2351#ifdef HAVE_ICONV
c547282d 2352#if !wxUSE_FONTMAP
e95354ec 2353 if ( m_name )
c547282d 2354#endif // !wxUSE_FONTMAP
e95354ec 2355 {
c547282d
VZ
2356 wxString name(m_name);
2357
2358#if wxUSE_FONTMAP
2359 if ( name.empty() )
2360 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2361#endif // wxUSE_FONTMAP
2362
2363 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2364 if ( conv->IsOk() )
2365 return conv;
2366
2367 delete conv;
2368 }
2369#endif // HAVE_ICONV
2370
2371#ifdef wxHAVE_WIN32_MB2WC
2372 {
7608a683 2373#if wxUSE_FONTMAP
e95354ec
VZ
2374 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2375 : new wxMBConv_win32(m_encoding);
2376 if ( conv->IsOk() )
2377 return conv;
2378
2379 delete conv;
7608a683
WS
2380#else
2381 return NULL;
2382#endif
e95354ec
VZ
2383 }
2384#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2385#if defined(__WXMAC__)
2386 {
2387 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2388 {
2389
2390 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2391 : new wxMBConv_mac(m_encoding);
2392 if ( conv->IsOk() )
f7e98dee
RN
2393 return conv;
2394
2395 delete conv;
2396 }
2397 }
2398#endif
2399#if defined(__WXCOCOA__)
2400 {
2401 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2402 {
2403
2404 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2405 : new wxMBConv_cocoa(m_encoding);
2406 if ( conv->IsOk() )
d775fa82
WS
2407 return conv;
2408
2409 delete conv;
2410 }
335d31e0
SC
2411 }
2412#endif
e95354ec
VZ
2413 // step (2)
2414 wxFontEncoding enc = m_encoding;
2415#if wxUSE_FONTMAP
c547282d
VZ
2416 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2417 {
2418 // use "false" to suppress interactive dialogs -- we can be called from
2419 // anywhere and popping up a dialog from here is the last thing we want to
2420 // do
2421 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2422 }
e95354ec
VZ
2423#endif // wxUSE_FONTMAP
2424
2425 switch ( enc )
2426 {
2427 case wxFONTENCODING_UTF7:
2428 return new wxMBConvUTF7;
2429
2430 case wxFONTENCODING_UTF8:
2431 return new wxMBConvUTF8;
2432
e95354ec
VZ
2433 case wxFONTENCODING_UTF16BE:
2434 return new wxMBConvUTF16BE;
2435
2436 case wxFONTENCODING_UTF16LE:
2437 return new wxMBConvUTF16LE;
2438
e95354ec
VZ
2439 case wxFONTENCODING_UTF32BE:
2440 return new wxMBConvUTF32BE;
2441
2442 case wxFONTENCODING_UTF32LE:
2443 return new wxMBConvUTF32LE;
2444
2445 default:
2446 // nothing to do but put here to suppress gcc warnings
2447 ;
2448 }
2449
2450 // step (3)
2451#if wxUSE_FONTMAP
2452 {
2453 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2454 : new wxMBConv_wxwin(m_encoding);
2455 if ( conv->IsOk() )
2456 return conv;
2457
2458 delete conv;
2459 }
2460#endif // wxUSE_FONTMAP
2461
a58d4f4d
VS
2462 // NB: This is a hack to prevent deadlock. What could otherwise happen
2463 // in Unicode build: wxConvLocal creation ends up being here
2464 // because of some failure and logs the error. But wxLog will try to
2465 // attach timestamp, for which it will need wxConvLocal (to convert
2466 // time to char* and then wchar_t*), but that fails, tries to log
2467 // error, but wxLog has a (already locked) critical section that
2468 // guards static buffer.
2469 static bool alreadyLoggingError = false;
2470 if (!alreadyLoggingError)
2471 {
2472 alreadyLoggingError = true;
2473 wxLogError(_("Cannot convert from the charset '%s'!"),
2474 m_name ? m_name
e95354ec
VZ
2475 :
2476#if wxUSE_FONTMAP
2477 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2478#else // !wxUSE_FONTMAP
2479 wxString::Format(_("encoding %s"), m_encoding).c_str()
2480#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2481 );
a58d4f4d
VS
2482 alreadyLoggingError = false;
2483 }
e95354ec
VZ
2484
2485 return NULL;
2486}
2487
2488void wxCSConv::CreateConvIfNeeded() const
2489{
2490 if ( m_deferred )
2491 {
2492 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2493
2494#if wxUSE_INTL
2495 // if we don't have neither the name nor the encoding, use the default
2496 // encoding for this system
2497 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2498 {
4d312c22 2499 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2500 }
2501#endif // wxUSE_INTL
2502
e95354ec
VZ
2503 self->m_convReal = DoCreate();
2504 self->m_deferred = false;
6001e347 2505 }
6001e347
RR
2506}
2507
2508size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2509{
e95354ec 2510 CreateConvIfNeeded();
dccce9ea 2511
e95354ec
VZ
2512 if (m_convReal)
2513 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2514
2515 // latin-1 (direct)
4def3b35 2516 size_t len = strlen(psz);
dccce9ea 2517
f1339c56
RR
2518 if (buf)
2519 {
4def3b35 2520 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2521 buf[c] = (unsigned char)(psz[c]);
2522 }
dccce9ea 2523
f1339c56 2524 return len;
6001e347
RR
2525}
2526
2527size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2528{
e95354ec 2529 CreateConvIfNeeded();
dccce9ea 2530
e95354ec
VZ
2531 if (m_convReal)
2532 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2533
f1339c56 2534 // latin-1 (direct)
f8d791e0 2535 const size_t len = wxWcslen(psz);
f1339c56
RR
2536 if (buf)
2537 {
4def3b35 2538 for (size_t c = 0; c <= len; c++)
24642831
VS
2539 {
2540 if (psz[c] > 0xFF)
2541 return (size_t)-1;
907173e5 2542 buf[c] = (char)psz[c];
24642831
VS
2543 }
2544 }
2545 else
2546 {
2547 for (size_t c = 0; c <= len; c++)
2548 {
2549 if (psz[c] > 0xFF)
2550 return (size_t)-1;
2551 }
f1339c56 2552 }
dccce9ea 2553
f1339c56 2554 return len;
6001e347
RR
2555}
2556
bde4baac
VZ
2557// ----------------------------------------------------------------------------
2558// globals
2559// ----------------------------------------------------------------------------
2560
2561#ifdef __WINDOWS__
2562 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2563#elif defined(__WXMAC__) && !defined(__MACH__)
2564 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2565#else
dcc8fac0 2566 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2567#endif
2568
2569static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2570static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2571static wxMBConvUTF7 wxConvUTF7Obj;
2572static wxMBConvUTF8 wxConvUTF8Obj;
2573
2574
2575WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2576WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2577WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2578WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2579WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2580WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2581
2582#else // !wxUSE_WCHAR_T
2583
2584// stand-ins in absence of wchar_t
2585WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2586 wxConvISO8859_1,
2587 wxConvLocal,
2588 wxConvUTF8;
2589
2590#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2591
2592