]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Warning fix.
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
15f2ee32
RN
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5// Ryan Norton, Fredrik Roubert (UTF7)
6001e347
RR
6// Modified by:
7// Created: 29/01/98
8// RCS-ID: $Id$
e95354ec
VZ
9// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10// (c) 2000-2003 Vadim Zeitlin
15f2ee32 11// (c) 2004 Ryan Norton, Fredrik Roubert
65571936 12// Licence: wxWindows licence
6001e347
RR
13/////////////////////////////////////////////////////////////////////////////
14
f6bcfd97
BP
15// ============================================================================
16// declarations
17// ============================================================================
18
19// ----------------------------------------------------------------------------
20// headers
21// ----------------------------------------------------------------------------
22
14f355c2 23#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
6001e347
RR
24 #pragma implementation "strconv.h"
25#endif
26
27// For compilers that support precompilation, includes "wx.h".
28#include "wx/wxprec.h"
29
30#ifdef __BORLANDC__
31 #pragma hdrstop
32#endif
33
373658eb
VZ
34#ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37#endif // WX_PRECOMP
38
bde4baac
VZ
39#include "wx/strconv.h"
40
41#if wxUSE_WCHAR_T
42
0a1c1e62 43#ifdef __WXMSW__
373658eb 44 #include "wx/msw/private.h"
7608a683
WS
45#endif
46
47#ifdef __WINDOWS__
13dd924a 48 #include "wx/msw/missing.h"
0a1c1e62
GRG
49#endif
50
1c193821 51#ifndef __WXWINCE__
1cd52418 52#include <errno.h>
1c193821
JS
53#endif
54
6001e347
RR
55#include <ctype.h>
56#include <string.h>
57#include <stdlib.h>
58
e95354ec
VZ
59#if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61#endif // __WIN32__ but !__WXMICROWIN__
62
373658eb
VZ
63// ----------------------------------------------------------------------------
64// headers
65// ----------------------------------------------------------------------------
7af284fd 66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
b1d547eb 73 #include "wx/thread.h"
1cd52418 74#endif
1cd52418 75
373658eb
VZ
76#include "wx/encconv.h"
77#include "wx/fontmap.h"
7608a683 78#include "wx/utils.h"
373658eb 79
335d31e0 80#ifdef __WXMAC__
4227afa4
SC
81#include <ATSUnicode.h>
82#include <TextCommon.h>
83#include <TextEncodingConverter.h>
335d31e0
SC
84
85#include "wx/mac/private.h" // includes mac headers
86#endif
373658eb
VZ
87// ----------------------------------------------------------------------------
88// macros
89// ----------------------------------------------------------------------------
3e61dfb0 90
1cd52418 91#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 92#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418
OK
93
94#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
95 #define WC_NAME "UCS4"
96 #define WC_BSWAP BSWAP_UCS4
97 #ifdef WORDS_BIGENDIAN
98 #define WC_NAME_BEST "UCS-4BE"
99 #else
100 #define WC_NAME_BEST "UCS-4LE"
101 #endif
1cd52418 102#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
103 #define WC_NAME "UTF16"
104 #define WC_BSWAP BSWAP_UTF16
a3f2769e 105 #define WC_UTF16
3a0d76bc
VS
106 #ifdef WORDS_BIGENDIAN
107 #define WC_NAME_BEST "UTF-16BE"
108 #else
109 #define WC_NAME_BEST "UTF-16LE"
110 #endif
bab1e722 111#else // sizeof(wchar_t) != 2 nor 4
bde4baac
VZ
112 // does this ever happen?
113 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1cd52418
OK
114#endif
115
373658eb
VZ
116// ============================================================================
117// implementation
118// ============================================================================
119
120// ----------------------------------------------------------------------------
c91830cb 121// UTF-16 en/decoding to/from UCS-4
373658eb 122// ----------------------------------------------------------------------------
6001e347 123
b0a6bb75 124
c91830cb 125static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418 126{
dccce9ea 127 if (input<=0xffff)
4def3b35 128 {
999836aa
VZ
129 if (output)
130 *output = (wxUint16) input;
4def3b35 131 return 1;
dccce9ea
VZ
132 }
133 else if (input>=0x110000)
4def3b35
VS
134 {
135 return (size_t)-1;
dccce9ea
VZ
136 }
137 else
4def3b35 138 {
dccce9ea 139 if (output)
4def3b35 140 {
c91830cb 141 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
999836aa 142 *output = (wxUint16) ((input&0x3ff)+0xdc00);
4def3b35
VS
143 }
144 return 2;
1cd52418 145 }
1cd52418
OK
146}
147
c91830cb 148static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418 149{
dccce9ea 150 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
151 {
152 output = *input;
153 return 1;
dccce9ea
VZ
154 }
155 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
156 {
157 output = *input;
158 return (size_t)-1;
dccce9ea
VZ
159 }
160 else
4def3b35
VS
161 {
162 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
163 return 2;
164 }
1cd52418
OK
165}
166
b0a6bb75 167
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169// wxMBConv
f6bcfd97 170// ----------------------------------------------------------------------------
2c53a80a
WS
171
172wxMBConv::~wxMBConv()
173{
174 // nothing to do here (necessary for Darwin linking probably)
175}
6001e347 176
6001e347
RR
177const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
178{
2b5f62a0 179 if ( psz )
6001e347 180 {
2b5f62a0
VZ
181 // calculate the length of the buffer needed first
182 size_t nLen = MB2WC(NULL, psz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 // now do the actual conversion
186 wxWCharBuffer buf(nLen);
635f33ce
VS
187 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
188 if ( nLen != (size_t)-1 )
189 {
190 return buf;
191 }
2b5f62a0 192 }
f6bcfd97 193 }
2b5f62a0
VZ
194
195 wxWCharBuffer buf((wchar_t *)NULL);
196
197 return buf;
6001e347
RR
198}
199
e5cceba0 200const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
6001e347 201{
2b5f62a0
VZ
202 if ( pwz )
203 {
204 size_t nLen = WC2MB(NULL, pwz, 0);
205 if ( nLen != (size_t)-1 )
206 {
c91830cb 207 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
635f33ce
VS
208 nLen = WC2MB(buf.data(), pwz, nLen + 4);
209 if ( nLen != (size_t)-1 )
210 {
211 return buf;
212 }
2b5f62a0
VZ
213 }
214 }
215
216 wxCharBuffer buf((char *)NULL);
e5cceba0 217
e5cceba0 218 return buf;
6001e347
RR
219}
220
f5fb6871 221const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 222{
f5fb6871
RN
223 wxASSERT(pOutSize != NULL);
224
e4e3bbb4
RN
225 const char* szEnd = szString + nStringLen + 1;
226 const char* szPos = szString;
227 const char* szStart = szPos;
228
229 size_t nActualLength = 0;
f5fb6871
RN
230 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
231
232 wxWCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
233
234 //Convert the string until the length() is reached, continuing the
235 //loop every time a null character is reached
236 while(szPos != szEnd)
237 {
238 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
239
240 //Get the length of the current (sub)string
241 size_t nLen = MB2WC(NULL, szPos, 0);
242
243 //Invalid conversion?
244 if( nLen == (size_t)-1 )
f5fb6871
RN
245 {
246 *pOutSize = 0;
247 theBuffer.data()[0u] = wxT('\0');
248 return theBuffer;
249 }
250
e4e3bbb4
RN
251
252 //Increase the actual length (+1 for current null character)
253 nActualLength += nLen + 1;
254
f5fb6871
RN
255 //if buffer too big, realloc the buffer
256 if (nActualLength > (nCurrentSize+1))
257 {
258 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
259 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
260 theBuffer = theNewBuffer;
261 nCurrentSize <<= 1;
262 }
263
264 //Convert the current (sub)string
265 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 266 {
f5fb6871
RN
267 *pOutSize = 0;
268 theBuffer.data()[0u] = wxT('\0');
269 return theBuffer;
e4e3bbb4
RN
270 }
271
272 //Increment to next (sub)string
273 //Note that we have to use strlen here instead of nLen
274 //here because XX2XX gives us the size of the output buffer,
275 //not neccessarly the length of the string
276 szPos += strlen(szPos) + 1;
277 }
278
f5fb6871
RN
279 //success - return actual length and the buffer
280 *pOutSize = nActualLength;
281 return theBuffer;
e4e3bbb4
RN
282}
283
f5fb6871 284const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
e4e3bbb4 285{
f5fb6871
RN
286 wxASSERT(pOutSize != NULL);
287
e4e3bbb4
RN
288 const wchar_t* szEnd = szString + nStringLen + 1;
289 const wchar_t* szPos = szString;
290 const wchar_t* szStart = szPos;
291
292 size_t nActualLength = 0;
f5fb6871
RN
293 size_t nCurrentSize = nStringLen << 2; //try * 4 first
294
295 wxCharBuffer theBuffer(nCurrentSize);
e4e3bbb4
RN
296
297 //Convert the string until the length() is reached, continuing the
298 //loop every time a null character is reached
299 while(szPos != szEnd)
300 {
301 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
302
303 //Get the length of the current (sub)string
304 size_t nLen = WC2MB(NULL, szPos, 0);
305
306 //Invalid conversion?
307 if( nLen == (size_t)-1 )
f5fb6871
RN
308 {
309 *pOutSize = 0;
310 theBuffer.data()[0u] = wxT('\0');
311 return theBuffer;
312 }
e4e3bbb4
RN
313
314 //Increase the actual length (+1 for current null character)
315 nActualLength += nLen + 1;
316
f5fb6871
RN
317 //if buffer too big, realloc the buffer
318 if (nActualLength > (nCurrentSize+1))
319 {
320 wxCharBuffer theNewBuffer(nCurrentSize << 1);
321 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
322 theBuffer = theNewBuffer;
323 nCurrentSize <<= 1;
324 }
325
326 //Convert the current (sub)string
327 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
e4e3bbb4 328 {
f5fb6871
RN
329 *pOutSize = 0;
330 theBuffer.data()[0u] = wxT('\0');
331 return theBuffer;
e4e3bbb4
RN
332 }
333
334 //Increment to next (sub)string
335 //Note that we have to use wxWcslen here instead of nLen
336 //here because XX2XX gives us the size of the output buffer,
337 //not neccessarly the length of the string
338 szPos += wxWcslen(szPos) + 1;
339 }
340
f5fb6871
RN
341 //success - return actual length and the buffer
342 *pOutSize = nActualLength;
343 return theBuffer;
e4e3bbb4
RN
344}
345
6001e347 346// ----------------------------------------------------------------------------
bde4baac 347// wxMBConvLibc
6001e347
RR
348// ----------------------------------------------------------------------------
349
bde4baac
VZ
350size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
351{
352 return wxMB2WC(buf, psz, n);
353}
354
355size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
356{
357 return wxWC2MB(buf, psz, n);
358}
bde4baac 359// ----------------------------------------------------------------------------
15f2ee32 360// UTF-7
bde4baac 361// ----------------------------------------------------------------------------
6001e347 362
15f2ee32 363// Implementation (C) 2004 Fredrik Roubert
6001e347 364
15f2ee32
RN
365//
366// BASE64 decoding table
367//
368static const unsigned char utf7unb64[] =
6001e347 369{
15f2ee32
RN
370 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
376 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
377 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
379 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
380 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
381 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
382 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
383 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
384 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
385 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
402};
403
404size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
405{
15f2ee32
RN
406 size_t len = 0;
407
408 while (*psz && ((!buf) || (len < n)))
409 {
410 unsigned char cc = *psz++;
411 if (cc != '+')
412 {
413 // plain ASCII char
414 if (buf)
415 *buf++ = cc;
416 len++;
417 }
418 else if (*psz == '-')
419 {
420 // encoded plus sign
421 if (buf)
422 *buf++ = cc;
423 len++;
424 psz++;
425 }
426 else
427 {
428 // BASE64 encoded string
429 bool lsb;
430 unsigned char c;
431 unsigned int d, l;
432 for (lsb = false, d = 0, l = 0;
433 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
434 {
435 d <<= 6;
436 d += cc;
437 for (l += 6; l >= 8; lsb = !lsb)
438 {
6356d52a 439 c = (unsigned char)((d >> (l -= 8)) % 256);
15f2ee32
RN
440 if (lsb)
441 {
442 if (buf)
443 *buf++ |= c;
444 len ++;
445 }
446 else
447 if (buf)
6356d52a 448 *buf = (wchar_t)(c << 8);
15f2ee32
RN
449 }
450 }
451 if (*psz == '-')
452 psz++;
453 }
454 }
455 if (buf && (len < n))
456 *buf = 0;
457 return len;
6001e347
RR
458}
459
15f2ee32
RN
460//
461// BASE64 encoding table
462//
463static const unsigned char utf7enb64[] =
464{
465 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
466 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
467 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
468 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
469 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
470 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
471 'w', 'x', 'y', 'z', '0', '1', '2', '3',
472 '4', '5', '6', '7', '8', '9', '+', '/'
473};
474
475//
476// UTF-7 encoding table
477//
478// 0 - Set D (directly encoded characters)
479// 1 - Set O (optional direct characters)
480// 2 - whitespace characters (optional)
481// 3 - special characters
482//
483static const unsigned char utf7encode[128] =
6001e347 484{
15f2ee32
RN
485 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
486 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
487 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
489 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
491 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
493};
494
667e5b3e 495size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
15f2ee32
RN
496{
497
498
499 size_t len = 0;
500
501 while (*psz && ((!buf) || (len < n)))
502 {
503 wchar_t cc = *psz++;
504 if (cc < 0x80 && utf7encode[cc] < 1)
505 {
506 // plain ASCII char
507 if (buf)
508 *buf++ = (char)cc;
509 len++;
510 }
511#ifndef WC_UTF16
79c78d42 512 else if (((wxUint32)cc) > 0xffff)
6e394fc6 513 {
15f2ee32
RN
514 // no surrogate pair generation (yet?)
515 return (size_t)-1;
516 }
517#endif
518 else
519 {
520 if (buf)
521 *buf++ = '+';
522 len++;
523 if (cc != '+')
524 {
525 // BASE64 encode string
526 unsigned int lsb, d, l;
527 for (d = 0, l = 0;; psz++)
528 {
529 for (lsb = 0; lsb < 2; lsb ++)
530 {
531 d <<= 8;
532 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
533
534 for (l += 8; l >= 6; )
535 {
536 l -= 6;
537 if (buf)
538 *buf++ = utf7enb64[(d >> l) % 64];
539 len++;
540 }
541 }
542 cc = *psz;
543 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
544 break;
545 }
546 if (l != 0)
547 {
548 if (buf)
549 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
550 len++;
551 }
552 }
553 if (buf)
554 *buf++ = '-';
555 len++;
556 }
557 }
558 if (buf && (len < n))
559 *buf = 0;
560 return len;
6001e347
RR
561}
562
f6bcfd97 563// ----------------------------------------------------------------------------
6001e347 564// UTF-8
f6bcfd97 565// ----------------------------------------------------------------------------
6001e347 566
dccce9ea 567static wxUint32 utf8_max[]=
4def3b35 568 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
569
570size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
571{
4def3b35
VS
572 size_t len = 0;
573
dccce9ea 574 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
575 {
576 unsigned char cc = *psz++, fc = cc;
577 unsigned cnt;
dccce9ea 578 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 579 fc <<= 1;
dccce9ea 580 if (!cnt)
4def3b35
VS
581 {
582 // plain ASCII char
dccce9ea 583 if (buf)
4def3b35
VS
584 *buf++ = cc;
585 len++;
dccce9ea
VZ
586 }
587 else
4def3b35
VS
588 {
589 cnt--;
dccce9ea 590 if (!cnt)
4def3b35
VS
591 {
592 // invalid UTF-8 sequence
593 return (size_t)-1;
dccce9ea
VZ
594 }
595 else
4def3b35
VS
596 {
597 unsigned ocnt = cnt - 1;
598 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 599 while (cnt--)
4def3b35
VS
600 {
601 cc = *psz++;
dccce9ea 602 if ((cc & 0xC0) != 0x80)
4def3b35
VS
603 {
604 // invalid UTF-8 sequence
605 return (size_t)-1;
606 }
607 res = (res << 6) | (cc & 0x3f);
608 }
dccce9ea 609 if (res <= utf8_max[ocnt])
4def3b35
VS
610 {
611 // illegal UTF-8 encoding
612 return (size_t)-1;
613 }
1cd52418 614#ifdef WC_UTF16
b5153fd8
VZ
615 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
616 size_t pa = encode_utf16(res, (wxUint16 *)buf);
4def3b35
VS
617 if (pa == (size_t)-1)
618 return (size_t)-1;
dccce9ea 619 if (buf)
4def3b35
VS
620 buf += pa;
621 len += pa;
373658eb 622#else // !WC_UTF16
dccce9ea 623 if (buf)
4def3b35
VS
624 *buf++ = res;
625 len++;
373658eb 626#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
627 }
628 }
6001e347 629 }
dccce9ea 630 if (buf && (len < n))
4def3b35
VS
631 *buf = 0;
632 return len;
6001e347
RR
633}
634
635size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
636{
4def3b35 637 size_t len = 0;
6001e347 638
dccce9ea 639 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
640 {
641 wxUint32 cc;
1cd52418 642#ifdef WC_UTF16
b5153fd8
VZ
643 // cast is ok for WC_UTF16
644 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
4def3b35 645 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 646#else
4def3b35
VS
647 cc=(*psz++) & 0x7fffffff;
648#endif
649 unsigned cnt;
650 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 651 if (!cnt)
4def3b35
VS
652 {
653 // plain ASCII char
dccce9ea 654 if (buf)
574c939e 655 *buf++ = (char) cc;
4def3b35 656 len++;
dccce9ea
VZ
657 }
658
659 else
4def3b35
VS
660 {
661 len += cnt + 1;
dccce9ea 662 if (buf)
4def3b35 663 {
574c939e 664 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 665 while (cnt--)
574c939e 666 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
667 }
668 }
6001e347 669 }
4def3b35
VS
670
671 if (buf && (len<n)) *buf = 0;
adb45366 672
4def3b35 673 return len;
6001e347
RR
674}
675
c91830cb
VZ
676
677
678
679// ----------------------------------------------------------------------------
680// UTF-16
681// ----------------------------------------------------------------------------
682
683#ifdef WORDS_BIGENDIAN
bde4baac
VZ
684 #define wxMBConvUTF16straight wxMBConvUTF16BE
685 #define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb 686#else
bde4baac
VZ
687 #define wxMBConvUTF16swap wxMBConvUTF16BE
688 #define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb
VZ
689#endif
690
691
c91830cb
VZ
692#ifdef WC_UTF16
693
c91830cb
VZ
694// copy 16bit MB to 16bit String
695size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
696{
697 size_t len=0;
698
699 while (*(wxUint16*)psz && (!buf || len < n))
700 {
701 if (buf)
702 *buf++ = *(wxUint16*)psz;
703 len++;
704
705 psz += sizeof(wxUint16);
706 }
707 if (buf && len<n) *buf=0;
708
709 return len;
710}
711
712
713// copy 16bit String to 16bit MB
714size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
715{
716 size_t len=0;
717
718 while (*psz && (!buf || len < n))
719 {
720 if (buf)
721 {
722 *(wxUint16*)buf = *psz;
723 buf += sizeof(wxUint16);
724 }
725 len += sizeof(wxUint16);
726 psz++;
727 }
728 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
729
730 return len;
731}
732
733
734// swap 16bit MB to 16bit String
735size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
736{
737 size_t len=0;
738
739 while (*(wxUint16*)psz && (!buf || len < n))
740 {
741 if (buf)
742 {
743 ((char *)buf)[0] = psz[1];
744 ((char *)buf)[1] = psz[0];
745 buf++;
746 }
747 len++;
748 psz += sizeof(wxUint16);
749 }
750 if (buf && len<n) *buf=0;
751
752 return len;
753}
754
755
756// swap 16bit MB to 16bit String
757size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
758{
759 size_t len=0;
760
761 while (*psz && (!buf || len < n))
762 {
763 if (buf)
764 {
765 *buf++ = ((char*)psz)[1];
766 *buf++ = ((char*)psz)[0];
767 }
768 len += sizeof(wxUint16);
769 psz++;
770 }
771 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
772
773 return len;
774}
775
776
777#else // WC_UTF16
778
779
780// copy 16bit MB to 32bit String
781size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
782{
783 size_t len=0;
784
785 while (*(wxUint16*)psz && (!buf || len < n))
786 {
787 wxUint32 cc;
788 size_t pa=decode_utf16((wxUint16*)psz, cc);
789 if (pa == (size_t)-1)
790 return pa;
791
792 if (buf)
793 *buf++ = cc;
794 len++;
795 psz += pa * sizeof(wxUint16);
796 }
797 if (buf && len<n) *buf=0;
798
799 return len;
800}
801
802
803// copy 32bit String to 16bit MB
804size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
805{
806 size_t len=0;
807
808 while (*psz && (!buf || len < n))
809 {
810 wxUint16 cc[2];
811 size_t pa=encode_utf16(*psz, cc);
812
813 if (pa == (size_t)-1)
814 return pa;
815
816 if (buf)
817 {
69b80d28 818 *(wxUint16*)buf = cc[0];
b5153fd8 819 buf += sizeof(wxUint16);
c91830cb 820 if (pa > 1)
69b80d28
VZ
821 {
822 *(wxUint16*)buf = cc[1];
823 buf += sizeof(wxUint16);
824 }
c91830cb
VZ
825 }
826
827 len += pa*sizeof(wxUint16);
828 psz++;
829 }
830 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
831
832 return len;
833}
834
835
836// swap 16bit MB to 32bit String
837size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
838{
839 size_t len=0;
840
841 while (*(wxUint16*)psz && (!buf || len < n))
842 {
843 wxUint32 cc;
844 char tmp[4];
845 tmp[0]=psz[1]; tmp[1]=psz[0];
846 tmp[2]=psz[3]; tmp[3]=psz[2];
847
848 size_t pa=decode_utf16((wxUint16*)tmp, cc);
849 if (pa == (size_t)-1)
850 return pa;
851
852 if (buf)
853 *buf++ = cc;
854
855 len++;
856 psz += pa * sizeof(wxUint16);
857 }
858 if (buf && len<n) *buf=0;
859
860 return len;
861}
862
863
864// swap 32bit String to 16bit MB
865size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
866{
867 size_t len=0;
868
869 while (*psz && (!buf || len < n))
870 {
871 wxUint16 cc[2];
872 size_t pa=encode_utf16(*psz, cc);
873
874 if (pa == (size_t)-1)
875 return pa;
876
877 if (buf)
878 {
879 *buf++ = ((char*)cc)[1];
880 *buf++ = ((char*)cc)[0];
881 if (pa > 1)
882 {
883 *buf++ = ((char*)cc)[3];
884 *buf++ = ((char*)cc)[2];
885 }
886 }
887
888 len += pa*sizeof(wxUint16);
889 psz++;
890 }
891 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
892
893 return len;
894}
895
896#endif // WC_UTF16
897
898
899// ----------------------------------------------------------------------------
900// UTF-32
901// ----------------------------------------------------------------------------
902
903#ifdef WORDS_BIGENDIAN
904#define wxMBConvUTF32straight wxMBConvUTF32BE
905#define wxMBConvUTF32swap wxMBConvUTF32LE
906#else
907#define wxMBConvUTF32swap wxMBConvUTF32BE
908#define wxMBConvUTF32straight wxMBConvUTF32LE
909#endif
910
911
912WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
913WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
914
915
916#ifdef WC_UTF16
917
918// copy 32bit MB to 16bit String
919size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
920{
921 size_t len=0;
922
923 while (*(wxUint32*)psz && (!buf || len < n))
924 {
925 wxUint16 cc[2];
926
927 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
928 if (pa == (size_t)-1)
929 return pa;
930
931 if (buf)
932 {
933 *buf++ = cc[0];
934 if (pa > 1)
935 *buf++ = cc[1];
936 }
937 len += pa;
938 psz += sizeof(wxUint32);
939 }
940 if (buf && len<n) *buf=0;
941
942 return len;
943}
944
945
946// copy 16bit String to 32bit MB
947size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
948{
949 size_t len=0;
950
951 while (*psz && (!buf || len < n))
952 {
953 wxUint32 cc;
954
b5153fd8
VZ
955 // cast is ok for WC_UTF16
956 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
c91830cb
VZ
957 if (pa == (size_t)-1)
958 return pa;
959
960 if (buf)
961 {
962 *(wxUint32*)buf = cc;
963 buf += sizeof(wxUint32);
964 }
965 len += sizeof(wxUint32);
966 psz += pa;
967 }
b5153fd8
VZ
968
969 if (buf && len<=n-sizeof(wxUint32))
970 *(wxUint32*)buf=0;
c91830cb
VZ
971
972 return len;
973}
974
975
976
977// swap 32bit MB to 16bit String
978size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
979{
980 size_t len=0;
981
982 while (*(wxUint32*)psz && (!buf || len < n))
983 {
984 char tmp[4];
985 tmp[0] = psz[3]; tmp[1] = psz[2];
986 tmp[2] = psz[1]; tmp[3] = psz[0];
987
988
989 wxUint16 cc[2];
990
991 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
992 if (pa == (size_t)-1)
993 return pa;
994
995 if (buf)
996 {
997 *buf++ = cc[0];
998 if (pa > 1)
999 *buf++ = cc[1];
1000 }
1001 len += pa;
1002 psz += sizeof(wxUint32);
1003 }
b5153fd8
VZ
1004
1005 if (buf && len<n)
1006 *buf=0;
c91830cb
VZ
1007
1008 return len;
1009}
1010
1011
1012// swap 16bit String to 32bit MB
1013size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1014{
1015 size_t len=0;
1016
1017 while (*psz && (!buf || len < n))
1018 {
1019 char cc[4];
1020
b5153fd8
VZ
1021 // cast is ok for WC_UTF16
1022 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
c91830cb
VZ
1023 if (pa == (size_t)-1)
1024 return pa;
1025
1026 if (buf)
1027 {
1028 *buf++ = cc[3];
1029 *buf++ = cc[2];
1030 *buf++ = cc[1];
1031 *buf++ = cc[0];
1032 }
1033 len += sizeof(wxUint32);
1034 psz += pa;
1035 }
b5153fd8
VZ
1036
1037 if (buf && len<=n-sizeof(wxUint32))
1038 *(wxUint32*)buf=0;
c91830cb
VZ
1039
1040 return len;
1041}
1042
1043#else // WC_UTF16
1044
1045
1046// copy 32bit MB to 32bit String
1047size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1048{
1049 size_t len=0;
1050
1051 while (*(wxUint32*)psz && (!buf || len < n))
1052 {
1053 if (buf)
1054 *buf++ = *(wxUint32*)psz;
1055 len++;
1056 psz += sizeof(wxUint32);
1057 }
b5153fd8
VZ
1058
1059 if (buf && len<n)
1060 *buf=0;
c91830cb
VZ
1061
1062 return len;
1063}
1064
1065
1066// copy 32bit String to 32bit MB
1067size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1068{
1069 size_t len=0;
1070
1071 while (*psz && (!buf || len < n))
1072 {
1073 if (buf)
1074 {
1075 *(wxUint32*)buf = *psz;
1076 buf += sizeof(wxUint32);
1077 }
1078
1079 len += sizeof(wxUint32);
1080 psz++;
1081 }
1082
b5153fd8
VZ
1083 if (buf && len<=n-sizeof(wxUint32))
1084 *(wxUint32*)buf=0;
c91830cb
VZ
1085
1086 return len;
1087}
1088
1089
1090// swap 32bit MB to 32bit String
1091size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1092{
1093 size_t len=0;
1094
1095 while (*(wxUint32*)psz && (!buf || len < n))
1096 {
1097 if (buf)
1098 {
1099 ((char *)buf)[0] = psz[3];
1100 ((char *)buf)[1] = psz[2];
1101 ((char *)buf)[2] = psz[1];
1102 ((char *)buf)[3] = psz[0];
1103 buf++;
1104 }
1105 len++;
1106 psz += sizeof(wxUint32);
1107 }
b5153fd8
VZ
1108
1109 if (buf && len<n)
1110 *buf=0;
c91830cb
VZ
1111
1112 return len;
1113}
1114
1115
1116// swap 32bit String to 32bit MB
1117size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1118{
1119 size_t len=0;
1120
1121 while (*psz && (!buf || len < n))
1122 {
1123 if (buf)
1124 {
1125 *buf++ = ((char *)psz)[3];
1126 *buf++ = ((char *)psz)[2];
1127 *buf++ = ((char *)psz)[1];
1128 *buf++ = ((char *)psz)[0];
1129 }
1130 len += sizeof(wxUint32);
1131 psz++;
1132 }
b5153fd8
VZ
1133
1134 if (buf && len<=n-sizeof(wxUint32))
1135 *(wxUint32*)buf=0;
c91830cb
VZ
1136
1137 return len;
1138}
1139
1140
1141#endif // WC_UTF16
1142
1143
36acb880
VZ
1144// ============================================================================
1145// The classes doing conversion using the iconv_xxx() functions
1146// ============================================================================
3caec1bb 1147
b040e242 1148#ifdef HAVE_ICONV
3a0d76bc 1149
b1d547eb
VS
1150// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1151// E2BIG if output buffer is _exactly_ as big as needed. Such case is
1152// (unless there's yet another bug in glibc) the only case when iconv()
1153// returns with (size_t)-1 (which means error) and says there are 0 bytes
1154// left in the input buffer -- when _real_ error occurs,
1155// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1156// iconv() failure.
3caec1bb
VS
1157// [This bug does not appear in glibc 2.2.]
1158#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1159#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1160 (errno != E2BIG || bufLeft != 0))
1161#else
1162#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1163#endif
1164
ab217dba 1165#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
1166
1167// ----------------------------------------------------------------------------
e95354ec 1168// wxMBConv_iconv: encapsulates an iconv character set
36acb880
VZ
1169// ----------------------------------------------------------------------------
1170
e95354ec 1171class wxMBConv_iconv : public wxMBConv
1cd52418
OK
1172{
1173public:
e95354ec
VZ
1174 wxMBConv_iconv(const wxChar *name);
1175 virtual ~wxMBConv_iconv();
36acb880 1176
bde4baac
VZ
1177 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1178 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
36acb880 1179
e95354ec 1180 bool IsOk() const
36acb880
VZ
1181 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1182
1183protected:
1184 // the iconv handlers used to translate from multibyte to wide char and in
1185 // the other direction
1186 iconv_t m2w,
1187 w2m;
b1d547eb
VS
1188#if wxUSE_THREADS
1189 // guards access to m2w and w2m objects
1190 wxMutex m_iconvMutex;
1191#endif
36acb880
VZ
1192
1193private:
e95354ec 1194 // the name (for iconv_open()) of a wide char charset -- if none is
36acb880
VZ
1195 // available on this machine, it will remain NULL
1196 static const char *ms_wcCharsetName;
1197
1198 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1199 // different endian-ness than the native one
405d8f46 1200 static bool ms_wcNeedsSwap;
36acb880
VZ
1201};
1202
e95354ec
VZ
1203const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1204bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880 1205
e95354ec 1206wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
36acb880 1207{
04c79127
RR
1208 // Do it the hard way
1209 char cname[100];
1210 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1211 cname[i] = (char) name[i];
1212
36acb880
VZ
1213 // check for charset that represents wchar_t:
1214 if (ms_wcCharsetName == NULL)
f1339c56 1215 {
e95354ec 1216 ms_wcNeedsSwap = false;
dccce9ea 1217
36acb880
VZ
1218 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1219 ms_wcCharsetName = WC_NAME_BEST;
04c79127 1220 m2w = iconv_open(ms_wcCharsetName, cname);
3a0d76bc 1221
36acb880
VZ
1222 if (m2w == (iconv_t)-1)
1223 {
1224 // try charset w/o bytesex info (e.g. "UCS4")
1225 // and check for bytesex ourselves:
1226 ms_wcCharsetName = WC_NAME;
04c79127 1227 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880
VZ
1228
1229 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
1230 if (m2w == (iconv_t)-1)
1231 {
36acb880 1232 ms_wcCharsetName = "WCHAR_T";
04c79127 1233 m2w = iconv_open(ms_wcCharsetName, cname);
36acb880 1234 }
3a0d76bc 1235
36acb880
VZ
1236 if (m2w != (iconv_t)-1)
1237 {
1238 char buf[2], *bufPtr;
1239 wchar_t wbuf[2], *wbufPtr;
1240 size_t insz, outsz;
1241 size_t res;
1242
1243 buf[0] = 'A';
1244 buf[1] = 0;
1245 wbuf[0] = 0;
1246 insz = 2;
1247 outsz = SIZEOF_WCHAR_T * 2;
1248 wbufPtr = wbuf;
1249 bufPtr = buf;
1250
1251 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1252 (char**)&wbufPtr, &outsz);
1253
1254 if (ICONV_FAILED(res, insz))
3a0d76bc 1255 {
36acb880
VZ
1256 ms_wcCharsetName = NULL;
1257 wxLogLastError(wxT("iconv"));
2b5f62a0 1258 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
3a0d76bc
VS
1259 }
1260 else
1261 {
36acb880 1262 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
1263 }
1264 }
36acb880
VZ
1265 else
1266 {
1267 ms_wcCharsetName = NULL;
373658eb 1268
77ffb593 1269 // VS: we must not output an error here, since wxWidgets will safely
957686c8
VS
1270 // fall back to using wxEncodingConverter.
1271 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1272 //wxLogError(
36acb880 1273 }
3a0d76bc 1274 }
36acb880 1275 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 1276 }
36acb880 1277 else // we already have ms_wcCharsetName
3caec1bb 1278 {
04c79127 1279 m2w = iconv_open(ms_wcCharsetName, cname);
f1339c56 1280 }
dccce9ea 1281
36acb880
VZ
1282 // NB: don't ever pass NULL to iconv_open(), it may crash!
1283 if ( ms_wcCharsetName )
f1339c56 1284 {
04c79127 1285 w2m = iconv_open( cname, ms_wcCharsetName);
36acb880 1286 }
405d8f46
VZ
1287 else
1288 {
1289 w2m = (iconv_t)-1;
1290 }
36acb880 1291}
3caec1bb 1292
e95354ec 1293wxMBConv_iconv::~wxMBConv_iconv()
36acb880
VZ
1294{
1295 if ( m2w != (iconv_t)-1 )
1296 iconv_close(m2w);
1297 if ( w2m != (iconv_t)-1 )
1298 iconv_close(w2m);
1299}
3a0d76bc 1300
bde4baac 1301size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
36acb880 1302{
b1d547eb
VS
1303#if wxUSE_THREADS
1304 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1305 // Unfortunately there is a couple of global wxCSConv objects such as
1306 // wxConvLocal that are used all over wx code, so we have to make sure
1307 // the handle is used by at most one thread at the time. Otherwise
1308 // only a few wx classes would be safe to use from non-main threads
1309 // as MB<->WC conversion would fail "randomly".
1310 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1311#endif
1312
36acb880
VZ
1313 size_t inbuf = strlen(psz);
1314 size_t outbuf = n * SIZEOF_WCHAR_T;
1315 size_t res, cres;
1316 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1317 wchar_t *bufPtr = buf;
1318 const char *pszPtr = psz;
1319
1320 if (buf)
1321 {
1322 // have destination buffer, convert there
1323 cres = iconv(m2w,
1324 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1325 (char**)&bufPtr, &outbuf);
1326 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 1327
36acb880 1328 if (ms_wcNeedsSwap)
3a0d76bc 1329 {
36acb880
VZ
1330 // convert to native endianness
1331 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 1332 }
adb45366 1333
49dd9820
VS
1334 // NB: iconv was given only strlen(psz) characters on input, and so
1335 // it couldn't convert the trailing zero. Let's do it ourselves
1336 // if there's some room left for it in the output buffer.
1337 if (res < n)
1338 buf[res] = 0;
36acb880
VZ
1339 }
1340 else
1341 {
1342 // no destination buffer... convert using temp buffer
1343 // to calculate destination buffer requirement
1344 wchar_t tbuf[8];
1345 res = 0;
1346 do {
1347 bufPtr = tbuf;
1348 outbuf = 8*SIZEOF_WCHAR_T;
1349
1350 cres = iconv(m2w,
1351 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1352 (char**)&bufPtr, &outbuf );
1353
1354 res += 8-(outbuf/SIZEOF_WCHAR_T);
1355 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1356 }
dccce9ea 1357
36acb880 1358 if (ICONV_FAILED(cres, inbuf))
f1339c56 1359 {
36acb880
VZ
1360 //VS: it is ok if iconv fails, hence trace only
1361 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1362 return (size_t)-1;
1363 }
1364
1365 return res;
1366}
1367
bde4baac 1368size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
36acb880 1369{
b1d547eb
VS
1370#if wxUSE_THREADS
1371 // NB: explained in MB2WC
1372 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1373#endif
1374
f8d791e0 1375 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
36acb880
VZ
1376 size_t outbuf = n;
1377 size_t res, cres;
3a0d76bc 1378
36acb880 1379 wchar_t *tmpbuf = 0;
3caec1bb 1380
36acb880
VZ
1381 if (ms_wcNeedsSwap)
1382 {
1383 // need to copy to temp buffer to switch endianness
1384 // this absolutely doesn't rock!
1385 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1386 // could be in read-only memory, or be accessed in some other thread)
1387 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1388 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1389 WC_BSWAP(tmpbuf, inbuf)
1390 psz=tmpbuf;
1391 }
3a0d76bc 1392
36acb880
VZ
1393 if (buf)
1394 {
1395 // have destination buffer, convert there
1396 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 1397
36acb880 1398 res = n-outbuf;
adb45366 1399
49dd9820
VS
1400 // NB: iconv was given only wcslen(psz) characters on input, and so
1401 // it couldn't convert the trailing zero. Let's do it ourselves
1402 // if there's some room left for it in the output buffer.
1403 if (res < n)
1404 buf[0] = 0;
36acb880
VZ
1405 }
1406 else
1407 {
1408 // no destination buffer... convert using temp buffer
1409 // to calculate destination buffer requirement
1410 char tbuf[16];
1411 res = 0;
1412 do {
1413 buf = tbuf; outbuf = 16;
1414
1415 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 1416
36acb880
VZ
1417 res += 16 - outbuf;
1418 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 1419 }
dccce9ea 1420
36acb880
VZ
1421 if (ms_wcNeedsSwap)
1422 {
1423 free(tmpbuf);
1424 }
dccce9ea 1425
36acb880
VZ
1426 if (ICONV_FAILED(cres, inbuf))
1427 {
1428 //VS: it is ok if iconv fails, hence trace only
1429 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1430 return (size_t)-1;
1431 }
1432
1433 return res;
1434}
1435
b040e242 1436#endif // HAVE_ICONV
36acb880 1437
e95354ec 1438
36acb880
VZ
1439// ============================================================================
1440// Win32 conversion classes
1441// ============================================================================
1cd52418 1442
e95354ec 1443#ifdef wxHAVE_WIN32_MB2WC
373658eb 1444
8b04d4c4 1445// from utils.cpp
d775fa82 1446#if wxUSE_FONTMAP
8b04d4c4
VZ
1447extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1448extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683 1449#endif
373658eb 1450
e95354ec 1451class wxMBConv_win32 : public wxMBConv
1cd52418
OK
1452{
1453public:
bde4baac
VZ
1454 wxMBConv_win32()
1455 {
1456 m_CodePage = CP_ACP;
1457 }
1458
7608a683 1459#if wxUSE_FONTMAP
e95354ec 1460 wxMBConv_win32(const wxChar* name)
bde4baac
VZ
1461 {
1462 m_CodePage = wxCharsetToCodepage(name);
1463 }
dccce9ea 1464
e95354ec 1465 wxMBConv_win32(wxFontEncoding encoding)
bde4baac
VZ
1466 {
1467 m_CodePage = wxEncodingToCodepage(encoding);
1468 }
7608a683 1469#endif
8b04d4c4 1470
bde4baac 1471 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
f1339c56 1472 {
02272c9c
VZ
1473 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1474 // the behaviour is not compatible with the Unix version (using iconv)
1475 // and break the library itself, e.g. wxTextInputStream::NextChar()
1476 // wouldn't work if reading an incomplete MB char didn't result in an
1477 // error
667e5b3e
VZ
1478 //
1479 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1480 // an error (tested under Windows Server 2003) and apparently it is
1481 // done on purpose, i.e. the function accepts any input in this case
1482 // and although I'd prefer to return error on ill-formed output, our
1483 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1484 // explicitly ill-formed according to RFC 2152) neither so we don't
1485 // even have any fallback here...
1486 int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1487
2b5f62a0
VZ
1488 const size_t len = ::MultiByteToWideChar
1489 (
1490 m_CodePage, // code page
667e5b3e 1491 flags, // flags: fall on error
2b5f62a0
VZ
1492 psz, // input string
1493 -1, // its length (NUL-terminated)
b4da152e 1494 buf, // output string
2b5f62a0
VZ
1495 buf ? n : 0 // size of output buffer
1496 );
1497
03a991bc
VZ
1498 // note that it returns count of written chars for buf != NULL and size
1499 // of the needed buffer for buf == NULL so in either case the length of
1500 // the string (which never includes the terminating NUL) is one less
1501 return len ? len - 1 : (size_t)-1;
f1339c56 1502 }
dccce9ea 1503
13dd924a 1504 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
f1339c56 1505 {
13dd924a
VZ
1506 /*
1507 we have a problem here: by default, WideCharToMultiByte() may
1508 replace characters unrepresentable in the target code page with bad
1509 quality approximations such as turning "1/2" symbol (U+00BD) into
1510 "1" for the code pages which don't have it and we, obviously, want
1511 to avoid this at any price
d775fa82 1512
13dd924a
VZ
1513 the trouble is that this function does it _silently_, i.e. it won't
1514 even tell us whether it did or not... Win98/2000 and higher provide
1515 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1516 we have to resort to a round trip, i.e. check that converting back
1517 results in the same string -- this is, of course, expensive but
1518 otherwise we simply can't be sure to not garble the data.
1519 */
1520
1521 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1522 // it doesn't work with CJK encodings (which we test for rather roughly
1523 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1524 // supporting it
907173e5
WS
1525 BOOL usedDef wxDUMMY_INITIALIZE(false);
1526 BOOL *pUsedDef;
13dd924a
VZ
1527 int flags;
1528 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1529 {
1530 // it's our lucky day
1531 flags = WC_NO_BEST_FIT_CHARS;
1532 pUsedDef = &usedDef;
1533 }
1534 else // old system or unsupported encoding
1535 {
1536 flags = 0;
1537 pUsedDef = NULL;
1538 }
1539
2b5f62a0
VZ
1540 const size_t len = ::WideCharToMultiByte
1541 (
1542 m_CodePage, // code page
13dd924a
VZ
1543 flags, // either none or no best fit
1544 pwz, // input string
2b5f62a0
VZ
1545 -1, // it is (wide) NUL-terminated
1546 buf, // output buffer
1547 buf ? n : 0, // and its size
1548 NULL, // default "replacement" char
13dd924a 1549 pUsedDef // [out] was it used?
2b5f62a0
VZ
1550 );
1551
13dd924a
VZ
1552 if ( !len )
1553 {
1554 // function totally failed
1555 return (size_t)-1;
1556 }
1557
1558 // if we were really converting, check if we succeeded
1559 if ( buf )
1560 {
1561 if ( flags )
1562 {
1563 // check if the conversion failed, i.e. if any replacements
1564 // were done
1565 if ( usedDef )
1566 return (size_t)-1;
1567 }
1568 else // we must resort to double tripping...
1569 {
1570 wxWCharBuffer wcBuf(n);
1571 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1572 wcscmp(wcBuf, pwz) != 0 )
1573 {
1574 // we didn't obtain the same thing we started from, hence
1575 // the conversion was lossy and we consider that it failed
1576 return (size_t)-1;
1577 }
1578 }
1579 }
1580
03a991bc 1581 // see the comment above for the reason of "len - 1"
13dd924a 1582 return len - 1;
f1339c56 1583 }
dccce9ea 1584
13dd924a
VZ
1585 bool IsOk() const { return m_CodePage != -1; }
1586
1587private:
1588 static bool CanUseNoBestFit()
1589 {
1590 static int s_isWin98Or2k = -1;
1591
1592 if ( s_isWin98Or2k == -1 )
1593 {
1594 int verMaj, verMin;
1595 switch ( wxGetOsVersion(&verMaj, &verMin) )
1596 {
1597 case wxWIN95:
1598 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1599 break;
1600
1601 case wxWINDOWS_NT:
1602 s_isWin98Or2k = verMaj >= 5;
1603 break;
1604
1605 default:
1606 // unknown, be conseravtive by default
1607 s_isWin98Or2k = 0;
1608 }
1609
1610 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1611 }
1612
1613 return s_isWin98Or2k == 1;
1614 }
f1339c56 1615
b1d66b54 1616 long m_CodePage;
1cd52418 1617};
e95354ec
VZ
1618
1619#endif // wxHAVE_WIN32_MB2WC
1620
f7e98dee
RN
1621// ============================================================================
1622// Cocoa conversion classes
1623// ============================================================================
1624
1625#if defined(__WXCOCOA__)
1626
ecd9653b 1627// RN: There is no UTF-32 support in either Core Foundation or
f7e98dee
RN
1628// Cocoa. Strangely enough, internally Core Foundation uses
1629// UTF 32 internally quite a bit - its just not public (yet).
1630
1631#include <CoreFoundation/CFString.h>
1632#include <CoreFoundation/CFStringEncodingExt.h>
1633
1634CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
ecd9653b 1635{
638357a0 1636 CFStringEncoding enc = kCFStringEncodingInvalidId ;
ecd9653b
WS
1637 if ( encoding == wxFONTENCODING_DEFAULT )
1638 {
638357a0 1639 enc = CFStringGetSystemEncoding();
ecd9653b
WS
1640 }
1641 else switch( encoding)
1642 {
1643 case wxFONTENCODING_ISO8859_1 :
1644 enc = kCFStringEncodingISOLatin1 ;
1645 break ;
1646 case wxFONTENCODING_ISO8859_2 :
1647 enc = kCFStringEncodingISOLatin2;
1648 break ;
1649 case wxFONTENCODING_ISO8859_3 :
1650 enc = kCFStringEncodingISOLatin3 ;
1651 break ;
1652 case wxFONTENCODING_ISO8859_4 :
1653 enc = kCFStringEncodingISOLatin4;
1654 break ;
1655 case wxFONTENCODING_ISO8859_5 :
1656 enc = kCFStringEncodingISOLatinCyrillic;
1657 break ;
1658 case wxFONTENCODING_ISO8859_6 :
1659 enc = kCFStringEncodingISOLatinArabic;
1660 break ;
1661 case wxFONTENCODING_ISO8859_7 :
1662 enc = kCFStringEncodingISOLatinGreek;
1663 break ;
1664 case wxFONTENCODING_ISO8859_8 :
1665 enc = kCFStringEncodingISOLatinHebrew;
1666 break ;
1667 case wxFONTENCODING_ISO8859_9 :
1668 enc = kCFStringEncodingISOLatin5;
1669 break ;
1670 case wxFONTENCODING_ISO8859_10 :
1671 enc = kCFStringEncodingISOLatin6;
1672 break ;
1673 case wxFONTENCODING_ISO8859_11 :
1674 enc = kCFStringEncodingISOLatinThai;
1675 break ;
1676 case wxFONTENCODING_ISO8859_13 :
1677 enc = kCFStringEncodingISOLatin7;
1678 break ;
1679 case wxFONTENCODING_ISO8859_14 :
1680 enc = kCFStringEncodingISOLatin8;
1681 break ;
1682 case wxFONTENCODING_ISO8859_15 :
1683 enc = kCFStringEncodingISOLatin9;
1684 break ;
1685
1686 case wxFONTENCODING_KOI8 :
1687 enc = kCFStringEncodingKOI8_R;
1688 break ;
1689 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1690 enc = kCFStringEncodingDOSRussian;
1691 break ;
1692
1693// case wxFONTENCODING_BULGARIAN :
1694// enc = ;
1695// break ;
1696
1697 case wxFONTENCODING_CP437 :
1698 enc =kCFStringEncodingDOSLatinUS ;
1699 break ;
1700 case wxFONTENCODING_CP850 :
1701 enc = kCFStringEncodingDOSLatin1;
1702 break ;
1703 case wxFONTENCODING_CP852 :
1704 enc = kCFStringEncodingDOSLatin2;
1705 break ;
1706 case wxFONTENCODING_CP855 :
1707 enc = kCFStringEncodingDOSCyrillic;
1708 break ;
1709 case wxFONTENCODING_CP866 :
1710 enc =kCFStringEncodingDOSRussian ;
1711 break ;
1712 case wxFONTENCODING_CP874 :
1713 enc = kCFStringEncodingDOSThai;
1714 break ;
1715 case wxFONTENCODING_CP932 :
1716 enc = kCFStringEncodingDOSJapanese;
1717 break ;
1718 case wxFONTENCODING_CP936 :
1719 enc =kCFStringEncodingDOSChineseSimplif ;
1720 break ;
1721 case wxFONTENCODING_CP949 :
1722 enc = kCFStringEncodingDOSKorean;
1723 break ;
1724 case wxFONTENCODING_CP950 :
1725 enc = kCFStringEncodingDOSChineseTrad;
1726 break ;
ecd9653b
WS
1727 case wxFONTENCODING_CP1250 :
1728 enc = kCFStringEncodingWindowsLatin2;
1729 break ;
1730 case wxFONTENCODING_CP1251 :
1731 enc =kCFStringEncodingWindowsCyrillic ;
1732 break ;
1733 case wxFONTENCODING_CP1252 :
1734 enc =kCFStringEncodingWindowsLatin1 ;
1735 break ;
1736 case wxFONTENCODING_CP1253 :
1737 enc = kCFStringEncodingWindowsGreek;
1738 break ;
1739 case wxFONTENCODING_CP1254 :
1740 enc = kCFStringEncodingWindowsLatin5;
1741 break ;
1742 case wxFONTENCODING_CP1255 :
1743 enc =kCFStringEncodingWindowsHebrew ;
1744 break ;
1745 case wxFONTENCODING_CP1256 :
1746 enc =kCFStringEncodingWindowsArabic ;
1747 break ;
1748 case wxFONTENCODING_CP1257 :
1749 enc = kCFStringEncodingWindowsBalticRim;
1750 break ;
638357a0
RN
1751// This only really encodes to UTF7 (if that) evidently
1752// case wxFONTENCODING_UTF7 :
1753// enc = kCFStringEncodingNonLossyASCII ;
1754// break ;
ecd9653b
WS
1755 case wxFONTENCODING_UTF8 :
1756 enc = kCFStringEncodingUTF8 ;
1757 break ;
1758 case wxFONTENCODING_EUC_JP :
1759 enc = kCFStringEncodingEUC_JP;
1760 break ;
1761 case wxFONTENCODING_UTF16 :
f7e98dee 1762 enc = kCFStringEncodingUnicode ;
ecd9653b 1763 break ;
f7e98dee
RN
1764 case wxFONTENCODING_MACROMAN :
1765 enc = kCFStringEncodingMacRoman ;
1766 break ;
1767 case wxFONTENCODING_MACJAPANESE :
1768 enc = kCFStringEncodingMacJapanese ;
1769 break ;
1770 case wxFONTENCODING_MACCHINESETRAD :
1771 enc = kCFStringEncodingMacChineseTrad ;
1772 break ;
1773 case wxFONTENCODING_MACKOREAN :
1774 enc = kCFStringEncodingMacKorean ;
1775 break ;
1776 case wxFONTENCODING_MACARABIC :
1777 enc = kCFStringEncodingMacArabic ;
1778 break ;
1779 case wxFONTENCODING_MACHEBREW :
1780 enc = kCFStringEncodingMacHebrew ;
1781 break ;
1782 case wxFONTENCODING_MACGREEK :
1783 enc = kCFStringEncodingMacGreek ;
1784 break ;
1785 case wxFONTENCODING_MACCYRILLIC :
1786 enc = kCFStringEncodingMacCyrillic ;
1787 break ;
1788 case wxFONTENCODING_MACDEVANAGARI :
1789 enc = kCFStringEncodingMacDevanagari ;
1790 break ;
1791 case wxFONTENCODING_MACGURMUKHI :
1792 enc = kCFStringEncodingMacGurmukhi ;
1793 break ;
1794 case wxFONTENCODING_MACGUJARATI :
1795 enc = kCFStringEncodingMacGujarati ;
1796 break ;
1797 case wxFONTENCODING_MACORIYA :
1798 enc = kCFStringEncodingMacOriya ;
1799 break ;
1800 case wxFONTENCODING_MACBENGALI :
1801 enc = kCFStringEncodingMacBengali ;
1802 break ;
1803 case wxFONTENCODING_MACTAMIL :
1804 enc = kCFStringEncodingMacTamil ;
1805 break ;
1806 case wxFONTENCODING_MACTELUGU :
1807 enc = kCFStringEncodingMacTelugu ;
1808 break ;
1809 case wxFONTENCODING_MACKANNADA :
1810 enc = kCFStringEncodingMacKannada ;
1811 break ;
1812 case wxFONTENCODING_MACMALAJALAM :
1813 enc = kCFStringEncodingMacMalayalam ;
1814 break ;
1815 case wxFONTENCODING_MACSINHALESE :
1816 enc = kCFStringEncodingMacSinhalese ;
1817 break ;
1818 case wxFONTENCODING_MACBURMESE :
1819 enc = kCFStringEncodingMacBurmese ;
1820 break ;
1821 case wxFONTENCODING_MACKHMER :
1822 enc = kCFStringEncodingMacKhmer ;
1823 break ;
1824 case wxFONTENCODING_MACTHAI :
1825 enc = kCFStringEncodingMacThai ;
1826 break ;
1827 case wxFONTENCODING_MACLAOTIAN :
1828 enc = kCFStringEncodingMacLaotian ;
1829 break ;
1830 case wxFONTENCODING_MACGEORGIAN :
1831 enc = kCFStringEncodingMacGeorgian ;
1832 break ;
1833 case wxFONTENCODING_MACARMENIAN :
1834 enc = kCFStringEncodingMacArmenian ;
1835 break ;
1836 case wxFONTENCODING_MACCHINESESIMP :
1837 enc = kCFStringEncodingMacChineseSimp ;
1838 break ;
1839 case wxFONTENCODING_MACTIBETAN :
1840 enc = kCFStringEncodingMacTibetan ;
1841 break ;
1842 case wxFONTENCODING_MACMONGOLIAN :
1843 enc = kCFStringEncodingMacMongolian ;
1844 break ;
1845 case wxFONTENCODING_MACETHIOPIC :
1846 enc = kCFStringEncodingMacEthiopic ;
1847 break ;
1848 case wxFONTENCODING_MACCENTRALEUR :
1849 enc = kCFStringEncodingMacCentralEurRoman ;
1850 break ;
1851 case wxFONTENCODING_MACVIATNAMESE :
1852 enc = kCFStringEncodingMacVietnamese ;
1853 break ;
1854 case wxFONTENCODING_MACARABICEXT :
1855 enc = kCFStringEncodingMacExtArabic ;
1856 break ;
1857 case wxFONTENCODING_MACSYMBOL :
1858 enc = kCFStringEncodingMacSymbol ;
1859 break ;
1860 case wxFONTENCODING_MACDINGBATS :
1861 enc = kCFStringEncodingMacDingbats ;
1862 break ;
1863 case wxFONTENCODING_MACTURKISH :
1864 enc = kCFStringEncodingMacTurkish ;
1865 break ;
1866 case wxFONTENCODING_MACCROATIAN :
1867 enc = kCFStringEncodingMacCroatian ;
1868 break ;
1869 case wxFONTENCODING_MACICELANDIC :
1870 enc = kCFStringEncodingMacIcelandic ;
1871 break ;
1872 case wxFONTENCODING_MACROMANIAN :
1873 enc = kCFStringEncodingMacRomanian ;
1874 break ;
1875 case wxFONTENCODING_MACCELTIC :
1876 enc = kCFStringEncodingMacCeltic ;
1877 break ;
1878 case wxFONTENCODING_MACGAELIC :
1879 enc = kCFStringEncodingMacGaelic ;
1880 break ;
ecd9653b
WS
1881// case wxFONTENCODING_MACKEYBOARD :
1882// enc = kCFStringEncodingMacKeyboardGlyphs ;
1883// break ;
1884 default :
1885 // because gcc is picky
1886 break ;
1887 } ;
1888 return enc ;
f7e98dee
RN
1889}
1890
f7e98dee
RN
1891class wxMBConv_cocoa : public wxMBConv
1892{
1893public:
1894 wxMBConv_cocoa()
1895 {
1896 Init(CFStringGetSystemEncoding()) ;
1897 }
1898
a6900d10 1899#if wxUSE_FONTMAP
f7e98dee
RN
1900 wxMBConv_cocoa(const wxChar* name)
1901 {
1902 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1903 }
a6900d10 1904#endif
f7e98dee
RN
1905
1906 wxMBConv_cocoa(wxFontEncoding encoding)
1907 {
1908 Init( wxCFStringEncFromFontEnc(encoding) );
1909 }
1910
1911 ~wxMBConv_cocoa()
1912 {
1913 }
1914
1915 void Init( CFStringEncoding encoding)
1916 {
638357a0 1917 m_encoding = encoding ;
f7e98dee
RN
1918 }
1919
1920 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1921 {
1922 wxASSERT(szUnConv);
ecd9653b 1923
638357a0
RN
1924 CFStringRef theString = CFStringCreateWithBytes (
1925 NULL, //the allocator
1926 (const UInt8*)szUnConv,
1927 strlen(szUnConv),
1928 m_encoding,
1929 false //no BOM/external representation
f7e98dee
RN
1930 );
1931
1932 wxASSERT(theString);
1933
638357a0
RN
1934 size_t nOutLength = CFStringGetLength(theString);
1935
1936 if (szOut == NULL)
f7e98dee 1937 {
f7e98dee 1938 CFRelease(theString);
638357a0 1939 return nOutLength;
f7e98dee 1940 }
ecd9653b 1941
638357a0 1942 CFRange theRange = { 0, nOutSize };
ecd9653b 1943
638357a0
RN
1944#if SIZEOF_WCHAR_T == 4
1945 UniChar* szUniCharBuffer = new UniChar[nOutSize];
1946#endif
1947
f7e98dee 1948 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
638357a0 1949
f7e98dee 1950 CFRelease(theString);
ecd9653b 1951
638357a0 1952 szUniCharBuffer[nOutLength] = '\0' ;
f7e98dee
RN
1953
1954#if SIZEOF_WCHAR_T == 4
1955 wxMBConvUTF16 converter ;
638357a0 1956 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
f7e98dee
RN
1957 delete[] szUniCharBuffer;
1958#endif
638357a0
RN
1959
1960 return nOutLength;
f7e98dee
RN
1961 }
1962
1963 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1964 {
638357a0
RN
1965 wxASSERT(szUnConv);
1966
f7e98dee 1967 size_t nRealOutSize;
638357a0 1968 size_t nBufSize = wxWcslen(szUnConv);
f7e98dee 1969 UniChar* szUniBuffer = (UniChar*) szUnConv;
ecd9653b 1970
f7e98dee
RN
1971#if SIZEOF_WCHAR_T == 4
1972 wxMBConvUTF16BE converter ;
1973 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1974 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1975 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1976 nBufSize /= sizeof(UniChar);
f7e98dee
RN
1977#endif
1978
1979 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1980 NULL, //allocator
1981 szUniBuffer,
1982 nBufSize,
638357a0 1983 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
f7e98dee 1984 );
ecd9653b 1985
f7e98dee 1986 wxASSERT(theString);
ecd9653b 1987
f7e98dee 1988 //Note that CER puts a BOM when converting to unicode
638357a0
RN
1989 //so we check and use getchars instead in that case
1990 if (m_encoding == kCFStringEncodingUnicode)
f7e98dee 1991 {
638357a0
RN
1992 if (szOut != NULL)
1993 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1994
1995 nRealOutSize = CFStringGetLength(theString) + 1;
1996 }
1997 else
1998 {
1999 CFStringGetBytes(
2000 theString,
2001 CFRangeMake(0, CFStringGetLength(theString)),
2002 m_encoding,
2003 0, //what to put in characters that can't be converted -
2004 //0 tells CFString to return NULL if it meets such a character
2005 false, //not an external representation
2006 (UInt8*) szOut,
2007 nOutSize,
2008 (CFIndex*) &nRealOutSize
2009 );
f7e98dee 2010 }
ecd9653b 2011
638357a0 2012 CFRelease(theString);
ecd9653b 2013
638357a0
RN
2014#if SIZEOF_WCHAR_T == 4
2015 delete[] szUniBuffer;
2016#endif
ecd9653b 2017
f7e98dee
RN
2018 return nRealOutSize - 1;
2019 }
2020
2021 bool IsOk() const
ecd9653b 2022 {
638357a0
RN
2023 return m_encoding != kCFStringEncodingInvalidId &&
2024 CFStringIsEncodingAvailable(m_encoding);
f7e98dee
RN
2025 }
2026
2027private:
638357a0 2028 CFStringEncoding m_encoding ;
f7e98dee
RN
2029};
2030
2031#endif // defined(__WXCOCOA__)
2032
335d31e0
SC
2033// ============================================================================
2034// Mac conversion classes
2035// ============================================================================
2036
2037#if defined(__WXMAC__) && defined(TARGET_CARBON)
2038
2039class wxMBConv_mac : public wxMBConv
2040{
2041public:
2042 wxMBConv_mac()
2043 {
2044 Init(CFStringGetSystemEncoding()) ;
2045 }
2046
2d1659cf 2047#if wxUSE_FONTMAP
335d31e0
SC
2048 wxMBConv_mac(const wxChar* name)
2049 {
d775fa82 2050 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
335d31e0 2051 }
2d1659cf 2052#endif
335d31e0
SC
2053
2054 wxMBConv_mac(wxFontEncoding encoding)
2055 {
d775fa82
WS
2056 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2057 }
2058
2059 ~wxMBConv_mac()
2060 {
2061 OSStatus status = noErr ;
2062 status = TECDisposeConverter(m_MB2WC_converter);
2063 status = TECDisposeConverter(m_WC2MB_converter);
2064 }
2065
2066
2067 void Init( TextEncodingBase encoding)
2068 {
2069 OSStatus status = noErr ;
2070 m_char_encoding = encoding ;
2071 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2072
2073 status = TECCreateConverter(&m_MB2WC_converter,
2074 m_char_encoding,
2075 m_unicode_encoding);
2076 status = TECCreateConverter(&m_WC2MB_converter,
2077 m_unicode_encoding,
2078 m_char_encoding);
2079 }
2080
335d31e0
SC
2081 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2082 {
d775fa82
WS
2083 OSStatus status = noErr ;
2084 ByteCount byteOutLen ;
2085 ByteCount byteInLen = strlen(psz) ;
2086 wchar_t *tbuf = NULL ;
2087 UniChar* ubuf = NULL ;
2088 size_t res = 0 ;
2089
2090 if (buf == NULL)
2091 {
638357a0 2092 //apple specs say at least 32
c543817b 2093 n = wxMax( 32 , byteInLen ) ;
d775fa82
WS
2094 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2095 }
2096 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
f3a355ce 2097#if SIZEOF_WCHAR_T == 4
d775fa82 2098 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
f3a355ce 2099#else
d775fa82 2100 ubuf = (UniChar*) (buf ? buf : tbuf) ;
f3a355ce 2101#endif
d775fa82
WS
2102 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2103 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
f3a355ce 2104#if SIZEOF_WCHAR_T == 4
8471ea90
SC
2105 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2106 // is not properly terminated we get random characters at the end
2107 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
d775fa82
WS
2108 wxMBConvUTF16BE converter ;
2109 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2110 free( ubuf ) ;
f3a355ce 2111#else
d775fa82 2112 res = byteOutLen / sizeof( UniChar ) ;
f3a355ce 2113#endif
d775fa82
WS
2114 if ( buf == NULL )
2115 free(tbuf) ;
335d31e0 2116
335d31e0
SC
2117 if ( buf && res < n)
2118 buf[res] = 0;
2119
d775fa82 2120 return res ;
335d31e0
SC
2121 }
2122
2123 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
d775fa82
WS
2124 {
2125 OSStatus status = noErr ;
2126 ByteCount byteOutLen ;
2127 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2128
2129 char *tbuf = NULL ;
2130
2131 if (buf == NULL)
2132 {
638357a0 2133 //apple specs say at least 32
c543817b 2134 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
d775fa82
WS
2135 tbuf = (char*) malloc( n ) ;
2136 }
2137
2138 ByteCount byteBufferLen = n ;
2139 UniChar* ubuf = NULL ;
f3a355ce 2140#if SIZEOF_WCHAR_T == 4
d775fa82
WS
2141 wxMBConvUTF16BE converter ;
2142 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2143 byteInLen = unicharlen ;
2144 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2145 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
f3a355ce 2146#else
d775fa82 2147 ubuf = (UniChar*) psz ;
f3a355ce 2148#endif
d775fa82
WS
2149 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2150 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
f3a355ce 2151#if SIZEOF_WCHAR_T == 4
d775fa82 2152 free( ubuf ) ;
f3a355ce 2153#endif
d775fa82
WS
2154 if ( buf == NULL )
2155 free(tbuf) ;
335d31e0 2156
d775fa82 2157 size_t res = byteOutLen ;
335d31e0 2158 if ( buf && res < n)
638357a0 2159 {
335d31e0 2160 buf[res] = 0;
638357a0
RN
2161
2162 //we need to double-trip to verify it didn't insert any ? in place
2163 //of bogus characters
2164 wxWCharBuffer wcBuf(n);
2165 size_t pszlen = wxWcslen(psz);
2166 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2167 wxWcslen(wcBuf) != pszlen ||
2168 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2169 {
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
2172 return (size_t)-1;
2173 }
2174 }
335d31e0 2175
d775fa82 2176 return res ;
335d31e0
SC
2177 }
2178
2179 bool IsOk() const
2180 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2181
2182private:
d775fa82
WS
2183 TECObjectRef m_MB2WC_converter ;
2184 TECObjectRef m_WC2MB_converter ;
2185
2186 TextEncodingBase m_char_encoding ;
2187 TextEncodingBase m_unicode_encoding ;
335d31e0
SC
2188};
2189
2190#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1e6feb95 2191
36acb880
VZ
2192// ============================================================================
2193// wxEncodingConverter based conversion classes
2194// ============================================================================
2195
1e6feb95 2196#if wxUSE_FONTMAP
1cd52418 2197
e95354ec 2198class wxMBConv_wxwin : public wxMBConv
1cd52418 2199{
8b04d4c4
VZ
2200private:
2201 void Init()
2202 {
2203 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2204 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2205 }
2206
6001e347 2207public:
f1339c56
RR
2208 // temporarily just use wxEncodingConverter stuff,
2209 // so that it works while a better implementation is built
e95354ec 2210 wxMBConv_wxwin(const wxChar* name)
f1339c56
RR
2211 {
2212 if (name)
e95354ec 2213 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
8b04d4c4
VZ
2214 else
2215 m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb 2216
8b04d4c4
VZ
2217 Init();
2218 }
2219
e95354ec 2220 wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4
VZ
2221 {
2222 m_enc = enc;
2223
2224 Init();
f1339c56 2225 }
dccce9ea 2226
bde4baac 2227 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
f1339c56
RR
2228 {
2229 size_t inbuf = strlen(psz);
dccce9ea 2230 if (buf)
c643a977
VS
2231 {
2232 if (!m2w.Convert(psz,buf))
2233 return (size_t)-1;
2234 }
f1339c56
RR
2235 return inbuf;
2236 }
dccce9ea 2237
bde4baac 2238 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
f1339c56 2239 {
f8d791e0 2240 const size_t inbuf = wxWcslen(psz);
f1339c56 2241 if (buf)
c643a977
VS
2242 {
2243 if (!w2m.Convert(psz,buf))
2244 return (size_t)-1;
2245 }
dccce9ea 2246
f1339c56
RR
2247 return inbuf;
2248 }
dccce9ea 2249
e95354ec 2250 bool IsOk() const { return m_ok; }
f1339c56
RR
2251
2252public:
8b04d4c4 2253 wxFontEncoding m_enc;
f1339c56 2254 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
2255
2256 // were we initialized successfully?
2257 bool m_ok;
fc7a2a60 2258
e95354ec 2259 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
f6bcfd97 2260};
6001e347 2261
1e6feb95
VZ
2262#endif // wxUSE_FONTMAP
2263
36acb880
VZ
2264// ============================================================================
2265// wxCSConv implementation
2266// ============================================================================
2267
8b04d4c4 2268void wxCSConv::Init()
6001e347 2269{
e95354ec
VZ
2270 m_name = NULL;
2271 m_convReal = NULL;
2272 m_deferred = true;
2273}
2274
8b04d4c4
VZ
2275wxCSConv::wxCSConv(const wxChar *charset)
2276{
2277 Init();
82713003 2278
e95354ec
VZ
2279 if ( charset )
2280 {
e95354ec
VZ
2281 SetName(charset);
2282 }
bda3d86a
VZ
2283
2284 m_encoding = wxFONTENCODING_SYSTEM;
6001e347
RR
2285}
2286
8b04d4c4
VZ
2287wxCSConv::wxCSConv(wxFontEncoding encoding)
2288{
bda3d86a 2289 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
e95354ec
VZ
2290 {
2291 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2292
2293 encoding = wxFONTENCODING_SYSTEM;
2294 }
2295
8b04d4c4
VZ
2296 Init();
2297
bda3d86a 2298 m_encoding = encoding;
8b04d4c4
VZ
2299}
2300
6001e347
RR
2301wxCSConv::~wxCSConv()
2302{
65e50848
JS
2303 Clear();
2304}
2305
54380f29 2306wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4 2307 : wxMBConv()
54380f29 2308{
8b04d4c4
VZ
2309 Init();
2310
54380f29 2311 SetName(conv.m_name);
8b04d4c4 2312 m_encoding = conv.m_encoding;
54380f29
GD
2313}
2314
2315wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2316{
2317 Clear();
8b04d4c4 2318
54380f29 2319 SetName(conv.m_name);
8b04d4c4
VZ
2320 m_encoding = conv.m_encoding;
2321
54380f29
GD
2322 return *this;
2323}
2324
65e50848
JS
2325void wxCSConv::Clear()
2326{
8b04d4c4 2327 free(m_name);
e95354ec 2328 delete m_convReal;
8b04d4c4 2329
65e50848 2330 m_name = NULL;
e95354ec 2331 m_convReal = NULL;
6001e347
RR
2332}
2333
2334void wxCSConv::SetName(const wxChar *charset)
2335{
f1339c56
RR
2336 if (charset)
2337 {
2338 m_name = wxStrdup(charset);
e95354ec 2339 m_deferred = true;
f1339c56 2340 }
6001e347
RR
2341}
2342
e95354ec
VZ
2343wxMBConv *wxCSConv::DoCreate() const
2344{
c547282d
VZ
2345 // check for the special case of ASCII or ISO8859-1 charset: as we have
2346 // special knowledge of it anyhow, we don't need to create a special
2347 // conversion object
2348 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56 2349 {
e95354ec
VZ
2350 // don't convert at all
2351 return NULL;
2352 }
dccce9ea 2353
e95354ec
VZ
2354 // we trust OS to do conversion better than we can so try external
2355 // conversion methods first
2356 //
2357 // the full order is:
2358 // 1. OS conversion (iconv() under Unix or Win32 API)
2359 // 2. hard coded conversions for UTF
2360 // 3. wxEncodingConverter as fall back
2361
2362 // step (1)
2363#ifdef HAVE_ICONV
c547282d 2364#if !wxUSE_FONTMAP
e95354ec 2365 if ( m_name )
c547282d 2366#endif // !wxUSE_FONTMAP
e95354ec 2367 {
c547282d
VZ
2368 wxString name(m_name);
2369
2370#if wxUSE_FONTMAP
2371 if ( name.empty() )
2372 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2373#endif // wxUSE_FONTMAP
2374
2375 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
e95354ec
VZ
2376 if ( conv->IsOk() )
2377 return conv;
2378
2379 delete conv;
2380 }
2381#endif // HAVE_ICONV
2382
2383#ifdef wxHAVE_WIN32_MB2WC
2384 {
7608a683 2385#if wxUSE_FONTMAP
e95354ec
VZ
2386 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2387 : new wxMBConv_win32(m_encoding);
2388 if ( conv->IsOk() )
2389 return conv;
2390
2391 delete conv;
7608a683
WS
2392#else
2393 return NULL;
2394#endif
e95354ec
VZ
2395 }
2396#endif // wxHAVE_WIN32_MB2WC
d775fa82
WS
2397#if defined(__WXMAC__)
2398 {
2399 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2400 {
2401
2d1659cf 2402#if wxUSE_FONTMAP
d775fa82
WS
2403 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2404 : new wxMBConv_mac(m_encoding);
2d1659cf
RN
2405#else
2406 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2407#endif
d775fa82 2408 if ( conv->IsOk() )
f7e98dee
RN
2409 return conv;
2410
2411 delete conv;
2412 }
2413 }
2414#endif
2415#if defined(__WXCOCOA__)
2416 {
2417 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2418 {
2419
a6900d10 2420#if wxUSE_FONTMAP
f7e98dee
RN
2421 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2422 : new wxMBConv_cocoa(m_encoding);
a6900d10
RN
2423#else
2424 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2425#endif
f7e98dee 2426 if ( conv->IsOk() )
d775fa82
WS
2427 return conv;
2428
2429 delete conv;
2430 }
335d31e0
SC
2431 }
2432#endif
e95354ec
VZ
2433 // step (2)
2434 wxFontEncoding enc = m_encoding;
2435#if wxUSE_FONTMAP
c547282d
VZ
2436 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2437 {
2438 // use "false" to suppress interactive dialogs -- we can be called from
2439 // anywhere and popping up a dialog from here is the last thing we want to
2440 // do
2441 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2442 }
e95354ec
VZ
2443#endif // wxUSE_FONTMAP
2444
2445 switch ( enc )
2446 {
2447 case wxFONTENCODING_UTF7:
2448 return new wxMBConvUTF7;
2449
2450 case wxFONTENCODING_UTF8:
2451 return new wxMBConvUTF8;
2452
e95354ec
VZ
2453 case wxFONTENCODING_UTF16BE:
2454 return new wxMBConvUTF16BE;
2455
2456 case wxFONTENCODING_UTF16LE:
2457 return new wxMBConvUTF16LE;
2458
e95354ec
VZ
2459 case wxFONTENCODING_UTF32BE:
2460 return new wxMBConvUTF32BE;
2461
2462 case wxFONTENCODING_UTF32LE:
2463 return new wxMBConvUTF32LE;
2464
2465 default:
2466 // nothing to do but put here to suppress gcc warnings
2467 ;
2468 }
2469
2470 // step (3)
2471#if wxUSE_FONTMAP
2472 {
2473 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2474 : new wxMBConv_wxwin(m_encoding);
2475 if ( conv->IsOk() )
2476 return conv;
2477
2478 delete conv;
2479 }
2480#endif // wxUSE_FONTMAP
2481
a58d4f4d
VS
2482 // NB: This is a hack to prevent deadlock. What could otherwise happen
2483 // in Unicode build: wxConvLocal creation ends up being here
2484 // because of some failure and logs the error. But wxLog will try to
2485 // attach timestamp, for which it will need wxConvLocal (to convert
2486 // time to char* and then wchar_t*), but that fails, tries to log
2487 // error, but wxLog has a (already locked) critical section that
2488 // guards static buffer.
2489 static bool alreadyLoggingError = false;
2490 if (!alreadyLoggingError)
2491 {
2492 alreadyLoggingError = true;
2493 wxLogError(_("Cannot convert from the charset '%s'!"),
2494 m_name ? m_name
e95354ec
VZ
2495 :
2496#if wxUSE_FONTMAP
2497 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2498#else // !wxUSE_FONTMAP
2499 wxString::Format(_("encoding %s"), m_encoding).c_str()
2500#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2501 );
a58d4f4d
VS
2502 alreadyLoggingError = false;
2503 }
e95354ec
VZ
2504
2505 return NULL;
2506}
2507
2508void wxCSConv::CreateConvIfNeeded() const
2509{
2510 if ( m_deferred )
2511 {
2512 wxCSConv *self = (wxCSConv *)this; // const_cast
bda3d86a
VZ
2513
2514#if wxUSE_INTL
2515 // if we don't have neither the name nor the encoding, use the default
2516 // encoding for this system
2517 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2518 {
4d312c22 2519 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
bda3d86a
VZ
2520 }
2521#endif // wxUSE_INTL
2522
e95354ec
VZ
2523 self->m_convReal = DoCreate();
2524 self->m_deferred = false;
6001e347 2525 }
6001e347
RR
2526}
2527
2528size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2529{
e95354ec 2530 CreateConvIfNeeded();
dccce9ea 2531
e95354ec
VZ
2532 if (m_convReal)
2533 return m_convReal->MB2WC(buf, psz, n);
f1339c56
RR
2534
2535 // latin-1 (direct)
4def3b35 2536 size_t len = strlen(psz);
dccce9ea 2537
f1339c56
RR
2538 if (buf)
2539 {
4def3b35 2540 for (size_t c = 0; c <= len; c++)
f1339c56
RR
2541 buf[c] = (unsigned char)(psz[c]);
2542 }
dccce9ea 2543
f1339c56 2544 return len;
6001e347
RR
2545}
2546
2547size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2548{
e95354ec 2549 CreateConvIfNeeded();
dccce9ea 2550
e95354ec
VZ
2551 if (m_convReal)
2552 return m_convReal->WC2MB(buf, psz, n);
1cd52418 2553
f1339c56 2554 // latin-1 (direct)
f8d791e0 2555 const size_t len = wxWcslen(psz);
f1339c56
RR
2556 if (buf)
2557 {
4def3b35 2558 for (size_t c = 0; c <= len; c++)
24642831
VS
2559 {
2560 if (psz[c] > 0xFF)
2561 return (size_t)-1;
907173e5 2562 buf[c] = (char)psz[c];
24642831
VS
2563 }
2564 }
2565 else
2566 {
2567 for (size_t c = 0; c <= len; c++)
2568 {
2569 if (psz[c] > 0xFF)
2570 return (size_t)-1;
2571 }
f1339c56 2572 }
dccce9ea 2573
f1339c56 2574 return len;
6001e347
RR
2575}
2576
bde4baac
VZ
2577// ----------------------------------------------------------------------------
2578// globals
2579// ----------------------------------------------------------------------------
2580
2581#ifdef __WINDOWS__
2582 static wxMBConv_win32 wxConvLibcObj;
f81f5901
SC
2583#elif defined(__WXMAC__) && !defined(__MACH__)
2584 static wxMBConv_mac wxConvLibcObj ;
bde4baac 2585#else
dcc8fac0 2586 static wxMBConvLibc wxConvLibcObj;
bde4baac
VZ
2587#endif
2588
2589static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2590static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2591static wxMBConvUTF7 wxConvUTF7Obj;
2592static wxMBConvUTF8 wxConvUTF8Obj;
2593
bde4baac
VZ
2594WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2595WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2596WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2597WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2598WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2599WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
f5a1953b
VZ
2600WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2601#ifdef __WXOSX__
2602 wxConvUTF8Obj;
2603#else
2604 wxConvLibcObj;
2605#endif
2606
bde4baac
VZ
2607
2608#else // !wxUSE_WCHAR_T
2609
2610// stand-ins in absence of wchar_t
2611WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2612 wxConvISO8859_1,
2613 wxConvLocal,
2614 wxConvUTF8;
2615
2616#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
6001e347
RR
2617
2618