]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
undid last (wrong) change to wxMBConvUTF16swap::MB2WC(); added comment to explain why
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
191 {
192 wxASSERT(pOutSize != NULL);
193
194 const char* szEnd = szString + nStringLen + 1;
195 const char* szPos = szString;
196 const char* szStart = szPos;
197
198 size_t nActualLength = 0;
199 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
200
201 wxWCharBuffer theBuffer(nCurrentSize);
202
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos != szEnd)
206 {
207 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
208
209 //Get the length of the current (sub)string
210 size_t nLen = MB2WC(NULL, szPos, 0);
211
212 //Invalid conversion?
213 if( nLen == (size_t)-1 )
214 {
215 *pOutSize = 0;
216 theBuffer.data()[0u] = wxT('\0');
217 return theBuffer;
218 }
219
220
221 //Increase the actual length (+1 for current null character)
222 nActualLength += nLen + 1;
223
224 //if buffer too big, realloc the buffer
225 if (nActualLength > (nCurrentSize+1))
226 {
227 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
228 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
229 theBuffer = theNewBuffer;
230 nCurrentSize <<= 1;
231 }
232
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
235 {
236 *pOutSize = 0;
237 theBuffer.data()[0u] = wxT('\0');
238 return theBuffer;
239 }
240
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos += strlen(szPos) + 1;
246 }
247
248 //success - return actual length and the buffer
249 *pOutSize = nActualLength;
250 return theBuffer;
251 }
252
253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
254 {
255 wxASSERT(pOutSize != NULL);
256
257 const wchar_t* szEnd = szString + nStringLen + 1;
258 const wchar_t* szPos = szString;
259 const wchar_t* szStart = szPos;
260
261 size_t nActualLength = 0;
262 size_t nCurrentSize = nStringLen << 2; //try * 4 first
263
264 wxCharBuffer theBuffer(nCurrentSize);
265
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos != szEnd)
269 {
270 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
271
272 //Get the length of the current (sub)string
273 size_t nLen = WC2MB(NULL, szPos, 0);
274
275 //Invalid conversion?
276 if( nLen == (size_t)-1 )
277 {
278 *pOutSize = 0;
279 theBuffer.data()[0u] = wxT('\0');
280 return theBuffer;
281 }
282
283 //Increase the actual length (+1 for current null character)
284 nActualLength += nLen + 1;
285
286 //if buffer too big, realloc the buffer
287 if (nActualLength > (nCurrentSize+1))
288 {
289 wxCharBuffer theNewBuffer(nCurrentSize << 1);
290 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
291 theBuffer = theNewBuffer;
292 nCurrentSize <<= 1;
293 }
294
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
297 {
298 *pOutSize = 0;
299 theBuffer.data()[0u] = wxT('\0');
300 return theBuffer;
301 }
302
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos += wxWcslen(szPos) + 1;
308 }
309
310 //success - return actual length and the buffer
311 *pOutSize = nActualLength;
312 return theBuffer;
313 }
314
315 // ----------------------------------------------------------------------------
316 // wxMBConvLibc
317 // ----------------------------------------------------------------------------
318
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
320 {
321 return wxMB2WC(buf, psz, n);
322 }
323
324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
325 {
326 return wxWC2MB(buf, psz, n);
327 }
328
329 #ifdef __UNIX__
330
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
334
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
336 {
337 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
338 || wxStricmp(charset, _T("UTF8")) == 0 )
339 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
340 else
341 m_conv = new wxCSConv(charset);
342 }
343
344 size_t
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
346 const char *psz,
347 size_t outputSize) const
348 {
349 return m_conv->MB2WC( outputBuf, psz, outputSize );
350 }
351
352 size_t
353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
354 const wchar_t *psz,
355 size_t outputSize) const
356 {
357 return m_conv->WC2MB( outputBuf, psz, outputSize );
358 }
359
360 #endif
361
362 // ----------------------------------------------------------------------------
363 // UTF-7
364 // ----------------------------------------------------------------------------
365
366 // Implementation (C) 2004 Fredrik Roubert
367
368 //
369 // BASE64 decoding table
370 //
371 static const unsigned char utf7unb64[] =
372 {
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
405 };
406
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
408 {
409 size_t len = 0;
410
411 while ( *psz && (!buf || (len < n)) )
412 {
413 unsigned char cc = *psz++;
414 if (cc != '+')
415 {
416 // plain ASCII char
417 if (buf)
418 *buf++ = cc;
419 len++;
420 }
421 else if (*psz == '-')
422 {
423 // encoded plus sign
424 if (buf)
425 *buf++ = cc;
426 len++;
427 psz++;
428 }
429 else // start of BASE64 encoded string
430 {
431 bool lsb, ok;
432 unsigned int d, l;
433 for ( ok = lsb = false, d = 0, l = 0;
434 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
435 psz++ )
436 {
437 d <<= 6;
438 d += cc;
439 for (l += 6; l >= 8; lsb = !lsb)
440 {
441 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
442 if (lsb)
443 {
444 if (buf)
445 *buf++ |= c;
446 len ++;
447 }
448 else
449 {
450 if (buf)
451 *buf = (wchar_t)(c << 8);
452 }
453
454 ok = true;
455 }
456 }
457
458 if ( !ok )
459 {
460 // in valid UTF7 we should have valid characters after '+'
461 return (size_t)-1;
462 }
463
464 if (*psz == '-')
465 psz++;
466 }
467 }
468
469 if ( buf && (len < n) )
470 *buf = '\0';
471
472 return len;
473 }
474
475 //
476 // BASE64 encoding table
477 //
478 static const unsigned char utf7enb64[] =
479 {
480 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
481 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
482 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
483 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
484 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
485 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
486 'w', 'x', 'y', 'z', '0', '1', '2', '3',
487 '4', '5', '6', '7', '8', '9', '+', '/'
488 };
489
490 //
491 // UTF-7 encoding table
492 //
493 // 0 - Set D (directly encoded characters)
494 // 1 - Set O (optional direct characters)
495 // 2 - whitespace characters (optional)
496 // 3 - special characters
497 //
498 static const unsigned char utf7encode[128] =
499 {
500 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
502 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
504 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
506 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
508 };
509
510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
511 {
512
513
514 size_t len = 0;
515
516 while (*psz && ((!buf) || (len < n)))
517 {
518 wchar_t cc = *psz++;
519 if (cc < 0x80 && utf7encode[cc] < 1)
520 {
521 // plain ASCII char
522 if (buf)
523 *buf++ = (char)cc;
524 len++;
525 }
526 #ifndef WC_UTF16
527 else if (((wxUint32)cc) > 0xffff)
528 {
529 // no surrogate pair generation (yet?)
530 return (size_t)-1;
531 }
532 #endif
533 else
534 {
535 if (buf)
536 *buf++ = '+';
537 len++;
538 if (cc != '+')
539 {
540 // BASE64 encode string
541 unsigned int lsb, d, l;
542 for (d = 0, l = 0; /*nothing*/; psz++)
543 {
544 for (lsb = 0; lsb < 2; lsb ++)
545 {
546 d <<= 8;
547 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
548
549 for (l += 8; l >= 6; )
550 {
551 l -= 6;
552 if (buf)
553 *buf++ = utf7enb64[(d >> l) % 64];
554 len++;
555 }
556 }
557 cc = *psz;
558 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
559 break;
560 }
561 if (l != 0)
562 {
563 if (buf)
564 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
565 len++;
566 }
567 }
568 if (buf)
569 *buf++ = '-';
570 len++;
571 }
572 }
573 if (buf && (len < n))
574 *buf = 0;
575 return len;
576 }
577
578 // ----------------------------------------------------------------------------
579 // UTF-8
580 // ----------------------------------------------------------------------------
581
582 static wxUint32 utf8_max[]=
583 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
584
585 // boundaries of the private use area we use to (temporarily) remap invalid
586 // characters invalid in a UTF-8 encoded string
587 const wxUint32 wxUnicodePUA = 0x100000;
588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
589
590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
591 {
592 size_t len = 0;
593
594 while (*psz && ((!buf) || (len < n)))
595 {
596 const char *opsz = psz;
597 bool invalid = false;
598 unsigned char cc = *psz++, fc = cc;
599 unsigned cnt;
600 for (cnt = 0; fc & 0x80; cnt++)
601 fc <<= 1;
602 if (!cnt)
603 {
604 // plain ASCII char
605 if (buf)
606 *buf++ = cc;
607 len++;
608
609 // escape the escape character for octal escapes
610 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
611 && cc == '\\' && (!buf || len < n))
612 {
613 if (buf)
614 *buf++ = cc;
615 len++;
616 }
617 }
618 else
619 {
620 cnt--;
621 if (!cnt)
622 {
623 // invalid UTF-8 sequence
624 invalid = true;
625 }
626 else
627 {
628 unsigned ocnt = cnt - 1;
629 wxUint32 res = cc & (0x3f >> cnt);
630 while (cnt--)
631 {
632 cc = *psz;
633 if ((cc & 0xC0) != 0x80)
634 {
635 // invalid UTF-8 sequence
636 invalid = true;
637 break;
638 }
639 psz++;
640 res = (res << 6) | (cc & 0x3f);
641 }
642 if (invalid || res <= utf8_max[ocnt])
643 {
644 // illegal UTF-8 encoding
645 invalid = true;
646 }
647 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
648 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
649 {
650 // if one of our PUA characters turns up externally
651 // it must also be treated as an illegal sequence
652 // (a bit like you have to escape an escape character)
653 invalid = true;
654 }
655 else
656 {
657 #ifdef WC_UTF16
658 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
659 size_t pa = encode_utf16(res, (wxUint16 *)buf);
660 if (pa == (size_t)-1)
661 {
662 invalid = true;
663 }
664 else
665 {
666 if (buf)
667 buf += pa;
668 len += pa;
669 }
670 #else // !WC_UTF16
671 if (buf)
672 *buf++ = (wchar_t)res;
673 len++;
674 #endif // WC_UTF16/!WC_UTF16
675 }
676 }
677 if (invalid)
678 {
679 if (m_options & MAP_INVALID_UTF8_TO_PUA)
680 {
681 while (opsz < psz && (!buf || len < n))
682 {
683 #ifdef WC_UTF16
684 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
685 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
686 wxASSERT(pa != (size_t)-1);
687 if (buf)
688 buf += pa;
689 opsz++;
690 len += pa;
691 #else
692 if (buf)
693 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
694 opsz++;
695 len++;
696 #endif
697 }
698 }
699 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
700 {
701 while (opsz < psz && (!buf || len < n))
702 {
703 if ( buf && len + 3 < n )
704 {
705 unsigned char on = *opsz;
706 *buf++ = L'\\';
707 *buf++ = (wchar_t)( L'0' + on / 0100 );
708 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
709 *buf++ = (wchar_t)( L'0' + on % 010 );
710 }
711 opsz++;
712 len += 4;
713 }
714 }
715 else // MAP_INVALID_UTF8_NOT
716 {
717 return (size_t)-1;
718 }
719 }
720 }
721 }
722 if (buf && (len < n))
723 *buf = 0;
724 return len;
725 }
726
727 static inline bool isoctal(wchar_t wch)
728 {
729 return L'0' <= wch && wch <= L'7';
730 }
731
732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
733 {
734 size_t len = 0;
735
736 while (*psz && ((!buf) || (len < n)))
737 {
738 wxUint32 cc;
739 #ifdef WC_UTF16
740 // cast is ok for WC_UTF16
741 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
742 psz += (pa == (size_t)-1) ? 1 : pa;
743 #else
744 cc=(*psz++) & 0x7fffffff;
745 #endif
746
747 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
748 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
749 {
750 if (buf)
751 *buf++ = (char)(cc - wxUnicodePUA);
752 len++;
753 }
754 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
755 && cc == L'\\' && psz[0] == L'\\' )
756 {
757 if (buf)
758 *buf++ = (char)cc;
759 psz++;
760 len++;
761 }
762 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
763 cc == L'\\' &&
764 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
765 {
766 if (buf)
767 {
768 *buf++ = (char) ((psz[0] - L'0')*0100 +
769 (psz[1] - L'0')*010 +
770 (psz[2] - L'0'));
771 }
772
773 psz += 3;
774 len++;
775 }
776 else
777 {
778 unsigned cnt;
779 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
780 if (!cnt)
781 {
782 // plain ASCII char
783 if (buf)
784 *buf++ = (char) cc;
785 len++;
786 }
787
788 else
789 {
790 len += cnt + 1;
791 if (buf)
792 {
793 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
794 while (cnt--)
795 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
796 }
797 }
798 }
799 }
800
801 if (buf && (len<n))
802 *buf = 0;
803
804 return len;
805 }
806
807 // ----------------------------------------------------------------------------
808 // UTF-16
809 // ----------------------------------------------------------------------------
810
811 #ifdef WORDS_BIGENDIAN
812 #define wxMBConvUTF16straight wxMBConvUTF16BE
813 #define wxMBConvUTF16swap wxMBConvUTF16LE
814 #else
815 #define wxMBConvUTF16swap wxMBConvUTF16BE
816 #define wxMBConvUTF16straight wxMBConvUTF16LE
817 #endif
818
819
820 #ifdef WC_UTF16
821
822 // copy 16bit MB to 16bit String
823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
824 {
825 size_t len=0;
826
827 while (*(wxUint16*)psz && (!buf || len < n))
828 {
829 if (buf)
830 *buf++ = *(wxUint16*)psz;
831 len++;
832
833 psz += sizeof(wxUint16);
834 }
835 if (buf && len<n) *buf=0;
836
837 return len;
838 }
839
840
841 // copy 16bit String to 16bit MB
842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
843 {
844 size_t len=0;
845
846 while (*psz && (!buf || len < n))
847 {
848 if (buf)
849 {
850 *(wxUint16*)buf = *psz;
851 buf += sizeof(wxUint16);
852 }
853 len += sizeof(wxUint16);
854 psz++;
855 }
856 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
857
858 return len;
859 }
860
861
862 // swap 16bit MB to 16bit String
863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
864 {
865 size_t len = 0;
866
867 // UTF16 string must be terminated by 2 NULs as single NULs may occur
868 // inside the string
869 while ( (psz[0] || psz[1]) && (!buf || len < n) )
870 {
871 if ( buf )
872 {
873 ((char *)buf)[0] = psz[1];
874 ((char *)buf)[1] = psz[0];
875 buf++;
876 }
877 len++;
878 psz += 2;
879 }
880
881 if ( buf && len < n )
882 *buf = L'\0';
883
884 return len;
885 }
886
887
888 // swap 16bit MB to 16bit String
889 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
890 {
891 size_t len=0;
892
893 while (*psz && (!buf || len < n))
894 {
895 if (buf)
896 {
897 *buf++ = ((char*)psz)[1];
898 *buf++ = ((char*)psz)[0];
899 }
900 len += sizeof(wxUint16);
901 psz++;
902 }
903 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
904
905 return len;
906 }
907
908
909 #else // WC_UTF16
910
911
912 // copy 16bit MB to 32bit String
913 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
914 {
915 size_t len=0;
916
917 while (*(wxUint16*)psz && (!buf || len < n))
918 {
919 wxUint32 cc;
920 size_t pa=decode_utf16((wxUint16*)psz, cc);
921 if (pa == (size_t)-1)
922 return pa;
923
924 if (buf)
925 *buf++ = (wchar_t)cc;
926 len++;
927 psz += pa * sizeof(wxUint16);
928 }
929 if (buf && len<n) *buf=0;
930
931 return len;
932 }
933
934
935 // copy 32bit String to 16bit MB
936 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
937 {
938 size_t len=0;
939
940 while (*psz && (!buf || len < n))
941 {
942 wxUint16 cc[2];
943 size_t pa=encode_utf16(*psz, cc);
944
945 if (pa == (size_t)-1)
946 return pa;
947
948 if (buf)
949 {
950 *(wxUint16*)buf = cc[0];
951 buf += sizeof(wxUint16);
952 if (pa > 1)
953 {
954 *(wxUint16*)buf = cc[1];
955 buf += sizeof(wxUint16);
956 }
957 }
958
959 len += pa*sizeof(wxUint16);
960 psz++;
961 }
962 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
963
964 return len;
965 }
966
967
968 // swap 16bit MB to 32bit String
969 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
970 {
971 size_t len=0;
972
973 while (*(wxUint16*)psz && (!buf || len < n))
974 {
975 wxUint32 cc;
976 char tmp[4];
977 tmp[0]=psz[1]; tmp[1]=psz[0];
978 tmp[2]=psz[3]; tmp[3]=psz[2];
979
980 size_t pa=decode_utf16((wxUint16*)tmp, cc);
981 if (pa == (size_t)-1)
982 return pa;
983
984 if (buf)
985 *buf++ = (wchar_t)cc;
986
987 len++;
988 psz += pa * sizeof(wxUint16);
989 }
990 if (buf && len<n) *buf=0;
991
992 return len;
993 }
994
995
996 // swap 32bit String to 16bit MB
997 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
998 {
999 size_t len=0;
1000
1001 while (*psz && (!buf || len < n))
1002 {
1003 wxUint16 cc[2];
1004 size_t pa=encode_utf16(*psz, cc);
1005
1006 if (pa == (size_t)-1)
1007 return pa;
1008
1009 if (buf)
1010 {
1011 *buf++ = ((char*)cc)[1];
1012 *buf++ = ((char*)cc)[0];
1013 if (pa > 1)
1014 {
1015 *buf++ = ((char*)cc)[3];
1016 *buf++ = ((char*)cc)[2];
1017 }
1018 }
1019
1020 len += pa*sizeof(wxUint16);
1021 psz++;
1022 }
1023 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1024
1025 return len;
1026 }
1027
1028 #endif // WC_UTF16
1029
1030
1031 // ----------------------------------------------------------------------------
1032 // UTF-32
1033 // ----------------------------------------------------------------------------
1034
1035 #ifdef WORDS_BIGENDIAN
1036 #define wxMBConvUTF32straight wxMBConvUTF32BE
1037 #define wxMBConvUTF32swap wxMBConvUTF32LE
1038 #else
1039 #define wxMBConvUTF32swap wxMBConvUTF32BE
1040 #define wxMBConvUTF32straight wxMBConvUTF32LE
1041 #endif
1042
1043
1044 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1045 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1046
1047
1048 #ifdef WC_UTF16
1049
1050 // copy 32bit MB to 16bit String
1051 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1052 {
1053 size_t len=0;
1054
1055 while (*(wxUint32*)psz && (!buf || len < n))
1056 {
1057 wxUint16 cc[2];
1058
1059 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1060 if (pa == (size_t)-1)
1061 return pa;
1062
1063 if (buf)
1064 {
1065 *buf++ = cc[0];
1066 if (pa > 1)
1067 *buf++ = cc[1];
1068 }
1069 len += pa;
1070 psz += sizeof(wxUint32);
1071 }
1072 if (buf && len<n) *buf=0;
1073
1074 return len;
1075 }
1076
1077
1078 // copy 16bit String to 32bit MB
1079 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1080 {
1081 size_t len=0;
1082
1083 while (*psz && (!buf || len < n))
1084 {
1085 wxUint32 cc;
1086
1087 // cast is ok for WC_UTF16
1088 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1089 if (pa == (size_t)-1)
1090 return pa;
1091
1092 if (buf)
1093 {
1094 *(wxUint32*)buf = cc;
1095 buf += sizeof(wxUint32);
1096 }
1097 len += sizeof(wxUint32);
1098 psz += pa;
1099 }
1100
1101 if (buf && len<=n-sizeof(wxUint32))
1102 *(wxUint32*)buf=0;
1103
1104 return len;
1105 }
1106
1107
1108
1109 // swap 32bit MB to 16bit String
1110 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1111 {
1112 size_t len=0;
1113
1114 while (*(wxUint32*)psz && (!buf || len < n))
1115 {
1116 char tmp[4];
1117 tmp[0] = psz[3]; tmp[1] = psz[2];
1118 tmp[2] = psz[1]; tmp[3] = psz[0];
1119
1120
1121 wxUint16 cc[2];
1122
1123 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1124 if (pa == (size_t)-1)
1125 return pa;
1126
1127 if (buf)
1128 {
1129 *buf++ = cc[0];
1130 if (pa > 1)
1131 *buf++ = cc[1];
1132 }
1133 len += pa;
1134 psz += sizeof(wxUint32);
1135 }
1136
1137 if (buf && len<n)
1138 *buf=0;
1139
1140 return len;
1141 }
1142
1143
1144 // swap 16bit String to 32bit MB
1145 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1146 {
1147 size_t len=0;
1148
1149 while (*psz && (!buf || len < n))
1150 {
1151 char cc[4];
1152
1153 // cast is ok for WC_UTF16
1154 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1155 if (pa == (size_t)-1)
1156 return pa;
1157
1158 if (buf)
1159 {
1160 *buf++ = cc[3];
1161 *buf++ = cc[2];
1162 *buf++ = cc[1];
1163 *buf++ = cc[0];
1164 }
1165 len += sizeof(wxUint32);
1166 psz += pa;
1167 }
1168
1169 if (buf && len<=n-sizeof(wxUint32))
1170 *(wxUint32*)buf=0;
1171
1172 return len;
1173 }
1174
1175 #else // WC_UTF16
1176
1177
1178 // copy 32bit MB to 32bit String
1179 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1180 {
1181 size_t len=0;
1182
1183 while (*(wxUint32*)psz && (!buf || len < n))
1184 {
1185 if (buf)
1186 *buf++ = (wchar_t)(*(wxUint32*)psz);
1187 len++;
1188 psz += sizeof(wxUint32);
1189 }
1190
1191 if (buf && len<n)
1192 *buf=0;
1193
1194 return len;
1195 }
1196
1197
1198 // copy 32bit String to 32bit MB
1199 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1200 {
1201 size_t len=0;
1202
1203 while (*psz && (!buf || len < n))
1204 {
1205 if (buf)
1206 {
1207 *(wxUint32*)buf = *psz;
1208 buf += sizeof(wxUint32);
1209 }
1210
1211 len += sizeof(wxUint32);
1212 psz++;
1213 }
1214
1215 if (buf && len<=n-sizeof(wxUint32))
1216 *(wxUint32*)buf=0;
1217
1218 return len;
1219 }
1220
1221
1222 // swap 32bit MB to 32bit String
1223 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1224 {
1225 size_t len=0;
1226
1227 while (*(wxUint32*)psz && (!buf || len < n))
1228 {
1229 if (buf)
1230 {
1231 ((char *)buf)[0] = psz[3];
1232 ((char *)buf)[1] = psz[2];
1233 ((char *)buf)[2] = psz[1];
1234 ((char *)buf)[3] = psz[0];
1235 buf++;
1236 }
1237 len++;
1238 psz += sizeof(wxUint32);
1239 }
1240
1241 if (buf && len<n)
1242 *buf=0;
1243
1244 return len;
1245 }
1246
1247
1248 // swap 32bit String to 32bit MB
1249 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1250 {
1251 size_t len=0;
1252
1253 while (*psz && (!buf || len < n))
1254 {
1255 if (buf)
1256 {
1257 *buf++ = ((char *)psz)[3];
1258 *buf++ = ((char *)psz)[2];
1259 *buf++ = ((char *)psz)[1];
1260 *buf++ = ((char *)psz)[0];
1261 }
1262 len += sizeof(wxUint32);
1263 psz++;
1264 }
1265
1266 if (buf && len<=n-sizeof(wxUint32))
1267 *(wxUint32*)buf=0;
1268
1269 return len;
1270 }
1271
1272
1273 #endif // WC_UTF16
1274
1275
1276 // ============================================================================
1277 // The classes doing conversion using the iconv_xxx() functions
1278 // ============================================================================
1279
1280 #ifdef HAVE_ICONV
1281
1282 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1283 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1284 // (unless there's yet another bug in glibc) the only case when iconv()
1285 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1286 // left in the input buffer -- when _real_ error occurs,
1287 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1288 // iconv() failure.
1289 // [This bug does not appear in glibc 2.2.]
1290 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1291 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1292 (errno != E2BIG || bufLeft != 0))
1293 #else
1294 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1295 #endif
1296
1297 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1298
1299 #define ICONV_T_INVALID ((iconv_t)-1)
1300
1301 #if SIZEOF_WCHAR_T == 4
1302 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1303 #define WC_ENC wxFONTENCODING_UTF32
1304 #elif SIZEOF_WCHAR_T == 2
1305 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1306 #define WC_ENC wxFONTENCODING_UTF16
1307 #else // sizeof(wchar_t) != 2 nor 4
1308 // does this ever happen?
1309 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1310 #endif
1311
1312 // ----------------------------------------------------------------------------
1313 // wxMBConv_iconv: encapsulates an iconv character set
1314 // ----------------------------------------------------------------------------
1315
1316 class wxMBConv_iconv : public wxMBConv
1317 {
1318 public:
1319 wxMBConv_iconv(const wxChar *name);
1320 virtual ~wxMBConv_iconv();
1321
1322 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1323 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1324
1325 bool IsOk() const
1326 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1327
1328 protected:
1329 // the iconv handlers used to translate from multibyte to wide char and in
1330 // the other direction
1331 iconv_t m2w,
1332 w2m;
1333 #if wxUSE_THREADS
1334 // guards access to m2w and w2m objects
1335 wxMutex m_iconvMutex;
1336 #endif
1337
1338 private:
1339 // the name (for iconv_open()) of a wide char charset -- if none is
1340 // available on this machine, it will remain NULL
1341 static wxString ms_wcCharsetName;
1342
1343 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1344 // different endian-ness than the native one
1345 static bool ms_wcNeedsSwap;
1346 };
1347
1348 // make the constructor available for unit testing
1349 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1350 {
1351 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1352 if ( !result->IsOk() )
1353 {
1354 delete result;
1355 return 0;
1356 }
1357 return result;
1358 }
1359
1360 wxString wxMBConv_iconv::ms_wcCharsetName;
1361 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1362
1363 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1364 {
1365 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1366 // names for the charsets
1367 const wxCharBuffer cname(wxString(name).ToAscii());
1368
1369 // check for charset that represents wchar_t:
1370 if ( ms_wcCharsetName.empty() )
1371 {
1372 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1373
1374 #if wxUSE_FONTMAP
1375 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1376 #else // !wxUSE_FONTMAP
1377 static const wxChar *names[] =
1378 {
1379 #if SIZEOF_WCHAR_T == 4
1380 _T("UCS-4"),
1381 #elif SIZEOF_WCHAR_T = 2
1382 _T("UCS-2"),
1383 #endif
1384 NULL
1385 };
1386 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1387
1388 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1389 {
1390 const wxString nameCS(*names);
1391
1392 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1393 wxString nameXE(nameCS);
1394 #ifdef WORDS_BIGENDIAN
1395 nameXE += _T("BE");
1396 #else // little endian
1397 nameXE += _T("LE");
1398 #endif
1399
1400 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1401 nameXE.c_str());
1402
1403 m2w = iconv_open(nameXE.ToAscii(), cname);
1404 if ( m2w == ICONV_T_INVALID )
1405 {
1406 // try charset w/o bytesex info (e.g. "UCS4")
1407 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1408 nameCS.c_str());
1409 m2w = iconv_open(nameCS.ToAscii(), cname);
1410
1411 // and check for bytesex ourselves:
1412 if ( m2w != ICONV_T_INVALID )
1413 {
1414 char buf[2], *bufPtr;
1415 wchar_t wbuf[2], *wbufPtr;
1416 size_t insz, outsz;
1417 size_t res;
1418
1419 buf[0] = 'A';
1420 buf[1] = 0;
1421 wbuf[0] = 0;
1422 insz = 2;
1423 outsz = SIZEOF_WCHAR_T * 2;
1424 wbufPtr = wbuf;
1425 bufPtr = buf;
1426
1427 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1428 (char**)&wbufPtr, &outsz);
1429
1430 if (ICONV_FAILED(res, insz))
1431 {
1432 wxLogLastError(wxT("iconv"));
1433 wxLogError(_("Conversion to charset '%s' doesn't work."),
1434 nameCS.c_str());
1435 }
1436 else // ok, can convert to this encoding, remember it
1437 {
1438 ms_wcCharsetName = nameCS;
1439 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1440 }
1441 }
1442 }
1443 else // use charset not requiring byte swapping
1444 {
1445 ms_wcCharsetName = nameXE;
1446 }
1447 }
1448
1449 wxLogTrace(TRACE_STRCONV,
1450 wxT("iconv wchar_t charset is \"%s\"%s"),
1451 ms_wcCharsetName.empty() ? _T("<none>")
1452 : ms_wcCharsetName.c_str(),
1453 ms_wcNeedsSwap ? _T(" (needs swap)")
1454 : _T(""));
1455 }
1456 else // we already have ms_wcCharsetName
1457 {
1458 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1459 }
1460
1461 if ( ms_wcCharsetName.empty() )
1462 {
1463 w2m = ICONV_T_INVALID;
1464 }
1465 else
1466 {
1467 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1468 if ( w2m == ICONV_T_INVALID )
1469 {
1470 wxLogTrace(TRACE_STRCONV,
1471 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1472 ms_wcCharsetName.c_str(), cname.data());
1473 }
1474 }
1475 }
1476
1477 wxMBConv_iconv::~wxMBConv_iconv()
1478 {
1479 if ( m2w != ICONV_T_INVALID )
1480 iconv_close(m2w);
1481 if ( w2m != ICONV_T_INVALID )
1482 iconv_close(w2m);
1483 }
1484
1485 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1486 {
1487 #if wxUSE_THREADS
1488 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1489 // Unfortunately there is a couple of global wxCSConv objects such as
1490 // wxConvLocal that are used all over wx code, so we have to make sure
1491 // the handle is used by at most one thread at the time. Otherwise
1492 // only a few wx classes would be safe to use from non-main threads
1493 // as MB<->WC conversion would fail "randomly".
1494 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1495 #endif
1496
1497 size_t inbuf = strlen(psz);
1498 size_t outbuf = n * SIZEOF_WCHAR_T;
1499 size_t res, cres;
1500 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1501 wchar_t *bufPtr = buf;
1502 const char *pszPtr = psz;
1503
1504 if (buf)
1505 {
1506 // have destination buffer, convert there
1507 cres = iconv(m2w,
1508 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1509 (char**)&bufPtr, &outbuf);
1510 res = n - (outbuf / SIZEOF_WCHAR_T);
1511
1512 if (ms_wcNeedsSwap)
1513 {
1514 // convert to native endianness
1515 for ( unsigned i = 0; i < res; i++ )
1516 buf[n] = WC_BSWAP(buf[i]);
1517 }
1518
1519 // NB: iconv was given only strlen(psz) characters on input, and so
1520 // it couldn't convert the trailing zero. Let's do it ourselves
1521 // if there's some room left for it in the output buffer.
1522 if (res < n)
1523 buf[res] = 0;
1524 }
1525 else
1526 {
1527 // no destination buffer... convert using temp buffer
1528 // to calculate destination buffer requirement
1529 wchar_t tbuf[8];
1530 res = 0;
1531 do {
1532 bufPtr = tbuf;
1533 outbuf = 8*SIZEOF_WCHAR_T;
1534
1535 cres = iconv(m2w,
1536 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1537 (char**)&bufPtr, &outbuf );
1538
1539 res += 8-(outbuf/SIZEOF_WCHAR_T);
1540 } while ((cres==(size_t)-1) && (errno==E2BIG));
1541 }
1542
1543 if (ICONV_FAILED(cres, inbuf))
1544 {
1545 //VS: it is ok if iconv fails, hence trace only
1546 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1547 return (size_t)-1;
1548 }
1549
1550 return res;
1551 }
1552
1553 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1554 {
1555 #if wxUSE_THREADS
1556 // NB: explained in MB2WC
1557 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1558 #endif
1559
1560 size_t inlen = wxWcslen(psz);
1561 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1562 size_t outbuf = n;
1563 size_t res, cres;
1564
1565 wchar_t *tmpbuf = 0;
1566
1567 if (ms_wcNeedsSwap)
1568 {
1569 // need to copy to temp buffer to switch endianness
1570 // (doing WC_BSWAP twice on the original buffer won't help, as it
1571 // could be in read-only memory, or be accessed in some other thread)
1572 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1573 for ( size_t i = 0; i < inlen; i++ )
1574 tmpbuf[n] = WC_BSWAP(psz[i]);
1575 tmpbuf[inlen] = L'\0';
1576 psz = tmpbuf;
1577 }
1578
1579 if (buf)
1580 {
1581 // have destination buffer, convert there
1582 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1583
1584 res = n-outbuf;
1585
1586 // NB: iconv was given only wcslen(psz) characters on input, and so
1587 // it couldn't convert the trailing zero. Let's do it ourselves
1588 // if there's some room left for it in the output buffer.
1589 if (res < n)
1590 buf[0] = 0;
1591 }
1592 else
1593 {
1594 // no destination buffer... convert using temp buffer
1595 // to calculate destination buffer requirement
1596 char tbuf[16];
1597 res = 0;
1598 do {
1599 buf = tbuf; outbuf = 16;
1600
1601 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1602
1603 res += 16 - outbuf;
1604 } while ((cres==(size_t)-1) && (errno==E2BIG));
1605 }
1606
1607 if (ms_wcNeedsSwap)
1608 {
1609 free(tmpbuf);
1610 }
1611
1612 if (ICONV_FAILED(cres, inbuf))
1613 {
1614 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1615 return (size_t)-1;
1616 }
1617
1618 return res;
1619 }
1620
1621 #endif // HAVE_ICONV
1622
1623
1624 // ============================================================================
1625 // Win32 conversion classes
1626 // ============================================================================
1627
1628 #ifdef wxHAVE_WIN32_MB2WC
1629
1630 // from utils.cpp
1631 #if wxUSE_FONTMAP
1632 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1633 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1634 #endif
1635
1636 class wxMBConv_win32 : public wxMBConv
1637 {
1638 public:
1639 wxMBConv_win32()
1640 {
1641 m_CodePage = CP_ACP;
1642 }
1643
1644 #if wxUSE_FONTMAP
1645 wxMBConv_win32(const wxChar* name)
1646 {
1647 m_CodePage = wxCharsetToCodepage(name);
1648 }
1649
1650 wxMBConv_win32(wxFontEncoding encoding)
1651 {
1652 m_CodePage = wxEncodingToCodepage(encoding);
1653 }
1654 #endif
1655
1656 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1657 {
1658 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1659 // the behaviour is not compatible with the Unix version (using iconv)
1660 // and break the library itself, e.g. wxTextInputStream::NextChar()
1661 // wouldn't work if reading an incomplete MB char didn't result in an
1662 // error
1663 //
1664 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1665 // an error (tested under Windows Server 2003) and apparently it is
1666 // done on purpose, i.e. the function accepts any input in this case
1667 // and although I'd prefer to return error on ill-formed output, our
1668 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1669 // explicitly ill-formed according to RFC 2152) neither so we don't
1670 // even have any fallback here...
1671 //
1672 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1673 // Win XP or newer and if it is specified on older versions, conversion
1674 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1675 // fails. So we can only use the flag on newer Windows versions.
1676 // Additionally, the flag is not supported by UTF7, symbol and CJK
1677 // encodings. See here:
1678 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1679 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1680 int flags = 0;
1681 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1682 m_CodePage < 50000 &&
1683 IsAtLeastWin2kSP4() )
1684 {
1685 flags = MB_ERR_INVALID_CHARS;
1686 }
1687 else if ( m_CodePage == CP_UTF8 )
1688 {
1689 // Avoid round-trip in the special case of UTF-8 by using our
1690 // own UTF-8 conversion code:
1691 return wxMBConvUTF8().MB2WC(buf, psz, n);
1692 }
1693
1694 const size_t len = ::MultiByteToWideChar
1695 (
1696 m_CodePage, // code page
1697 flags, // flags: fall on error
1698 psz, // input string
1699 -1, // its length (NUL-terminated)
1700 buf, // output string
1701 buf ? n : 0 // size of output buffer
1702 );
1703 if ( !len )
1704 {
1705 // function totally failed
1706 return (size_t)-1;
1707 }
1708
1709 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1710 // check if we succeeded, by doing a double trip:
1711 if ( !flags && buf )
1712 {
1713 const size_t mbLen = strlen(psz);
1714 wxCharBuffer mbBuf(mbLen);
1715 if ( ::WideCharToMultiByte
1716 (
1717 m_CodePage,
1718 0,
1719 buf,
1720 -1,
1721 mbBuf.data(),
1722 mbLen + 1, // size in bytes, not length
1723 NULL,
1724 NULL
1725 ) == 0 ||
1726 strcmp(mbBuf, psz) != 0 )
1727 {
1728 // we didn't obtain the same thing we started from, hence
1729 // the conversion was lossy and we consider that it failed
1730 return (size_t)-1;
1731 }
1732 }
1733
1734 // note that it returns count of written chars for buf != NULL and size
1735 // of the needed buffer for buf == NULL so in either case the length of
1736 // the string (which never includes the terminating NUL) is one less
1737 return len - 1;
1738 }
1739
1740 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1741 {
1742 /*
1743 we have a problem here: by default, WideCharToMultiByte() may
1744 replace characters unrepresentable in the target code page with bad
1745 quality approximations such as turning "1/2" symbol (U+00BD) into
1746 "1" for the code pages which don't have it and we, obviously, want
1747 to avoid this at any price
1748
1749 the trouble is that this function does it _silently_, i.e. it won't
1750 even tell us whether it did or not... Win98/2000 and higher provide
1751 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1752 we have to resort to a round trip, i.e. check that converting back
1753 results in the same string -- this is, of course, expensive but
1754 otherwise we simply can't be sure to not garble the data.
1755 */
1756
1757 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1758 // it doesn't work with CJK encodings (which we test for rather roughly
1759 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1760 // supporting it
1761 BOOL usedDef wxDUMMY_INITIALIZE(false);
1762 BOOL *pUsedDef;
1763 int flags;
1764 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1765 {
1766 // it's our lucky day
1767 flags = WC_NO_BEST_FIT_CHARS;
1768 pUsedDef = &usedDef;
1769 }
1770 else // old system or unsupported encoding
1771 {
1772 flags = 0;
1773 pUsedDef = NULL;
1774 }
1775
1776 const size_t len = ::WideCharToMultiByte
1777 (
1778 m_CodePage, // code page
1779 flags, // either none or no best fit
1780 pwz, // input string
1781 -1, // it is (wide) NUL-terminated
1782 buf, // output buffer
1783 buf ? n : 0, // and its size
1784 NULL, // default "replacement" char
1785 pUsedDef // [out] was it used?
1786 );
1787
1788 if ( !len )
1789 {
1790 // function totally failed
1791 return (size_t)-1;
1792 }
1793
1794 // if we were really converting, check if we succeeded
1795 if ( buf )
1796 {
1797 if ( flags )
1798 {
1799 // check if the conversion failed, i.e. if any replacements
1800 // were done
1801 if ( usedDef )
1802 return (size_t)-1;
1803 }
1804 else // we must resort to double tripping...
1805 {
1806 wxWCharBuffer wcBuf(n);
1807 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1808 wcscmp(wcBuf, pwz) != 0 )
1809 {
1810 // we didn't obtain the same thing we started from, hence
1811 // the conversion was lossy and we consider that it failed
1812 return (size_t)-1;
1813 }
1814 }
1815 }
1816
1817 // see the comment above for the reason of "len - 1"
1818 return len - 1;
1819 }
1820
1821 bool IsOk() const { return m_CodePage != -1; }
1822
1823 private:
1824 static bool CanUseNoBestFit()
1825 {
1826 static int s_isWin98Or2k = -1;
1827
1828 if ( s_isWin98Or2k == -1 )
1829 {
1830 int verMaj, verMin;
1831 switch ( wxGetOsVersion(&verMaj, &verMin) )
1832 {
1833 case wxWIN95:
1834 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1835 break;
1836
1837 case wxWINDOWS_NT:
1838 s_isWin98Or2k = verMaj >= 5;
1839 break;
1840
1841 default:
1842 // unknown, be conseravtive by default
1843 s_isWin98Or2k = 0;
1844 }
1845
1846 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1847 }
1848
1849 return s_isWin98Or2k == 1;
1850 }
1851
1852 static bool IsAtLeastWin2kSP4()
1853 {
1854 #ifdef __WXWINCE__
1855 return false;
1856 #else
1857 static int s_isAtLeastWin2kSP4 = -1;
1858
1859 if ( s_isAtLeastWin2kSP4 == -1 )
1860 {
1861 OSVERSIONINFOEX ver;
1862
1863 memset(&ver, 0, sizeof(ver));
1864 ver.dwOSVersionInfoSize = sizeof(ver);
1865 GetVersionEx((OSVERSIONINFO*)&ver);
1866
1867 s_isAtLeastWin2kSP4 =
1868 ((ver.dwMajorVersion > 5) || // Vista+
1869 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1870 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1871 ver.wServicePackMajor >= 4)) // 2000 SP4+
1872 ? 1 : 0;
1873 }
1874
1875 return s_isAtLeastWin2kSP4 == 1;
1876 #endif
1877 }
1878
1879 long m_CodePage;
1880 };
1881
1882 #endif // wxHAVE_WIN32_MB2WC
1883
1884 // ============================================================================
1885 // Cocoa conversion classes
1886 // ============================================================================
1887
1888 #if defined(__WXCOCOA__)
1889
1890 // RN: There is no UTF-32 support in either Core Foundation or
1891 // Cocoa. Strangely enough, internally Core Foundation uses
1892 // UTF 32 internally quite a bit - its just not public (yet).
1893
1894 #include <CoreFoundation/CFString.h>
1895 #include <CoreFoundation/CFStringEncodingExt.h>
1896
1897 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1898 {
1899 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1900 if ( encoding == wxFONTENCODING_DEFAULT )
1901 {
1902 enc = CFStringGetSystemEncoding();
1903 }
1904 else switch( encoding)
1905 {
1906 case wxFONTENCODING_ISO8859_1 :
1907 enc = kCFStringEncodingISOLatin1 ;
1908 break ;
1909 case wxFONTENCODING_ISO8859_2 :
1910 enc = kCFStringEncodingISOLatin2;
1911 break ;
1912 case wxFONTENCODING_ISO8859_3 :
1913 enc = kCFStringEncodingISOLatin3 ;
1914 break ;
1915 case wxFONTENCODING_ISO8859_4 :
1916 enc = kCFStringEncodingISOLatin4;
1917 break ;
1918 case wxFONTENCODING_ISO8859_5 :
1919 enc = kCFStringEncodingISOLatinCyrillic;
1920 break ;
1921 case wxFONTENCODING_ISO8859_6 :
1922 enc = kCFStringEncodingISOLatinArabic;
1923 break ;
1924 case wxFONTENCODING_ISO8859_7 :
1925 enc = kCFStringEncodingISOLatinGreek;
1926 break ;
1927 case wxFONTENCODING_ISO8859_8 :
1928 enc = kCFStringEncodingISOLatinHebrew;
1929 break ;
1930 case wxFONTENCODING_ISO8859_9 :
1931 enc = kCFStringEncodingISOLatin5;
1932 break ;
1933 case wxFONTENCODING_ISO8859_10 :
1934 enc = kCFStringEncodingISOLatin6;
1935 break ;
1936 case wxFONTENCODING_ISO8859_11 :
1937 enc = kCFStringEncodingISOLatinThai;
1938 break ;
1939 case wxFONTENCODING_ISO8859_13 :
1940 enc = kCFStringEncodingISOLatin7;
1941 break ;
1942 case wxFONTENCODING_ISO8859_14 :
1943 enc = kCFStringEncodingISOLatin8;
1944 break ;
1945 case wxFONTENCODING_ISO8859_15 :
1946 enc = kCFStringEncodingISOLatin9;
1947 break ;
1948
1949 case wxFONTENCODING_KOI8 :
1950 enc = kCFStringEncodingKOI8_R;
1951 break ;
1952 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1953 enc = kCFStringEncodingDOSRussian;
1954 break ;
1955
1956 // case wxFONTENCODING_BULGARIAN :
1957 // enc = ;
1958 // break ;
1959
1960 case wxFONTENCODING_CP437 :
1961 enc =kCFStringEncodingDOSLatinUS ;
1962 break ;
1963 case wxFONTENCODING_CP850 :
1964 enc = kCFStringEncodingDOSLatin1;
1965 break ;
1966 case wxFONTENCODING_CP852 :
1967 enc = kCFStringEncodingDOSLatin2;
1968 break ;
1969 case wxFONTENCODING_CP855 :
1970 enc = kCFStringEncodingDOSCyrillic;
1971 break ;
1972 case wxFONTENCODING_CP866 :
1973 enc =kCFStringEncodingDOSRussian ;
1974 break ;
1975 case wxFONTENCODING_CP874 :
1976 enc = kCFStringEncodingDOSThai;
1977 break ;
1978 case wxFONTENCODING_CP932 :
1979 enc = kCFStringEncodingDOSJapanese;
1980 break ;
1981 case wxFONTENCODING_CP936 :
1982 enc =kCFStringEncodingDOSChineseSimplif ;
1983 break ;
1984 case wxFONTENCODING_CP949 :
1985 enc = kCFStringEncodingDOSKorean;
1986 break ;
1987 case wxFONTENCODING_CP950 :
1988 enc = kCFStringEncodingDOSChineseTrad;
1989 break ;
1990 case wxFONTENCODING_CP1250 :
1991 enc = kCFStringEncodingWindowsLatin2;
1992 break ;
1993 case wxFONTENCODING_CP1251 :
1994 enc =kCFStringEncodingWindowsCyrillic ;
1995 break ;
1996 case wxFONTENCODING_CP1252 :
1997 enc =kCFStringEncodingWindowsLatin1 ;
1998 break ;
1999 case wxFONTENCODING_CP1253 :
2000 enc = kCFStringEncodingWindowsGreek;
2001 break ;
2002 case wxFONTENCODING_CP1254 :
2003 enc = kCFStringEncodingWindowsLatin5;
2004 break ;
2005 case wxFONTENCODING_CP1255 :
2006 enc =kCFStringEncodingWindowsHebrew ;
2007 break ;
2008 case wxFONTENCODING_CP1256 :
2009 enc =kCFStringEncodingWindowsArabic ;
2010 break ;
2011 case wxFONTENCODING_CP1257 :
2012 enc = kCFStringEncodingWindowsBalticRim;
2013 break ;
2014 // This only really encodes to UTF7 (if that) evidently
2015 // case wxFONTENCODING_UTF7 :
2016 // enc = kCFStringEncodingNonLossyASCII ;
2017 // break ;
2018 case wxFONTENCODING_UTF8 :
2019 enc = kCFStringEncodingUTF8 ;
2020 break ;
2021 case wxFONTENCODING_EUC_JP :
2022 enc = kCFStringEncodingEUC_JP;
2023 break ;
2024 case wxFONTENCODING_UTF16 :
2025 enc = kCFStringEncodingUnicode ;
2026 break ;
2027 case wxFONTENCODING_MACROMAN :
2028 enc = kCFStringEncodingMacRoman ;
2029 break ;
2030 case wxFONTENCODING_MACJAPANESE :
2031 enc = kCFStringEncodingMacJapanese ;
2032 break ;
2033 case wxFONTENCODING_MACCHINESETRAD :
2034 enc = kCFStringEncodingMacChineseTrad ;
2035 break ;
2036 case wxFONTENCODING_MACKOREAN :
2037 enc = kCFStringEncodingMacKorean ;
2038 break ;
2039 case wxFONTENCODING_MACARABIC :
2040 enc = kCFStringEncodingMacArabic ;
2041 break ;
2042 case wxFONTENCODING_MACHEBREW :
2043 enc = kCFStringEncodingMacHebrew ;
2044 break ;
2045 case wxFONTENCODING_MACGREEK :
2046 enc = kCFStringEncodingMacGreek ;
2047 break ;
2048 case wxFONTENCODING_MACCYRILLIC :
2049 enc = kCFStringEncodingMacCyrillic ;
2050 break ;
2051 case wxFONTENCODING_MACDEVANAGARI :
2052 enc = kCFStringEncodingMacDevanagari ;
2053 break ;
2054 case wxFONTENCODING_MACGURMUKHI :
2055 enc = kCFStringEncodingMacGurmukhi ;
2056 break ;
2057 case wxFONTENCODING_MACGUJARATI :
2058 enc = kCFStringEncodingMacGujarati ;
2059 break ;
2060 case wxFONTENCODING_MACORIYA :
2061 enc = kCFStringEncodingMacOriya ;
2062 break ;
2063 case wxFONTENCODING_MACBENGALI :
2064 enc = kCFStringEncodingMacBengali ;
2065 break ;
2066 case wxFONTENCODING_MACTAMIL :
2067 enc = kCFStringEncodingMacTamil ;
2068 break ;
2069 case wxFONTENCODING_MACTELUGU :
2070 enc = kCFStringEncodingMacTelugu ;
2071 break ;
2072 case wxFONTENCODING_MACKANNADA :
2073 enc = kCFStringEncodingMacKannada ;
2074 break ;
2075 case wxFONTENCODING_MACMALAJALAM :
2076 enc = kCFStringEncodingMacMalayalam ;
2077 break ;
2078 case wxFONTENCODING_MACSINHALESE :
2079 enc = kCFStringEncodingMacSinhalese ;
2080 break ;
2081 case wxFONTENCODING_MACBURMESE :
2082 enc = kCFStringEncodingMacBurmese ;
2083 break ;
2084 case wxFONTENCODING_MACKHMER :
2085 enc = kCFStringEncodingMacKhmer ;
2086 break ;
2087 case wxFONTENCODING_MACTHAI :
2088 enc = kCFStringEncodingMacThai ;
2089 break ;
2090 case wxFONTENCODING_MACLAOTIAN :
2091 enc = kCFStringEncodingMacLaotian ;
2092 break ;
2093 case wxFONTENCODING_MACGEORGIAN :
2094 enc = kCFStringEncodingMacGeorgian ;
2095 break ;
2096 case wxFONTENCODING_MACARMENIAN :
2097 enc = kCFStringEncodingMacArmenian ;
2098 break ;
2099 case wxFONTENCODING_MACCHINESESIMP :
2100 enc = kCFStringEncodingMacChineseSimp ;
2101 break ;
2102 case wxFONTENCODING_MACTIBETAN :
2103 enc = kCFStringEncodingMacTibetan ;
2104 break ;
2105 case wxFONTENCODING_MACMONGOLIAN :
2106 enc = kCFStringEncodingMacMongolian ;
2107 break ;
2108 case wxFONTENCODING_MACETHIOPIC :
2109 enc = kCFStringEncodingMacEthiopic ;
2110 break ;
2111 case wxFONTENCODING_MACCENTRALEUR :
2112 enc = kCFStringEncodingMacCentralEurRoman ;
2113 break ;
2114 case wxFONTENCODING_MACVIATNAMESE :
2115 enc = kCFStringEncodingMacVietnamese ;
2116 break ;
2117 case wxFONTENCODING_MACARABICEXT :
2118 enc = kCFStringEncodingMacExtArabic ;
2119 break ;
2120 case wxFONTENCODING_MACSYMBOL :
2121 enc = kCFStringEncodingMacSymbol ;
2122 break ;
2123 case wxFONTENCODING_MACDINGBATS :
2124 enc = kCFStringEncodingMacDingbats ;
2125 break ;
2126 case wxFONTENCODING_MACTURKISH :
2127 enc = kCFStringEncodingMacTurkish ;
2128 break ;
2129 case wxFONTENCODING_MACCROATIAN :
2130 enc = kCFStringEncodingMacCroatian ;
2131 break ;
2132 case wxFONTENCODING_MACICELANDIC :
2133 enc = kCFStringEncodingMacIcelandic ;
2134 break ;
2135 case wxFONTENCODING_MACROMANIAN :
2136 enc = kCFStringEncodingMacRomanian ;
2137 break ;
2138 case wxFONTENCODING_MACCELTIC :
2139 enc = kCFStringEncodingMacCeltic ;
2140 break ;
2141 case wxFONTENCODING_MACGAELIC :
2142 enc = kCFStringEncodingMacGaelic ;
2143 break ;
2144 // case wxFONTENCODING_MACKEYBOARD :
2145 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2146 // break ;
2147 default :
2148 // because gcc is picky
2149 break ;
2150 } ;
2151 return enc ;
2152 }
2153
2154 class wxMBConv_cocoa : public wxMBConv
2155 {
2156 public:
2157 wxMBConv_cocoa()
2158 {
2159 Init(CFStringGetSystemEncoding()) ;
2160 }
2161
2162 #if wxUSE_FONTMAP
2163 wxMBConv_cocoa(const wxChar* name)
2164 {
2165 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2166 }
2167 #endif
2168
2169 wxMBConv_cocoa(wxFontEncoding encoding)
2170 {
2171 Init( wxCFStringEncFromFontEnc(encoding) );
2172 }
2173
2174 ~wxMBConv_cocoa()
2175 {
2176 }
2177
2178 void Init( CFStringEncoding encoding)
2179 {
2180 m_encoding = encoding ;
2181 }
2182
2183 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2184 {
2185 wxASSERT(szUnConv);
2186
2187 CFStringRef theString = CFStringCreateWithBytes (
2188 NULL, //the allocator
2189 (const UInt8*)szUnConv,
2190 strlen(szUnConv),
2191 m_encoding,
2192 false //no BOM/external representation
2193 );
2194
2195 wxASSERT(theString);
2196
2197 size_t nOutLength = CFStringGetLength(theString);
2198
2199 if (szOut == NULL)
2200 {
2201 CFRelease(theString);
2202 return nOutLength;
2203 }
2204
2205 CFRange theRange = { 0, nOutSize };
2206
2207 #if SIZEOF_WCHAR_T == 4
2208 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2209 #endif
2210
2211 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2212
2213 CFRelease(theString);
2214
2215 szUniCharBuffer[nOutLength] = '\0' ;
2216
2217 #if SIZEOF_WCHAR_T == 4
2218 wxMBConvUTF16 converter ;
2219 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2220 delete[] szUniCharBuffer;
2221 #endif
2222
2223 return nOutLength;
2224 }
2225
2226 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2227 {
2228 wxASSERT(szUnConv);
2229
2230 size_t nRealOutSize;
2231 size_t nBufSize = wxWcslen(szUnConv);
2232 UniChar* szUniBuffer = (UniChar*) szUnConv;
2233
2234 #if SIZEOF_WCHAR_T == 4
2235 wxMBConvUTF16 converter ;
2236 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2237 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2238 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2239 nBufSize /= sizeof(UniChar);
2240 #endif
2241
2242 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2243 NULL, //allocator
2244 szUniBuffer,
2245 nBufSize,
2246 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2247 );
2248
2249 wxASSERT(theString);
2250
2251 //Note that CER puts a BOM when converting to unicode
2252 //so we check and use getchars instead in that case
2253 if (m_encoding == kCFStringEncodingUnicode)
2254 {
2255 if (szOut != NULL)
2256 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2257
2258 nRealOutSize = CFStringGetLength(theString) + 1;
2259 }
2260 else
2261 {
2262 CFStringGetBytes(
2263 theString,
2264 CFRangeMake(0, CFStringGetLength(theString)),
2265 m_encoding,
2266 0, //what to put in characters that can't be converted -
2267 //0 tells CFString to return NULL if it meets such a character
2268 false, //not an external representation
2269 (UInt8*) szOut,
2270 nOutSize,
2271 (CFIndex*) &nRealOutSize
2272 );
2273 }
2274
2275 CFRelease(theString);
2276
2277 #if SIZEOF_WCHAR_T == 4
2278 delete[] szUniBuffer;
2279 #endif
2280
2281 return nRealOutSize - 1;
2282 }
2283
2284 bool IsOk() const
2285 {
2286 return m_encoding != kCFStringEncodingInvalidId &&
2287 CFStringIsEncodingAvailable(m_encoding);
2288 }
2289
2290 private:
2291 CFStringEncoding m_encoding ;
2292 };
2293
2294 #endif // defined(__WXCOCOA__)
2295
2296 // ============================================================================
2297 // Mac conversion classes
2298 // ============================================================================
2299
2300 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2301
2302 class wxMBConv_mac : public wxMBConv
2303 {
2304 public:
2305 wxMBConv_mac()
2306 {
2307 Init(CFStringGetSystemEncoding()) ;
2308 }
2309
2310 #if wxUSE_FONTMAP
2311 wxMBConv_mac(const wxChar* name)
2312 {
2313 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2314 }
2315 #endif
2316
2317 wxMBConv_mac(wxFontEncoding encoding)
2318 {
2319 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2320 }
2321
2322 ~wxMBConv_mac()
2323 {
2324 OSStatus status = noErr ;
2325 status = TECDisposeConverter(m_MB2WC_converter);
2326 status = TECDisposeConverter(m_WC2MB_converter);
2327 }
2328
2329
2330 void Init( TextEncodingBase encoding)
2331 {
2332 OSStatus status = noErr ;
2333 m_char_encoding = encoding ;
2334 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2335
2336 status = TECCreateConverter(&m_MB2WC_converter,
2337 m_char_encoding,
2338 m_unicode_encoding);
2339 status = TECCreateConverter(&m_WC2MB_converter,
2340 m_unicode_encoding,
2341 m_char_encoding);
2342 }
2343
2344 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2345 {
2346 OSStatus status = noErr ;
2347 ByteCount byteOutLen ;
2348 ByteCount byteInLen = strlen(psz) ;
2349 wchar_t *tbuf = NULL ;
2350 UniChar* ubuf = NULL ;
2351 size_t res = 0 ;
2352
2353 if (buf == NULL)
2354 {
2355 //apple specs say at least 32
2356 n = wxMax( 32 , byteInLen ) ;
2357 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2358 }
2359 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2360 #if SIZEOF_WCHAR_T == 4
2361 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2362 #else
2363 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2364 #endif
2365 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2366 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2367 #if SIZEOF_WCHAR_T == 4
2368 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2369 // is not properly terminated we get random characters at the end
2370 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2371 wxMBConvUTF16 converter ;
2372 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2373 free( ubuf ) ;
2374 #else
2375 res = byteOutLen / sizeof( UniChar ) ;
2376 #endif
2377 if ( buf == NULL )
2378 free(tbuf) ;
2379
2380 if ( buf && res < n)
2381 buf[res] = 0;
2382
2383 return res ;
2384 }
2385
2386 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2387 {
2388 OSStatus status = noErr ;
2389 ByteCount byteOutLen ;
2390 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2391
2392 char *tbuf = NULL ;
2393
2394 if (buf == NULL)
2395 {
2396 //apple specs say at least 32
2397 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2398 tbuf = (char*) malloc( n ) ;
2399 }
2400
2401 ByteCount byteBufferLen = n ;
2402 UniChar* ubuf = NULL ;
2403 #if SIZEOF_WCHAR_T == 4
2404 wxMBConvUTF16 converter ;
2405 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2406 byteInLen = unicharlen ;
2407 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2408 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2409 #else
2410 ubuf = (UniChar*) psz ;
2411 #endif
2412 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2413 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2414 #if SIZEOF_WCHAR_T == 4
2415 free( ubuf ) ;
2416 #endif
2417 if ( buf == NULL )
2418 free(tbuf) ;
2419
2420 size_t res = byteOutLen ;
2421 if ( buf && res < n)
2422 {
2423 buf[res] = 0;
2424
2425 //we need to double-trip to verify it didn't insert any ? in place
2426 //of bogus characters
2427 wxWCharBuffer wcBuf(n);
2428 size_t pszlen = wxWcslen(psz);
2429 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2430 wxWcslen(wcBuf) != pszlen ||
2431 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2432 {
2433 // we didn't obtain the same thing we started from, hence
2434 // the conversion was lossy and we consider that it failed
2435 return (size_t)-1;
2436 }
2437 }
2438
2439 return res ;
2440 }
2441
2442 bool IsOk() const
2443 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2444
2445 private:
2446 TECObjectRef m_MB2WC_converter ;
2447 TECObjectRef m_WC2MB_converter ;
2448
2449 TextEncodingBase m_char_encoding ;
2450 TextEncodingBase m_unicode_encoding ;
2451 };
2452
2453 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2454
2455 // ============================================================================
2456 // wxEncodingConverter based conversion classes
2457 // ============================================================================
2458
2459 #if wxUSE_FONTMAP
2460
2461 class wxMBConv_wxwin : public wxMBConv
2462 {
2463 private:
2464 void Init()
2465 {
2466 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2467 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2468 }
2469
2470 public:
2471 // temporarily just use wxEncodingConverter stuff,
2472 // so that it works while a better implementation is built
2473 wxMBConv_wxwin(const wxChar* name)
2474 {
2475 if (name)
2476 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2477 else
2478 m_enc = wxFONTENCODING_SYSTEM;
2479
2480 Init();
2481 }
2482
2483 wxMBConv_wxwin(wxFontEncoding enc)
2484 {
2485 m_enc = enc;
2486
2487 Init();
2488 }
2489
2490 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2491 {
2492 size_t inbuf = strlen(psz);
2493 if (buf)
2494 {
2495 if (!m2w.Convert(psz,buf))
2496 return (size_t)-1;
2497 }
2498 return inbuf;
2499 }
2500
2501 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2502 {
2503 const size_t inbuf = wxWcslen(psz);
2504 if (buf)
2505 {
2506 if (!w2m.Convert(psz,buf))
2507 return (size_t)-1;
2508 }
2509
2510 return inbuf;
2511 }
2512
2513 bool IsOk() const { return m_ok; }
2514
2515 public:
2516 wxFontEncoding m_enc;
2517 wxEncodingConverter m2w, w2m;
2518
2519 // were we initialized successfully?
2520 bool m_ok;
2521
2522 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2523 };
2524
2525 // make the constructors available for unit testing
2526 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2527 {
2528 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2529 if ( !result->IsOk() )
2530 {
2531 delete result;
2532 return 0;
2533 }
2534 return result;
2535 }
2536
2537 #endif // wxUSE_FONTMAP
2538
2539 // ============================================================================
2540 // wxCSConv implementation
2541 // ============================================================================
2542
2543 void wxCSConv::Init()
2544 {
2545 m_name = NULL;
2546 m_convReal = NULL;
2547 m_deferred = true;
2548 }
2549
2550 wxCSConv::wxCSConv(const wxChar *charset)
2551 {
2552 Init();
2553
2554 if ( charset )
2555 {
2556 SetName(charset);
2557 }
2558
2559 #if wxUSE_FONTMAP
2560 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2561 #else
2562 m_encoding = wxFONTENCODING_SYSTEM;
2563 #endif
2564 }
2565
2566 wxCSConv::wxCSConv(wxFontEncoding encoding)
2567 {
2568 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2569 {
2570 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2571
2572 encoding = wxFONTENCODING_SYSTEM;
2573 }
2574
2575 Init();
2576
2577 m_encoding = encoding;
2578 }
2579
2580 wxCSConv::~wxCSConv()
2581 {
2582 Clear();
2583 }
2584
2585 wxCSConv::wxCSConv(const wxCSConv& conv)
2586 : wxMBConv()
2587 {
2588 Init();
2589
2590 SetName(conv.m_name);
2591 m_encoding = conv.m_encoding;
2592 }
2593
2594 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2595 {
2596 Clear();
2597
2598 SetName(conv.m_name);
2599 m_encoding = conv.m_encoding;
2600
2601 return *this;
2602 }
2603
2604 void wxCSConv::Clear()
2605 {
2606 free(m_name);
2607 delete m_convReal;
2608
2609 m_name = NULL;
2610 m_convReal = NULL;
2611 }
2612
2613 void wxCSConv::SetName(const wxChar *charset)
2614 {
2615 if (charset)
2616 {
2617 m_name = wxStrdup(charset);
2618 m_deferred = true;
2619 }
2620 }
2621
2622 #if wxUSE_FONTMAP
2623 #include "wx/hashmap.h"
2624
2625 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2626 wxEncodingNameCache );
2627
2628 static wxEncodingNameCache gs_nameCache;
2629 #endif
2630
2631 wxMBConv *wxCSConv::DoCreate() const
2632 {
2633 #if wxUSE_FONTMAP
2634 wxLogTrace(TRACE_STRCONV,
2635 wxT("creating conversion for %s"),
2636 (m_name ? m_name
2637 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2638 #endif // wxUSE_FONTMAP
2639
2640 // check for the special case of ASCII or ISO8859-1 charset: as we have
2641 // special knowledge of it anyhow, we don't need to create a special
2642 // conversion object
2643 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2644 m_encoding == wxFONTENCODING_DEFAULT )
2645 {
2646 // don't convert at all
2647 return NULL;
2648 }
2649
2650 // we trust OS to do conversion better than we can so try external
2651 // conversion methods first
2652 //
2653 // the full order is:
2654 // 1. OS conversion (iconv() under Unix or Win32 API)
2655 // 2. hard coded conversions for UTF
2656 // 3. wxEncodingConverter as fall back
2657
2658 // step (1)
2659 #ifdef HAVE_ICONV
2660 #if !wxUSE_FONTMAP
2661 if ( m_name )
2662 #endif // !wxUSE_FONTMAP
2663 {
2664 wxString name(m_name);
2665 wxFontEncoding encoding(m_encoding);
2666
2667 if ( !name.empty() )
2668 {
2669 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2670 if ( conv->IsOk() )
2671 return conv;
2672
2673 delete conv;
2674
2675 #if wxUSE_FONTMAP
2676 encoding =
2677 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2678 #endif // wxUSE_FONTMAP
2679 }
2680 #if wxUSE_FONTMAP
2681 {
2682 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2683 if ( it != gs_nameCache.end() )
2684 {
2685 if ( it->second.empty() )
2686 return NULL;
2687
2688 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2689 if ( conv->IsOk() )
2690 return conv;
2691
2692 delete conv;
2693 }
2694
2695 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2696
2697 for ( ; *names; ++names )
2698 {
2699 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2700 if ( conv->IsOk() )
2701 {
2702 gs_nameCache[encoding] = *names;
2703 return conv;
2704 }
2705
2706 delete conv;
2707 }
2708
2709 gs_nameCache[encoding] = _T(""); // cache the failure
2710 }
2711 #endif // wxUSE_FONTMAP
2712 }
2713 #endif // HAVE_ICONV
2714
2715 #ifdef wxHAVE_WIN32_MB2WC
2716 {
2717 #if wxUSE_FONTMAP
2718 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2719 : new wxMBConv_win32(m_encoding);
2720 if ( conv->IsOk() )
2721 return conv;
2722
2723 delete conv;
2724 #else
2725 return NULL;
2726 #endif
2727 }
2728 #endif // wxHAVE_WIN32_MB2WC
2729 #if defined(__WXMAC__)
2730 {
2731 // leave UTF16 and UTF32 to the built-ins of wx
2732 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2733 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2734 {
2735
2736 #if wxUSE_FONTMAP
2737 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2738 : new wxMBConv_mac(m_encoding);
2739 #else
2740 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2741 #endif
2742 if ( conv->IsOk() )
2743 return conv;
2744
2745 delete conv;
2746 }
2747 }
2748 #endif
2749 #if defined(__WXCOCOA__)
2750 {
2751 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2752 {
2753
2754 #if wxUSE_FONTMAP
2755 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2756 : new wxMBConv_cocoa(m_encoding);
2757 #else
2758 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2759 #endif
2760 if ( conv->IsOk() )
2761 return conv;
2762
2763 delete conv;
2764 }
2765 }
2766 #endif
2767 // step (2)
2768 wxFontEncoding enc = m_encoding;
2769 #if wxUSE_FONTMAP
2770 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2771 {
2772 // use "false" to suppress interactive dialogs -- we can be called from
2773 // anywhere and popping up a dialog from here is the last thing we want to
2774 // do
2775 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2776 }
2777 #endif // wxUSE_FONTMAP
2778
2779 switch ( enc )
2780 {
2781 case wxFONTENCODING_UTF7:
2782 return new wxMBConvUTF7;
2783
2784 case wxFONTENCODING_UTF8:
2785 return new wxMBConvUTF8;
2786
2787 case wxFONTENCODING_UTF16BE:
2788 return new wxMBConvUTF16BE;
2789
2790 case wxFONTENCODING_UTF16LE:
2791 return new wxMBConvUTF16LE;
2792
2793 case wxFONTENCODING_UTF32BE:
2794 return new wxMBConvUTF32BE;
2795
2796 case wxFONTENCODING_UTF32LE:
2797 return new wxMBConvUTF32LE;
2798
2799 default:
2800 // nothing to do but put here to suppress gcc warnings
2801 ;
2802 }
2803
2804 // step (3)
2805 #if wxUSE_FONTMAP
2806 {
2807 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2808 : new wxMBConv_wxwin(m_encoding);
2809 if ( conv->IsOk() )
2810 return conv;
2811
2812 delete conv;
2813 }
2814 #endif // wxUSE_FONTMAP
2815
2816 // NB: This is a hack to prevent deadlock. What could otherwise happen
2817 // in Unicode build: wxConvLocal creation ends up being here
2818 // because of some failure and logs the error. But wxLog will try to
2819 // attach timestamp, for which it will need wxConvLocal (to convert
2820 // time to char* and then wchar_t*), but that fails, tries to log
2821 // error, but wxLog has a (already locked) critical section that
2822 // guards static buffer.
2823 static bool alreadyLoggingError = false;
2824 if (!alreadyLoggingError)
2825 {
2826 alreadyLoggingError = true;
2827 wxLogError(_("Cannot convert from the charset '%s'!"),
2828 m_name ? m_name
2829 :
2830 #if wxUSE_FONTMAP
2831 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2832 #else // !wxUSE_FONTMAP
2833 wxString::Format(_("encoding %s"), m_encoding).c_str()
2834 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2835 );
2836 alreadyLoggingError = false;
2837 }
2838
2839 return NULL;
2840 }
2841
2842 void wxCSConv::CreateConvIfNeeded() const
2843 {
2844 if ( m_deferred )
2845 {
2846 wxCSConv *self = (wxCSConv *)this; // const_cast
2847
2848 #if wxUSE_INTL
2849 // if we don't have neither the name nor the encoding, use the default
2850 // encoding for this system
2851 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2852 {
2853 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2854 }
2855 #endif // wxUSE_INTL
2856
2857 self->m_convReal = DoCreate();
2858 self->m_deferred = false;
2859 }
2860 }
2861
2862 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2863 {
2864 CreateConvIfNeeded();
2865
2866 if (m_convReal)
2867 return m_convReal->MB2WC(buf, psz, n);
2868
2869 // latin-1 (direct)
2870 size_t len = strlen(psz);
2871
2872 if (buf)
2873 {
2874 for (size_t c = 0; c <= len; c++)
2875 buf[c] = (unsigned char)(psz[c]);
2876 }
2877
2878 return len;
2879 }
2880
2881 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2882 {
2883 CreateConvIfNeeded();
2884
2885 if (m_convReal)
2886 return m_convReal->WC2MB(buf, psz, n);
2887
2888 // latin-1 (direct)
2889 const size_t len = wxWcslen(psz);
2890 if (buf)
2891 {
2892 for (size_t c = 0; c <= len; c++)
2893 {
2894 if (psz[c] > 0xFF)
2895 return (size_t)-1;
2896 buf[c] = (char)psz[c];
2897 }
2898 }
2899 else
2900 {
2901 for (size_t c = 0; c <= len; c++)
2902 {
2903 if (psz[c] > 0xFF)
2904 return (size_t)-1;
2905 }
2906 }
2907
2908 return len;
2909 }
2910
2911 // ----------------------------------------------------------------------------
2912 // globals
2913 // ----------------------------------------------------------------------------
2914
2915 #ifdef __WINDOWS__
2916 static wxMBConv_win32 wxConvLibcObj;
2917 #elif defined(__WXMAC__) && !defined(__MACH__)
2918 static wxMBConv_mac wxConvLibcObj ;
2919 #else
2920 static wxMBConvLibc wxConvLibcObj;
2921 #endif
2922
2923 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2924 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2925 static wxMBConvUTF7 wxConvUTF7Obj;
2926 static wxMBConvUTF8 wxConvUTF8Obj;
2927
2928 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2929 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2930 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2933 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2934 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2935 #ifdef __WXOSX__
2936 wxConvUTF8Obj;
2937 #else
2938 wxConvLibcObj;
2939 #endif
2940
2941
2942 #else // !wxUSE_WCHAR_T
2943
2944 // stand-ins in absence of wchar_t
2945 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2946 wxConvISO8859_1,
2947 wxConvLocal,
2948 wxConvUTF8;
2949
2950 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T