]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
detect some invalid UTF7 strings when decoding them in wxMBConvUTF7
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
191 {
192 wxASSERT(pOutSize != NULL);
193
194 const char* szEnd = szString + nStringLen + 1;
195 const char* szPos = szString;
196 const char* szStart = szPos;
197
198 size_t nActualLength = 0;
199 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
200
201 wxWCharBuffer theBuffer(nCurrentSize);
202
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos != szEnd)
206 {
207 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
208
209 //Get the length of the current (sub)string
210 size_t nLen = MB2WC(NULL, szPos, 0);
211
212 //Invalid conversion?
213 if( nLen == (size_t)-1 )
214 {
215 *pOutSize = 0;
216 theBuffer.data()[0u] = wxT('\0');
217 return theBuffer;
218 }
219
220
221 //Increase the actual length (+1 for current null character)
222 nActualLength += nLen + 1;
223
224 //if buffer too big, realloc the buffer
225 if (nActualLength > (nCurrentSize+1))
226 {
227 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
228 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
229 theBuffer = theNewBuffer;
230 nCurrentSize <<= 1;
231 }
232
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
235 {
236 *pOutSize = 0;
237 theBuffer.data()[0u] = wxT('\0');
238 return theBuffer;
239 }
240
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos += strlen(szPos) + 1;
246 }
247
248 //success - return actual length and the buffer
249 *pOutSize = nActualLength;
250 return theBuffer;
251 }
252
253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
254 {
255 wxASSERT(pOutSize != NULL);
256
257 const wchar_t* szEnd = szString + nStringLen + 1;
258 const wchar_t* szPos = szString;
259 const wchar_t* szStart = szPos;
260
261 size_t nActualLength = 0;
262 size_t nCurrentSize = nStringLen << 2; //try * 4 first
263
264 wxCharBuffer theBuffer(nCurrentSize);
265
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos != szEnd)
269 {
270 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
271
272 //Get the length of the current (sub)string
273 size_t nLen = WC2MB(NULL, szPos, 0);
274
275 //Invalid conversion?
276 if( nLen == (size_t)-1 )
277 {
278 *pOutSize = 0;
279 theBuffer.data()[0u] = wxT('\0');
280 return theBuffer;
281 }
282
283 //Increase the actual length (+1 for current null character)
284 nActualLength += nLen + 1;
285
286 //if buffer too big, realloc the buffer
287 if (nActualLength > (nCurrentSize+1))
288 {
289 wxCharBuffer theNewBuffer(nCurrentSize << 1);
290 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
291 theBuffer = theNewBuffer;
292 nCurrentSize <<= 1;
293 }
294
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
297 {
298 *pOutSize = 0;
299 theBuffer.data()[0u] = wxT('\0');
300 return theBuffer;
301 }
302
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos += wxWcslen(szPos) + 1;
308 }
309
310 //success - return actual length and the buffer
311 *pOutSize = nActualLength;
312 return theBuffer;
313 }
314
315 // ----------------------------------------------------------------------------
316 // wxMBConvLibc
317 // ----------------------------------------------------------------------------
318
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
320 {
321 return wxMB2WC(buf, psz, n);
322 }
323
324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
325 {
326 return wxWC2MB(buf, psz, n);
327 }
328
329 #ifdef __UNIX__
330
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
334
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
336 {
337 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
338 || wxStricmp(charset, _T("UTF8")) == 0 )
339 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
340 else
341 m_conv = new wxCSConv(charset);
342 }
343
344 size_t
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
346 const char *psz,
347 size_t outputSize) const
348 {
349 return m_conv->MB2WC( outputBuf, psz, outputSize );
350 }
351
352 size_t
353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
354 const wchar_t *psz,
355 size_t outputSize) const
356 {
357 return m_conv->WC2MB( outputBuf, psz, outputSize );
358 }
359
360 #endif
361
362 // ----------------------------------------------------------------------------
363 // UTF-7
364 // ----------------------------------------------------------------------------
365
366 // Implementation (C) 2004 Fredrik Roubert
367
368 //
369 // BASE64 decoding table
370 //
371 static const unsigned char utf7unb64[] =
372 {
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
405 };
406
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
408 {
409 size_t len = 0;
410
411 while ( *psz && (!buf || (len < n)) )
412 {
413 unsigned char cc = *psz++;
414 if (cc != '+')
415 {
416 // plain ASCII char
417 if (buf)
418 *buf++ = cc;
419 len++;
420 }
421 else if (*psz == '-')
422 {
423 // encoded plus sign
424 if (buf)
425 *buf++ = cc;
426 len++;
427 psz++;
428 }
429 else // start of BASE64 encoded string
430 {
431 bool lsb, ok;
432 unsigned int d, l;
433 for ( ok = lsb = false, d = 0, l = 0;
434 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
435 psz++ )
436 {
437 d <<= 6;
438 d += cc;
439 for (l += 6; l >= 8; lsb = !lsb)
440 {
441 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
442 if (lsb)
443 {
444 if (buf)
445 *buf++ |= c;
446 len ++;
447 }
448 else
449 {
450 if (buf)
451 *buf = (wchar_t)(c << 8);
452 }
453
454 ok = true;
455 }
456 }
457
458 if ( !ok )
459 {
460 // in valid UTF7 we should have valid characters after '+'
461 return (size_t)-1;
462 }
463
464 if (*psz == '-')
465 psz++;
466 }
467 }
468
469 if ( buf && (len < n) )
470 *buf = '\0';
471
472 return len;
473 }
474
475 //
476 // BASE64 encoding table
477 //
478 static const unsigned char utf7enb64[] =
479 {
480 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
481 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
482 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
483 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
484 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
485 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
486 'w', 'x', 'y', 'z', '0', '1', '2', '3',
487 '4', '5', '6', '7', '8', '9', '+', '/'
488 };
489
490 //
491 // UTF-7 encoding table
492 //
493 // 0 - Set D (directly encoded characters)
494 // 1 - Set O (optional direct characters)
495 // 2 - whitespace characters (optional)
496 // 3 - special characters
497 //
498 static const unsigned char utf7encode[128] =
499 {
500 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
502 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
504 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
506 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
508 };
509
510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
511 {
512
513
514 size_t len = 0;
515
516 while (*psz && ((!buf) || (len < n)))
517 {
518 wchar_t cc = *psz++;
519 if (cc < 0x80 && utf7encode[cc] < 1)
520 {
521 // plain ASCII char
522 if (buf)
523 *buf++ = (char)cc;
524 len++;
525 }
526 #ifndef WC_UTF16
527 else if (((wxUint32)cc) > 0xffff)
528 {
529 // no surrogate pair generation (yet?)
530 return (size_t)-1;
531 }
532 #endif
533 else
534 {
535 if (buf)
536 *buf++ = '+';
537 len++;
538 if (cc != '+')
539 {
540 // BASE64 encode string
541 unsigned int lsb, d, l;
542 for (d = 0, l = 0; /*nothing*/; psz++)
543 {
544 for (lsb = 0; lsb < 2; lsb ++)
545 {
546 d <<= 8;
547 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
548
549 for (l += 8; l >= 6; )
550 {
551 l -= 6;
552 if (buf)
553 *buf++ = utf7enb64[(d >> l) % 64];
554 len++;
555 }
556 }
557 cc = *psz;
558 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
559 break;
560 }
561 if (l != 0)
562 {
563 if (buf)
564 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
565 len++;
566 }
567 }
568 if (buf)
569 *buf++ = '-';
570 len++;
571 }
572 }
573 if (buf && (len < n))
574 *buf = 0;
575 return len;
576 }
577
578 // ----------------------------------------------------------------------------
579 // UTF-8
580 // ----------------------------------------------------------------------------
581
582 static wxUint32 utf8_max[]=
583 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
584
585 // boundaries of the private use area we use to (temporarily) remap invalid
586 // characters invalid in a UTF-8 encoded string
587 const wxUint32 wxUnicodePUA = 0x100000;
588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
589
590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
591 {
592 size_t len = 0;
593
594 while (*psz && ((!buf) || (len < n)))
595 {
596 const char *opsz = psz;
597 bool invalid = false;
598 unsigned char cc = *psz++, fc = cc;
599 unsigned cnt;
600 for (cnt = 0; fc & 0x80; cnt++)
601 fc <<= 1;
602 if (!cnt)
603 {
604 // plain ASCII char
605 if (buf)
606 *buf++ = cc;
607 len++;
608
609 // escape the escape character for octal escapes
610 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
611 && cc == '\\' && (!buf || len < n))
612 {
613 if (buf)
614 *buf++ = cc;
615 len++;
616 }
617 }
618 else
619 {
620 cnt--;
621 if (!cnt)
622 {
623 // invalid UTF-8 sequence
624 invalid = true;
625 }
626 else
627 {
628 unsigned ocnt = cnt - 1;
629 wxUint32 res = cc & (0x3f >> cnt);
630 while (cnt--)
631 {
632 cc = *psz;
633 if ((cc & 0xC0) != 0x80)
634 {
635 // invalid UTF-8 sequence
636 invalid = true;
637 break;
638 }
639 psz++;
640 res = (res << 6) | (cc & 0x3f);
641 }
642 if (invalid || res <= utf8_max[ocnt])
643 {
644 // illegal UTF-8 encoding
645 invalid = true;
646 }
647 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
648 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
649 {
650 // if one of our PUA characters turns up externally
651 // it must also be treated as an illegal sequence
652 // (a bit like you have to escape an escape character)
653 invalid = true;
654 }
655 else
656 {
657 #ifdef WC_UTF16
658 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
659 size_t pa = encode_utf16(res, (wxUint16 *)buf);
660 if (pa == (size_t)-1)
661 {
662 invalid = true;
663 }
664 else
665 {
666 if (buf)
667 buf += pa;
668 len += pa;
669 }
670 #else // !WC_UTF16
671 if (buf)
672 *buf++ = (wchar_t)res;
673 len++;
674 #endif // WC_UTF16/!WC_UTF16
675 }
676 }
677 if (invalid)
678 {
679 if (m_options & MAP_INVALID_UTF8_TO_PUA)
680 {
681 while (opsz < psz && (!buf || len < n))
682 {
683 #ifdef WC_UTF16
684 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
685 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
686 wxASSERT(pa != (size_t)-1);
687 if (buf)
688 buf += pa;
689 opsz++;
690 len += pa;
691 #else
692 if (buf)
693 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
694 opsz++;
695 len++;
696 #endif
697 }
698 }
699 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
700 {
701 while (opsz < psz && (!buf || len < n))
702 {
703 if ( buf && len + 3 < n )
704 {
705 unsigned char on = *opsz;
706 *buf++ = L'\\';
707 *buf++ = (wchar_t)( L'0' + on / 0100 );
708 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
709 *buf++ = (wchar_t)( L'0' + on % 010 );
710 }
711 opsz++;
712 len += 4;
713 }
714 }
715 else // MAP_INVALID_UTF8_NOT
716 {
717 return (size_t)-1;
718 }
719 }
720 }
721 }
722 if (buf && (len < n))
723 *buf = 0;
724 return len;
725 }
726
727 static inline bool isoctal(wchar_t wch)
728 {
729 return L'0' <= wch && wch <= L'7';
730 }
731
732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
733 {
734 size_t len = 0;
735
736 while (*psz && ((!buf) || (len < n)))
737 {
738 wxUint32 cc;
739 #ifdef WC_UTF16
740 // cast is ok for WC_UTF16
741 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
742 psz += (pa == (size_t)-1) ? 1 : pa;
743 #else
744 cc=(*psz++) & 0x7fffffff;
745 #endif
746
747 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
748 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
749 {
750 if (buf)
751 *buf++ = (char)(cc - wxUnicodePUA);
752 len++;
753 }
754 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
755 && cc == L'\\' && psz[0] == L'\\' )
756 {
757 if (buf)
758 *buf++ = (char)cc;
759 psz++;
760 len++;
761 }
762 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
763 cc == L'\\' &&
764 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
765 {
766 if (buf)
767 {
768 *buf++ = (char) ((psz[0] - L'0')*0100 +
769 (psz[1] - L'0')*010 +
770 (psz[2] - L'0'));
771 }
772
773 psz += 3;
774 len++;
775 }
776 else
777 {
778 unsigned cnt;
779 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
780 if (!cnt)
781 {
782 // plain ASCII char
783 if (buf)
784 *buf++ = (char) cc;
785 len++;
786 }
787
788 else
789 {
790 len += cnt + 1;
791 if (buf)
792 {
793 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
794 while (cnt--)
795 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
796 }
797 }
798 }
799 }
800
801 if (buf && (len<n))
802 *buf = 0;
803
804 return len;
805 }
806
807 // ----------------------------------------------------------------------------
808 // UTF-16
809 // ----------------------------------------------------------------------------
810
811 #ifdef WORDS_BIGENDIAN
812 #define wxMBConvUTF16straight wxMBConvUTF16BE
813 #define wxMBConvUTF16swap wxMBConvUTF16LE
814 #else
815 #define wxMBConvUTF16swap wxMBConvUTF16BE
816 #define wxMBConvUTF16straight wxMBConvUTF16LE
817 #endif
818
819
820 #ifdef WC_UTF16
821
822 // copy 16bit MB to 16bit String
823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
824 {
825 size_t len=0;
826
827 while (*(wxUint16*)psz && (!buf || len < n))
828 {
829 if (buf)
830 *buf++ = *(wxUint16*)psz;
831 len++;
832
833 psz += sizeof(wxUint16);
834 }
835 if (buf && len<n) *buf=0;
836
837 return len;
838 }
839
840
841 // copy 16bit String to 16bit MB
842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
843 {
844 size_t len=0;
845
846 while (*psz && (!buf || len < n))
847 {
848 if (buf)
849 {
850 *(wxUint16*)buf = *psz;
851 buf += sizeof(wxUint16);
852 }
853 len += sizeof(wxUint16);
854 psz++;
855 }
856 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
857
858 return len;
859 }
860
861
862 // swap 16bit MB to 16bit String
863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
864 {
865 size_t len=0;
866
867 while (*(wxUint16*)psz && (!buf || len < n))
868 {
869 if (buf)
870 {
871 ((char *)buf)[0] = psz[1];
872 ((char *)buf)[1] = psz[0];
873 buf++;
874 }
875 len++;
876 psz += sizeof(wxUint16);
877 }
878 if (buf && len<n) *buf=0;
879
880 return len;
881 }
882
883
884 // swap 16bit MB to 16bit String
885 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
886 {
887 size_t len=0;
888
889 while (*psz && (!buf || len < n))
890 {
891 if (buf)
892 {
893 *buf++ = ((char*)psz)[1];
894 *buf++ = ((char*)psz)[0];
895 }
896 len += sizeof(wxUint16);
897 psz++;
898 }
899 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
900
901 return len;
902 }
903
904
905 #else // WC_UTF16
906
907
908 // copy 16bit MB to 32bit String
909 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
910 {
911 size_t len=0;
912
913 while (*(wxUint16*)psz && (!buf || len < n))
914 {
915 wxUint32 cc;
916 size_t pa=decode_utf16((wxUint16*)psz, cc);
917 if (pa == (size_t)-1)
918 return pa;
919
920 if (buf)
921 *buf++ = (wchar_t)cc;
922 len++;
923 psz += pa * sizeof(wxUint16);
924 }
925 if (buf && len<n) *buf=0;
926
927 return len;
928 }
929
930
931 // copy 32bit String to 16bit MB
932 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
933 {
934 size_t len=0;
935
936 while (*psz && (!buf || len < n))
937 {
938 wxUint16 cc[2];
939 size_t pa=encode_utf16(*psz, cc);
940
941 if (pa == (size_t)-1)
942 return pa;
943
944 if (buf)
945 {
946 *(wxUint16*)buf = cc[0];
947 buf += sizeof(wxUint16);
948 if (pa > 1)
949 {
950 *(wxUint16*)buf = cc[1];
951 buf += sizeof(wxUint16);
952 }
953 }
954
955 len += pa*sizeof(wxUint16);
956 psz++;
957 }
958 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
959
960 return len;
961 }
962
963
964 // swap 16bit MB to 32bit String
965 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
966 {
967 size_t len=0;
968
969 while (*(wxUint16*)psz && (!buf || len < n))
970 {
971 wxUint32 cc;
972 char tmp[4];
973 tmp[0]=psz[1]; tmp[1]=psz[0];
974 tmp[2]=psz[3]; tmp[3]=psz[2];
975
976 size_t pa=decode_utf16((wxUint16*)tmp, cc);
977 if (pa == (size_t)-1)
978 return pa;
979
980 if (buf)
981 *buf++ = (wchar_t)cc;
982
983 len++;
984 psz += pa * sizeof(wxUint16);
985 }
986 if (buf && len<n) *buf=0;
987
988 return len;
989 }
990
991
992 // swap 32bit String to 16bit MB
993 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
994 {
995 size_t len=0;
996
997 while (*psz && (!buf || len < n))
998 {
999 wxUint16 cc[2];
1000 size_t pa=encode_utf16(*psz, cc);
1001
1002 if (pa == (size_t)-1)
1003 return pa;
1004
1005 if (buf)
1006 {
1007 *buf++ = ((char*)cc)[1];
1008 *buf++ = ((char*)cc)[0];
1009 if (pa > 1)
1010 {
1011 *buf++ = ((char*)cc)[3];
1012 *buf++ = ((char*)cc)[2];
1013 }
1014 }
1015
1016 len += pa*sizeof(wxUint16);
1017 psz++;
1018 }
1019 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1020
1021 return len;
1022 }
1023
1024 #endif // WC_UTF16
1025
1026
1027 // ----------------------------------------------------------------------------
1028 // UTF-32
1029 // ----------------------------------------------------------------------------
1030
1031 #ifdef WORDS_BIGENDIAN
1032 #define wxMBConvUTF32straight wxMBConvUTF32BE
1033 #define wxMBConvUTF32swap wxMBConvUTF32LE
1034 #else
1035 #define wxMBConvUTF32swap wxMBConvUTF32BE
1036 #define wxMBConvUTF32straight wxMBConvUTF32LE
1037 #endif
1038
1039
1040 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1041 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1042
1043
1044 #ifdef WC_UTF16
1045
1046 // copy 32bit MB to 16bit String
1047 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1048 {
1049 size_t len=0;
1050
1051 while (*(wxUint32*)psz && (!buf || len < n))
1052 {
1053 wxUint16 cc[2];
1054
1055 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1056 if (pa == (size_t)-1)
1057 return pa;
1058
1059 if (buf)
1060 {
1061 *buf++ = cc[0];
1062 if (pa > 1)
1063 *buf++ = cc[1];
1064 }
1065 len += pa;
1066 psz += sizeof(wxUint32);
1067 }
1068 if (buf && len<n) *buf=0;
1069
1070 return len;
1071 }
1072
1073
1074 // copy 16bit String to 32bit MB
1075 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1076 {
1077 size_t len=0;
1078
1079 while (*psz && (!buf || len < n))
1080 {
1081 wxUint32 cc;
1082
1083 // cast is ok for WC_UTF16
1084 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1085 if (pa == (size_t)-1)
1086 return pa;
1087
1088 if (buf)
1089 {
1090 *(wxUint32*)buf = cc;
1091 buf += sizeof(wxUint32);
1092 }
1093 len += sizeof(wxUint32);
1094 psz += pa;
1095 }
1096
1097 if (buf && len<=n-sizeof(wxUint32))
1098 *(wxUint32*)buf=0;
1099
1100 return len;
1101 }
1102
1103
1104
1105 // swap 32bit MB to 16bit String
1106 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1107 {
1108 size_t len=0;
1109
1110 while (*(wxUint32*)psz && (!buf || len < n))
1111 {
1112 char tmp[4];
1113 tmp[0] = psz[3]; tmp[1] = psz[2];
1114 tmp[2] = psz[1]; tmp[3] = psz[0];
1115
1116
1117 wxUint16 cc[2];
1118
1119 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1120 if (pa == (size_t)-1)
1121 return pa;
1122
1123 if (buf)
1124 {
1125 *buf++ = cc[0];
1126 if (pa > 1)
1127 *buf++ = cc[1];
1128 }
1129 len += pa;
1130 psz += sizeof(wxUint32);
1131 }
1132
1133 if (buf && len<n)
1134 *buf=0;
1135
1136 return len;
1137 }
1138
1139
1140 // swap 16bit String to 32bit MB
1141 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1142 {
1143 size_t len=0;
1144
1145 while (*psz && (!buf || len < n))
1146 {
1147 char cc[4];
1148
1149 // cast is ok for WC_UTF16
1150 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1151 if (pa == (size_t)-1)
1152 return pa;
1153
1154 if (buf)
1155 {
1156 *buf++ = cc[3];
1157 *buf++ = cc[2];
1158 *buf++ = cc[1];
1159 *buf++ = cc[0];
1160 }
1161 len += sizeof(wxUint32);
1162 psz += pa;
1163 }
1164
1165 if (buf && len<=n-sizeof(wxUint32))
1166 *(wxUint32*)buf=0;
1167
1168 return len;
1169 }
1170
1171 #else // WC_UTF16
1172
1173
1174 // copy 32bit MB to 32bit String
1175 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1176 {
1177 size_t len=0;
1178
1179 while (*(wxUint32*)psz && (!buf || len < n))
1180 {
1181 if (buf)
1182 *buf++ = (wchar_t)(*(wxUint32*)psz);
1183 len++;
1184 psz += sizeof(wxUint32);
1185 }
1186
1187 if (buf && len<n)
1188 *buf=0;
1189
1190 return len;
1191 }
1192
1193
1194 // copy 32bit String to 32bit MB
1195 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1196 {
1197 size_t len=0;
1198
1199 while (*psz && (!buf || len < n))
1200 {
1201 if (buf)
1202 {
1203 *(wxUint32*)buf = *psz;
1204 buf += sizeof(wxUint32);
1205 }
1206
1207 len += sizeof(wxUint32);
1208 psz++;
1209 }
1210
1211 if (buf && len<=n-sizeof(wxUint32))
1212 *(wxUint32*)buf=0;
1213
1214 return len;
1215 }
1216
1217
1218 // swap 32bit MB to 32bit String
1219 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1220 {
1221 size_t len=0;
1222
1223 while (*(wxUint32*)psz && (!buf || len < n))
1224 {
1225 if (buf)
1226 {
1227 ((char *)buf)[0] = psz[3];
1228 ((char *)buf)[1] = psz[2];
1229 ((char *)buf)[2] = psz[1];
1230 ((char *)buf)[3] = psz[0];
1231 buf++;
1232 }
1233 len++;
1234 psz += sizeof(wxUint32);
1235 }
1236
1237 if (buf && len<n)
1238 *buf=0;
1239
1240 return len;
1241 }
1242
1243
1244 // swap 32bit String to 32bit MB
1245 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246 {
1247 size_t len=0;
1248
1249 while (*psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 *buf++ = ((char *)psz)[3];
1254 *buf++ = ((char *)psz)[2];
1255 *buf++ = ((char *)psz)[1];
1256 *buf++ = ((char *)psz)[0];
1257 }
1258 len += sizeof(wxUint32);
1259 psz++;
1260 }
1261
1262 if (buf && len<=n-sizeof(wxUint32))
1263 *(wxUint32*)buf=0;
1264
1265 return len;
1266 }
1267
1268
1269 #endif // WC_UTF16
1270
1271
1272 // ============================================================================
1273 // The classes doing conversion using the iconv_xxx() functions
1274 // ============================================================================
1275
1276 #ifdef HAVE_ICONV
1277
1278 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1279 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1280 // (unless there's yet another bug in glibc) the only case when iconv()
1281 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1282 // left in the input buffer -- when _real_ error occurs,
1283 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1284 // iconv() failure.
1285 // [This bug does not appear in glibc 2.2.]
1286 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1287 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1288 (errno != E2BIG || bufLeft != 0))
1289 #else
1290 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1291 #endif
1292
1293 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1294
1295 #define ICONV_T_INVALID ((iconv_t)-1)
1296
1297 #if SIZEOF_WCHAR_T == 4
1298 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1299 #define WC_ENC wxFONTENCODING_UTF32
1300 #elif SIZEOF_WCHAR_T == 2
1301 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1302 #define WC_ENC wxFONTENCODING_UTF16
1303 #else // sizeof(wchar_t) != 2 nor 4
1304 // does this ever happen?
1305 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1306 #endif
1307
1308 // ----------------------------------------------------------------------------
1309 // wxMBConv_iconv: encapsulates an iconv character set
1310 // ----------------------------------------------------------------------------
1311
1312 class wxMBConv_iconv : public wxMBConv
1313 {
1314 public:
1315 wxMBConv_iconv(const wxChar *name);
1316 virtual ~wxMBConv_iconv();
1317
1318 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1319 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1320
1321 bool IsOk() const
1322 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1323
1324 protected:
1325 // the iconv handlers used to translate from multibyte to wide char and in
1326 // the other direction
1327 iconv_t m2w,
1328 w2m;
1329 #if wxUSE_THREADS
1330 // guards access to m2w and w2m objects
1331 wxMutex m_iconvMutex;
1332 #endif
1333
1334 private:
1335 // the name (for iconv_open()) of a wide char charset -- if none is
1336 // available on this machine, it will remain NULL
1337 static wxString ms_wcCharsetName;
1338
1339 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1340 // different endian-ness than the native one
1341 static bool ms_wcNeedsSwap;
1342 };
1343
1344 // make the constructor available for unit testing
1345 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1346 {
1347 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1348 if ( !result->IsOk() )
1349 {
1350 delete result;
1351 return 0;
1352 }
1353 return result;
1354 }
1355
1356 wxString wxMBConv_iconv::ms_wcCharsetName;
1357 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1358
1359 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1360 {
1361 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1362 // names for the charsets
1363 const wxCharBuffer cname(wxString(name).ToAscii());
1364
1365 // check for charset that represents wchar_t:
1366 if ( ms_wcCharsetName.empty() )
1367 {
1368 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1369
1370 #if wxUSE_FONTMAP
1371 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1372 #else // !wxUSE_FONTMAP
1373 static const wxChar *names[] =
1374 {
1375 #if SIZEOF_WCHAR_T == 4
1376 _T("UCS-4"),
1377 #elif SIZEOF_WCHAR_T = 2
1378 _T("UCS-2"),
1379 #endif
1380 NULL
1381 };
1382 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1383
1384 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1385 {
1386 const wxString nameCS(*names);
1387
1388 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1389 wxString nameXE(nameCS);
1390 #ifdef WORDS_BIGENDIAN
1391 nameXE += _T("BE");
1392 #else // little endian
1393 nameXE += _T("LE");
1394 #endif
1395
1396 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1397 nameXE.c_str());
1398
1399 m2w = iconv_open(nameXE.ToAscii(), cname);
1400 if ( m2w == ICONV_T_INVALID )
1401 {
1402 // try charset w/o bytesex info (e.g. "UCS4")
1403 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1404 nameCS.c_str());
1405 m2w = iconv_open(nameCS.ToAscii(), cname);
1406
1407 // and check for bytesex ourselves:
1408 if ( m2w != ICONV_T_INVALID )
1409 {
1410 char buf[2], *bufPtr;
1411 wchar_t wbuf[2], *wbufPtr;
1412 size_t insz, outsz;
1413 size_t res;
1414
1415 buf[0] = 'A';
1416 buf[1] = 0;
1417 wbuf[0] = 0;
1418 insz = 2;
1419 outsz = SIZEOF_WCHAR_T * 2;
1420 wbufPtr = wbuf;
1421 bufPtr = buf;
1422
1423 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1424 (char**)&wbufPtr, &outsz);
1425
1426 if (ICONV_FAILED(res, insz))
1427 {
1428 wxLogLastError(wxT("iconv"));
1429 wxLogError(_("Conversion to charset '%s' doesn't work."),
1430 nameCS.c_str());
1431 }
1432 else // ok, can convert to this encoding, remember it
1433 {
1434 ms_wcCharsetName = nameCS;
1435 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1436 }
1437 }
1438 }
1439 else // use charset not requiring byte swapping
1440 {
1441 ms_wcCharsetName = nameXE;
1442 }
1443 }
1444
1445 wxLogTrace(TRACE_STRCONV,
1446 wxT("iconv wchar_t charset is \"%s\"%s"),
1447 ms_wcCharsetName.empty() ? _T("<none>")
1448 : ms_wcCharsetName.c_str(),
1449 ms_wcNeedsSwap ? _T(" (needs swap)")
1450 : _T(""));
1451 }
1452 else // we already have ms_wcCharsetName
1453 {
1454 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1455 }
1456
1457 if ( ms_wcCharsetName.empty() )
1458 {
1459 w2m = ICONV_T_INVALID;
1460 }
1461 else
1462 {
1463 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1464 if ( w2m == ICONV_T_INVALID )
1465 {
1466 wxLogTrace(TRACE_STRCONV,
1467 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1468 ms_wcCharsetName.c_str(), cname.data());
1469 }
1470 }
1471 }
1472
1473 wxMBConv_iconv::~wxMBConv_iconv()
1474 {
1475 if ( m2w != ICONV_T_INVALID )
1476 iconv_close(m2w);
1477 if ( w2m != ICONV_T_INVALID )
1478 iconv_close(w2m);
1479 }
1480
1481 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1482 {
1483 #if wxUSE_THREADS
1484 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1485 // Unfortunately there is a couple of global wxCSConv objects such as
1486 // wxConvLocal that are used all over wx code, so we have to make sure
1487 // the handle is used by at most one thread at the time. Otherwise
1488 // only a few wx classes would be safe to use from non-main threads
1489 // as MB<->WC conversion would fail "randomly".
1490 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1491 #endif
1492
1493 size_t inbuf = strlen(psz);
1494 size_t outbuf = n * SIZEOF_WCHAR_T;
1495 size_t res, cres;
1496 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1497 wchar_t *bufPtr = buf;
1498 const char *pszPtr = psz;
1499
1500 if (buf)
1501 {
1502 // have destination buffer, convert there
1503 cres = iconv(m2w,
1504 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1505 (char**)&bufPtr, &outbuf);
1506 res = n - (outbuf / SIZEOF_WCHAR_T);
1507
1508 if (ms_wcNeedsSwap)
1509 {
1510 // convert to native endianness
1511 for ( unsigned i = 0; i < res; i++ )
1512 buf[n] = WC_BSWAP(buf[i]);
1513 }
1514
1515 // NB: iconv was given only strlen(psz) characters on input, and so
1516 // it couldn't convert the trailing zero. Let's do it ourselves
1517 // if there's some room left for it in the output buffer.
1518 if (res < n)
1519 buf[res] = 0;
1520 }
1521 else
1522 {
1523 // no destination buffer... convert using temp buffer
1524 // to calculate destination buffer requirement
1525 wchar_t tbuf[8];
1526 res = 0;
1527 do {
1528 bufPtr = tbuf;
1529 outbuf = 8*SIZEOF_WCHAR_T;
1530
1531 cres = iconv(m2w,
1532 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1533 (char**)&bufPtr, &outbuf );
1534
1535 res += 8-(outbuf/SIZEOF_WCHAR_T);
1536 } while ((cres==(size_t)-1) && (errno==E2BIG));
1537 }
1538
1539 if (ICONV_FAILED(cres, inbuf))
1540 {
1541 //VS: it is ok if iconv fails, hence trace only
1542 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1543 return (size_t)-1;
1544 }
1545
1546 return res;
1547 }
1548
1549 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1550 {
1551 #if wxUSE_THREADS
1552 // NB: explained in MB2WC
1553 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1554 #endif
1555
1556 size_t inlen = wxWcslen(psz);
1557 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1558 size_t outbuf = n;
1559 size_t res, cres;
1560
1561 wchar_t *tmpbuf = 0;
1562
1563 if (ms_wcNeedsSwap)
1564 {
1565 // need to copy to temp buffer to switch endianness
1566 // (doing WC_BSWAP twice on the original buffer won't help, as it
1567 // could be in read-only memory, or be accessed in some other thread)
1568 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1569 for ( size_t i = 0; i < inlen; i++ )
1570 tmpbuf[n] = WC_BSWAP(psz[i]);
1571 tmpbuf[inlen] = L'\0';
1572 psz = tmpbuf;
1573 }
1574
1575 if (buf)
1576 {
1577 // have destination buffer, convert there
1578 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1579
1580 res = n-outbuf;
1581
1582 // NB: iconv was given only wcslen(psz) characters on input, and so
1583 // it couldn't convert the trailing zero. Let's do it ourselves
1584 // if there's some room left for it in the output buffer.
1585 if (res < n)
1586 buf[0] = 0;
1587 }
1588 else
1589 {
1590 // no destination buffer... convert using temp buffer
1591 // to calculate destination buffer requirement
1592 char tbuf[16];
1593 res = 0;
1594 do {
1595 buf = tbuf; outbuf = 16;
1596
1597 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1598
1599 res += 16 - outbuf;
1600 } while ((cres==(size_t)-1) && (errno==E2BIG));
1601 }
1602
1603 if (ms_wcNeedsSwap)
1604 {
1605 free(tmpbuf);
1606 }
1607
1608 if (ICONV_FAILED(cres, inbuf))
1609 {
1610 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1611 return (size_t)-1;
1612 }
1613
1614 return res;
1615 }
1616
1617 #endif // HAVE_ICONV
1618
1619
1620 // ============================================================================
1621 // Win32 conversion classes
1622 // ============================================================================
1623
1624 #ifdef wxHAVE_WIN32_MB2WC
1625
1626 // from utils.cpp
1627 #if wxUSE_FONTMAP
1628 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1629 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1630 #endif
1631
1632 class wxMBConv_win32 : public wxMBConv
1633 {
1634 public:
1635 wxMBConv_win32()
1636 {
1637 m_CodePage = CP_ACP;
1638 }
1639
1640 #if wxUSE_FONTMAP
1641 wxMBConv_win32(const wxChar* name)
1642 {
1643 m_CodePage = wxCharsetToCodepage(name);
1644 }
1645
1646 wxMBConv_win32(wxFontEncoding encoding)
1647 {
1648 m_CodePage = wxEncodingToCodepage(encoding);
1649 }
1650 #endif
1651
1652 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1653 {
1654 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1655 // the behaviour is not compatible with the Unix version (using iconv)
1656 // and break the library itself, e.g. wxTextInputStream::NextChar()
1657 // wouldn't work if reading an incomplete MB char didn't result in an
1658 // error
1659 //
1660 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1661 // an error (tested under Windows Server 2003) and apparently it is
1662 // done on purpose, i.e. the function accepts any input in this case
1663 // and although I'd prefer to return error on ill-formed output, our
1664 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1665 // explicitly ill-formed according to RFC 2152) neither so we don't
1666 // even have any fallback here...
1667 //
1668 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1669 // Win XP or newer and if it is specified on older versions, conversion
1670 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1671 // fails. So we can only use the flag on newer Windows versions.
1672 // Additionally, the flag is not supported by UTF7, symbol and CJK
1673 // encodings. See here:
1674 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1675 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1676 int flags = 0;
1677 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1678 m_CodePage < 50000 &&
1679 IsAtLeastWin2kSP4() )
1680 {
1681 flags = MB_ERR_INVALID_CHARS;
1682 }
1683 else if ( m_CodePage == CP_UTF8 )
1684 {
1685 // Avoid round-trip in the special case of UTF-8 by using our
1686 // own UTF-8 conversion code:
1687 return wxMBConvUTF8().MB2WC(buf, psz, n);
1688 }
1689
1690 const size_t len = ::MultiByteToWideChar
1691 (
1692 m_CodePage, // code page
1693 flags, // flags: fall on error
1694 psz, // input string
1695 -1, // its length (NUL-terminated)
1696 buf, // output string
1697 buf ? n : 0 // size of output buffer
1698 );
1699 if ( !len )
1700 {
1701 // function totally failed
1702 return (size_t)-1;
1703 }
1704
1705 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1706 // check if we succeeded, by doing a double trip:
1707 if ( !flags && buf )
1708 {
1709 const size_t mbLen = strlen(psz);
1710 wxCharBuffer mbBuf(mbLen);
1711 if ( ::WideCharToMultiByte
1712 (
1713 m_CodePage,
1714 0,
1715 buf,
1716 -1,
1717 mbBuf.data(),
1718 mbLen + 1, // size in bytes, not length
1719 NULL,
1720 NULL
1721 ) == 0 ||
1722 strcmp(mbBuf, psz) != 0 )
1723 {
1724 // we didn't obtain the same thing we started from, hence
1725 // the conversion was lossy and we consider that it failed
1726 return (size_t)-1;
1727 }
1728 }
1729
1730 // note that it returns count of written chars for buf != NULL and size
1731 // of the needed buffer for buf == NULL so in either case the length of
1732 // the string (which never includes the terminating NUL) is one less
1733 return len - 1;
1734 }
1735
1736 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1737 {
1738 /*
1739 we have a problem here: by default, WideCharToMultiByte() may
1740 replace characters unrepresentable in the target code page with bad
1741 quality approximations such as turning "1/2" symbol (U+00BD) into
1742 "1" for the code pages which don't have it and we, obviously, want
1743 to avoid this at any price
1744
1745 the trouble is that this function does it _silently_, i.e. it won't
1746 even tell us whether it did or not... Win98/2000 and higher provide
1747 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1748 we have to resort to a round trip, i.e. check that converting back
1749 results in the same string -- this is, of course, expensive but
1750 otherwise we simply can't be sure to not garble the data.
1751 */
1752
1753 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1754 // it doesn't work with CJK encodings (which we test for rather roughly
1755 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1756 // supporting it
1757 BOOL usedDef wxDUMMY_INITIALIZE(false);
1758 BOOL *pUsedDef;
1759 int flags;
1760 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1761 {
1762 // it's our lucky day
1763 flags = WC_NO_BEST_FIT_CHARS;
1764 pUsedDef = &usedDef;
1765 }
1766 else // old system or unsupported encoding
1767 {
1768 flags = 0;
1769 pUsedDef = NULL;
1770 }
1771
1772 const size_t len = ::WideCharToMultiByte
1773 (
1774 m_CodePage, // code page
1775 flags, // either none or no best fit
1776 pwz, // input string
1777 -1, // it is (wide) NUL-terminated
1778 buf, // output buffer
1779 buf ? n : 0, // and its size
1780 NULL, // default "replacement" char
1781 pUsedDef // [out] was it used?
1782 );
1783
1784 if ( !len )
1785 {
1786 // function totally failed
1787 return (size_t)-1;
1788 }
1789
1790 // if we were really converting, check if we succeeded
1791 if ( buf )
1792 {
1793 if ( flags )
1794 {
1795 // check if the conversion failed, i.e. if any replacements
1796 // were done
1797 if ( usedDef )
1798 return (size_t)-1;
1799 }
1800 else // we must resort to double tripping...
1801 {
1802 wxWCharBuffer wcBuf(n);
1803 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1804 wcscmp(wcBuf, pwz) != 0 )
1805 {
1806 // we didn't obtain the same thing we started from, hence
1807 // the conversion was lossy and we consider that it failed
1808 return (size_t)-1;
1809 }
1810 }
1811 }
1812
1813 // see the comment above for the reason of "len - 1"
1814 return len - 1;
1815 }
1816
1817 bool IsOk() const { return m_CodePage != -1; }
1818
1819 private:
1820 static bool CanUseNoBestFit()
1821 {
1822 static int s_isWin98Or2k = -1;
1823
1824 if ( s_isWin98Or2k == -1 )
1825 {
1826 int verMaj, verMin;
1827 switch ( wxGetOsVersion(&verMaj, &verMin) )
1828 {
1829 case wxWIN95:
1830 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1831 break;
1832
1833 case wxWINDOWS_NT:
1834 s_isWin98Or2k = verMaj >= 5;
1835 break;
1836
1837 default:
1838 // unknown, be conseravtive by default
1839 s_isWin98Or2k = 0;
1840 }
1841
1842 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1843 }
1844
1845 return s_isWin98Or2k == 1;
1846 }
1847
1848 static bool IsAtLeastWin2kSP4()
1849 {
1850 #ifdef __WXWINCE__
1851 return false;
1852 #else
1853 static int s_isAtLeastWin2kSP4 = -1;
1854
1855 if ( s_isAtLeastWin2kSP4 == -1 )
1856 {
1857 OSVERSIONINFOEX ver;
1858
1859 memset(&ver, 0, sizeof(ver));
1860 ver.dwOSVersionInfoSize = sizeof(ver);
1861 GetVersionEx((OSVERSIONINFO*)&ver);
1862
1863 s_isAtLeastWin2kSP4 =
1864 ((ver.dwMajorVersion > 5) || // Vista+
1865 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1866 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1867 ver.wServicePackMajor >= 4)) // 2000 SP4+
1868 ? 1 : 0;
1869 }
1870
1871 return s_isAtLeastWin2kSP4 == 1;
1872 #endif
1873 }
1874
1875 long m_CodePage;
1876 };
1877
1878 #endif // wxHAVE_WIN32_MB2WC
1879
1880 // ============================================================================
1881 // Cocoa conversion classes
1882 // ============================================================================
1883
1884 #if defined(__WXCOCOA__)
1885
1886 // RN: There is no UTF-32 support in either Core Foundation or
1887 // Cocoa. Strangely enough, internally Core Foundation uses
1888 // UTF 32 internally quite a bit - its just not public (yet).
1889
1890 #include <CoreFoundation/CFString.h>
1891 #include <CoreFoundation/CFStringEncodingExt.h>
1892
1893 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1894 {
1895 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1896 if ( encoding == wxFONTENCODING_DEFAULT )
1897 {
1898 enc = CFStringGetSystemEncoding();
1899 }
1900 else switch( encoding)
1901 {
1902 case wxFONTENCODING_ISO8859_1 :
1903 enc = kCFStringEncodingISOLatin1 ;
1904 break ;
1905 case wxFONTENCODING_ISO8859_2 :
1906 enc = kCFStringEncodingISOLatin2;
1907 break ;
1908 case wxFONTENCODING_ISO8859_3 :
1909 enc = kCFStringEncodingISOLatin3 ;
1910 break ;
1911 case wxFONTENCODING_ISO8859_4 :
1912 enc = kCFStringEncodingISOLatin4;
1913 break ;
1914 case wxFONTENCODING_ISO8859_5 :
1915 enc = kCFStringEncodingISOLatinCyrillic;
1916 break ;
1917 case wxFONTENCODING_ISO8859_6 :
1918 enc = kCFStringEncodingISOLatinArabic;
1919 break ;
1920 case wxFONTENCODING_ISO8859_7 :
1921 enc = kCFStringEncodingISOLatinGreek;
1922 break ;
1923 case wxFONTENCODING_ISO8859_8 :
1924 enc = kCFStringEncodingISOLatinHebrew;
1925 break ;
1926 case wxFONTENCODING_ISO8859_9 :
1927 enc = kCFStringEncodingISOLatin5;
1928 break ;
1929 case wxFONTENCODING_ISO8859_10 :
1930 enc = kCFStringEncodingISOLatin6;
1931 break ;
1932 case wxFONTENCODING_ISO8859_11 :
1933 enc = kCFStringEncodingISOLatinThai;
1934 break ;
1935 case wxFONTENCODING_ISO8859_13 :
1936 enc = kCFStringEncodingISOLatin7;
1937 break ;
1938 case wxFONTENCODING_ISO8859_14 :
1939 enc = kCFStringEncodingISOLatin8;
1940 break ;
1941 case wxFONTENCODING_ISO8859_15 :
1942 enc = kCFStringEncodingISOLatin9;
1943 break ;
1944
1945 case wxFONTENCODING_KOI8 :
1946 enc = kCFStringEncodingKOI8_R;
1947 break ;
1948 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1949 enc = kCFStringEncodingDOSRussian;
1950 break ;
1951
1952 // case wxFONTENCODING_BULGARIAN :
1953 // enc = ;
1954 // break ;
1955
1956 case wxFONTENCODING_CP437 :
1957 enc =kCFStringEncodingDOSLatinUS ;
1958 break ;
1959 case wxFONTENCODING_CP850 :
1960 enc = kCFStringEncodingDOSLatin1;
1961 break ;
1962 case wxFONTENCODING_CP852 :
1963 enc = kCFStringEncodingDOSLatin2;
1964 break ;
1965 case wxFONTENCODING_CP855 :
1966 enc = kCFStringEncodingDOSCyrillic;
1967 break ;
1968 case wxFONTENCODING_CP866 :
1969 enc =kCFStringEncodingDOSRussian ;
1970 break ;
1971 case wxFONTENCODING_CP874 :
1972 enc = kCFStringEncodingDOSThai;
1973 break ;
1974 case wxFONTENCODING_CP932 :
1975 enc = kCFStringEncodingDOSJapanese;
1976 break ;
1977 case wxFONTENCODING_CP936 :
1978 enc =kCFStringEncodingDOSChineseSimplif ;
1979 break ;
1980 case wxFONTENCODING_CP949 :
1981 enc = kCFStringEncodingDOSKorean;
1982 break ;
1983 case wxFONTENCODING_CP950 :
1984 enc = kCFStringEncodingDOSChineseTrad;
1985 break ;
1986 case wxFONTENCODING_CP1250 :
1987 enc = kCFStringEncodingWindowsLatin2;
1988 break ;
1989 case wxFONTENCODING_CP1251 :
1990 enc =kCFStringEncodingWindowsCyrillic ;
1991 break ;
1992 case wxFONTENCODING_CP1252 :
1993 enc =kCFStringEncodingWindowsLatin1 ;
1994 break ;
1995 case wxFONTENCODING_CP1253 :
1996 enc = kCFStringEncodingWindowsGreek;
1997 break ;
1998 case wxFONTENCODING_CP1254 :
1999 enc = kCFStringEncodingWindowsLatin5;
2000 break ;
2001 case wxFONTENCODING_CP1255 :
2002 enc =kCFStringEncodingWindowsHebrew ;
2003 break ;
2004 case wxFONTENCODING_CP1256 :
2005 enc =kCFStringEncodingWindowsArabic ;
2006 break ;
2007 case wxFONTENCODING_CP1257 :
2008 enc = kCFStringEncodingWindowsBalticRim;
2009 break ;
2010 // This only really encodes to UTF7 (if that) evidently
2011 // case wxFONTENCODING_UTF7 :
2012 // enc = kCFStringEncodingNonLossyASCII ;
2013 // break ;
2014 case wxFONTENCODING_UTF8 :
2015 enc = kCFStringEncodingUTF8 ;
2016 break ;
2017 case wxFONTENCODING_EUC_JP :
2018 enc = kCFStringEncodingEUC_JP;
2019 break ;
2020 case wxFONTENCODING_UTF16 :
2021 enc = kCFStringEncodingUnicode ;
2022 break ;
2023 case wxFONTENCODING_MACROMAN :
2024 enc = kCFStringEncodingMacRoman ;
2025 break ;
2026 case wxFONTENCODING_MACJAPANESE :
2027 enc = kCFStringEncodingMacJapanese ;
2028 break ;
2029 case wxFONTENCODING_MACCHINESETRAD :
2030 enc = kCFStringEncodingMacChineseTrad ;
2031 break ;
2032 case wxFONTENCODING_MACKOREAN :
2033 enc = kCFStringEncodingMacKorean ;
2034 break ;
2035 case wxFONTENCODING_MACARABIC :
2036 enc = kCFStringEncodingMacArabic ;
2037 break ;
2038 case wxFONTENCODING_MACHEBREW :
2039 enc = kCFStringEncodingMacHebrew ;
2040 break ;
2041 case wxFONTENCODING_MACGREEK :
2042 enc = kCFStringEncodingMacGreek ;
2043 break ;
2044 case wxFONTENCODING_MACCYRILLIC :
2045 enc = kCFStringEncodingMacCyrillic ;
2046 break ;
2047 case wxFONTENCODING_MACDEVANAGARI :
2048 enc = kCFStringEncodingMacDevanagari ;
2049 break ;
2050 case wxFONTENCODING_MACGURMUKHI :
2051 enc = kCFStringEncodingMacGurmukhi ;
2052 break ;
2053 case wxFONTENCODING_MACGUJARATI :
2054 enc = kCFStringEncodingMacGujarati ;
2055 break ;
2056 case wxFONTENCODING_MACORIYA :
2057 enc = kCFStringEncodingMacOriya ;
2058 break ;
2059 case wxFONTENCODING_MACBENGALI :
2060 enc = kCFStringEncodingMacBengali ;
2061 break ;
2062 case wxFONTENCODING_MACTAMIL :
2063 enc = kCFStringEncodingMacTamil ;
2064 break ;
2065 case wxFONTENCODING_MACTELUGU :
2066 enc = kCFStringEncodingMacTelugu ;
2067 break ;
2068 case wxFONTENCODING_MACKANNADA :
2069 enc = kCFStringEncodingMacKannada ;
2070 break ;
2071 case wxFONTENCODING_MACMALAJALAM :
2072 enc = kCFStringEncodingMacMalayalam ;
2073 break ;
2074 case wxFONTENCODING_MACSINHALESE :
2075 enc = kCFStringEncodingMacSinhalese ;
2076 break ;
2077 case wxFONTENCODING_MACBURMESE :
2078 enc = kCFStringEncodingMacBurmese ;
2079 break ;
2080 case wxFONTENCODING_MACKHMER :
2081 enc = kCFStringEncodingMacKhmer ;
2082 break ;
2083 case wxFONTENCODING_MACTHAI :
2084 enc = kCFStringEncodingMacThai ;
2085 break ;
2086 case wxFONTENCODING_MACLAOTIAN :
2087 enc = kCFStringEncodingMacLaotian ;
2088 break ;
2089 case wxFONTENCODING_MACGEORGIAN :
2090 enc = kCFStringEncodingMacGeorgian ;
2091 break ;
2092 case wxFONTENCODING_MACARMENIAN :
2093 enc = kCFStringEncodingMacArmenian ;
2094 break ;
2095 case wxFONTENCODING_MACCHINESESIMP :
2096 enc = kCFStringEncodingMacChineseSimp ;
2097 break ;
2098 case wxFONTENCODING_MACTIBETAN :
2099 enc = kCFStringEncodingMacTibetan ;
2100 break ;
2101 case wxFONTENCODING_MACMONGOLIAN :
2102 enc = kCFStringEncodingMacMongolian ;
2103 break ;
2104 case wxFONTENCODING_MACETHIOPIC :
2105 enc = kCFStringEncodingMacEthiopic ;
2106 break ;
2107 case wxFONTENCODING_MACCENTRALEUR :
2108 enc = kCFStringEncodingMacCentralEurRoman ;
2109 break ;
2110 case wxFONTENCODING_MACVIATNAMESE :
2111 enc = kCFStringEncodingMacVietnamese ;
2112 break ;
2113 case wxFONTENCODING_MACARABICEXT :
2114 enc = kCFStringEncodingMacExtArabic ;
2115 break ;
2116 case wxFONTENCODING_MACSYMBOL :
2117 enc = kCFStringEncodingMacSymbol ;
2118 break ;
2119 case wxFONTENCODING_MACDINGBATS :
2120 enc = kCFStringEncodingMacDingbats ;
2121 break ;
2122 case wxFONTENCODING_MACTURKISH :
2123 enc = kCFStringEncodingMacTurkish ;
2124 break ;
2125 case wxFONTENCODING_MACCROATIAN :
2126 enc = kCFStringEncodingMacCroatian ;
2127 break ;
2128 case wxFONTENCODING_MACICELANDIC :
2129 enc = kCFStringEncodingMacIcelandic ;
2130 break ;
2131 case wxFONTENCODING_MACROMANIAN :
2132 enc = kCFStringEncodingMacRomanian ;
2133 break ;
2134 case wxFONTENCODING_MACCELTIC :
2135 enc = kCFStringEncodingMacCeltic ;
2136 break ;
2137 case wxFONTENCODING_MACGAELIC :
2138 enc = kCFStringEncodingMacGaelic ;
2139 break ;
2140 // case wxFONTENCODING_MACKEYBOARD :
2141 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2142 // break ;
2143 default :
2144 // because gcc is picky
2145 break ;
2146 } ;
2147 return enc ;
2148 }
2149
2150 class wxMBConv_cocoa : public wxMBConv
2151 {
2152 public:
2153 wxMBConv_cocoa()
2154 {
2155 Init(CFStringGetSystemEncoding()) ;
2156 }
2157
2158 #if wxUSE_FONTMAP
2159 wxMBConv_cocoa(const wxChar* name)
2160 {
2161 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2162 }
2163 #endif
2164
2165 wxMBConv_cocoa(wxFontEncoding encoding)
2166 {
2167 Init( wxCFStringEncFromFontEnc(encoding) );
2168 }
2169
2170 ~wxMBConv_cocoa()
2171 {
2172 }
2173
2174 void Init( CFStringEncoding encoding)
2175 {
2176 m_encoding = encoding ;
2177 }
2178
2179 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2180 {
2181 wxASSERT(szUnConv);
2182
2183 CFStringRef theString = CFStringCreateWithBytes (
2184 NULL, //the allocator
2185 (const UInt8*)szUnConv,
2186 strlen(szUnConv),
2187 m_encoding,
2188 false //no BOM/external representation
2189 );
2190
2191 wxASSERT(theString);
2192
2193 size_t nOutLength = CFStringGetLength(theString);
2194
2195 if (szOut == NULL)
2196 {
2197 CFRelease(theString);
2198 return nOutLength;
2199 }
2200
2201 CFRange theRange = { 0, nOutSize };
2202
2203 #if SIZEOF_WCHAR_T == 4
2204 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2205 #endif
2206
2207 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2208
2209 CFRelease(theString);
2210
2211 szUniCharBuffer[nOutLength] = '\0' ;
2212
2213 #if SIZEOF_WCHAR_T == 4
2214 wxMBConvUTF16 converter ;
2215 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2216 delete[] szUniCharBuffer;
2217 #endif
2218
2219 return nOutLength;
2220 }
2221
2222 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2223 {
2224 wxASSERT(szUnConv);
2225
2226 size_t nRealOutSize;
2227 size_t nBufSize = wxWcslen(szUnConv);
2228 UniChar* szUniBuffer = (UniChar*) szUnConv;
2229
2230 #if SIZEOF_WCHAR_T == 4
2231 wxMBConvUTF16 converter ;
2232 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2233 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2234 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2235 nBufSize /= sizeof(UniChar);
2236 #endif
2237
2238 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2239 NULL, //allocator
2240 szUniBuffer,
2241 nBufSize,
2242 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2243 );
2244
2245 wxASSERT(theString);
2246
2247 //Note that CER puts a BOM when converting to unicode
2248 //so we check and use getchars instead in that case
2249 if (m_encoding == kCFStringEncodingUnicode)
2250 {
2251 if (szOut != NULL)
2252 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2253
2254 nRealOutSize = CFStringGetLength(theString) + 1;
2255 }
2256 else
2257 {
2258 CFStringGetBytes(
2259 theString,
2260 CFRangeMake(0, CFStringGetLength(theString)),
2261 m_encoding,
2262 0, //what to put in characters that can't be converted -
2263 //0 tells CFString to return NULL if it meets such a character
2264 false, //not an external representation
2265 (UInt8*) szOut,
2266 nOutSize,
2267 (CFIndex*) &nRealOutSize
2268 );
2269 }
2270
2271 CFRelease(theString);
2272
2273 #if SIZEOF_WCHAR_T == 4
2274 delete[] szUniBuffer;
2275 #endif
2276
2277 return nRealOutSize - 1;
2278 }
2279
2280 bool IsOk() const
2281 {
2282 return m_encoding != kCFStringEncodingInvalidId &&
2283 CFStringIsEncodingAvailable(m_encoding);
2284 }
2285
2286 private:
2287 CFStringEncoding m_encoding ;
2288 };
2289
2290 #endif // defined(__WXCOCOA__)
2291
2292 // ============================================================================
2293 // Mac conversion classes
2294 // ============================================================================
2295
2296 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2297
2298 class wxMBConv_mac : public wxMBConv
2299 {
2300 public:
2301 wxMBConv_mac()
2302 {
2303 Init(CFStringGetSystemEncoding()) ;
2304 }
2305
2306 #if wxUSE_FONTMAP
2307 wxMBConv_mac(const wxChar* name)
2308 {
2309 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2310 }
2311 #endif
2312
2313 wxMBConv_mac(wxFontEncoding encoding)
2314 {
2315 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2316 }
2317
2318 ~wxMBConv_mac()
2319 {
2320 OSStatus status = noErr ;
2321 status = TECDisposeConverter(m_MB2WC_converter);
2322 status = TECDisposeConverter(m_WC2MB_converter);
2323 }
2324
2325
2326 void Init( TextEncodingBase encoding)
2327 {
2328 OSStatus status = noErr ;
2329 m_char_encoding = encoding ;
2330 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2331
2332 status = TECCreateConverter(&m_MB2WC_converter,
2333 m_char_encoding,
2334 m_unicode_encoding);
2335 status = TECCreateConverter(&m_WC2MB_converter,
2336 m_unicode_encoding,
2337 m_char_encoding);
2338 }
2339
2340 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2341 {
2342 OSStatus status = noErr ;
2343 ByteCount byteOutLen ;
2344 ByteCount byteInLen = strlen(psz) ;
2345 wchar_t *tbuf = NULL ;
2346 UniChar* ubuf = NULL ;
2347 size_t res = 0 ;
2348
2349 if (buf == NULL)
2350 {
2351 //apple specs say at least 32
2352 n = wxMax( 32 , byteInLen ) ;
2353 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2354 }
2355 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2356 #if SIZEOF_WCHAR_T == 4
2357 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2358 #else
2359 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2360 #endif
2361 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2362 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2363 #if SIZEOF_WCHAR_T == 4
2364 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2365 // is not properly terminated we get random characters at the end
2366 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2367 wxMBConvUTF16 converter ;
2368 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2369 free( ubuf ) ;
2370 #else
2371 res = byteOutLen / sizeof( UniChar ) ;
2372 #endif
2373 if ( buf == NULL )
2374 free(tbuf) ;
2375
2376 if ( buf && res < n)
2377 buf[res] = 0;
2378
2379 return res ;
2380 }
2381
2382 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2383 {
2384 OSStatus status = noErr ;
2385 ByteCount byteOutLen ;
2386 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2387
2388 char *tbuf = NULL ;
2389
2390 if (buf == NULL)
2391 {
2392 //apple specs say at least 32
2393 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2394 tbuf = (char*) malloc( n ) ;
2395 }
2396
2397 ByteCount byteBufferLen = n ;
2398 UniChar* ubuf = NULL ;
2399 #if SIZEOF_WCHAR_T == 4
2400 wxMBConvUTF16 converter ;
2401 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2402 byteInLen = unicharlen ;
2403 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2404 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2405 #else
2406 ubuf = (UniChar*) psz ;
2407 #endif
2408 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2409 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2410 #if SIZEOF_WCHAR_T == 4
2411 free( ubuf ) ;
2412 #endif
2413 if ( buf == NULL )
2414 free(tbuf) ;
2415
2416 size_t res = byteOutLen ;
2417 if ( buf && res < n)
2418 {
2419 buf[res] = 0;
2420
2421 //we need to double-trip to verify it didn't insert any ? in place
2422 //of bogus characters
2423 wxWCharBuffer wcBuf(n);
2424 size_t pszlen = wxWcslen(psz);
2425 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2426 wxWcslen(wcBuf) != pszlen ||
2427 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2428 {
2429 // we didn't obtain the same thing we started from, hence
2430 // the conversion was lossy and we consider that it failed
2431 return (size_t)-1;
2432 }
2433 }
2434
2435 return res ;
2436 }
2437
2438 bool IsOk() const
2439 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2440
2441 private:
2442 TECObjectRef m_MB2WC_converter ;
2443 TECObjectRef m_WC2MB_converter ;
2444
2445 TextEncodingBase m_char_encoding ;
2446 TextEncodingBase m_unicode_encoding ;
2447 };
2448
2449 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2450
2451 // ============================================================================
2452 // wxEncodingConverter based conversion classes
2453 // ============================================================================
2454
2455 #if wxUSE_FONTMAP
2456
2457 class wxMBConv_wxwin : public wxMBConv
2458 {
2459 private:
2460 void Init()
2461 {
2462 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2463 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2464 }
2465
2466 public:
2467 // temporarily just use wxEncodingConverter stuff,
2468 // so that it works while a better implementation is built
2469 wxMBConv_wxwin(const wxChar* name)
2470 {
2471 if (name)
2472 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2473 else
2474 m_enc = wxFONTENCODING_SYSTEM;
2475
2476 Init();
2477 }
2478
2479 wxMBConv_wxwin(wxFontEncoding enc)
2480 {
2481 m_enc = enc;
2482
2483 Init();
2484 }
2485
2486 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2487 {
2488 size_t inbuf = strlen(psz);
2489 if (buf)
2490 {
2491 if (!m2w.Convert(psz,buf))
2492 return (size_t)-1;
2493 }
2494 return inbuf;
2495 }
2496
2497 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2498 {
2499 const size_t inbuf = wxWcslen(psz);
2500 if (buf)
2501 {
2502 if (!w2m.Convert(psz,buf))
2503 return (size_t)-1;
2504 }
2505
2506 return inbuf;
2507 }
2508
2509 bool IsOk() const { return m_ok; }
2510
2511 public:
2512 wxFontEncoding m_enc;
2513 wxEncodingConverter m2w, w2m;
2514
2515 // were we initialized successfully?
2516 bool m_ok;
2517
2518 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2519 };
2520
2521 // make the constructors available for unit testing
2522 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2523 {
2524 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2525 if ( !result->IsOk() )
2526 {
2527 delete result;
2528 return 0;
2529 }
2530 return result;
2531 }
2532
2533 #endif // wxUSE_FONTMAP
2534
2535 // ============================================================================
2536 // wxCSConv implementation
2537 // ============================================================================
2538
2539 void wxCSConv::Init()
2540 {
2541 m_name = NULL;
2542 m_convReal = NULL;
2543 m_deferred = true;
2544 }
2545
2546 wxCSConv::wxCSConv(const wxChar *charset)
2547 {
2548 Init();
2549
2550 if ( charset )
2551 {
2552 SetName(charset);
2553 }
2554
2555 #if wxUSE_FONTMAP
2556 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2557 #else
2558 m_encoding = wxFONTENCODING_SYSTEM;
2559 #endif
2560 }
2561
2562 wxCSConv::wxCSConv(wxFontEncoding encoding)
2563 {
2564 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2565 {
2566 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2567
2568 encoding = wxFONTENCODING_SYSTEM;
2569 }
2570
2571 Init();
2572
2573 m_encoding = encoding;
2574 }
2575
2576 wxCSConv::~wxCSConv()
2577 {
2578 Clear();
2579 }
2580
2581 wxCSConv::wxCSConv(const wxCSConv& conv)
2582 : wxMBConv()
2583 {
2584 Init();
2585
2586 SetName(conv.m_name);
2587 m_encoding = conv.m_encoding;
2588 }
2589
2590 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2591 {
2592 Clear();
2593
2594 SetName(conv.m_name);
2595 m_encoding = conv.m_encoding;
2596
2597 return *this;
2598 }
2599
2600 void wxCSConv::Clear()
2601 {
2602 free(m_name);
2603 delete m_convReal;
2604
2605 m_name = NULL;
2606 m_convReal = NULL;
2607 }
2608
2609 void wxCSConv::SetName(const wxChar *charset)
2610 {
2611 if (charset)
2612 {
2613 m_name = wxStrdup(charset);
2614 m_deferred = true;
2615 }
2616 }
2617
2618 #if wxUSE_FONTMAP
2619 #include "wx/hashmap.h"
2620
2621 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2622 wxEncodingNameCache );
2623
2624 static wxEncodingNameCache gs_nameCache;
2625 #endif
2626
2627 wxMBConv *wxCSConv::DoCreate() const
2628 {
2629 #if wxUSE_FONTMAP
2630 wxLogTrace(TRACE_STRCONV,
2631 wxT("creating conversion for %s"),
2632 (m_name ? m_name
2633 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2634 #endif // wxUSE_FONTMAP
2635
2636 // check for the special case of ASCII or ISO8859-1 charset: as we have
2637 // special knowledge of it anyhow, we don't need to create a special
2638 // conversion object
2639 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2640 m_encoding == wxFONTENCODING_DEFAULT )
2641 {
2642 // don't convert at all
2643 return NULL;
2644 }
2645
2646 // we trust OS to do conversion better than we can so try external
2647 // conversion methods first
2648 //
2649 // the full order is:
2650 // 1. OS conversion (iconv() under Unix or Win32 API)
2651 // 2. hard coded conversions for UTF
2652 // 3. wxEncodingConverter as fall back
2653
2654 // step (1)
2655 #ifdef HAVE_ICONV
2656 #if !wxUSE_FONTMAP
2657 if ( m_name )
2658 #endif // !wxUSE_FONTMAP
2659 {
2660 wxString name(m_name);
2661 wxFontEncoding encoding(m_encoding);
2662
2663 if ( !name.empty() )
2664 {
2665 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2666 if ( conv->IsOk() )
2667 return conv;
2668
2669 delete conv;
2670
2671 #if wxUSE_FONTMAP
2672 encoding =
2673 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2674 #endif // wxUSE_FONTMAP
2675 }
2676 #if wxUSE_FONTMAP
2677 {
2678 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2679 if ( it != gs_nameCache.end() )
2680 {
2681 if ( it->second.empty() )
2682 return NULL;
2683
2684 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2685 if ( conv->IsOk() )
2686 return conv;
2687
2688 delete conv;
2689 }
2690
2691 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2692
2693 for ( ; *names; ++names )
2694 {
2695 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2696 if ( conv->IsOk() )
2697 {
2698 gs_nameCache[encoding] = *names;
2699 return conv;
2700 }
2701
2702 delete conv;
2703 }
2704
2705 gs_nameCache[encoding] = _T(""); // cache the failure
2706 }
2707 #endif // wxUSE_FONTMAP
2708 }
2709 #endif // HAVE_ICONV
2710
2711 #ifdef wxHAVE_WIN32_MB2WC
2712 {
2713 #if wxUSE_FONTMAP
2714 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2715 : new wxMBConv_win32(m_encoding);
2716 if ( conv->IsOk() )
2717 return conv;
2718
2719 delete conv;
2720 #else
2721 return NULL;
2722 #endif
2723 }
2724 #endif // wxHAVE_WIN32_MB2WC
2725 #if defined(__WXMAC__)
2726 {
2727 // leave UTF16 and UTF32 to the built-ins of wx
2728 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2729 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2730 {
2731
2732 #if wxUSE_FONTMAP
2733 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2734 : new wxMBConv_mac(m_encoding);
2735 #else
2736 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2737 #endif
2738 if ( conv->IsOk() )
2739 return conv;
2740
2741 delete conv;
2742 }
2743 }
2744 #endif
2745 #if defined(__WXCOCOA__)
2746 {
2747 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2748 {
2749
2750 #if wxUSE_FONTMAP
2751 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2752 : new wxMBConv_cocoa(m_encoding);
2753 #else
2754 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2755 #endif
2756 if ( conv->IsOk() )
2757 return conv;
2758
2759 delete conv;
2760 }
2761 }
2762 #endif
2763 // step (2)
2764 wxFontEncoding enc = m_encoding;
2765 #if wxUSE_FONTMAP
2766 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2767 {
2768 // use "false" to suppress interactive dialogs -- we can be called from
2769 // anywhere and popping up a dialog from here is the last thing we want to
2770 // do
2771 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2772 }
2773 #endif // wxUSE_FONTMAP
2774
2775 switch ( enc )
2776 {
2777 case wxFONTENCODING_UTF7:
2778 return new wxMBConvUTF7;
2779
2780 case wxFONTENCODING_UTF8:
2781 return new wxMBConvUTF8;
2782
2783 case wxFONTENCODING_UTF16BE:
2784 return new wxMBConvUTF16BE;
2785
2786 case wxFONTENCODING_UTF16LE:
2787 return new wxMBConvUTF16LE;
2788
2789 case wxFONTENCODING_UTF32BE:
2790 return new wxMBConvUTF32BE;
2791
2792 case wxFONTENCODING_UTF32LE:
2793 return new wxMBConvUTF32LE;
2794
2795 default:
2796 // nothing to do but put here to suppress gcc warnings
2797 ;
2798 }
2799
2800 // step (3)
2801 #if wxUSE_FONTMAP
2802 {
2803 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2804 : new wxMBConv_wxwin(m_encoding);
2805 if ( conv->IsOk() )
2806 return conv;
2807
2808 delete conv;
2809 }
2810 #endif // wxUSE_FONTMAP
2811
2812 // NB: This is a hack to prevent deadlock. What could otherwise happen
2813 // in Unicode build: wxConvLocal creation ends up being here
2814 // because of some failure and logs the error. But wxLog will try to
2815 // attach timestamp, for which it will need wxConvLocal (to convert
2816 // time to char* and then wchar_t*), but that fails, tries to log
2817 // error, but wxLog has a (already locked) critical section that
2818 // guards static buffer.
2819 static bool alreadyLoggingError = false;
2820 if (!alreadyLoggingError)
2821 {
2822 alreadyLoggingError = true;
2823 wxLogError(_("Cannot convert from the charset '%s'!"),
2824 m_name ? m_name
2825 :
2826 #if wxUSE_FONTMAP
2827 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2828 #else // !wxUSE_FONTMAP
2829 wxString::Format(_("encoding %s"), m_encoding).c_str()
2830 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2831 );
2832 alreadyLoggingError = false;
2833 }
2834
2835 return NULL;
2836 }
2837
2838 void wxCSConv::CreateConvIfNeeded() const
2839 {
2840 if ( m_deferred )
2841 {
2842 wxCSConv *self = (wxCSConv *)this; // const_cast
2843
2844 #if wxUSE_INTL
2845 // if we don't have neither the name nor the encoding, use the default
2846 // encoding for this system
2847 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2848 {
2849 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2850 }
2851 #endif // wxUSE_INTL
2852
2853 self->m_convReal = DoCreate();
2854 self->m_deferred = false;
2855 }
2856 }
2857
2858 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2859 {
2860 CreateConvIfNeeded();
2861
2862 if (m_convReal)
2863 return m_convReal->MB2WC(buf, psz, n);
2864
2865 // latin-1 (direct)
2866 size_t len = strlen(psz);
2867
2868 if (buf)
2869 {
2870 for (size_t c = 0; c <= len; c++)
2871 buf[c] = (unsigned char)(psz[c]);
2872 }
2873
2874 return len;
2875 }
2876
2877 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2878 {
2879 CreateConvIfNeeded();
2880
2881 if (m_convReal)
2882 return m_convReal->WC2MB(buf, psz, n);
2883
2884 // latin-1 (direct)
2885 const size_t len = wxWcslen(psz);
2886 if (buf)
2887 {
2888 for (size_t c = 0; c <= len; c++)
2889 {
2890 if (psz[c] > 0xFF)
2891 return (size_t)-1;
2892 buf[c] = (char)psz[c];
2893 }
2894 }
2895 else
2896 {
2897 for (size_t c = 0; c <= len; c++)
2898 {
2899 if (psz[c] > 0xFF)
2900 return (size_t)-1;
2901 }
2902 }
2903
2904 return len;
2905 }
2906
2907 // ----------------------------------------------------------------------------
2908 // globals
2909 // ----------------------------------------------------------------------------
2910
2911 #ifdef __WINDOWS__
2912 static wxMBConv_win32 wxConvLibcObj;
2913 #elif defined(__WXMAC__) && !defined(__MACH__)
2914 static wxMBConv_mac wxConvLibcObj ;
2915 #else
2916 static wxMBConvLibc wxConvLibcObj;
2917 #endif
2918
2919 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2920 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2921 static wxMBConvUTF7 wxConvUTF7Obj;
2922 static wxMBConvUTF8 wxConvUTF8Obj;
2923
2924 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2925 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2926 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2927 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2928 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2929 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2930 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2931 #ifdef __WXOSX__
2932 wxConvUTF8Obj;
2933 #else
2934 wxConvLibcObj;
2935 #endif
2936
2937
2938 #else // !wxUSE_WCHAR_T
2939
2940 // stand-ins in absence of wchar_t
2941 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2942 wxConvISO8859_1,
2943 wxConvLocal,
2944 wxConvUTF8;
2945
2946 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T