]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
fixed win32 code to not use MB_ERR_INVALID_CHARS on win version where it isn't availa...
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
191 {
192 wxASSERT(pOutSize != NULL);
193
194 const char* szEnd = szString + nStringLen + 1;
195 const char* szPos = szString;
196 const char* szStart = szPos;
197
198 size_t nActualLength = 0;
199 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
200
201 wxWCharBuffer theBuffer(nCurrentSize);
202
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos != szEnd)
206 {
207 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
208
209 //Get the length of the current (sub)string
210 size_t nLen = MB2WC(NULL, szPos, 0);
211
212 //Invalid conversion?
213 if( nLen == (size_t)-1 )
214 {
215 *pOutSize = 0;
216 theBuffer.data()[0u] = wxT('\0');
217 return theBuffer;
218 }
219
220
221 //Increase the actual length (+1 for current null character)
222 nActualLength += nLen + 1;
223
224 //if buffer too big, realloc the buffer
225 if (nActualLength > (nCurrentSize+1))
226 {
227 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
228 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
229 theBuffer = theNewBuffer;
230 nCurrentSize <<= 1;
231 }
232
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
235 {
236 *pOutSize = 0;
237 theBuffer.data()[0u] = wxT('\0');
238 return theBuffer;
239 }
240
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos += strlen(szPos) + 1;
246 }
247
248 //success - return actual length and the buffer
249 *pOutSize = nActualLength;
250 return theBuffer;
251 }
252
253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
254 {
255 wxASSERT(pOutSize != NULL);
256
257 const wchar_t* szEnd = szString + nStringLen + 1;
258 const wchar_t* szPos = szString;
259 const wchar_t* szStart = szPos;
260
261 size_t nActualLength = 0;
262 size_t nCurrentSize = nStringLen << 2; //try * 4 first
263
264 wxCharBuffer theBuffer(nCurrentSize);
265
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos != szEnd)
269 {
270 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
271
272 //Get the length of the current (sub)string
273 size_t nLen = WC2MB(NULL, szPos, 0);
274
275 //Invalid conversion?
276 if( nLen == (size_t)-1 )
277 {
278 *pOutSize = 0;
279 theBuffer.data()[0u] = wxT('\0');
280 return theBuffer;
281 }
282
283 //Increase the actual length (+1 for current null character)
284 nActualLength += nLen + 1;
285
286 //if buffer too big, realloc the buffer
287 if (nActualLength > (nCurrentSize+1))
288 {
289 wxCharBuffer theNewBuffer(nCurrentSize << 1);
290 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
291 theBuffer = theNewBuffer;
292 nCurrentSize <<= 1;
293 }
294
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
297 {
298 *pOutSize = 0;
299 theBuffer.data()[0u] = wxT('\0');
300 return theBuffer;
301 }
302
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos += wxWcslen(szPos) + 1;
308 }
309
310 //success - return actual length and the buffer
311 *pOutSize = nActualLength;
312 return theBuffer;
313 }
314
315 // ----------------------------------------------------------------------------
316 // wxMBConvLibc
317 // ----------------------------------------------------------------------------
318
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
320 {
321 return wxMB2WC(buf, psz, n);
322 }
323
324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
325 {
326 return wxWC2MB(buf, psz, n);
327 }
328
329 #ifdef __UNIX__
330
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
334
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
336 {
337 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
338 || wxStricmp(charset, _T("UTF8")) == 0 )
339 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
340 else
341 m_conv = new wxCSConv(charset);
342 }
343
344 size_t
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
346 const char *psz,
347 size_t outputSize) const
348 {
349 return m_conv->MB2WC( outputBuf, psz, outputSize );
350 }
351
352 size_t
353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
354 const wchar_t *psz,
355 size_t outputSize) const
356 {
357 return m_conv->WC2MB( outputBuf, psz, outputSize );
358 }
359
360 #endif
361
362 // ----------------------------------------------------------------------------
363 // UTF-7
364 // ----------------------------------------------------------------------------
365
366 // Implementation (C) 2004 Fredrik Roubert
367
368 //
369 // BASE64 decoding table
370 //
371 static const unsigned char utf7unb64[] =
372 {
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
405 };
406
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
408 {
409 size_t len = 0;
410
411 while (*psz && ((!buf) || (len < n)))
412 {
413 unsigned char cc = *psz++;
414 if (cc != '+')
415 {
416 // plain ASCII char
417 if (buf)
418 *buf++ = cc;
419 len++;
420 }
421 else if (*psz == '-')
422 {
423 // encoded plus sign
424 if (buf)
425 *buf++ = cc;
426 len++;
427 psz++;
428 }
429 else
430 {
431 // BASE64 encoded string
432 bool lsb;
433 unsigned char c;
434 unsigned int d, l;
435 for (lsb = false, d = 0, l = 0;
436 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
437 {
438 d <<= 6;
439 d += cc;
440 for (l += 6; l >= 8; lsb = !lsb)
441 {
442 c = (unsigned char)((d >> (l -= 8)) % 256);
443 if (lsb)
444 {
445 if (buf)
446 *buf++ |= c;
447 len ++;
448 }
449 else
450 if (buf)
451 *buf = (wchar_t)(c << 8);
452 }
453 }
454 if (*psz == '-')
455 psz++;
456 }
457 }
458 if (buf && (len < n))
459 *buf = 0;
460 return len;
461 }
462
463 //
464 // BASE64 encoding table
465 //
466 static const unsigned char utf7enb64[] =
467 {
468 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
469 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
470 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
471 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
472 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
473 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
474 'w', 'x', 'y', 'z', '0', '1', '2', '3',
475 '4', '5', '6', '7', '8', '9', '+', '/'
476 };
477
478 //
479 // UTF-7 encoding table
480 //
481 // 0 - Set D (directly encoded characters)
482 // 1 - Set O (optional direct characters)
483 // 2 - whitespace characters (optional)
484 // 3 - special characters
485 //
486 static const unsigned char utf7encode[128] =
487 {
488 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
489 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
490 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
492 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
494 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
496 };
497
498 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
499 {
500
501
502 size_t len = 0;
503
504 while (*psz && ((!buf) || (len < n)))
505 {
506 wchar_t cc = *psz++;
507 if (cc < 0x80 && utf7encode[cc] < 1)
508 {
509 // plain ASCII char
510 if (buf)
511 *buf++ = (char)cc;
512 len++;
513 }
514 #ifndef WC_UTF16
515 else if (((wxUint32)cc) > 0xffff)
516 {
517 // no surrogate pair generation (yet?)
518 return (size_t)-1;
519 }
520 #endif
521 else
522 {
523 if (buf)
524 *buf++ = '+';
525 len++;
526 if (cc != '+')
527 {
528 // BASE64 encode string
529 unsigned int lsb, d, l;
530 for (d = 0, l = 0; /*nothing*/; psz++)
531 {
532 for (lsb = 0; lsb < 2; lsb ++)
533 {
534 d <<= 8;
535 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
536
537 for (l += 8; l >= 6; )
538 {
539 l -= 6;
540 if (buf)
541 *buf++ = utf7enb64[(d >> l) % 64];
542 len++;
543 }
544 }
545 cc = *psz;
546 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
547 break;
548 }
549 if (l != 0)
550 {
551 if (buf)
552 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
553 len++;
554 }
555 }
556 if (buf)
557 *buf++ = '-';
558 len++;
559 }
560 }
561 if (buf && (len < n))
562 *buf = 0;
563 return len;
564 }
565
566 // ----------------------------------------------------------------------------
567 // UTF-8
568 // ----------------------------------------------------------------------------
569
570 static wxUint32 utf8_max[]=
571 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
572
573 // boundaries of the private use area we use to (temporarily) remap invalid
574 // characters invalid in a UTF-8 encoded string
575 const wxUint32 wxUnicodePUA = 0x100000;
576 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
577
578 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
579 {
580 size_t len = 0;
581
582 while (*psz && ((!buf) || (len < n)))
583 {
584 const char *opsz = psz;
585 bool invalid = false;
586 unsigned char cc = *psz++, fc = cc;
587 unsigned cnt;
588 for (cnt = 0; fc & 0x80; cnt++)
589 fc <<= 1;
590 if (!cnt)
591 {
592 // plain ASCII char
593 if (buf)
594 *buf++ = cc;
595 len++;
596
597 // escape the escape character for octal escapes
598 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
599 && cc == '\\' && (!buf || len < n))
600 {
601 if (buf)
602 *buf++ = cc;
603 len++;
604 }
605 }
606 else
607 {
608 cnt--;
609 if (!cnt)
610 {
611 // invalid UTF-8 sequence
612 invalid = true;
613 }
614 else
615 {
616 unsigned ocnt = cnt - 1;
617 wxUint32 res = cc & (0x3f >> cnt);
618 while (cnt--)
619 {
620 cc = *psz;
621 if ((cc & 0xC0) != 0x80)
622 {
623 // invalid UTF-8 sequence
624 invalid = true;
625 break;
626 }
627 psz++;
628 res = (res << 6) | (cc & 0x3f);
629 }
630 if (invalid || res <= utf8_max[ocnt])
631 {
632 // illegal UTF-8 encoding
633 invalid = true;
634 }
635 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
636 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
637 {
638 // if one of our PUA characters turns up externally
639 // it must also be treated as an illegal sequence
640 // (a bit like you have to escape an escape character)
641 invalid = true;
642 }
643 else
644 {
645 #ifdef WC_UTF16
646 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
647 size_t pa = encode_utf16(res, (wxUint16 *)buf);
648 if (pa == (size_t)-1)
649 {
650 invalid = true;
651 }
652 else
653 {
654 if (buf)
655 buf += pa;
656 len += pa;
657 }
658 #else // !WC_UTF16
659 if (buf)
660 *buf++ = (wchar_t)res;
661 len++;
662 #endif // WC_UTF16/!WC_UTF16
663 }
664 }
665 if (invalid)
666 {
667 if (m_options & MAP_INVALID_UTF8_TO_PUA)
668 {
669 while (opsz < psz && (!buf || len < n))
670 {
671 #ifdef WC_UTF16
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
674 wxASSERT(pa != (size_t)-1);
675 if (buf)
676 buf += pa;
677 opsz++;
678 len += pa;
679 #else
680 if (buf)
681 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
682 opsz++;
683 len++;
684 #endif
685 }
686 }
687 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
688 {
689 while (opsz < psz && (!buf || len < n))
690 {
691 if ( buf && len + 3 < n )
692 {
693 unsigned char on = *opsz;
694 *buf++ = L'\\';
695 *buf++ = (wchar_t)( L'0' + on / 0100 );
696 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
697 *buf++ = (wchar_t)( L'0' + on % 010 );
698 }
699 opsz++;
700 len += 4;
701 }
702 }
703 else // MAP_INVALID_UTF8_NOT
704 {
705 return (size_t)-1;
706 }
707 }
708 }
709 }
710 if (buf && (len < n))
711 *buf = 0;
712 return len;
713 }
714
715 static inline bool isoctal(wchar_t wch)
716 {
717 return L'0' <= wch && wch <= L'7';
718 }
719
720 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
721 {
722 size_t len = 0;
723
724 while (*psz && ((!buf) || (len < n)))
725 {
726 wxUint32 cc;
727 #ifdef WC_UTF16
728 // cast is ok for WC_UTF16
729 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
730 psz += (pa == (size_t)-1) ? 1 : pa;
731 #else
732 cc=(*psz++) & 0x7fffffff;
733 #endif
734
735 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
736 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
737 {
738 if (buf)
739 *buf++ = (char)(cc - wxUnicodePUA);
740 len++;
741 }
742 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
743 && cc == L'\\' && psz[0] == L'\\' )
744 {
745 if (buf)
746 *buf++ = (char)cc;
747 psz++;
748 len++;
749 }
750 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
751 cc == L'\\' &&
752 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
753 {
754 if (buf)
755 {
756 *buf++ = (char) ((psz[0] - L'0')*0100 +
757 (psz[1] - L'0')*010 +
758 (psz[2] - L'0'));
759 }
760
761 psz += 3;
762 len++;
763 }
764 else
765 {
766 unsigned cnt;
767 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
768 if (!cnt)
769 {
770 // plain ASCII char
771 if (buf)
772 *buf++ = (char) cc;
773 len++;
774 }
775
776 else
777 {
778 len += cnt + 1;
779 if (buf)
780 {
781 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
782 while (cnt--)
783 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
784 }
785 }
786 }
787 }
788
789 if (buf && (len<n))
790 *buf = 0;
791
792 return len;
793 }
794
795 // ----------------------------------------------------------------------------
796 // UTF-16
797 // ----------------------------------------------------------------------------
798
799 #ifdef WORDS_BIGENDIAN
800 #define wxMBConvUTF16straight wxMBConvUTF16BE
801 #define wxMBConvUTF16swap wxMBConvUTF16LE
802 #else
803 #define wxMBConvUTF16swap wxMBConvUTF16BE
804 #define wxMBConvUTF16straight wxMBConvUTF16LE
805 #endif
806
807
808 #ifdef WC_UTF16
809
810 // copy 16bit MB to 16bit String
811 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
812 {
813 size_t len=0;
814
815 while (*(wxUint16*)psz && (!buf || len < n))
816 {
817 if (buf)
818 *buf++ = *(wxUint16*)psz;
819 len++;
820
821 psz += sizeof(wxUint16);
822 }
823 if (buf && len<n) *buf=0;
824
825 return len;
826 }
827
828
829 // copy 16bit String to 16bit MB
830 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
831 {
832 size_t len=0;
833
834 while (*psz && (!buf || len < n))
835 {
836 if (buf)
837 {
838 *(wxUint16*)buf = *psz;
839 buf += sizeof(wxUint16);
840 }
841 len += sizeof(wxUint16);
842 psz++;
843 }
844 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
845
846 return len;
847 }
848
849
850 // swap 16bit MB to 16bit String
851 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
852 {
853 size_t len=0;
854
855 while (*(wxUint16*)psz && (!buf || len < n))
856 {
857 if (buf)
858 {
859 ((char *)buf)[0] = psz[1];
860 ((char *)buf)[1] = psz[0];
861 buf++;
862 }
863 len++;
864 psz += sizeof(wxUint16);
865 }
866 if (buf && len<n) *buf=0;
867
868 return len;
869 }
870
871
872 // swap 16bit MB to 16bit String
873 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
874 {
875 size_t len=0;
876
877 while (*psz && (!buf || len < n))
878 {
879 if (buf)
880 {
881 *buf++ = ((char*)psz)[1];
882 *buf++ = ((char*)psz)[0];
883 }
884 len += sizeof(wxUint16);
885 psz++;
886 }
887 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
888
889 return len;
890 }
891
892
893 #else // WC_UTF16
894
895
896 // copy 16bit MB to 32bit String
897 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
898 {
899 size_t len=0;
900
901 while (*(wxUint16*)psz && (!buf || len < n))
902 {
903 wxUint32 cc;
904 size_t pa=decode_utf16((wxUint16*)psz, cc);
905 if (pa == (size_t)-1)
906 return pa;
907
908 if (buf)
909 *buf++ = (wchar_t)cc;
910 len++;
911 psz += pa * sizeof(wxUint16);
912 }
913 if (buf && len<n) *buf=0;
914
915 return len;
916 }
917
918
919 // copy 32bit String to 16bit MB
920 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
921 {
922 size_t len=0;
923
924 while (*psz && (!buf || len < n))
925 {
926 wxUint16 cc[2];
927 size_t pa=encode_utf16(*psz, cc);
928
929 if (pa == (size_t)-1)
930 return pa;
931
932 if (buf)
933 {
934 *(wxUint16*)buf = cc[0];
935 buf += sizeof(wxUint16);
936 if (pa > 1)
937 {
938 *(wxUint16*)buf = cc[1];
939 buf += sizeof(wxUint16);
940 }
941 }
942
943 len += pa*sizeof(wxUint16);
944 psz++;
945 }
946 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
947
948 return len;
949 }
950
951
952 // swap 16bit MB to 32bit String
953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
954 {
955 size_t len=0;
956
957 while (*(wxUint16*)psz && (!buf || len < n))
958 {
959 wxUint32 cc;
960 char tmp[4];
961 tmp[0]=psz[1]; tmp[1]=psz[0];
962 tmp[2]=psz[3]; tmp[3]=psz[2];
963
964 size_t pa=decode_utf16((wxUint16*)tmp, cc);
965 if (pa == (size_t)-1)
966 return pa;
967
968 if (buf)
969 *buf++ = (wchar_t)cc;
970
971 len++;
972 psz += pa * sizeof(wxUint16);
973 }
974 if (buf && len<n) *buf=0;
975
976 return len;
977 }
978
979
980 // swap 32bit String to 16bit MB
981 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
982 {
983 size_t len=0;
984
985 while (*psz && (!buf || len < n))
986 {
987 wxUint16 cc[2];
988 size_t pa=encode_utf16(*psz, cc);
989
990 if (pa == (size_t)-1)
991 return pa;
992
993 if (buf)
994 {
995 *buf++ = ((char*)cc)[1];
996 *buf++ = ((char*)cc)[0];
997 if (pa > 1)
998 {
999 *buf++ = ((char*)cc)[3];
1000 *buf++ = ((char*)cc)[2];
1001 }
1002 }
1003
1004 len += pa*sizeof(wxUint16);
1005 psz++;
1006 }
1007 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1008
1009 return len;
1010 }
1011
1012 #endif // WC_UTF16
1013
1014
1015 // ----------------------------------------------------------------------------
1016 // UTF-32
1017 // ----------------------------------------------------------------------------
1018
1019 #ifdef WORDS_BIGENDIAN
1020 #define wxMBConvUTF32straight wxMBConvUTF32BE
1021 #define wxMBConvUTF32swap wxMBConvUTF32LE
1022 #else
1023 #define wxMBConvUTF32swap wxMBConvUTF32BE
1024 #define wxMBConvUTF32straight wxMBConvUTF32LE
1025 #endif
1026
1027
1028 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1029 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1030
1031
1032 #ifdef WC_UTF16
1033
1034 // copy 32bit MB to 16bit String
1035 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1036 {
1037 size_t len=0;
1038
1039 while (*(wxUint32*)psz && (!buf || len < n))
1040 {
1041 wxUint16 cc[2];
1042
1043 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1044 if (pa == (size_t)-1)
1045 return pa;
1046
1047 if (buf)
1048 {
1049 *buf++ = cc[0];
1050 if (pa > 1)
1051 *buf++ = cc[1];
1052 }
1053 len += pa;
1054 psz += sizeof(wxUint32);
1055 }
1056 if (buf && len<n) *buf=0;
1057
1058 return len;
1059 }
1060
1061
1062 // copy 16bit String to 32bit MB
1063 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1064 {
1065 size_t len=0;
1066
1067 while (*psz && (!buf || len < n))
1068 {
1069 wxUint32 cc;
1070
1071 // cast is ok for WC_UTF16
1072 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1073 if (pa == (size_t)-1)
1074 return pa;
1075
1076 if (buf)
1077 {
1078 *(wxUint32*)buf = cc;
1079 buf += sizeof(wxUint32);
1080 }
1081 len += sizeof(wxUint32);
1082 psz += pa;
1083 }
1084
1085 if (buf && len<=n-sizeof(wxUint32))
1086 *(wxUint32*)buf=0;
1087
1088 return len;
1089 }
1090
1091
1092
1093 // swap 32bit MB to 16bit String
1094 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1095 {
1096 size_t len=0;
1097
1098 while (*(wxUint32*)psz && (!buf || len < n))
1099 {
1100 char tmp[4];
1101 tmp[0] = psz[3]; tmp[1] = psz[2];
1102 tmp[2] = psz[1]; tmp[3] = psz[0];
1103
1104
1105 wxUint16 cc[2];
1106
1107 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1108 if (pa == (size_t)-1)
1109 return pa;
1110
1111 if (buf)
1112 {
1113 *buf++ = cc[0];
1114 if (pa > 1)
1115 *buf++ = cc[1];
1116 }
1117 len += pa;
1118 psz += sizeof(wxUint32);
1119 }
1120
1121 if (buf && len<n)
1122 *buf=0;
1123
1124 return len;
1125 }
1126
1127
1128 // swap 16bit String to 32bit MB
1129 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1130 {
1131 size_t len=0;
1132
1133 while (*psz && (!buf || len < n))
1134 {
1135 char cc[4];
1136
1137 // cast is ok for WC_UTF16
1138 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1139 if (pa == (size_t)-1)
1140 return pa;
1141
1142 if (buf)
1143 {
1144 *buf++ = cc[3];
1145 *buf++ = cc[2];
1146 *buf++ = cc[1];
1147 *buf++ = cc[0];
1148 }
1149 len += sizeof(wxUint32);
1150 psz += pa;
1151 }
1152
1153 if (buf && len<=n-sizeof(wxUint32))
1154 *(wxUint32*)buf=0;
1155
1156 return len;
1157 }
1158
1159 #else // WC_UTF16
1160
1161
1162 // copy 32bit MB to 32bit String
1163 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1164 {
1165 size_t len=0;
1166
1167 while (*(wxUint32*)psz && (!buf || len < n))
1168 {
1169 if (buf)
1170 *buf++ = (wchar_t)(*(wxUint32*)psz);
1171 len++;
1172 psz += sizeof(wxUint32);
1173 }
1174
1175 if (buf && len<n)
1176 *buf=0;
1177
1178 return len;
1179 }
1180
1181
1182 // copy 32bit String to 32bit MB
1183 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1184 {
1185 size_t len=0;
1186
1187 while (*psz && (!buf || len < n))
1188 {
1189 if (buf)
1190 {
1191 *(wxUint32*)buf = *psz;
1192 buf += sizeof(wxUint32);
1193 }
1194
1195 len += sizeof(wxUint32);
1196 psz++;
1197 }
1198
1199 if (buf && len<=n-sizeof(wxUint32))
1200 *(wxUint32*)buf=0;
1201
1202 return len;
1203 }
1204
1205
1206 // swap 32bit MB to 32bit String
1207 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1208 {
1209 size_t len=0;
1210
1211 while (*(wxUint32*)psz && (!buf || len < n))
1212 {
1213 if (buf)
1214 {
1215 ((char *)buf)[0] = psz[3];
1216 ((char *)buf)[1] = psz[2];
1217 ((char *)buf)[2] = psz[1];
1218 ((char *)buf)[3] = psz[0];
1219 buf++;
1220 }
1221 len++;
1222 psz += sizeof(wxUint32);
1223 }
1224
1225 if (buf && len<n)
1226 *buf=0;
1227
1228 return len;
1229 }
1230
1231
1232 // swap 32bit String to 32bit MB
1233 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1234 {
1235 size_t len=0;
1236
1237 while (*psz && (!buf || len < n))
1238 {
1239 if (buf)
1240 {
1241 *buf++ = ((char *)psz)[3];
1242 *buf++ = ((char *)psz)[2];
1243 *buf++ = ((char *)psz)[1];
1244 *buf++ = ((char *)psz)[0];
1245 }
1246 len += sizeof(wxUint32);
1247 psz++;
1248 }
1249
1250 if (buf && len<=n-sizeof(wxUint32))
1251 *(wxUint32*)buf=0;
1252
1253 return len;
1254 }
1255
1256
1257 #endif // WC_UTF16
1258
1259
1260 // ============================================================================
1261 // The classes doing conversion using the iconv_xxx() functions
1262 // ============================================================================
1263
1264 #ifdef HAVE_ICONV
1265
1266 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1267 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1268 // (unless there's yet another bug in glibc) the only case when iconv()
1269 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1270 // left in the input buffer -- when _real_ error occurs,
1271 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1272 // iconv() failure.
1273 // [This bug does not appear in glibc 2.2.]
1274 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1275 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1276 (errno != E2BIG || bufLeft != 0))
1277 #else
1278 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1279 #endif
1280
1281 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1282
1283 #define ICONV_T_INVALID ((iconv_t)-1)
1284
1285 #if SIZEOF_WCHAR_T == 4
1286 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1287 #define WC_ENC wxFONTENCODING_UTF32
1288 #elif SIZEOF_WCHAR_T == 2
1289 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1290 #define WC_ENC wxFONTENCODING_UTF16
1291 #else // sizeof(wchar_t) != 2 nor 4
1292 // does this ever happen?
1293 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1294 #endif
1295
1296 // ----------------------------------------------------------------------------
1297 // wxMBConv_iconv: encapsulates an iconv character set
1298 // ----------------------------------------------------------------------------
1299
1300 class wxMBConv_iconv : public wxMBConv
1301 {
1302 public:
1303 wxMBConv_iconv(const wxChar *name);
1304 virtual ~wxMBConv_iconv();
1305
1306 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1307 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1308
1309 bool IsOk() const
1310 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1311
1312 protected:
1313 // the iconv handlers used to translate from multibyte to wide char and in
1314 // the other direction
1315 iconv_t m2w,
1316 w2m;
1317 #if wxUSE_THREADS
1318 // guards access to m2w and w2m objects
1319 wxMutex m_iconvMutex;
1320 #endif
1321
1322 private:
1323 // the name (for iconv_open()) of a wide char charset -- if none is
1324 // available on this machine, it will remain NULL
1325 static wxString ms_wcCharsetName;
1326
1327 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1328 // different endian-ness than the native one
1329 static bool ms_wcNeedsSwap;
1330 };
1331
1332 // make the constructor available for unit testing
1333 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1334 {
1335 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1336 if ( !result->IsOk() )
1337 {
1338 delete result;
1339 return 0;
1340 }
1341 return result;
1342 }
1343
1344 wxString wxMBConv_iconv::ms_wcCharsetName;
1345 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1346
1347 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1348 {
1349 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1350 // names for the charsets
1351 const wxCharBuffer cname(wxString(name).ToAscii());
1352
1353 // check for charset that represents wchar_t:
1354 if ( ms_wcCharsetName.empty() )
1355 {
1356 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1357
1358 #if wxUSE_FONTMAP
1359 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1360 #else // !wxUSE_FONTMAP
1361 static const wxChar *names[] =
1362 {
1363 #if SIZEOF_WCHAR_T == 4
1364 _T("UCS-4"),
1365 #elif SIZEOF_WCHAR_T = 2
1366 _T("UCS-2"),
1367 #endif
1368 NULL
1369 };
1370 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1371
1372 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1373 {
1374 const wxString nameCS(*names);
1375
1376 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377 wxString nameXE(nameCS);
1378 #ifdef WORDS_BIGENDIAN
1379 nameXE += _T("BE");
1380 #else // little endian
1381 nameXE += _T("LE");
1382 #endif
1383
1384 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1385 nameXE.c_str());
1386
1387 m2w = iconv_open(nameXE.ToAscii(), cname);
1388 if ( m2w == ICONV_T_INVALID )
1389 {
1390 // try charset w/o bytesex info (e.g. "UCS4")
1391 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1392 nameCS.c_str());
1393 m2w = iconv_open(nameCS.ToAscii(), cname);
1394
1395 // and check for bytesex ourselves:
1396 if ( m2w != ICONV_T_INVALID )
1397 {
1398 char buf[2], *bufPtr;
1399 wchar_t wbuf[2], *wbufPtr;
1400 size_t insz, outsz;
1401 size_t res;
1402
1403 buf[0] = 'A';
1404 buf[1] = 0;
1405 wbuf[0] = 0;
1406 insz = 2;
1407 outsz = SIZEOF_WCHAR_T * 2;
1408 wbufPtr = wbuf;
1409 bufPtr = buf;
1410
1411 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1412 (char**)&wbufPtr, &outsz);
1413
1414 if (ICONV_FAILED(res, insz))
1415 {
1416 wxLogLastError(wxT("iconv"));
1417 wxLogError(_("Conversion to charset '%s' doesn't work."),
1418 nameCS.c_str());
1419 }
1420 else // ok, can convert to this encoding, remember it
1421 {
1422 ms_wcCharsetName = nameCS;
1423 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1424 }
1425 }
1426 }
1427 else // use charset not requiring byte swapping
1428 {
1429 ms_wcCharsetName = nameXE;
1430 }
1431 }
1432
1433 wxLogTrace(TRACE_STRCONV,
1434 wxT("iconv wchar_t charset is \"%s\"%s"),
1435 ms_wcCharsetName.empty() ? _T("<none>")
1436 : ms_wcCharsetName.c_str(),
1437 ms_wcNeedsSwap ? _T(" (needs swap)")
1438 : _T(""));
1439 }
1440 else // we already have ms_wcCharsetName
1441 {
1442 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1443 }
1444
1445 if ( ms_wcCharsetName.empty() )
1446 {
1447 w2m = ICONV_T_INVALID;
1448 }
1449 else
1450 {
1451 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1452 if ( w2m == ICONV_T_INVALID )
1453 {
1454 wxLogTrace(TRACE_STRCONV,
1455 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1456 ms_wcCharsetName.c_str(), cname.data());
1457 }
1458 }
1459 }
1460
1461 wxMBConv_iconv::~wxMBConv_iconv()
1462 {
1463 if ( m2w != ICONV_T_INVALID )
1464 iconv_close(m2w);
1465 if ( w2m != ICONV_T_INVALID )
1466 iconv_close(w2m);
1467 }
1468
1469 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1470 {
1471 #if wxUSE_THREADS
1472 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1473 // Unfortunately there is a couple of global wxCSConv objects such as
1474 // wxConvLocal that are used all over wx code, so we have to make sure
1475 // the handle is used by at most one thread at the time. Otherwise
1476 // only a few wx classes would be safe to use from non-main threads
1477 // as MB<->WC conversion would fail "randomly".
1478 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1479 #endif
1480
1481 size_t inbuf = strlen(psz);
1482 size_t outbuf = n * SIZEOF_WCHAR_T;
1483 size_t res, cres;
1484 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1485 wchar_t *bufPtr = buf;
1486 const char *pszPtr = psz;
1487
1488 if (buf)
1489 {
1490 // have destination buffer, convert there
1491 cres = iconv(m2w,
1492 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1493 (char**)&bufPtr, &outbuf);
1494 res = n - (outbuf / SIZEOF_WCHAR_T);
1495
1496 if (ms_wcNeedsSwap)
1497 {
1498 // convert to native endianness
1499 for ( unsigned i = 0; i < res; i++ )
1500 buf[n] = WC_BSWAP(buf[i]);
1501 }
1502
1503 // NB: iconv was given only strlen(psz) characters on input, and so
1504 // it couldn't convert the trailing zero. Let's do it ourselves
1505 // if there's some room left for it in the output buffer.
1506 if (res < n)
1507 buf[res] = 0;
1508 }
1509 else
1510 {
1511 // no destination buffer... convert using temp buffer
1512 // to calculate destination buffer requirement
1513 wchar_t tbuf[8];
1514 res = 0;
1515 do {
1516 bufPtr = tbuf;
1517 outbuf = 8*SIZEOF_WCHAR_T;
1518
1519 cres = iconv(m2w,
1520 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1521 (char**)&bufPtr, &outbuf );
1522
1523 res += 8-(outbuf/SIZEOF_WCHAR_T);
1524 } while ((cres==(size_t)-1) && (errno==E2BIG));
1525 }
1526
1527 if (ICONV_FAILED(cres, inbuf))
1528 {
1529 //VS: it is ok if iconv fails, hence trace only
1530 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1531 return (size_t)-1;
1532 }
1533
1534 return res;
1535 }
1536
1537 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1538 {
1539 #if wxUSE_THREADS
1540 // NB: explained in MB2WC
1541 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1542 #endif
1543
1544 size_t inlen = wxWcslen(psz);
1545 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1546 size_t outbuf = n;
1547 size_t res, cres;
1548
1549 wchar_t *tmpbuf = 0;
1550
1551 if (ms_wcNeedsSwap)
1552 {
1553 // need to copy to temp buffer to switch endianness
1554 // (doing WC_BSWAP twice on the original buffer won't help, as it
1555 // could be in read-only memory, or be accessed in some other thread)
1556 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1557 for ( size_t i = 0; i < inlen; i++ )
1558 tmpbuf[n] = WC_BSWAP(psz[i]);
1559 tmpbuf[inlen] = L'\0';
1560 psz = tmpbuf;
1561 }
1562
1563 if (buf)
1564 {
1565 // have destination buffer, convert there
1566 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1567
1568 res = n-outbuf;
1569
1570 // NB: iconv was given only wcslen(psz) characters on input, and so
1571 // it couldn't convert the trailing zero. Let's do it ourselves
1572 // if there's some room left for it in the output buffer.
1573 if (res < n)
1574 buf[0] = 0;
1575 }
1576 else
1577 {
1578 // no destination buffer... convert using temp buffer
1579 // to calculate destination buffer requirement
1580 char tbuf[16];
1581 res = 0;
1582 do {
1583 buf = tbuf; outbuf = 16;
1584
1585 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1586
1587 res += 16 - outbuf;
1588 } while ((cres==(size_t)-1) && (errno==E2BIG));
1589 }
1590
1591 if (ms_wcNeedsSwap)
1592 {
1593 free(tmpbuf);
1594 }
1595
1596 if (ICONV_FAILED(cres, inbuf))
1597 {
1598 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1599 return (size_t)-1;
1600 }
1601
1602 return res;
1603 }
1604
1605 #endif // HAVE_ICONV
1606
1607
1608 // ============================================================================
1609 // Win32 conversion classes
1610 // ============================================================================
1611
1612 #ifdef wxHAVE_WIN32_MB2WC
1613
1614 // from utils.cpp
1615 #if wxUSE_FONTMAP
1616 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1617 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1618 #endif
1619
1620 class wxMBConv_win32 : public wxMBConv
1621 {
1622 public:
1623 wxMBConv_win32()
1624 {
1625 m_CodePage = CP_ACP;
1626 }
1627
1628 #if wxUSE_FONTMAP
1629 wxMBConv_win32(const wxChar* name)
1630 {
1631 m_CodePage = wxCharsetToCodepage(name);
1632 }
1633
1634 wxMBConv_win32(wxFontEncoding encoding)
1635 {
1636 m_CodePage = wxEncodingToCodepage(encoding);
1637 }
1638 #endif
1639
1640 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1641 {
1642 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1643 // the behaviour is not compatible with the Unix version (using iconv)
1644 // and break the library itself, e.g. wxTextInputStream::NextChar()
1645 // wouldn't work if reading an incomplete MB char didn't result in an
1646 // error
1647 //
1648 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1649 // an error (tested under Windows Server 2003) and apparently it is
1650 // done on purpose, i.e. the function accepts any input in this case
1651 // and although I'd prefer to return error on ill-formed output, our
1652 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1653 // explicitly ill-formed according to RFC 2152) neither so we don't
1654 // even have any fallback here...
1655 //
1656 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1657 // Win XP or newer and if it is specified on older versions, conversion
1658 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1659 // fails. So we can only use the flag on newer Windows versions.
1660 // Additionally, the flag is not supported by UTF7, symbol and CJK
1661 // encodings. See here:
1662 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1663 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1664 int flags = 0;
1665 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1666 m_CodePage < 50000 &&
1667 IsAtLeastWin2kSP4() )
1668 {
1669 flags = MB_ERR_INVALID_CHARS;
1670 }
1671 else if ( m_CodePage == CP_UTF8 )
1672 {
1673 // Avoid round-trip in the special case of UTF-8 by using our
1674 // own UTF-8 conversion code:
1675 return wxMBConvUTF8().MB2WC(buf, psz, n);
1676 }
1677
1678 const size_t len = ::MultiByteToWideChar
1679 (
1680 m_CodePage, // code page
1681 flags, // flags: fall on error
1682 psz, // input string
1683 -1, // its length (NUL-terminated)
1684 buf, // output string
1685 buf ? n : 0 // size of output buffer
1686 );
1687 if ( !len )
1688 {
1689 // function totally failed
1690 return (size_t)-1;
1691 }
1692
1693 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1694 // check if we succeeded, by doing a double trip:
1695 if ( !flags && buf )
1696 {
1697 wxCharBuffer mbBuf(n);
1698 if ( ::WideCharToMultiByte
1699 (
1700 m_CodePage,
1701 0,
1702 buf,
1703 -1,
1704 mbBuf.data(),
1705 n,
1706 NULL,
1707 NULL
1708 ) == 0 ||
1709 strcmp(mbBuf, psz) != 0 )
1710 {
1711 // we didn't obtain the same thing we started from, hence
1712 // the conversion was lossy and we consider that it failed
1713 return (size_t)-1;
1714 }
1715 }
1716
1717 // note that it returns count of written chars for buf != NULL and size
1718 // of the needed buffer for buf == NULL so in either case the length of
1719 // the string (which never includes the terminating NUL) is one less
1720 return len - 1;
1721 }
1722
1723 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1724 {
1725 /*
1726 we have a problem here: by default, WideCharToMultiByte() may
1727 replace characters unrepresentable in the target code page with bad
1728 quality approximations such as turning "1/2" symbol (U+00BD) into
1729 "1" for the code pages which don't have it and we, obviously, want
1730 to avoid this at any price
1731
1732 the trouble is that this function does it _silently_, i.e. it won't
1733 even tell us whether it did or not... Win98/2000 and higher provide
1734 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1735 we have to resort to a round trip, i.e. check that converting back
1736 results in the same string -- this is, of course, expensive but
1737 otherwise we simply can't be sure to not garble the data.
1738 */
1739
1740 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1741 // it doesn't work with CJK encodings (which we test for rather roughly
1742 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1743 // supporting it
1744 BOOL usedDef wxDUMMY_INITIALIZE(false);
1745 BOOL *pUsedDef;
1746 int flags;
1747 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1748 {
1749 // it's our lucky day
1750 flags = WC_NO_BEST_FIT_CHARS;
1751 pUsedDef = &usedDef;
1752 }
1753 else // old system or unsupported encoding
1754 {
1755 flags = 0;
1756 pUsedDef = NULL;
1757 }
1758
1759 const size_t len = ::WideCharToMultiByte
1760 (
1761 m_CodePage, // code page
1762 flags, // either none or no best fit
1763 pwz, // input string
1764 -1, // it is (wide) NUL-terminated
1765 buf, // output buffer
1766 buf ? n : 0, // and its size
1767 NULL, // default "replacement" char
1768 pUsedDef // [out] was it used?
1769 );
1770
1771 if ( !len )
1772 {
1773 // function totally failed
1774 return (size_t)-1;
1775 }
1776
1777 // if we were really converting, check if we succeeded
1778 if ( buf )
1779 {
1780 if ( flags )
1781 {
1782 // check if the conversion failed, i.e. if any replacements
1783 // were done
1784 if ( usedDef )
1785 return (size_t)-1;
1786 }
1787 else // we must resort to double tripping...
1788 {
1789 wxWCharBuffer wcBuf(n);
1790 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1791 wcscmp(wcBuf, pwz) != 0 )
1792 {
1793 // we didn't obtain the same thing we started from, hence
1794 // the conversion was lossy and we consider that it failed
1795 return (size_t)-1;
1796 }
1797 }
1798 }
1799
1800 // see the comment above for the reason of "len - 1"
1801 return len - 1;
1802 }
1803
1804 bool IsOk() const { return m_CodePage != -1; }
1805
1806 private:
1807 static bool CanUseNoBestFit()
1808 {
1809 static int s_isWin98Or2k = -1;
1810
1811 if ( s_isWin98Or2k == -1 )
1812 {
1813 int verMaj, verMin;
1814 switch ( wxGetOsVersion(&verMaj, &verMin) )
1815 {
1816 case wxWIN95:
1817 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1818 break;
1819
1820 case wxWINDOWS_NT:
1821 s_isWin98Or2k = verMaj >= 5;
1822 break;
1823
1824 default:
1825 // unknown, be conseravtive by default
1826 s_isWin98Or2k = 0;
1827 }
1828
1829 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1830 }
1831
1832 return s_isWin98Or2k == 1;
1833 }
1834
1835 static bool IsAtLeastWin2kSP4()
1836 {
1837 static int s_isAtLeastWin2kSP4 = -1;
1838
1839 if ( s_isAtLeastWin2kSP4 == -1 )
1840 {
1841 OSVERSIONINFOEX ver;
1842
1843 memset(&ver, 0, sizeof(ver));
1844 ver.dwOSVersionInfoSize = sizeof(ver);
1845 GetVersionEx((OSVERSIONINFO*)&ver);
1846
1847 s_isAtLeastWin2kSP4 =
1848 ((ver.dwMajorVersion > 5) || // Vista+
1849 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1850 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1851 ver.wServicePackMajor >= 4)) // 2000 SP4+
1852 ? 1 : 0;
1853 }
1854
1855 return s_isAtLeastWin2kSP4 == 1;
1856 }
1857
1858 long m_CodePage;
1859 };
1860
1861 #endif // wxHAVE_WIN32_MB2WC
1862
1863 // ============================================================================
1864 // Cocoa conversion classes
1865 // ============================================================================
1866
1867 #if defined(__WXCOCOA__)
1868
1869 // RN: There is no UTF-32 support in either Core Foundation or
1870 // Cocoa. Strangely enough, internally Core Foundation uses
1871 // UTF 32 internally quite a bit - its just not public (yet).
1872
1873 #include <CoreFoundation/CFString.h>
1874 #include <CoreFoundation/CFStringEncodingExt.h>
1875
1876 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1877 {
1878 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1879 if ( encoding == wxFONTENCODING_DEFAULT )
1880 {
1881 enc = CFStringGetSystemEncoding();
1882 }
1883 else switch( encoding)
1884 {
1885 case wxFONTENCODING_ISO8859_1 :
1886 enc = kCFStringEncodingISOLatin1 ;
1887 break ;
1888 case wxFONTENCODING_ISO8859_2 :
1889 enc = kCFStringEncodingISOLatin2;
1890 break ;
1891 case wxFONTENCODING_ISO8859_3 :
1892 enc = kCFStringEncodingISOLatin3 ;
1893 break ;
1894 case wxFONTENCODING_ISO8859_4 :
1895 enc = kCFStringEncodingISOLatin4;
1896 break ;
1897 case wxFONTENCODING_ISO8859_5 :
1898 enc = kCFStringEncodingISOLatinCyrillic;
1899 break ;
1900 case wxFONTENCODING_ISO8859_6 :
1901 enc = kCFStringEncodingISOLatinArabic;
1902 break ;
1903 case wxFONTENCODING_ISO8859_7 :
1904 enc = kCFStringEncodingISOLatinGreek;
1905 break ;
1906 case wxFONTENCODING_ISO8859_8 :
1907 enc = kCFStringEncodingISOLatinHebrew;
1908 break ;
1909 case wxFONTENCODING_ISO8859_9 :
1910 enc = kCFStringEncodingISOLatin5;
1911 break ;
1912 case wxFONTENCODING_ISO8859_10 :
1913 enc = kCFStringEncodingISOLatin6;
1914 break ;
1915 case wxFONTENCODING_ISO8859_11 :
1916 enc = kCFStringEncodingISOLatinThai;
1917 break ;
1918 case wxFONTENCODING_ISO8859_13 :
1919 enc = kCFStringEncodingISOLatin7;
1920 break ;
1921 case wxFONTENCODING_ISO8859_14 :
1922 enc = kCFStringEncodingISOLatin8;
1923 break ;
1924 case wxFONTENCODING_ISO8859_15 :
1925 enc = kCFStringEncodingISOLatin9;
1926 break ;
1927
1928 case wxFONTENCODING_KOI8 :
1929 enc = kCFStringEncodingKOI8_R;
1930 break ;
1931 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1932 enc = kCFStringEncodingDOSRussian;
1933 break ;
1934
1935 // case wxFONTENCODING_BULGARIAN :
1936 // enc = ;
1937 // break ;
1938
1939 case wxFONTENCODING_CP437 :
1940 enc =kCFStringEncodingDOSLatinUS ;
1941 break ;
1942 case wxFONTENCODING_CP850 :
1943 enc = kCFStringEncodingDOSLatin1;
1944 break ;
1945 case wxFONTENCODING_CP852 :
1946 enc = kCFStringEncodingDOSLatin2;
1947 break ;
1948 case wxFONTENCODING_CP855 :
1949 enc = kCFStringEncodingDOSCyrillic;
1950 break ;
1951 case wxFONTENCODING_CP866 :
1952 enc =kCFStringEncodingDOSRussian ;
1953 break ;
1954 case wxFONTENCODING_CP874 :
1955 enc = kCFStringEncodingDOSThai;
1956 break ;
1957 case wxFONTENCODING_CP932 :
1958 enc = kCFStringEncodingDOSJapanese;
1959 break ;
1960 case wxFONTENCODING_CP936 :
1961 enc =kCFStringEncodingDOSChineseSimplif ;
1962 break ;
1963 case wxFONTENCODING_CP949 :
1964 enc = kCFStringEncodingDOSKorean;
1965 break ;
1966 case wxFONTENCODING_CP950 :
1967 enc = kCFStringEncodingDOSChineseTrad;
1968 break ;
1969 case wxFONTENCODING_CP1250 :
1970 enc = kCFStringEncodingWindowsLatin2;
1971 break ;
1972 case wxFONTENCODING_CP1251 :
1973 enc =kCFStringEncodingWindowsCyrillic ;
1974 break ;
1975 case wxFONTENCODING_CP1252 :
1976 enc =kCFStringEncodingWindowsLatin1 ;
1977 break ;
1978 case wxFONTENCODING_CP1253 :
1979 enc = kCFStringEncodingWindowsGreek;
1980 break ;
1981 case wxFONTENCODING_CP1254 :
1982 enc = kCFStringEncodingWindowsLatin5;
1983 break ;
1984 case wxFONTENCODING_CP1255 :
1985 enc =kCFStringEncodingWindowsHebrew ;
1986 break ;
1987 case wxFONTENCODING_CP1256 :
1988 enc =kCFStringEncodingWindowsArabic ;
1989 break ;
1990 case wxFONTENCODING_CP1257 :
1991 enc = kCFStringEncodingWindowsBalticRim;
1992 break ;
1993 // This only really encodes to UTF7 (if that) evidently
1994 // case wxFONTENCODING_UTF7 :
1995 // enc = kCFStringEncodingNonLossyASCII ;
1996 // break ;
1997 case wxFONTENCODING_UTF8 :
1998 enc = kCFStringEncodingUTF8 ;
1999 break ;
2000 case wxFONTENCODING_EUC_JP :
2001 enc = kCFStringEncodingEUC_JP;
2002 break ;
2003 case wxFONTENCODING_UTF16 :
2004 enc = kCFStringEncodingUnicode ;
2005 break ;
2006 case wxFONTENCODING_MACROMAN :
2007 enc = kCFStringEncodingMacRoman ;
2008 break ;
2009 case wxFONTENCODING_MACJAPANESE :
2010 enc = kCFStringEncodingMacJapanese ;
2011 break ;
2012 case wxFONTENCODING_MACCHINESETRAD :
2013 enc = kCFStringEncodingMacChineseTrad ;
2014 break ;
2015 case wxFONTENCODING_MACKOREAN :
2016 enc = kCFStringEncodingMacKorean ;
2017 break ;
2018 case wxFONTENCODING_MACARABIC :
2019 enc = kCFStringEncodingMacArabic ;
2020 break ;
2021 case wxFONTENCODING_MACHEBREW :
2022 enc = kCFStringEncodingMacHebrew ;
2023 break ;
2024 case wxFONTENCODING_MACGREEK :
2025 enc = kCFStringEncodingMacGreek ;
2026 break ;
2027 case wxFONTENCODING_MACCYRILLIC :
2028 enc = kCFStringEncodingMacCyrillic ;
2029 break ;
2030 case wxFONTENCODING_MACDEVANAGARI :
2031 enc = kCFStringEncodingMacDevanagari ;
2032 break ;
2033 case wxFONTENCODING_MACGURMUKHI :
2034 enc = kCFStringEncodingMacGurmukhi ;
2035 break ;
2036 case wxFONTENCODING_MACGUJARATI :
2037 enc = kCFStringEncodingMacGujarati ;
2038 break ;
2039 case wxFONTENCODING_MACORIYA :
2040 enc = kCFStringEncodingMacOriya ;
2041 break ;
2042 case wxFONTENCODING_MACBENGALI :
2043 enc = kCFStringEncodingMacBengali ;
2044 break ;
2045 case wxFONTENCODING_MACTAMIL :
2046 enc = kCFStringEncodingMacTamil ;
2047 break ;
2048 case wxFONTENCODING_MACTELUGU :
2049 enc = kCFStringEncodingMacTelugu ;
2050 break ;
2051 case wxFONTENCODING_MACKANNADA :
2052 enc = kCFStringEncodingMacKannada ;
2053 break ;
2054 case wxFONTENCODING_MACMALAJALAM :
2055 enc = kCFStringEncodingMacMalayalam ;
2056 break ;
2057 case wxFONTENCODING_MACSINHALESE :
2058 enc = kCFStringEncodingMacSinhalese ;
2059 break ;
2060 case wxFONTENCODING_MACBURMESE :
2061 enc = kCFStringEncodingMacBurmese ;
2062 break ;
2063 case wxFONTENCODING_MACKHMER :
2064 enc = kCFStringEncodingMacKhmer ;
2065 break ;
2066 case wxFONTENCODING_MACTHAI :
2067 enc = kCFStringEncodingMacThai ;
2068 break ;
2069 case wxFONTENCODING_MACLAOTIAN :
2070 enc = kCFStringEncodingMacLaotian ;
2071 break ;
2072 case wxFONTENCODING_MACGEORGIAN :
2073 enc = kCFStringEncodingMacGeorgian ;
2074 break ;
2075 case wxFONTENCODING_MACARMENIAN :
2076 enc = kCFStringEncodingMacArmenian ;
2077 break ;
2078 case wxFONTENCODING_MACCHINESESIMP :
2079 enc = kCFStringEncodingMacChineseSimp ;
2080 break ;
2081 case wxFONTENCODING_MACTIBETAN :
2082 enc = kCFStringEncodingMacTibetan ;
2083 break ;
2084 case wxFONTENCODING_MACMONGOLIAN :
2085 enc = kCFStringEncodingMacMongolian ;
2086 break ;
2087 case wxFONTENCODING_MACETHIOPIC :
2088 enc = kCFStringEncodingMacEthiopic ;
2089 break ;
2090 case wxFONTENCODING_MACCENTRALEUR :
2091 enc = kCFStringEncodingMacCentralEurRoman ;
2092 break ;
2093 case wxFONTENCODING_MACVIATNAMESE :
2094 enc = kCFStringEncodingMacVietnamese ;
2095 break ;
2096 case wxFONTENCODING_MACARABICEXT :
2097 enc = kCFStringEncodingMacExtArabic ;
2098 break ;
2099 case wxFONTENCODING_MACSYMBOL :
2100 enc = kCFStringEncodingMacSymbol ;
2101 break ;
2102 case wxFONTENCODING_MACDINGBATS :
2103 enc = kCFStringEncodingMacDingbats ;
2104 break ;
2105 case wxFONTENCODING_MACTURKISH :
2106 enc = kCFStringEncodingMacTurkish ;
2107 break ;
2108 case wxFONTENCODING_MACCROATIAN :
2109 enc = kCFStringEncodingMacCroatian ;
2110 break ;
2111 case wxFONTENCODING_MACICELANDIC :
2112 enc = kCFStringEncodingMacIcelandic ;
2113 break ;
2114 case wxFONTENCODING_MACROMANIAN :
2115 enc = kCFStringEncodingMacRomanian ;
2116 break ;
2117 case wxFONTENCODING_MACCELTIC :
2118 enc = kCFStringEncodingMacCeltic ;
2119 break ;
2120 case wxFONTENCODING_MACGAELIC :
2121 enc = kCFStringEncodingMacGaelic ;
2122 break ;
2123 // case wxFONTENCODING_MACKEYBOARD :
2124 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2125 // break ;
2126 default :
2127 // because gcc is picky
2128 break ;
2129 } ;
2130 return enc ;
2131 }
2132
2133 class wxMBConv_cocoa : public wxMBConv
2134 {
2135 public:
2136 wxMBConv_cocoa()
2137 {
2138 Init(CFStringGetSystemEncoding()) ;
2139 }
2140
2141 #if wxUSE_FONTMAP
2142 wxMBConv_cocoa(const wxChar* name)
2143 {
2144 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2145 }
2146 #endif
2147
2148 wxMBConv_cocoa(wxFontEncoding encoding)
2149 {
2150 Init( wxCFStringEncFromFontEnc(encoding) );
2151 }
2152
2153 ~wxMBConv_cocoa()
2154 {
2155 }
2156
2157 void Init( CFStringEncoding encoding)
2158 {
2159 m_encoding = encoding ;
2160 }
2161
2162 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2163 {
2164 wxASSERT(szUnConv);
2165
2166 CFStringRef theString = CFStringCreateWithBytes (
2167 NULL, //the allocator
2168 (const UInt8*)szUnConv,
2169 strlen(szUnConv),
2170 m_encoding,
2171 false //no BOM/external representation
2172 );
2173
2174 wxASSERT(theString);
2175
2176 size_t nOutLength = CFStringGetLength(theString);
2177
2178 if (szOut == NULL)
2179 {
2180 CFRelease(theString);
2181 return nOutLength;
2182 }
2183
2184 CFRange theRange = { 0, nOutSize };
2185
2186 #if SIZEOF_WCHAR_T == 4
2187 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2188 #endif
2189
2190 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2191
2192 CFRelease(theString);
2193
2194 szUniCharBuffer[nOutLength] = '\0' ;
2195
2196 #if SIZEOF_WCHAR_T == 4
2197 wxMBConvUTF16 converter ;
2198 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2199 delete[] szUniCharBuffer;
2200 #endif
2201
2202 return nOutLength;
2203 }
2204
2205 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2206 {
2207 wxASSERT(szUnConv);
2208
2209 size_t nRealOutSize;
2210 size_t nBufSize = wxWcslen(szUnConv);
2211 UniChar* szUniBuffer = (UniChar*) szUnConv;
2212
2213 #if SIZEOF_WCHAR_T == 4
2214 wxMBConvUTF16 converter ;
2215 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2216 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2217 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2218 nBufSize /= sizeof(UniChar);
2219 #endif
2220
2221 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2222 NULL, //allocator
2223 szUniBuffer,
2224 nBufSize,
2225 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2226 );
2227
2228 wxASSERT(theString);
2229
2230 //Note that CER puts a BOM when converting to unicode
2231 //so we check and use getchars instead in that case
2232 if (m_encoding == kCFStringEncodingUnicode)
2233 {
2234 if (szOut != NULL)
2235 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2236
2237 nRealOutSize = CFStringGetLength(theString) + 1;
2238 }
2239 else
2240 {
2241 CFStringGetBytes(
2242 theString,
2243 CFRangeMake(0, CFStringGetLength(theString)),
2244 m_encoding,
2245 0, //what to put in characters that can't be converted -
2246 //0 tells CFString to return NULL if it meets such a character
2247 false, //not an external representation
2248 (UInt8*) szOut,
2249 nOutSize,
2250 (CFIndex*) &nRealOutSize
2251 );
2252 }
2253
2254 CFRelease(theString);
2255
2256 #if SIZEOF_WCHAR_T == 4
2257 delete[] szUniBuffer;
2258 #endif
2259
2260 return nRealOutSize - 1;
2261 }
2262
2263 bool IsOk() const
2264 {
2265 return m_encoding != kCFStringEncodingInvalidId &&
2266 CFStringIsEncodingAvailable(m_encoding);
2267 }
2268
2269 private:
2270 CFStringEncoding m_encoding ;
2271 };
2272
2273 #endif // defined(__WXCOCOA__)
2274
2275 // ============================================================================
2276 // Mac conversion classes
2277 // ============================================================================
2278
2279 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2280
2281 class wxMBConv_mac : public wxMBConv
2282 {
2283 public:
2284 wxMBConv_mac()
2285 {
2286 Init(CFStringGetSystemEncoding()) ;
2287 }
2288
2289 #if wxUSE_FONTMAP
2290 wxMBConv_mac(const wxChar* name)
2291 {
2292 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2293 }
2294 #endif
2295
2296 wxMBConv_mac(wxFontEncoding encoding)
2297 {
2298 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2299 }
2300
2301 ~wxMBConv_mac()
2302 {
2303 OSStatus status = noErr ;
2304 status = TECDisposeConverter(m_MB2WC_converter);
2305 status = TECDisposeConverter(m_WC2MB_converter);
2306 }
2307
2308
2309 void Init( TextEncodingBase encoding)
2310 {
2311 OSStatus status = noErr ;
2312 m_char_encoding = encoding ;
2313 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2314
2315 status = TECCreateConverter(&m_MB2WC_converter,
2316 m_char_encoding,
2317 m_unicode_encoding);
2318 status = TECCreateConverter(&m_WC2MB_converter,
2319 m_unicode_encoding,
2320 m_char_encoding);
2321 }
2322
2323 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2324 {
2325 OSStatus status = noErr ;
2326 ByteCount byteOutLen ;
2327 ByteCount byteInLen = strlen(psz) ;
2328 wchar_t *tbuf = NULL ;
2329 UniChar* ubuf = NULL ;
2330 size_t res = 0 ;
2331
2332 if (buf == NULL)
2333 {
2334 //apple specs say at least 32
2335 n = wxMax( 32 , byteInLen ) ;
2336 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2337 }
2338 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2339 #if SIZEOF_WCHAR_T == 4
2340 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2341 #else
2342 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2343 #endif
2344 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2345 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2346 #if SIZEOF_WCHAR_T == 4
2347 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2348 // is not properly terminated we get random characters at the end
2349 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2350 wxMBConvUTF16 converter ;
2351 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2352 free( ubuf ) ;
2353 #else
2354 res = byteOutLen / sizeof( UniChar ) ;
2355 #endif
2356 if ( buf == NULL )
2357 free(tbuf) ;
2358
2359 if ( buf && res < n)
2360 buf[res] = 0;
2361
2362 return res ;
2363 }
2364
2365 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2366 {
2367 OSStatus status = noErr ;
2368 ByteCount byteOutLen ;
2369 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2370
2371 char *tbuf = NULL ;
2372
2373 if (buf == NULL)
2374 {
2375 //apple specs say at least 32
2376 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2377 tbuf = (char*) malloc( n ) ;
2378 }
2379
2380 ByteCount byteBufferLen = n ;
2381 UniChar* ubuf = NULL ;
2382 #if SIZEOF_WCHAR_T == 4
2383 wxMBConvUTF16 converter ;
2384 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2385 byteInLen = unicharlen ;
2386 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2387 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2388 #else
2389 ubuf = (UniChar*) psz ;
2390 #endif
2391 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2392 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2393 #if SIZEOF_WCHAR_T == 4
2394 free( ubuf ) ;
2395 #endif
2396 if ( buf == NULL )
2397 free(tbuf) ;
2398
2399 size_t res = byteOutLen ;
2400 if ( buf && res < n)
2401 {
2402 buf[res] = 0;
2403
2404 //we need to double-trip to verify it didn't insert any ? in place
2405 //of bogus characters
2406 wxWCharBuffer wcBuf(n);
2407 size_t pszlen = wxWcslen(psz);
2408 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2409 wxWcslen(wcBuf) != pszlen ||
2410 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2411 {
2412 // we didn't obtain the same thing we started from, hence
2413 // the conversion was lossy and we consider that it failed
2414 return (size_t)-1;
2415 }
2416 }
2417
2418 return res ;
2419 }
2420
2421 bool IsOk() const
2422 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2423
2424 private:
2425 TECObjectRef m_MB2WC_converter ;
2426 TECObjectRef m_WC2MB_converter ;
2427
2428 TextEncodingBase m_char_encoding ;
2429 TextEncodingBase m_unicode_encoding ;
2430 };
2431
2432 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2433
2434 // ============================================================================
2435 // wxEncodingConverter based conversion classes
2436 // ============================================================================
2437
2438 #if wxUSE_FONTMAP
2439
2440 class wxMBConv_wxwin : public wxMBConv
2441 {
2442 private:
2443 void Init()
2444 {
2445 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2446 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2447 }
2448
2449 public:
2450 // temporarily just use wxEncodingConverter stuff,
2451 // so that it works while a better implementation is built
2452 wxMBConv_wxwin(const wxChar* name)
2453 {
2454 if (name)
2455 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2456 else
2457 m_enc = wxFONTENCODING_SYSTEM;
2458
2459 Init();
2460 }
2461
2462 wxMBConv_wxwin(wxFontEncoding enc)
2463 {
2464 m_enc = enc;
2465
2466 Init();
2467 }
2468
2469 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2470 {
2471 size_t inbuf = strlen(psz);
2472 if (buf)
2473 {
2474 if (!m2w.Convert(psz,buf))
2475 return (size_t)-1;
2476 }
2477 return inbuf;
2478 }
2479
2480 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2481 {
2482 const size_t inbuf = wxWcslen(psz);
2483 if (buf)
2484 {
2485 if (!w2m.Convert(psz,buf))
2486 return (size_t)-1;
2487 }
2488
2489 return inbuf;
2490 }
2491
2492 bool IsOk() const { return m_ok; }
2493
2494 public:
2495 wxFontEncoding m_enc;
2496 wxEncodingConverter m2w, w2m;
2497
2498 // were we initialized successfully?
2499 bool m_ok;
2500
2501 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2502 };
2503
2504 // make the constructors available for unit testing
2505 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2506 {
2507 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2508 if ( !result->IsOk() )
2509 {
2510 delete result;
2511 return 0;
2512 }
2513 return result;
2514 }
2515
2516 #endif // wxUSE_FONTMAP
2517
2518 // ============================================================================
2519 // wxCSConv implementation
2520 // ============================================================================
2521
2522 void wxCSConv::Init()
2523 {
2524 m_name = NULL;
2525 m_convReal = NULL;
2526 m_deferred = true;
2527 }
2528
2529 wxCSConv::wxCSConv(const wxChar *charset)
2530 {
2531 Init();
2532
2533 if ( charset )
2534 {
2535 SetName(charset);
2536 }
2537
2538 #if wxUSE_FONTMAP
2539 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2540 #else
2541 m_encoding = wxFONTENCODING_SYSTEM;
2542 #endif
2543 }
2544
2545 wxCSConv::wxCSConv(wxFontEncoding encoding)
2546 {
2547 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2548 {
2549 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2550
2551 encoding = wxFONTENCODING_SYSTEM;
2552 }
2553
2554 Init();
2555
2556 m_encoding = encoding;
2557 }
2558
2559 wxCSConv::~wxCSConv()
2560 {
2561 Clear();
2562 }
2563
2564 wxCSConv::wxCSConv(const wxCSConv& conv)
2565 : wxMBConv()
2566 {
2567 Init();
2568
2569 SetName(conv.m_name);
2570 m_encoding = conv.m_encoding;
2571 }
2572
2573 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2574 {
2575 Clear();
2576
2577 SetName(conv.m_name);
2578 m_encoding = conv.m_encoding;
2579
2580 return *this;
2581 }
2582
2583 void wxCSConv::Clear()
2584 {
2585 free(m_name);
2586 delete m_convReal;
2587
2588 m_name = NULL;
2589 m_convReal = NULL;
2590 }
2591
2592 void wxCSConv::SetName(const wxChar *charset)
2593 {
2594 if (charset)
2595 {
2596 m_name = wxStrdup(charset);
2597 m_deferred = true;
2598 }
2599 }
2600
2601 #if wxUSE_FONTMAP
2602 #include "wx/hashmap.h"
2603
2604 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2605 wxEncodingNameCache );
2606
2607 static wxEncodingNameCache gs_nameCache;
2608 #endif
2609
2610 wxMBConv *wxCSConv::DoCreate() const
2611 {
2612 #if wxUSE_FONTMAP
2613 wxLogTrace(TRACE_STRCONV,
2614 wxT("creating conversion for %s"),
2615 (m_name ? m_name
2616 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2617 #endif // wxUSE_FONTMAP
2618
2619 // check for the special case of ASCII or ISO8859-1 charset: as we have
2620 // special knowledge of it anyhow, we don't need to create a special
2621 // conversion object
2622 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2623 m_encoding == wxFONTENCODING_DEFAULT )
2624 {
2625 // don't convert at all
2626 return NULL;
2627 }
2628
2629 // we trust OS to do conversion better than we can so try external
2630 // conversion methods first
2631 //
2632 // the full order is:
2633 // 1. OS conversion (iconv() under Unix or Win32 API)
2634 // 2. hard coded conversions for UTF
2635 // 3. wxEncodingConverter as fall back
2636
2637 // step (1)
2638 #ifdef HAVE_ICONV
2639 #if !wxUSE_FONTMAP
2640 if ( m_name )
2641 #endif // !wxUSE_FONTMAP
2642 {
2643 wxString name(m_name);
2644 wxFontEncoding encoding(m_encoding);
2645
2646 if ( !name.empty() )
2647 {
2648 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2649 if ( conv->IsOk() )
2650 return conv;
2651
2652 delete conv;
2653
2654 #if wxUSE_FONTMAP
2655 encoding =
2656 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2657 #endif // wxUSE_FONTMAP
2658 }
2659 #if wxUSE_FONTMAP
2660 {
2661 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2662 if ( it != gs_nameCache.end() )
2663 {
2664 if ( it->second.empty() )
2665 return NULL;
2666
2667 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2668 if ( conv->IsOk() )
2669 return conv;
2670
2671 delete conv;
2672 }
2673
2674 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2675
2676 for ( ; *names; ++names )
2677 {
2678 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2679 if ( conv->IsOk() )
2680 {
2681 gs_nameCache[encoding] = *names;
2682 return conv;
2683 }
2684
2685 delete conv;
2686 }
2687
2688 gs_nameCache[encoding] = _T(""); // cache the failure
2689 }
2690 #endif // wxUSE_FONTMAP
2691 }
2692 #endif // HAVE_ICONV
2693
2694 #ifdef wxHAVE_WIN32_MB2WC
2695 {
2696 #if wxUSE_FONTMAP
2697 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2698 : new wxMBConv_win32(m_encoding);
2699 if ( conv->IsOk() )
2700 return conv;
2701
2702 delete conv;
2703 #else
2704 return NULL;
2705 #endif
2706 }
2707 #endif // wxHAVE_WIN32_MB2WC
2708 #if defined(__WXMAC__)
2709 {
2710 // leave UTF16 and UTF32 to the built-ins of wx
2711 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2712 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2713 {
2714
2715 #if wxUSE_FONTMAP
2716 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2717 : new wxMBConv_mac(m_encoding);
2718 #else
2719 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2720 #endif
2721 if ( conv->IsOk() )
2722 return conv;
2723
2724 delete conv;
2725 }
2726 }
2727 #endif
2728 #if defined(__WXCOCOA__)
2729 {
2730 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2731 {
2732
2733 #if wxUSE_FONTMAP
2734 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2735 : new wxMBConv_cocoa(m_encoding);
2736 #else
2737 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2738 #endif
2739 if ( conv->IsOk() )
2740 return conv;
2741
2742 delete conv;
2743 }
2744 }
2745 #endif
2746 // step (2)
2747 wxFontEncoding enc = m_encoding;
2748 #if wxUSE_FONTMAP
2749 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2750 {
2751 // use "false" to suppress interactive dialogs -- we can be called from
2752 // anywhere and popping up a dialog from here is the last thing we want to
2753 // do
2754 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2755 }
2756 #endif // wxUSE_FONTMAP
2757
2758 switch ( enc )
2759 {
2760 case wxFONTENCODING_UTF7:
2761 return new wxMBConvUTF7;
2762
2763 case wxFONTENCODING_UTF8:
2764 return new wxMBConvUTF8;
2765
2766 case wxFONTENCODING_UTF16BE:
2767 return new wxMBConvUTF16BE;
2768
2769 case wxFONTENCODING_UTF16LE:
2770 return new wxMBConvUTF16LE;
2771
2772 case wxFONTENCODING_UTF32BE:
2773 return new wxMBConvUTF32BE;
2774
2775 case wxFONTENCODING_UTF32LE:
2776 return new wxMBConvUTF32LE;
2777
2778 default:
2779 // nothing to do but put here to suppress gcc warnings
2780 ;
2781 }
2782
2783 // step (3)
2784 #if wxUSE_FONTMAP
2785 {
2786 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2787 : new wxMBConv_wxwin(m_encoding);
2788 if ( conv->IsOk() )
2789 return conv;
2790
2791 delete conv;
2792 }
2793 #endif // wxUSE_FONTMAP
2794
2795 // NB: This is a hack to prevent deadlock. What could otherwise happen
2796 // in Unicode build: wxConvLocal creation ends up being here
2797 // because of some failure and logs the error. But wxLog will try to
2798 // attach timestamp, for which it will need wxConvLocal (to convert
2799 // time to char* and then wchar_t*), but that fails, tries to log
2800 // error, but wxLog has a (already locked) critical section that
2801 // guards static buffer.
2802 static bool alreadyLoggingError = false;
2803 if (!alreadyLoggingError)
2804 {
2805 alreadyLoggingError = true;
2806 wxLogError(_("Cannot convert from the charset '%s'!"),
2807 m_name ? m_name
2808 :
2809 #if wxUSE_FONTMAP
2810 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2811 #else // !wxUSE_FONTMAP
2812 wxString::Format(_("encoding %s"), m_encoding).c_str()
2813 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2814 );
2815 alreadyLoggingError = false;
2816 }
2817
2818 return NULL;
2819 }
2820
2821 void wxCSConv::CreateConvIfNeeded() const
2822 {
2823 if ( m_deferred )
2824 {
2825 wxCSConv *self = (wxCSConv *)this; // const_cast
2826
2827 #if wxUSE_INTL
2828 // if we don't have neither the name nor the encoding, use the default
2829 // encoding for this system
2830 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2831 {
2832 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2833 }
2834 #endif // wxUSE_INTL
2835
2836 self->m_convReal = DoCreate();
2837 self->m_deferred = false;
2838 }
2839 }
2840
2841 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2842 {
2843 CreateConvIfNeeded();
2844
2845 if (m_convReal)
2846 return m_convReal->MB2WC(buf, psz, n);
2847
2848 // latin-1 (direct)
2849 size_t len = strlen(psz);
2850
2851 if (buf)
2852 {
2853 for (size_t c = 0; c <= len; c++)
2854 buf[c] = (unsigned char)(psz[c]);
2855 }
2856
2857 return len;
2858 }
2859
2860 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2861 {
2862 CreateConvIfNeeded();
2863
2864 if (m_convReal)
2865 return m_convReal->WC2MB(buf, psz, n);
2866
2867 // latin-1 (direct)
2868 const size_t len = wxWcslen(psz);
2869 if (buf)
2870 {
2871 for (size_t c = 0; c <= len; c++)
2872 {
2873 if (psz[c] > 0xFF)
2874 return (size_t)-1;
2875 buf[c] = (char)psz[c];
2876 }
2877 }
2878 else
2879 {
2880 for (size_t c = 0; c <= len; c++)
2881 {
2882 if (psz[c] > 0xFF)
2883 return (size_t)-1;
2884 }
2885 }
2886
2887 return len;
2888 }
2889
2890 // ----------------------------------------------------------------------------
2891 // globals
2892 // ----------------------------------------------------------------------------
2893
2894 #ifdef __WINDOWS__
2895 static wxMBConv_win32 wxConvLibcObj;
2896 #elif defined(__WXMAC__) && !defined(__MACH__)
2897 static wxMBConv_mac wxConvLibcObj ;
2898 #else
2899 static wxMBConvLibc wxConvLibcObj;
2900 #endif
2901
2902 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2903 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2904 static wxMBConvUTF7 wxConvUTF7Obj;
2905 static wxMBConvUTF8 wxConvUTF8Obj;
2906
2907 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2908 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2909 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2910 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2911 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2912 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2913 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2914 #ifdef __WXOSX__
2915 wxConvUTF8Obj;
2916 #else
2917 wxConvLibcObj;
2918 #endif
2919
2920
2921 #else // !wxUSE_WCHAR_T
2922
2923 // stand-ins in absence of wchar_t
2924 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2925 wxConvISO8859_1,
2926 wxConvLocal,
2927 wxConvUTF8;
2928
2929 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T