]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
778d8dbb05174f42f1c4a1c970900ebb8a6e7696
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
191 {
192 wxASSERT(pOutSize != NULL);
193
194 const char* szEnd = szString + nStringLen + 1;
195 const char* szPos = szString;
196 const char* szStart = szPos;
197
198 size_t nActualLength = 0;
199 size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
200
201 wxWCharBuffer theBuffer(nCurrentSize);
202
203 //Convert the string until the length() is reached, continuing the
204 //loop every time a null character is reached
205 while(szPos != szEnd)
206 {
207 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
208
209 //Get the length of the current (sub)string
210 size_t nLen = MB2WC(NULL, szPos, 0);
211
212 //Invalid conversion?
213 if( nLen == (size_t)-1 )
214 {
215 *pOutSize = 0;
216 theBuffer.data()[0u] = wxT('\0');
217 return theBuffer;
218 }
219
220
221 //Increase the actual length (+1 for current null character)
222 nActualLength += nLen + 1;
223
224 //if buffer too big, realloc the buffer
225 if (nActualLength > (nCurrentSize+1))
226 {
227 wxWCharBuffer theNewBuffer(nCurrentSize << 1);
228 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
229 theBuffer = theNewBuffer;
230 nCurrentSize <<= 1;
231 }
232
233 //Convert the current (sub)string
234 if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
235 {
236 *pOutSize = 0;
237 theBuffer.data()[0u] = wxT('\0');
238 return theBuffer;
239 }
240
241 //Increment to next (sub)string
242 //Note that we have to use strlen instead of nLen here
243 //because XX2XX gives us the size of the output buffer,
244 //which is not necessarily the length of the string
245 szPos += strlen(szPos) + 1;
246 }
247
248 //success - return actual length and the buffer
249 *pOutSize = nActualLength;
250 return theBuffer;
251 }
252
253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
254 {
255 wxASSERT(pOutSize != NULL);
256
257 const wchar_t* szEnd = szString + nStringLen + 1;
258 const wchar_t* szPos = szString;
259 const wchar_t* szStart = szPos;
260
261 size_t nActualLength = 0;
262 size_t nCurrentSize = nStringLen << 2; //try * 4 first
263
264 wxCharBuffer theBuffer(nCurrentSize);
265
266 //Convert the string until the length() is reached, continuing the
267 //loop every time a null character is reached
268 while(szPos != szEnd)
269 {
270 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
271
272 //Get the length of the current (sub)string
273 size_t nLen = WC2MB(NULL, szPos, 0);
274
275 //Invalid conversion?
276 if( nLen == (size_t)-1 )
277 {
278 *pOutSize = 0;
279 theBuffer.data()[0u] = wxT('\0');
280 return theBuffer;
281 }
282
283 //Increase the actual length (+1 for current null character)
284 nActualLength += nLen + 1;
285
286 //if buffer too big, realloc the buffer
287 if (nActualLength > (nCurrentSize+1))
288 {
289 wxCharBuffer theNewBuffer(nCurrentSize << 1);
290 memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
291 theBuffer = theNewBuffer;
292 nCurrentSize <<= 1;
293 }
294
295 //Convert the current (sub)string
296 if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
297 {
298 *pOutSize = 0;
299 theBuffer.data()[0u] = wxT('\0');
300 return theBuffer;
301 }
302
303 //Increment to next (sub)string
304 //Note that we have to use wxWcslen instead of nLen here
305 //because XX2XX gives us the size of the output buffer,
306 //which is not necessarily the length of the string
307 szPos += wxWcslen(szPos) + 1;
308 }
309
310 //success - return actual length and the buffer
311 *pOutSize = nActualLength;
312 return theBuffer;
313 }
314
315 // ----------------------------------------------------------------------------
316 // wxMBConvLibc
317 // ----------------------------------------------------------------------------
318
319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
320 {
321 return wxMB2WC(buf, psz, n);
322 }
323
324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
325 {
326 return wxWC2MB(buf, psz, n);
327 }
328
329 #ifdef __UNIX__
330
331 // ----------------------------------------------------------------------------
332 // wxConvBrokenFileNames
333 // ----------------------------------------------------------------------------
334
335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
336 {
337 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
338 || wxStricmp(charset, _T("UTF8")) == 0 )
339 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
340 else
341 m_conv = new wxCSConv(charset);
342 }
343
344 size_t
345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
346 const char *psz,
347 size_t outputSize) const
348 {
349 return m_conv->MB2WC( outputBuf, psz, outputSize );
350 }
351
352 size_t
353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
354 const wchar_t *psz,
355 size_t outputSize) const
356 {
357 return m_conv->WC2MB( outputBuf, psz, outputSize );
358 }
359
360 #endif
361
362 // ----------------------------------------------------------------------------
363 // UTF-7
364 // ----------------------------------------------------------------------------
365
366 // Implementation (C) 2004 Fredrik Roubert
367
368 //
369 // BASE64 decoding table
370 //
371 static const unsigned char utf7unb64[] =
372 {
373 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
379 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
380 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
381 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
382 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
383 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
384 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
385 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
386 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
387 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
388 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
402 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
404 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
405 };
406
407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
408 {
409 size_t len = 0;
410
411 while ( *psz && (!buf || (len < n)) )
412 {
413 unsigned char cc = *psz++;
414 if (cc != '+')
415 {
416 // plain ASCII char
417 if (buf)
418 *buf++ = cc;
419 len++;
420 }
421 else if (*psz == '-')
422 {
423 // encoded plus sign
424 if (buf)
425 *buf++ = cc;
426 len++;
427 psz++;
428 }
429 else // start of BASE64 encoded string
430 {
431 bool lsb, ok;
432 unsigned int d, l;
433 for ( ok = lsb = false, d = 0, l = 0;
434 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
435 psz++ )
436 {
437 d <<= 6;
438 d += cc;
439 for (l += 6; l >= 8; lsb = !lsb)
440 {
441 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
442 if (lsb)
443 {
444 if (buf)
445 *buf++ |= c;
446 len ++;
447 }
448 else
449 {
450 if (buf)
451 *buf = (wchar_t)(c << 8);
452 }
453
454 ok = true;
455 }
456 }
457
458 if ( !ok )
459 {
460 // in valid UTF7 we should have valid characters after '+'
461 return (size_t)-1;
462 }
463
464 if (*psz == '-')
465 psz++;
466 }
467 }
468
469 if ( buf && (len < n) )
470 *buf = '\0';
471
472 return len;
473 }
474
475 //
476 // BASE64 encoding table
477 //
478 static const unsigned char utf7enb64[] =
479 {
480 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
481 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
482 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
483 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
484 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
485 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
486 'w', 'x', 'y', 'z', '0', '1', '2', '3',
487 '4', '5', '6', '7', '8', '9', '+', '/'
488 };
489
490 //
491 // UTF-7 encoding table
492 //
493 // 0 - Set D (directly encoded characters)
494 // 1 - Set O (optional direct characters)
495 // 2 - whitespace characters (optional)
496 // 3 - special characters
497 //
498 static const unsigned char utf7encode[128] =
499 {
500 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
502 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
504 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
506 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
508 };
509
510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
511 {
512
513
514 size_t len = 0;
515
516 while (*psz && ((!buf) || (len < n)))
517 {
518 wchar_t cc = *psz++;
519 if (cc < 0x80 && utf7encode[cc] < 1)
520 {
521 // plain ASCII char
522 if (buf)
523 *buf++ = (char)cc;
524 len++;
525 }
526 #ifndef WC_UTF16
527 else if (((wxUint32)cc) > 0xffff)
528 {
529 // no surrogate pair generation (yet?)
530 return (size_t)-1;
531 }
532 #endif
533 else
534 {
535 if (buf)
536 *buf++ = '+';
537 len++;
538 if (cc != '+')
539 {
540 // BASE64 encode string
541 unsigned int lsb, d, l;
542 for (d = 0, l = 0; /*nothing*/; psz++)
543 {
544 for (lsb = 0; lsb < 2; lsb ++)
545 {
546 d <<= 8;
547 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
548
549 for (l += 8; l >= 6; )
550 {
551 l -= 6;
552 if (buf)
553 *buf++ = utf7enb64[(d >> l) % 64];
554 len++;
555 }
556 }
557 cc = *psz;
558 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
559 break;
560 }
561 if (l != 0)
562 {
563 if (buf)
564 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
565 len++;
566 }
567 }
568 if (buf)
569 *buf++ = '-';
570 len++;
571 }
572 }
573 if (buf && (len < n))
574 *buf = 0;
575 return len;
576 }
577
578 // ----------------------------------------------------------------------------
579 // UTF-8
580 // ----------------------------------------------------------------------------
581
582 static wxUint32 utf8_max[]=
583 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
584
585 // boundaries of the private use area we use to (temporarily) remap invalid
586 // characters invalid in a UTF-8 encoded string
587 const wxUint32 wxUnicodePUA = 0x100000;
588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
589
590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
591 {
592 size_t len = 0;
593
594 while (*psz && ((!buf) || (len < n)))
595 {
596 const char *opsz = psz;
597 bool invalid = false;
598 unsigned char cc = *psz++, fc = cc;
599 unsigned cnt;
600 for (cnt = 0; fc & 0x80; cnt++)
601 fc <<= 1;
602 if (!cnt)
603 {
604 // plain ASCII char
605 if (buf)
606 *buf++ = cc;
607 len++;
608
609 // escape the escape character for octal escapes
610 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
611 && cc == '\\' && (!buf || len < n))
612 {
613 if (buf)
614 *buf++ = cc;
615 len++;
616 }
617 }
618 else
619 {
620 cnt--;
621 if (!cnt)
622 {
623 // invalid UTF-8 sequence
624 invalid = true;
625 }
626 else
627 {
628 unsigned ocnt = cnt - 1;
629 wxUint32 res = cc & (0x3f >> cnt);
630 while (cnt--)
631 {
632 cc = *psz;
633 if ((cc & 0xC0) != 0x80)
634 {
635 // invalid UTF-8 sequence
636 invalid = true;
637 break;
638 }
639 psz++;
640 res = (res << 6) | (cc & 0x3f);
641 }
642 if (invalid || res <= utf8_max[ocnt])
643 {
644 // illegal UTF-8 encoding
645 invalid = true;
646 }
647 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
648 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
649 {
650 // if one of our PUA characters turns up externally
651 // it must also be treated as an illegal sequence
652 // (a bit like you have to escape an escape character)
653 invalid = true;
654 }
655 else
656 {
657 #ifdef WC_UTF16
658 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
659 size_t pa = encode_utf16(res, (wxUint16 *)buf);
660 if (pa == (size_t)-1)
661 {
662 invalid = true;
663 }
664 else
665 {
666 if (buf)
667 buf += pa;
668 len += pa;
669 }
670 #else // !WC_UTF16
671 if (buf)
672 *buf++ = (wchar_t)res;
673 len++;
674 #endif // WC_UTF16/!WC_UTF16
675 }
676 }
677 if (invalid)
678 {
679 if (m_options & MAP_INVALID_UTF8_TO_PUA)
680 {
681 while (opsz < psz && (!buf || len < n))
682 {
683 #ifdef WC_UTF16
684 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
685 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
686 wxASSERT(pa != (size_t)-1);
687 if (buf)
688 buf += pa;
689 opsz++;
690 len += pa;
691 #else
692 if (buf)
693 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
694 opsz++;
695 len++;
696 #endif
697 }
698 }
699 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
700 {
701 while (opsz < psz && (!buf || len < n))
702 {
703 if ( buf && len + 3 < n )
704 {
705 unsigned char on = *opsz;
706 *buf++ = L'\\';
707 *buf++ = (wchar_t)( L'0' + on / 0100 );
708 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
709 *buf++ = (wchar_t)( L'0' + on % 010 );
710 }
711 opsz++;
712 len += 4;
713 }
714 }
715 else // MAP_INVALID_UTF8_NOT
716 {
717 return (size_t)-1;
718 }
719 }
720 }
721 }
722 if (buf && (len < n))
723 *buf = 0;
724 return len;
725 }
726
727 static inline bool isoctal(wchar_t wch)
728 {
729 return L'0' <= wch && wch <= L'7';
730 }
731
732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
733 {
734 size_t len = 0;
735
736 while (*psz && ((!buf) || (len < n)))
737 {
738 wxUint32 cc;
739 #ifdef WC_UTF16
740 // cast is ok for WC_UTF16
741 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
742 psz += (pa == (size_t)-1) ? 1 : pa;
743 #else
744 cc=(*psz++) & 0x7fffffff;
745 #endif
746
747 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
748 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
749 {
750 if (buf)
751 *buf++ = (char)(cc - wxUnicodePUA);
752 len++;
753 }
754 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
755 && cc == L'\\' && psz[0] == L'\\' )
756 {
757 if (buf)
758 *buf++ = (char)cc;
759 psz++;
760 len++;
761 }
762 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
763 cc == L'\\' &&
764 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
765 {
766 if (buf)
767 {
768 *buf++ = (char) ((psz[0] - L'0')*0100 +
769 (psz[1] - L'0')*010 +
770 (psz[2] - L'0'));
771 }
772
773 psz += 3;
774 len++;
775 }
776 else
777 {
778 unsigned cnt;
779 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
780 if (!cnt)
781 {
782 // plain ASCII char
783 if (buf)
784 *buf++ = (char) cc;
785 len++;
786 }
787
788 else
789 {
790 len += cnt + 1;
791 if (buf)
792 {
793 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
794 while (cnt--)
795 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
796 }
797 }
798 }
799 }
800
801 if (buf && (len<n))
802 *buf = 0;
803
804 return len;
805 }
806
807 // ----------------------------------------------------------------------------
808 // UTF-16
809 // ----------------------------------------------------------------------------
810
811 #ifdef WORDS_BIGENDIAN
812 #define wxMBConvUTF16straight wxMBConvUTF16BE
813 #define wxMBConvUTF16swap wxMBConvUTF16LE
814 #else
815 #define wxMBConvUTF16swap wxMBConvUTF16BE
816 #define wxMBConvUTF16straight wxMBConvUTF16LE
817 #endif
818
819
820 #ifdef WC_UTF16
821
822 // copy 16bit MB to 16bit String
823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
824 {
825 size_t len=0;
826
827 while (*(wxUint16*)psz && (!buf || len < n))
828 {
829 if (buf)
830 *buf++ = *(wxUint16*)psz;
831 len++;
832
833 psz += sizeof(wxUint16);
834 }
835 if (buf && len<n) *buf=0;
836
837 return len;
838 }
839
840
841 // copy 16bit String to 16bit MB
842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
843 {
844 size_t len=0;
845
846 while (*psz && (!buf || len < n))
847 {
848 if (buf)
849 {
850 *(wxUint16*)buf = *psz;
851 buf += sizeof(wxUint16);
852 }
853 len += sizeof(wxUint16);
854 psz++;
855 }
856 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
857
858 return len;
859 }
860
861
862 // swap 16bit MB to 16bit String
863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
864 {
865 size_t len = 0;
866
867 while ( *psz && (!buf || len < n) )
868 {
869 if ( buf )
870 {
871 ((char *)buf)[0] = psz[1];
872 ((char *)buf)[1] = psz[0];
873 buf++;
874 }
875 len++;
876 psz += 2;
877 }
878
879 if ( buf && len < n )
880 *buf = L'\0';
881
882 return len;
883 }
884
885
886 // swap 16bit MB to 16bit String
887 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
888 {
889 size_t len=0;
890
891 while (*psz && (!buf || len < n))
892 {
893 if (buf)
894 {
895 *buf++ = ((char*)psz)[1];
896 *buf++ = ((char*)psz)[0];
897 }
898 len += sizeof(wxUint16);
899 psz++;
900 }
901 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
902
903 return len;
904 }
905
906
907 #else // WC_UTF16
908
909
910 // copy 16bit MB to 32bit String
911 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
912 {
913 size_t len=0;
914
915 while (*(wxUint16*)psz && (!buf || len < n))
916 {
917 wxUint32 cc;
918 size_t pa=decode_utf16((wxUint16*)psz, cc);
919 if (pa == (size_t)-1)
920 return pa;
921
922 if (buf)
923 *buf++ = (wchar_t)cc;
924 len++;
925 psz += pa * sizeof(wxUint16);
926 }
927 if (buf && len<n) *buf=0;
928
929 return len;
930 }
931
932
933 // copy 32bit String to 16bit MB
934 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
935 {
936 size_t len=0;
937
938 while (*psz && (!buf || len < n))
939 {
940 wxUint16 cc[2];
941 size_t pa=encode_utf16(*psz, cc);
942
943 if (pa == (size_t)-1)
944 return pa;
945
946 if (buf)
947 {
948 *(wxUint16*)buf = cc[0];
949 buf += sizeof(wxUint16);
950 if (pa > 1)
951 {
952 *(wxUint16*)buf = cc[1];
953 buf += sizeof(wxUint16);
954 }
955 }
956
957 len += pa*sizeof(wxUint16);
958 psz++;
959 }
960 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
961
962 return len;
963 }
964
965
966 // swap 16bit MB to 32bit String
967 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
968 {
969 size_t len=0;
970
971 while (*(wxUint16*)psz && (!buf || len < n))
972 {
973 wxUint32 cc;
974 char tmp[4];
975 tmp[0]=psz[1]; tmp[1]=psz[0];
976 tmp[2]=psz[3]; tmp[3]=psz[2];
977
978 size_t pa=decode_utf16((wxUint16*)tmp, cc);
979 if (pa == (size_t)-1)
980 return pa;
981
982 if (buf)
983 *buf++ = (wchar_t)cc;
984
985 len++;
986 psz += pa * sizeof(wxUint16);
987 }
988 if (buf && len<n) *buf=0;
989
990 return len;
991 }
992
993
994 // swap 32bit String to 16bit MB
995 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
996 {
997 size_t len=0;
998
999 while (*psz && (!buf || len < n))
1000 {
1001 wxUint16 cc[2];
1002 size_t pa=encode_utf16(*psz, cc);
1003
1004 if (pa == (size_t)-1)
1005 return pa;
1006
1007 if (buf)
1008 {
1009 *buf++ = ((char*)cc)[1];
1010 *buf++ = ((char*)cc)[0];
1011 if (pa > 1)
1012 {
1013 *buf++ = ((char*)cc)[3];
1014 *buf++ = ((char*)cc)[2];
1015 }
1016 }
1017
1018 len += pa*sizeof(wxUint16);
1019 psz++;
1020 }
1021 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1022
1023 return len;
1024 }
1025
1026 #endif // WC_UTF16
1027
1028
1029 // ----------------------------------------------------------------------------
1030 // UTF-32
1031 // ----------------------------------------------------------------------------
1032
1033 #ifdef WORDS_BIGENDIAN
1034 #define wxMBConvUTF32straight wxMBConvUTF32BE
1035 #define wxMBConvUTF32swap wxMBConvUTF32LE
1036 #else
1037 #define wxMBConvUTF32swap wxMBConvUTF32BE
1038 #define wxMBConvUTF32straight wxMBConvUTF32LE
1039 #endif
1040
1041
1042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1044
1045
1046 #ifdef WC_UTF16
1047
1048 // copy 32bit MB to 16bit String
1049 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1050 {
1051 size_t len=0;
1052
1053 while (*(wxUint32*)psz && (!buf || len < n))
1054 {
1055 wxUint16 cc[2];
1056
1057 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1058 if (pa == (size_t)-1)
1059 return pa;
1060
1061 if (buf)
1062 {
1063 *buf++ = cc[0];
1064 if (pa > 1)
1065 *buf++ = cc[1];
1066 }
1067 len += pa;
1068 psz += sizeof(wxUint32);
1069 }
1070 if (buf && len<n) *buf=0;
1071
1072 return len;
1073 }
1074
1075
1076 // copy 16bit String to 32bit MB
1077 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1078 {
1079 size_t len=0;
1080
1081 while (*psz && (!buf || len < n))
1082 {
1083 wxUint32 cc;
1084
1085 // cast is ok for WC_UTF16
1086 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1087 if (pa == (size_t)-1)
1088 return pa;
1089
1090 if (buf)
1091 {
1092 *(wxUint32*)buf = cc;
1093 buf += sizeof(wxUint32);
1094 }
1095 len += sizeof(wxUint32);
1096 psz += pa;
1097 }
1098
1099 if (buf && len<=n-sizeof(wxUint32))
1100 *(wxUint32*)buf=0;
1101
1102 return len;
1103 }
1104
1105
1106
1107 // swap 32bit MB to 16bit String
1108 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1109 {
1110 size_t len=0;
1111
1112 while (*(wxUint32*)psz && (!buf || len < n))
1113 {
1114 char tmp[4];
1115 tmp[0] = psz[3]; tmp[1] = psz[2];
1116 tmp[2] = psz[1]; tmp[3] = psz[0];
1117
1118
1119 wxUint16 cc[2];
1120
1121 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1122 if (pa == (size_t)-1)
1123 return pa;
1124
1125 if (buf)
1126 {
1127 *buf++ = cc[0];
1128 if (pa > 1)
1129 *buf++ = cc[1];
1130 }
1131 len += pa;
1132 psz += sizeof(wxUint32);
1133 }
1134
1135 if (buf && len<n)
1136 *buf=0;
1137
1138 return len;
1139 }
1140
1141
1142 // swap 16bit String to 32bit MB
1143 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1144 {
1145 size_t len=0;
1146
1147 while (*psz && (!buf || len < n))
1148 {
1149 char cc[4];
1150
1151 // cast is ok for WC_UTF16
1152 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1153 if (pa == (size_t)-1)
1154 return pa;
1155
1156 if (buf)
1157 {
1158 *buf++ = cc[3];
1159 *buf++ = cc[2];
1160 *buf++ = cc[1];
1161 *buf++ = cc[0];
1162 }
1163 len += sizeof(wxUint32);
1164 psz += pa;
1165 }
1166
1167 if (buf && len<=n-sizeof(wxUint32))
1168 *(wxUint32*)buf=0;
1169
1170 return len;
1171 }
1172
1173 #else // WC_UTF16
1174
1175
1176 // copy 32bit MB to 32bit String
1177 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1178 {
1179 size_t len=0;
1180
1181 while (*(wxUint32*)psz && (!buf || len < n))
1182 {
1183 if (buf)
1184 *buf++ = (wchar_t)(*(wxUint32*)psz);
1185 len++;
1186 psz += sizeof(wxUint32);
1187 }
1188
1189 if (buf && len<n)
1190 *buf=0;
1191
1192 return len;
1193 }
1194
1195
1196 // copy 32bit String to 32bit MB
1197 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1198 {
1199 size_t len=0;
1200
1201 while (*psz && (!buf || len < n))
1202 {
1203 if (buf)
1204 {
1205 *(wxUint32*)buf = *psz;
1206 buf += sizeof(wxUint32);
1207 }
1208
1209 len += sizeof(wxUint32);
1210 psz++;
1211 }
1212
1213 if (buf && len<=n-sizeof(wxUint32))
1214 *(wxUint32*)buf=0;
1215
1216 return len;
1217 }
1218
1219
1220 // swap 32bit MB to 32bit String
1221 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1222 {
1223 size_t len=0;
1224
1225 while (*(wxUint32*)psz && (!buf || len < n))
1226 {
1227 if (buf)
1228 {
1229 ((char *)buf)[0] = psz[3];
1230 ((char *)buf)[1] = psz[2];
1231 ((char *)buf)[2] = psz[1];
1232 ((char *)buf)[3] = psz[0];
1233 buf++;
1234 }
1235 len++;
1236 psz += sizeof(wxUint32);
1237 }
1238
1239 if (buf && len<n)
1240 *buf=0;
1241
1242 return len;
1243 }
1244
1245
1246 // swap 32bit String to 32bit MB
1247 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1248 {
1249 size_t len=0;
1250
1251 while (*psz && (!buf || len < n))
1252 {
1253 if (buf)
1254 {
1255 *buf++ = ((char *)psz)[3];
1256 *buf++ = ((char *)psz)[2];
1257 *buf++ = ((char *)psz)[1];
1258 *buf++ = ((char *)psz)[0];
1259 }
1260 len += sizeof(wxUint32);
1261 psz++;
1262 }
1263
1264 if (buf && len<=n-sizeof(wxUint32))
1265 *(wxUint32*)buf=0;
1266
1267 return len;
1268 }
1269
1270
1271 #endif // WC_UTF16
1272
1273
1274 // ============================================================================
1275 // The classes doing conversion using the iconv_xxx() functions
1276 // ============================================================================
1277
1278 #ifdef HAVE_ICONV
1279
1280 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1281 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1282 // (unless there's yet another bug in glibc) the only case when iconv()
1283 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1284 // left in the input buffer -- when _real_ error occurs,
1285 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1286 // iconv() failure.
1287 // [This bug does not appear in glibc 2.2.]
1288 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1289 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1290 (errno != E2BIG || bufLeft != 0))
1291 #else
1292 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1293 #endif
1294
1295 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1296
1297 #define ICONV_T_INVALID ((iconv_t)-1)
1298
1299 #if SIZEOF_WCHAR_T == 4
1300 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1301 #define WC_ENC wxFONTENCODING_UTF32
1302 #elif SIZEOF_WCHAR_T == 2
1303 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1304 #define WC_ENC wxFONTENCODING_UTF16
1305 #else // sizeof(wchar_t) != 2 nor 4
1306 // does this ever happen?
1307 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1308 #endif
1309
1310 // ----------------------------------------------------------------------------
1311 // wxMBConv_iconv: encapsulates an iconv character set
1312 // ----------------------------------------------------------------------------
1313
1314 class wxMBConv_iconv : public wxMBConv
1315 {
1316 public:
1317 wxMBConv_iconv(const wxChar *name);
1318 virtual ~wxMBConv_iconv();
1319
1320 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1321 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1322
1323 bool IsOk() const
1324 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1325
1326 protected:
1327 // the iconv handlers used to translate from multibyte to wide char and in
1328 // the other direction
1329 iconv_t m2w,
1330 w2m;
1331 #if wxUSE_THREADS
1332 // guards access to m2w and w2m objects
1333 wxMutex m_iconvMutex;
1334 #endif
1335
1336 private:
1337 // the name (for iconv_open()) of a wide char charset -- if none is
1338 // available on this machine, it will remain NULL
1339 static wxString ms_wcCharsetName;
1340
1341 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1342 // different endian-ness than the native one
1343 static bool ms_wcNeedsSwap;
1344 };
1345
1346 // make the constructor available for unit testing
1347 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1348 {
1349 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1350 if ( !result->IsOk() )
1351 {
1352 delete result;
1353 return 0;
1354 }
1355 return result;
1356 }
1357
1358 wxString wxMBConv_iconv::ms_wcCharsetName;
1359 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1360
1361 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1362 {
1363 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1364 // names for the charsets
1365 const wxCharBuffer cname(wxString(name).ToAscii());
1366
1367 // check for charset that represents wchar_t:
1368 if ( ms_wcCharsetName.empty() )
1369 {
1370 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1371
1372 #if wxUSE_FONTMAP
1373 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1374 #else // !wxUSE_FONTMAP
1375 static const wxChar *names[] =
1376 {
1377 #if SIZEOF_WCHAR_T == 4
1378 _T("UCS-4"),
1379 #elif SIZEOF_WCHAR_T = 2
1380 _T("UCS-2"),
1381 #endif
1382 NULL
1383 };
1384 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1385
1386 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1387 {
1388 const wxString nameCS(*names);
1389
1390 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1391 wxString nameXE(nameCS);
1392 #ifdef WORDS_BIGENDIAN
1393 nameXE += _T("BE");
1394 #else // little endian
1395 nameXE += _T("LE");
1396 #endif
1397
1398 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1399 nameXE.c_str());
1400
1401 m2w = iconv_open(nameXE.ToAscii(), cname);
1402 if ( m2w == ICONV_T_INVALID )
1403 {
1404 // try charset w/o bytesex info (e.g. "UCS4")
1405 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1406 nameCS.c_str());
1407 m2w = iconv_open(nameCS.ToAscii(), cname);
1408
1409 // and check for bytesex ourselves:
1410 if ( m2w != ICONV_T_INVALID )
1411 {
1412 char buf[2], *bufPtr;
1413 wchar_t wbuf[2], *wbufPtr;
1414 size_t insz, outsz;
1415 size_t res;
1416
1417 buf[0] = 'A';
1418 buf[1] = 0;
1419 wbuf[0] = 0;
1420 insz = 2;
1421 outsz = SIZEOF_WCHAR_T * 2;
1422 wbufPtr = wbuf;
1423 bufPtr = buf;
1424
1425 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1426 (char**)&wbufPtr, &outsz);
1427
1428 if (ICONV_FAILED(res, insz))
1429 {
1430 wxLogLastError(wxT("iconv"));
1431 wxLogError(_("Conversion to charset '%s' doesn't work."),
1432 nameCS.c_str());
1433 }
1434 else // ok, can convert to this encoding, remember it
1435 {
1436 ms_wcCharsetName = nameCS;
1437 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1438 }
1439 }
1440 }
1441 else // use charset not requiring byte swapping
1442 {
1443 ms_wcCharsetName = nameXE;
1444 }
1445 }
1446
1447 wxLogTrace(TRACE_STRCONV,
1448 wxT("iconv wchar_t charset is \"%s\"%s"),
1449 ms_wcCharsetName.empty() ? _T("<none>")
1450 : ms_wcCharsetName.c_str(),
1451 ms_wcNeedsSwap ? _T(" (needs swap)")
1452 : _T(""));
1453 }
1454 else // we already have ms_wcCharsetName
1455 {
1456 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1457 }
1458
1459 if ( ms_wcCharsetName.empty() )
1460 {
1461 w2m = ICONV_T_INVALID;
1462 }
1463 else
1464 {
1465 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1466 if ( w2m == ICONV_T_INVALID )
1467 {
1468 wxLogTrace(TRACE_STRCONV,
1469 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1470 ms_wcCharsetName.c_str(), cname.data());
1471 }
1472 }
1473 }
1474
1475 wxMBConv_iconv::~wxMBConv_iconv()
1476 {
1477 if ( m2w != ICONV_T_INVALID )
1478 iconv_close(m2w);
1479 if ( w2m != ICONV_T_INVALID )
1480 iconv_close(w2m);
1481 }
1482
1483 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1484 {
1485 #if wxUSE_THREADS
1486 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1487 // Unfortunately there is a couple of global wxCSConv objects such as
1488 // wxConvLocal that are used all over wx code, so we have to make sure
1489 // the handle is used by at most one thread at the time. Otherwise
1490 // only a few wx classes would be safe to use from non-main threads
1491 // as MB<->WC conversion would fail "randomly".
1492 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1493 #endif
1494
1495 size_t inbuf = strlen(psz);
1496 size_t outbuf = n * SIZEOF_WCHAR_T;
1497 size_t res, cres;
1498 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1499 wchar_t *bufPtr = buf;
1500 const char *pszPtr = psz;
1501
1502 if (buf)
1503 {
1504 // have destination buffer, convert there
1505 cres = iconv(m2w,
1506 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1507 (char**)&bufPtr, &outbuf);
1508 res = n - (outbuf / SIZEOF_WCHAR_T);
1509
1510 if (ms_wcNeedsSwap)
1511 {
1512 // convert to native endianness
1513 for ( unsigned i = 0; i < res; i++ )
1514 buf[n] = WC_BSWAP(buf[i]);
1515 }
1516
1517 // NB: iconv was given only strlen(psz) characters on input, and so
1518 // it couldn't convert the trailing zero. Let's do it ourselves
1519 // if there's some room left for it in the output buffer.
1520 if (res < n)
1521 buf[res] = 0;
1522 }
1523 else
1524 {
1525 // no destination buffer... convert using temp buffer
1526 // to calculate destination buffer requirement
1527 wchar_t tbuf[8];
1528 res = 0;
1529 do {
1530 bufPtr = tbuf;
1531 outbuf = 8*SIZEOF_WCHAR_T;
1532
1533 cres = iconv(m2w,
1534 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1535 (char**)&bufPtr, &outbuf );
1536
1537 res += 8-(outbuf/SIZEOF_WCHAR_T);
1538 } while ((cres==(size_t)-1) && (errno==E2BIG));
1539 }
1540
1541 if (ICONV_FAILED(cres, inbuf))
1542 {
1543 //VS: it is ok if iconv fails, hence trace only
1544 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1545 return (size_t)-1;
1546 }
1547
1548 return res;
1549 }
1550
1551 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1552 {
1553 #if wxUSE_THREADS
1554 // NB: explained in MB2WC
1555 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1556 #endif
1557
1558 size_t inlen = wxWcslen(psz);
1559 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1560 size_t outbuf = n;
1561 size_t res, cres;
1562
1563 wchar_t *tmpbuf = 0;
1564
1565 if (ms_wcNeedsSwap)
1566 {
1567 // need to copy to temp buffer to switch endianness
1568 // (doing WC_BSWAP twice on the original buffer won't help, as it
1569 // could be in read-only memory, or be accessed in some other thread)
1570 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1571 for ( size_t i = 0; i < inlen; i++ )
1572 tmpbuf[n] = WC_BSWAP(psz[i]);
1573 tmpbuf[inlen] = L'\0';
1574 psz = tmpbuf;
1575 }
1576
1577 if (buf)
1578 {
1579 // have destination buffer, convert there
1580 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1581
1582 res = n-outbuf;
1583
1584 // NB: iconv was given only wcslen(psz) characters on input, and so
1585 // it couldn't convert the trailing zero. Let's do it ourselves
1586 // if there's some room left for it in the output buffer.
1587 if (res < n)
1588 buf[0] = 0;
1589 }
1590 else
1591 {
1592 // no destination buffer... convert using temp buffer
1593 // to calculate destination buffer requirement
1594 char tbuf[16];
1595 res = 0;
1596 do {
1597 buf = tbuf; outbuf = 16;
1598
1599 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1600
1601 res += 16 - outbuf;
1602 } while ((cres==(size_t)-1) && (errno==E2BIG));
1603 }
1604
1605 if (ms_wcNeedsSwap)
1606 {
1607 free(tmpbuf);
1608 }
1609
1610 if (ICONV_FAILED(cres, inbuf))
1611 {
1612 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1613 return (size_t)-1;
1614 }
1615
1616 return res;
1617 }
1618
1619 #endif // HAVE_ICONV
1620
1621
1622 // ============================================================================
1623 // Win32 conversion classes
1624 // ============================================================================
1625
1626 #ifdef wxHAVE_WIN32_MB2WC
1627
1628 // from utils.cpp
1629 #if wxUSE_FONTMAP
1630 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1631 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1632 #endif
1633
1634 class wxMBConv_win32 : public wxMBConv
1635 {
1636 public:
1637 wxMBConv_win32()
1638 {
1639 m_CodePage = CP_ACP;
1640 }
1641
1642 #if wxUSE_FONTMAP
1643 wxMBConv_win32(const wxChar* name)
1644 {
1645 m_CodePage = wxCharsetToCodepage(name);
1646 }
1647
1648 wxMBConv_win32(wxFontEncoding encoding)
1649 {
1650 m_CodePage = wxEncodingToCodepage(encoding);
1651 }
1652 #endif
1653
1654 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1655 {
1656 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1657 // the behaviour is not compatible with the Unix version (using iconv)
1658 // and break the library itself, e.g. wxTextInputStream::NextChar()
1659 // wouldn't work if reading an incomplete MB char didn't result in an
1660 // error
1661 //
1662 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1663 // an error (tested under Windows Server 2003) and apparently it is
1664 // done on purpose, i.e. the function accepts any input in this case
1665 // and although I'd prefer to return error on ill-formed output, our
1666 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1667 // explicitly ill-formed according to RFC 2152) neither so we don't
1668 // even have any fallback here...
1669 //
1670 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1671 // Win XP or newer and if it is specified on older versions, conversion
1672 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1673 // fails. So we can only use the flag on newer Windows versions.
1674 // Additionally, the flag is not supported by UTF7, symbol and CJK
1675 // encodings. See here:
1676 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1677 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1678 int flags = 0;
1679 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1680 m_CodePage < 50000 &&
1681 IsAtLeastWin2kSP4() )
1682 {
1683 flags = MB_ERR_INVALID_CHARS;
1684 }
1685 else if ( m_CodePage == CP_UTF8 )
1686 {
1687 // Avoid round-trip in the special case of UTF-8 by using our
1688 // own UTF-8 conversion code:
1689 return wxMBConvUTF8().MB2WC(buf, psz, n);
1690 }
1691
1692 const size_t len = ::MultiByteToWideChar
1693 (
1694 m_CodePage, // code page
1695 flags, // flags: fall on error
1696 psz, // input string
1697 -1, // its length (NUL-terminated)
1698 buf, // output string
1699 buf ? n : 0 // size of output buffer
1700 );
1701 if ( !len )
1702 {
1703 // function totally failed
1704 return (size_t)-1;
1705 }
1706
1707 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1708 // check if we succeeded, by doing a double trip:
1709 if ( !flags && buf )
1710 {
1711 const size_t mbLen = strlen(psz);
1712 wxCharBuffer mbBuf(mbLen);
1713 if ( ::WideCharToMultiByte
1714 (
1715 m_CodePage,
1716 0,
1717 buf,
1718 -1,
1719 mbBuf.data(),
1720 mbLen + 1, // size in bytes, not length
1721 NULL,
1722 NULL
1723 ) == 0 ||
1724 strcmp(mbBuf, psz) != 0 )
1725 {
1726 // we didn't obtain the same thing we started from, hence
1727 // the conversion was lossy and we consider that it failed
1728 return (size_t)-1;
1729 }
1730 }
1731
1732 // note that it returns count of written chars for buf != NULL and size
1733 // of the needed buffer for buf == NULL so in either case the length of
1734 // the string (which never includes the terminating NUL) is one less
1735 return len - 1;
1736 }
1737
1738 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1739 {
1740 /*
1741 we have a problem here: by default, WideCharToMultiByte() may
1742 replace characters unrepresentable in the target code page with bad
1743 quality approximations such as turning "1/2" symbol (U+00BD) into
1744 "1" for the code pages which don't have it and we, obviously, want
1745 to avoid this at any price
1746
1747 the trouble is that this function does it _silently_, i.e. it won't
1748 even tell us whether it did or not... Win98/2000 and higher provide
1749 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1750 we have to resort to a round trip, i.e. check that converting back
1751 results in the same string -- this is, of course, expensive but
1752 otherwise we simply can't be sure to not garble the data.
1753 */
1754
1755 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1756 // it doesn't work with CJK encodings (which we test for rather roughly
1757 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1758 // supporting it
1759 BOOL usedDef wxDUMMY_INITIALIZE(false);
1760 BOOL *pUsedDef;
1761 int flags;
1762 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1763 {
1764 // it's our lucky day
1765 flags = WC_NO_BEST_FIT_CHARS;
1766 pUsedDef = &usedDef;
1767 }
1768 else // old system or unsupported encoding
1769 {
1770 flags = 0;
1771 pUsedDef = NULL;
1772 }
1773
1774 const size_t len = ::WideCharToMultiByte
1775 (
1776 m_CodePage, // code page
1777 flags, // either none or no best fit
1778 pwz, // input string
1779 -1, // it is (wide) NUL-terminated
1780 buf, // output buffer
1781 buf ? n : 0, // and its size
1782 NULL, // default "replacement" char
1783 pUsedDef // [out] was it used?
1784 );
1785
1786 if ( !len )
1787 {
1788 // function totally failed
1789 return (size_t)-1;
1790 }
1791
1792 // if we were really converting, check if we succeeded
1793 if ( buf )
1794 {
1795 if ( flags )
1796 {
1797 // check if the conversion failed, i.e. if any replacements
1798 // were done
1799 if ( usedDef )
1800 return (size_t)-1;
1801 }
1802 else // we must resort to double tripping...
1803 {
1804 wxWCharBuffer wcBuf(n);
1805 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1806 wcscmp(wcBuf, pwz) != 0 )
1807 {
1808 // we didn't obtain the same thing we started from, hence
1809 // the conversion was lossy and we consider that it failed
1810 return (size_t)-1;
1811 }
1812 }
1813 }
1814
1815 // see the comment above for the reason of "len - 1"
1816 return len - 1;
1817 }
1818
1819 bool IsOk() const { return m_CodePage != -1; }
1820
1821 private:
1822 static bool CanUseNoBestFit()
1823 {
1824 static int s_isWin98Or2k = -1;
1825
1826 if ( s_isWin98Or2k == -1 )
1827 {
1828 int verMaj, verMin;
1829 switch ( wxGetOsVersion(&verMaj, &verMin) )
1830 {
1831 case wxWIN95:
1832 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1833 break;
1834
1835 case wxWINDOWS_NT:
1836 s_isWin98Or2k = verMaj >= 5;
1837 break;
1838
1839 default:
1840 // unknown, be conseravtive by default
1841 s_isWin98Or2k = 0;
1842 }
1843
1844 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1845 }
1846
1847 return s_isWin98Or2k == 1;
1848 }
1849
1850 static bool IsAtLeastWin2kSP4()
1851 {
1852 #ifdef __WXWINCE__
1853 return false;
1854 #else
1855 static int s_isAtLeastWin2kSP4 = -1;
1856
1857 if ( s_isAtLeastWin2kSP4 == -1 )
1858 {
1859 OSVERSIONINFOEX ver;
1860
1861 memset(&ver, 0, sizeof(ver));
1862 ver.dwOSVersionInfoSize = sizeof(ver);
1863 GetVersionEx((OSVERSIONINFO*)&ver);
1864
1865 s_isAtLeastWin2kSP4 =
1866 ((ver.dwMajorVersion > 5) || // Vista+
1867 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1868 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1869 ver.wServicePackMajor >= 4)) // 2000 SP4+
1870 ? 1 : 0;
1871 }
1872
1873 return s_isAtLeastWin2kSP4 == 1;
1874 #endif
1875 }
1876
1877 long m_CodePage;
1878 };
1879
1880 #endif // wxHAVE_WIN32_MB2WC
1881
1882 // ============================================================================
1883 // Cocoa conversion classes
1884 // ============================================================================
1885
1886 #if defined(__WXCOCOA__)
1887
1888 // RN: There is no UTF-32 support in either Core Foundation or
1889 // Cocoa. Strangely enough, internally Core Foundation uses
1890 // UTF 32 internally quite a bit - its just not public (yet).
1891
1892 #include <CoreFoundation/CFString.h>
1893 #include <CoreFoundation/CFStringEncodingExt.h>
1894
1895 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1896 {
1897 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1898 if ( encoding == wxFONTENCODING_DEFAULT )
1899 {
1900 enc = CFStringGetSystemEncoding();
1901 }
1902 else switch( encoding)
1903 {
1904 case wxFONTENCODING_ISO8859_1 :
1905 enc = kCFStringEncodingISOLatin1 ;
1906 break ;
1907 case wxFONTENCODING_ISO8859_2 :
1908 enc = kCFStringEncodingISOLatin2;
1909 break ;
1910 case wxFONTENCODING_ISO8859_3 :
1911 enc = kCFStringEncodingISOLatin3 ;
1912 break ;
1913 case wxFONTENCODING_ISO8859_4 :
1914 enc = kCFStringEncodingISOLatin4;
1915 break ;
1916 case wxFONTENCODING_ISO8859_5 :
1917 enc = kCFStringEncodingISOLatinCyrillic;
1918 break ;
1919 case wxFONTENCODING_ISO8859_6 :
1920 enc = kCFStringEncodingISOLatinArabic;
1921 break ;
1922 case wxFONTENCODING_ISO8859_7 :
1923 enc = kCFStringEncodingISOLatinGreek;
1924 break ;
1925 case wxFONTENCODING_ISO8859_8 :
1926 enc = kCFStringEncodingISOLatinHebrew;
1927 break ;
1928 case wxFONTENCODING_ISO8859_9 :
1929 enc = kCFStringEncodingISOLatin5;
1930 break ;
1931 case wxFONTENCODING_ISO8859_10 :
1932 enc = kCFStringEncodingISOLatin6;
1933 break ;
1934 case wxFONTENCODING_ISO8859_11 :
1935 enc = kCFStringEncodingISOLatinThai;
1936 break ;
1937 case wxFONTENCODING_ISO8859_13 :
1938 enc = kCFStringEncodingISOLatin7;
1939 break ;
1940 case wxFONTENCODING_ISO8859_14 :
1941 enc = kCFStringEncodingISOLatin8;
1942 break ;
1943 case wxFONTENCODING_ISO8859_15 :
1944 enc = kCFStringEncodingISOLatin9;
1945 break ;
1946
1947 case wxFONTENCODING_KOI8 :
1948 enc = kCFStringEncodingKOI8_R;
1949 break ;
1950 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1951 enc = kCFStringEncodingDOSRussian;
1952 break ;
1953
1954 // case wxFONTENCODING_BULGARIAN :
1955 // enc = ;
1956 // break ;
1957
1958 case wxFONTENCODING_CP437 :
1959 enc =kCFStringEncodingDOSLatinUS ;
1960 break ;
1961 case wxFONTENCODING_CP850 :
1962 enc = kCFStringEncodingDOSLatin1;
1963 break ;
1964 case wxFONTENCODING_CP852 :
1965 enc = kCFStringEncodingDOSLatin2;
1966 break ;
1967 case wxFONTENCODING_CP855 :
1968 enc = kCFStringEncodingDOSCyrillic;
1969 break ;
1970 case wxFONTENCODING_CP866 :
1971 enc =kCFStringEncodingDOSRussian ;
1972 break ;
1973 case wxFONTENCODING_CP874 :
1974 enc = kCFStringEncodingDOSThai;
1975 break ;
1976 case wxFONTENCODING_CP932 :
1977 enc = kCFStringEncodingDOSJapanese;
1978 break ;
1979 case wxFONTENCODING_CP936 :
1980 enc =kCFStringEncodingDOSChineseSimplif ;
1981 break ;
1982 case wxFONTENCODING_CP949 :
1983 enc = kCFStringEncodingDOSKorean;
1984 break ;
1985 case wxFONTENCODING_CP950 :
1986 enc = kCFStringEncodingDOSChineseTrad;
1987 break ;
1988 case wxFONTENCODING_CP1250 :
1989 enc = kCFStringEncodingWindowsLatin2;
1990 break ;
1991 case wxFONTENCODING_CP1251 :
1992 enc =kCFStringEncodingWindowsCyrillic ;
1993 break ;
1994 case wxFONTENCODING_CP1252 :
1995 enc =kCFStringEncodingWindowsLatin1 ;
1996 break ;
1997 case wxFONTENCODING_CP1253 :
1998 enc = kCFStringEncodingWindowsGreek;
1999 break ;
2000 case wxFONTENCODING_CP1254 :
2001 enc = kCFStringEncodingWindowsLatin5;
2002 break ;
2003 case wxFONTENCODING_CP1255 :
2004 enc =kCFStringEncodingWindowsHebrew ;
2005 break ;
2006 case wxFONTENCODING_CP1256 :
2007 enc =kCFStringEncodingWindowsArabic ;
2008 break ;
2009 case wxFONTENCODING_CP1257 :
2010 enc = kCFStringEncodingWindowsBalticRim;
2011 break ;
2012 // This only really encodes to UTF7 (if that) evidently
2013 // case wxFONTENCODING_UTF7 :
2014 // enc = kCFStringEncodingNonLossyASCII ;
2015 // break ;
2016 case wxFONTENCODING_UTF8 :
2017 enc = kCFStringEncodingUTF8 ;
2018 break ;
2019 case wxFONTENCODING_EUC_JP :
2020 enc = kCFStringEncodingEUC_JP;
2021 break ;
2022 case wxFONTENCODING_UTF16 :
2023 enc = kCFStringEncodingUnicode ;
2024 break ;
2025 case wxFONTENCODING_MACROMAN :
2026 enc = kCFStringEncodingMacRoman ;
2027 break ;
2028 case wxFONTENCODING_MACJAPANESE :
2029 enc = kCFStringEncodingMacJapanese ;
2030 break ;
2031 case wxFONTENCODING_MACCHINESETRAD :
2032 enc = kCFStringEncodingMacChineseTrad ;
2033 break ;
2034 case wxFONTENCODING_MACKOREAN :
2035 enc = kCFStringEncodingMacKorean ;
2036 break ;
2037 case wxFONTENCODING_MACARABIC :
2038 enc = kCFStringEncodingMacArabic ;
2039 break ;
2040 case wxFONTENCODING_MACHEBREW :
2041 enc = kCFStringEncodingMacHebrew ;
2042 break ;
2043 case wxFONTENCODING_MACGREEK :
2044 enc = kCFStringEncodingMacGreek ;
2045 break ;
2046 case wxFONTENCODING_MACCYRILLIC :
2047 enc = kCFStringEncodingMacCyrillic ;
2048 break ;
2049 case wxFONTENCODING_MACDEVANAGARI :
2050 enc = kCFStringEncodingMacDevanagari ;
2051 break ;
2052 case wxFONTENCODING_MACGURMUKHI :
2053 enc = kCFStringEncodingMacGurmukhi ;
2054 break ;
2055 case wxFONTENCODING_MACGUJARATI :
2056 enc = kCFStringEncodingMacGujarati ;
2057 break ;
2058 case wxFONTENCODING_MACORIYA :
2059 enc = kCFStringEncodingMacOriya ;
2060 break ;
2061 case wxFONTENCODING_MACBENGALI :
2062 enc = kCFStringEncodingMacBengali ;
2063 break ;
2064 case wxFONTENCODING_MACTAMIL :
2065 enc = kCFStringEncodingMacTamil ;
2066 break ;
2067 case wxFONTENCODING_MACTELUGU :
2068 enc = kCFStringEncodingMacTelugu ;
2069 break ;
2070 case wxFONTENCODING_MACKANNADA :
2071 enc = kCFStringEncodingMacKannada ;
2072 break ;
2073 case wxFONTENCODING_MACMALAJALAM :
2074 enc = kCFStringEncodingMacMalayalam ;
2075 break ;
2076 case wxFONTENCODING_MACSINHALESE :
2077 enc = kCFStringEncodingMacSinhalese ;
2078 break ;
2079 case wxFONTENCODING_MACBURMESE :
2080 enc = kCFStringEncodingMacBurmese ;
2081 break ;
2082 case wxFONTENCODING_MACKHMER :
2083 enc = kCFStringEncodingMacKhmer ;
2084 break ;
2085 case wxFONTENCODING_MACTHAI :
2086 enc = kCFStringEncodingMacThai ;
2087 break ;
2088 case wxFONTENCODING_MACLAOTIAN :
2089 enc = kCFStringEncodingMacLaotian ;
2090 break ;
2091 case wxFONTENCODING_MACGEORGIAN :
2092 enc = kCFStringEncodingMacGeorgian ;
2093 break ;
2094 case wxFONTENCODING_MACARMENIAN :
2095 enc = kCFStringEncodingMacArmenian ;
2096 break ;
2097 case wxFONTENCODING_MACCHINESESIMP :
2098 enc = kCFStringEncodingMacChineseSimp ;
2099 break ;
2100 case wxFONTENCODING_MACTIBETAN :
2101 enc = kCFStringEncodingMacTibetan ;
2102 break ;
2103 case wxFONTENCODING_MACMONGOLIAN :
2104 enc = kCFStringEncodingMacMongolian ;
2105 break ;
2106 case wxFONTENCODING_MACETHIOPIC :
2107 enc = kCFStringEncodingMacEthiopic ;
2108 break ;
2109 case wxFONTENCODING_MACCENTRALEUR :
2110 enc = kCFStringEncodingMacCentralEurRoman ;
2111 break ;
2112 case wxFONTENCODING_MACVIATNAMESE :
2113 enc = kCFStringEncodingMacVietnamese ;
2114 break ;
2115 case wxFONTENCODING_MACARABICEXT :
2116 enc = kCFStringEncodingMacExtArabic ;
2117 break ;
2118 case wxFONTENCODING_MACSYMBOL :
2119 enc = kCFStringEncodingMacSymbol ;
2120 break ;
2121 case wxFONTENCODING_MACDINGBATS :
2122 enc = kCFStringEncodingMacDingbats ;
2123 break ;
2124 case wxFONTENCODING_MACTURKISH :
2125 enc = kCFStringEncodingMacTurkish ;
2126 break ;
2127 case wxFONTENCODING_MACCROATIAN :
2128 enc = kCFStringEncodingMacCroatian ;
2129 break ;
2130 case wxFONTENCODING_MACICELANDIC :
2131 enc = kCFStringEncodingMacIcelandic ;
2132 break ;
2133 case wxFONTENCODING_MACROMANIAN :
2134 enc = kCFStringEncodingMacRomanian ;
2135 break ;
2136 case wxFONTENCODING_MACCELTIC :
2137 enc = kCFStringEncodingMacCeltic ;
2138 break ;
2139 case wxFONTENCODING_MACGAELIC :
2140 enc = kCFStringEncodingMacGaelic ;
2141 break ;
2142 // case wxFONTENCODING_MACKEYBOARD :
2143 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2144 // break ;
2145 default :
2146 // because gcc is picky
2147 break ;
2148 } ;
2149 return enc ;
2150 }
2151
2152 class wxMBConv_cocoa : public wxMBConv
2153 {
2154 public:
2155 wxMBConv_cocoa()
2156 {
2157 Init(CFStringGetSystemEncoding()) ;
2158 }
2159
2160 #if wxUSE_FONTMAP
2161 wxMBConv_cocoa(const wxChar* name)
2162 {
2163 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2164 }
2165 #endif
2166
2167 wxMBConv_cocoa(wxFontEncoding encoding)
2168 {
2169 Init( wxCFStringEncFromFontEnc(encoding) );
2170 }
2171
2172 ~wxMBConv_cocoa()
2173 {
2174 }
2175
2176 void Init( CFStringEncoding encoding)
2177 {
2178 m_encoding = encoding ;
2179 }
2180
2181 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2182 {
2183 wxASSERT(szUnConv);
2184
2185 CFStringRef theString = CFStringCreateWithBytes (
2186 NULL, //the allocator
2187 (const UInt8*)szUnConv,
2188 strlen(szUnConv),
2189 m_encoding,
2190 false //no BOM/external representation
2191 );
2192
2193 wxASSERT(theString);
2194
2195 size_t nOutLength = CFStringGetLength(theString);
2196
2197 if (szOut == NULL)
2198 {
2199 CFRelease(theString);
2200 return nOutLength;
2201 }
2202
2203 CFRange theRange = { 0, nOutSize };
2204
2205 #if SIZEOF_WCHAR_T == 4
2206 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2207 #endif
2208
2209 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2210
2211 CFRelease(theString);
2212
2213 szUniCharBuffer[nOutLength] = '\0' ;
2214
2215 #if SIZEOF_WCHAR_T == 4
2216 wxMBConvUTF16 converter ;
2217 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2218 delete[] szUniCharBuffer;
2219 #endif
2220
2221 return nOutLength;
2222 }
2223
2224 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2225 {
2226 wxASSERT(szUnConv);
2227
2228 size_t nRealOutSize;
2229 size_t nBufSize = wxWcslen(szUnConv);
2230 UniChar* szUniBuffer = (UniChar*) szUnConv;
2231
2232 #if SIZEOF_WCHAR_T == 4
2233 wxMBConvUTF16 converter ;
2234 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2235 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2236 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2237 nBufSize /= sizeof(UniChar);
2238 #endif
2239
2240 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2241 NULL, //allocator
2242 szUniBuffer,
2243 nBufSize,
2244 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2245 );
2246
2247 wxASSERT(theString);
2248
2249 //Note that CER puts a BOM when converting to unicode
2250 //so we check and use getchars instead in that case
2251 if (m_encoding == kCFStringEncodingUnicode)
2252 {
2253 if (szOut != NULL)
2254 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2255
2256 nRealOutSize = CFStringGetLength(theString) + 1;
2257 }
2258 else
2259 {
2260 CFStringGetBytes(
2261 theString,
2262 CFRangeMake(0, CFStringGetLength(theString)),
2263 m_encoding,
2264 0, //what to put in characters that can't be converted -
2265 //0 tells CFString to return NULL if it meets such a character
2266 false, //not an external representation
2267 (UInt8*) szOut,
2268 nOutSize,
2269 (CFIndex*) &nRealOutSize
2270 );
2271 }
2272
2273 CFRelease(theString);
2274
2275 #if SIZEOF_WCHAR_T == 4
2276 delete[] szUniBuffer;
2277 #endif
2278
2279 return nRealOutSize - 1;
2280 }
2281
2282 bool IsOk() const
2283 {
2284 return m_encoding != kCFStringEncodingInvalidId &&
2285 CFStringIsEncodingAvailable(m_encoding);
2286 }
2287
2288 private:
2289 CFStringEncoding m_encoding ;
2290 };
2291
2292 #endif // defined(__WXCOCOA__)
2293
2294 // ============================================================================
2295 // Mac conversion classes
2296 // ============================================================================
2297
2298 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2299
2300 class wxMBConv_mac : public wxMBConv
2301 {
2302 public:
2303 wxMBConv_mac()
2304 {
2305 Init(CFStringGetSystemEncoding()) ;
2306 }
2307
2308 #if wxUSE_FONTMAP
2309 wxMBConv_mac(const wxChar* name)
2310 {
2311 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2312 }
2313 #endif
2314
2315 wxMBConv_mac(wxFontEncoding encoding)
2316 {
2317 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2318 }
2319
2320 ~wxMBConv_mac()
2321 {
2322 OSStatus status = noErr ;
2323 status = TECDisposeConverter(m_MB2WC_converter);
2324 status = TECDisposeConverter(m_WC2MB_converter);
2325 }
2326
2327
2328 void Init( TextEncodingBase encoding)
2329 {
2330 OSStatus status = noErr ;
2331 m_char_encoding = encoding ;
2332 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2333
2334 status = TECCreateConverter(&m_MB2WC_converter,
2335 m_char_encoding,
2336 m_unicode_encoding);
2337 status = TECCreateConverter(&m_WC2MB_converter,
2338 m_unicode_encoding,
2339 m_char_encoding);
2340 }
2341
2342 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2343 {
2344 OSStatus status = noErr ;
2345 ByteCount byteOutLen ;
2346 ByteCount byteInLen = strlen(psz) ;
2347 wchar_t *tbuf = NULL ;
2348 UniChar* ubuf = NULL ;
2349 size_t res = 0 ;
2350
2351 if (buf == NULL)
2352 {
2353 //apple specs say at least 32
2354 n = wxMax( 32 , byteInLen ) ;
2355 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2356 }
2357 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2358 #if SIZEOF_WCHAR_T == 4
2359 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2360 #else
2361 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2362 #endif
2363 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2364 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2365 #if SIZEOF_WCHAR_T == 4
2366 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2367 // is not properly terminated we get random characters at the end
2368 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2369 wxMBConvUTF16 converter ;
2370 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2371 free( ubuf ) ;
2372 #else
2373 res = byteOutLen / sizeof( UniChar ) ;
2374 #endif
2375 if ( buf == NULL )
2376 free(tbuf) ;
2377
2378 if ( buf && res < n)
2379 buf[res] = 0;
2380
2381 return res ;
2382 }
2383
2384 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2385 {
2386 OSStatus status = noErr ;
2387 ByteCount byteOutLen ;
2388 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2389
2390 char *tbuf = NULL ;
2391
2392 if (buf == NULL)
2393 {
2394 //apple specs say at least 32
2395 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2396 tbuf = (char*) malloc( n ) ;
2397 }
2398
2399 ByteCount byteBufferLen = n ;
2400 UniChar* ubuf = NULL ;
2401 #if SIZEOF_WCHAR_T == 4
2402 wxMBConvUTF16 converter ;
2403 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2404 byteInLen = unicharlen ;
2405 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2406 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2407 #else
2408 ubuf = (UniChar*) psz ;
2409 #endif
2410 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2411 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2412 #if SIZEOF_WCHAR_T == 4
2413 free( ubuf ) ;
2414 #endif
2415 if ( buf == NULL )
2416 free(tbuf) ;
2417
2418 size_t res = byteOutLen ;
2419 if ( buf && res < n)
2420 {
2421 buf[res] = 0;
2422
2423 //we need to double-trip to verify it didn't insert any ? in place
2424 //of bogus characters
2425 wxWCharBuffer wcBuf(n);
2426 size_t pszlen = wxWcslen(psz);
2427 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2428 wxWcslen(wcBuf) != pszlen ||
2429 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2430 {
2431 // we didn't obtain the same thing we started from, hence
2432 // the conversion was lossy and we consider that it failed
2433 return (size_t)-1;
2434 }
2435 }
2436
2437 return res ;
2438 }
2439
2440 bool IsOk() const
2441 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2442
2443 private:
2444 TECObjectRef m_MB2WC_converter ;
2445 TECObjectRef m_WC2MB_converter ;
2446
2447 TextEncodingBase m_char_encoding ;
2448 TextEncodingBase m_unicode_encoding ;
2449 };
2450
2451 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2452
2453 // ============================================================================
2454 // wxEncodingConverter based conversion classes
2455 // ============================================================================
2456
2457 #if wxUSE_FONTMAP
2458
2459 class wxMBConv_wxwin : public wxMBConv
2460 {
2461 private:
2462 void Init()
2463 {
2464 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2465 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2466 }
2467
2468 public:
2469 // temporarily just use wxEncodingConverter stuff,
2470 // so that it works while a better implementation is built
2471 wxMBConv_wxwin(const wxChar* name)
2472 {
2473 if (name)
2474 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2475 else
2476 m_enc = wxFONTENCODING_SYSTEM;
2477
2478 Init();
2479 }
2480
2481 wxMBConv_wxwin(wxFontEncoding enc)
2482 {
2483 m_enc = enc;
2484
2485 Init();
2486 }
2487
2488 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2489 {
2490 size_t inbuf = strlen(psz);
2491 if (buf)
2492 {
2493 if (!m2w.Convert(psz,buf))
2494 return (size_t)-1;
2495 }
2496 return inbuf;
2497 }
2498
2499 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2500 {
2501 const size_t inbuf = wxWcslen(psz);
2502 if (buf)
2503 {
2504 if (!w2m.Convert(psz,buf))
2505 return (size_t)-1;
2506 }
2507
2508 return inbuf;
2509 }
2510
2511 bool IsOk() const { return m_ok; }
2512
2513 public:
2514 wxFontEncoding m_enc;
2515 wxEncodingConverter m2w, w2m;
2516
2517 // were we initialized successfully?
2518 bool m_ok;
2519
2520 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2521 };
2522
2523 // make the constructors available for unit testing
2524 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2525 {
2526 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2527 if ( !result->IsOk() )
2528 {
2529 delete result;
2530 return 0;
2531 }
2532 return result;
2533 }
2534
2535 #endif // wxUSE_FONTMAP
2536
2537 // ============================================================================
2538 // wxCSConv implementation
2539 // ============================================================================
2540
2541 void wxCSConv::Init()
2542 {
2543 m_name = NULL;
2544 m_convReal = NULL;
2545 m_deferred = true;
2546 }
2547
2548 wxCSConv::wxCSConv(const wxChar *charset)
2549 {
2550 Init();
2551
2552 if ( charset )
2553 {
2554 SetName(charset);
2555 }
2556
2557 #if wxUSE_FONTMAP
2558 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2559 #else
2560 m_encoding = wxFONTENCODING_SYSTEM;
2561 #endif
2562 }
2563
2564 wxCSConv::wxCSConv(wxFontEncoding encoding)
2565 {
2566 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2567 {
2568 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2569
2570 encoding = wxFONTENCODING_SYSTEM;
2571 }
2572
2573 Init();
2574
2575 m_encoding = encoding;
2576 }
2577
2578 wxCSConv::~wxCSConv()
2579 {
2580 Clear();
2581 }
2582
2583 wxCSConv::wxCSConv(const wxCSConv& conv)
2584 : wxMBConv()
2585 {
2586 Init();
2587
2588 SetName(conv.m_name);
2589 m_encoding = conv.m_encoding;
2590 }
2591
2592 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2593 {
2594 Clear();
2595
2596 SetName(conv.m_name);
2597 m_encoding = conv.m_encoding;
2598
2599 return *this;
2600 }
2601
2602 void wxCSConv::Clear()
2603 {
2604 free(m_name);
2605 delete m_convReal;
2606
2607 m_name = NULL;
2608 m_convReal = NULL;
2609 }
2610
2611 void wxCSConv::SetName(const wxChar *charset)
2612 {
2613 if (charset)
2614 {
2615 m_name = wxStrdup(charset);
2616 m_deferred = true;
2617 }
2618 }
2619
2620 #if wxUSE_FONTMAP
2621 #include "wx/hashmap.h"
2622
2623 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2624 wxEncodingNameCache );
2625
2626 static wxEncodingNameCache gs_nameCache;
2627 #endif
2628
2629 wxMBConv *wxCSConv::DoCreate() const
2630 {
2631 #if wxUSE_FONTMAP
2632 wxLogTrace(TRACE_STRCONV,
2633 wxT("creating conversion for %s"),
2634 (m_name ? m_name
2635 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2636 #endif // wxUSE_FONTMAP
2637
2638 // check for the special case of ASCII or ISO8859-1 charset: as we have
2639 // special knowledge of it anyhow, we don't need to create a special
2640 // conversion object
2641 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2642 m_encoding == wxFONTENCODING_DEFAULT )
2643 {
2644 // don't convert at all
2645 return NULL;
2646 }
2647
2648 // we trust OS to do conversion better than we can so try external
2649 // conversion methods first
2650 //
2651 // the full order is:
2652 // 1. OS conversion (iconv() under Unix or Win32 API)
2653 // 2. hard coded conversions for UTF
2654 // 3. wxEncodingConverter as fall back
2655
2656 // step (1)
2657 #ifdef HAVE_ICONV
2658 #if !wxUSE_FONTMAP
2659 if ( m_name )
2660 #endif // !wxUSE_FONTMAP
2661 {
2662 wxString name(m_name);
2663 wxFontEncoding encoding(m_encoding);
2664
2665 if ( !name.empty() )
2666 {
2667 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2668 if ( conv->IsOk() )
2669 return conv;
2670
2671 delete conv;
2672
2673 #if wxUSE_FONTMAP
2674 encoding =
2675 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2676 #endif // wxUSE_FONTMAP
2677 }
2678 #if wxUSE_FONTMAP
2679 {
2680 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2681 if ( it != gs_nameCache.end() )
2682 {
2683 if ( it->second.empty() )
2684 return NULL;
2685
2686 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2687 if ( conv->IsOk() )
2688 return conv;
2689
2690 delete conv;
2691 }
2692
2693 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2694
2695 for ( ; *names; ++names )
2696 {
2697 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2698 if ( conv->IsOk() )
2699 {
2700 gs_nameCache[encoding] = *names;
2701 return conv;
2702 }
2703
2704 delete conv;
2705 }
2706
2707 gs_nameCache[encoding] = _T(""); // cache the failure
2708 }
2709 #endif // wxUSE_FONTMAP
2710 }
2711 #endif // HAVE_ICONV
2712
2713 #ifdef wxHAVE_WIN32_MB2WC
2714 {
2715 #if wxUSE_FONTMAP
2716 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2717 : new wxMBConv_win32(m_encoding);
2718 if ( conv->IsOk() )
2719 return conv;
2720
2721 delete conv;
2722 #else
2723 return NULL;
2724 #endif
2725 }
2726 #endif // wxHAVE_WIN32_MB2WC
2727 #if defined(__WXMAC__)
2728 {
2729 // leave UTF16 and UTF32 to the built-ins of wx
2730 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2731 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2732 {
2733
2734 #if wxUSE_FONTMAP
2735 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2736 : new wxMBConv_mac(m_encoding);
2737 #else
2738 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2739 #endif
2740 if ( conv->IsOk() )
2741 return conv;
2742
2743 delete conv;
2744 }
2745 }
2746 #endif
2747 #if defined(__WXCOCOA__)
2748 {
2749 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2750 {
2751
2752 #if wxUSE_FONTMAP
2753 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2754 : new wxMBConv_cocoa(m_encoding);
2755 #else
2756 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2757 #endif
2758 if ( conv->IsOk() )
2759 return conv;
2760
2761 delete conv;
2762 }
2763 }
2764 #endif
2765 // step (2)
2766 wxFontEncoding enc = m_encoding;
2767 #if wxUSE_FONTMAP
2768 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2769 {
2770 // use "false" to suppress interactive dialogs -- we can be called from
2771 // anywhere and popping up a dialog from here is the last thing we want to
2772 // do
2773 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2774 }
2775 #endif // wxUSE_FONTMAP
2776
2777 switch ( enc )
2778 {
2779 case wxFONTENCODING_UTF7:
2780 return new wxMBConvUTF7;
2781
2782 case wxFONTENCODING_UTF8:
2783 return new wxMBConvUTF8;
2784
2785 case wxFONTENCODING_UTF16BE:
2786 return new wxMBConvUTF16BE;
2787
2788 case wxFONTENCODING_UTF16LE:
2789 return new wxMBConvUTF16LE;
2790
2791 case wxFONTENCODING_UTF32BE:
2792 return new wxMBConvUTF32BE;
2793
2794 case wxFONTENCODING_UTF32LE:
2795 return new wxMBConvUTF32LE;
2796
2797 default:
2798 // nothing to do but put here to suppress gcc warnings
2799 ;
2800 }
2801
2802 // step (3)
2803 #if wxUSE_FONTMAP
2804 {
2805 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2806 : new wxMBConv_wxwin(m_encoding);
2807 if ( conv->IsOk() )
2808 return conv;
2809
2810 delete conv;
2811 }
2812 #endif // wxUSE_FONTMAP
2813
2814 // NB: This is a hack to prevent deadlock. What could otherwise happen
2815 // in Unicode build: wxConvLocal creation ends up being here
2816 // because of some failure and logs the error. But wxLog will try to
2817 // attach timestamp, for which it will need wxConvLocal (to convert
2818 // time to char* and then wchar_t*), but that fails, tries to log
2819 // error, but wxLog has a (already locked) critical section that
2820 // guards static buffer.
2821 static bool alreadyLoggingError = false;
2822 if (!alreadyLoggingError)
2823 {
2824 alreadyLoggingError = true;
2825 wxLogError(_("Cannot convert from the charset '%s'!"),
2826 m_name ? m_name
2827 :
2828 #if wxUSE_FONTMAP
2829 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2830 #else // !wxUSE_FONTMAP
2831 wxString::Format(_("encoding %s"), m_encoding).c_str()
2832 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2833 );
2834 alreadyLoggingError = false;
2835 }
2836
2837 return NULL;
2838 }
2839
2840 void wxCSConv::CreateConvIfNeeded() const
2841 {
2842 if ( m_deferred )
2843 {
2844 wxCSConv *self = (wxCSConv *)this; // const_cast
2845
2846 #if wxUSE_INTL
2847 // if we don't have neither the name nor the encoding, use the default
2848 // encoding for this system
2849 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2850 {
2851 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2852 }
2853 #endif // wxUSE_INTL
2854
2855 self->m_convReal = DoCreate();
2856 self->m_deferred = false;
2857 }
2858 }
2859
2860 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2861 {
2862 CreateConvIfNeeded();
2863
2864 if (m_convReal)
2865 return m_convReal->MB2WC(buf, psz, n);
2866
2867 // latin-1 (direct)
2868 size_t len = strlen(psz);
2869
2870 if (buf)
2871 {
2872 for (size_t c = 0; c <= len; c++)
2873 buf[c] = (unsigned char)(psz[c]);
2874 }
2875
2876 return len;
2877 }
2878
2879 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2880 {
2881 CreateConvIfNeeded();
2882
2883 if (m_convReal)
2884 return m_convReal->WC2MB(buf, psz, n);
2885
2886 // latin-1 (direct)
2887 const size_t len = wxWcslen(psz);
2888 if (buf)
2889 {
2890 for (size_t c = 0; c <= len; c++)
2891 {
2892 if (psz[c] > 0xFF)
2893 return (size_t)-1;
2894 buf[c] = (char)psz[c];
2895 }
2896 }
2897 else
2898 {
2899 for (size_t c = 0; c <= len; c++)
2900 {
2901 if (psz[c] > 0xFF)
2902 return (size_t)-1;
2903 }
2904 }
2905
2906 return len;
2907 }
2908
2909 // ----------------------------------------------------------------------------
2910 // globals
2911 // ----------------------------------------------------------------------------
2912
2913 #ifdef __WINDOWS__
2914 static wxMBConv_win32 wxConvLibcObj;
2915 #elif defined(__WXMAC__) && !defined(__MACH__)
2916 static wxMBConv_mac wxConvLibcObj ;
2917 #else
2918 static wxMBConvLibc wxConvLibcObj;
2919 #endif
2920
2921 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2922 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2923 static wxMBConvUTF7 wxConvUTF7Obj;
2924 static wxMBConvUTF8 wxConvUTF8Obj;
2925
2926 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2927 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2928 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2929 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2930 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2933 #ifdef __WXOSX__
2934 wxConvUTF8Obj;
2935 #else
2936 wxConvLibcObj;
2937 #endif
2938
2939
2940 #else // !wxUSE_WCHAR_T
2941
2942 // stand-ins in absence of wchar_t
2943 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2944 wxConvISO8859_1,
2945 wxConvLocal,
2946 wxConvUTF8;
2947
2948 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T