]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
f52144b27533b7219ce1cf11ebefdac8a436392a
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer
191 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
192 {
193 // the currently accumulated wide characters
194 wxWCharBuffer wbuf;
195
196 // the current length of wbuf
197 size_t lenBuf = 0;
198
199 // we need to know the representation of L'\0' for this conversion
200 size_t nulLen;
201 const char * const nul = GetMBNul(&nulLen);
202 if ( nulLen == (size_t)-1 || nulLen == 0 )
203 return wxWCharBuffer();
204
205 // make a copy of the input string unless it is already properly
206 // NUL-terminated
207 wxCharBuffer bufTmp;
208
209 // now we can compute the input size if we were not given it: notice that
210 // in this case the string must be properly NUL-terminated, of course, as
211 // otherwise we have no way of knowing how long it is
212 if ( inLen == (size_t)-1 )
213 {
214 // not the most efficient algorithm but it shouldn't matter as normally
215 // there are not many NULs in the string and so normally memcmp()
216 // should stop on the first character
217 const char *p = in;
218 while ( memcmp(p, nul, nulLen) != 0 )
219 p++;
220
221 inLen = p - in + nulLen;
222 }
223 else // we already have the size
224 {
225 // check if it's not already NUL-terminated too to avoid the copy
226 if ( inLen < nulLen || memcmp(in + inLen - nulLen, nul, nulLen) != 0 )
227 {
228 // make a copy in order to properly NUL-terminate the string
229 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
230 memcpy(bufTmp.data(), in, inLen);
231 memcpy(bufTmp.data() + inLen, nul, nulLen);
232 }
233 }
234
235 if ( bufTmp )
236 in = bufTmp;
237
238 for ( const char * const inEnd = in + inLen;; )
239 {
240 // try to convert the current chunk if anything left
241 size_t lenChunk = in < inEnd ? MB2WC(NULL, in, 0) : 0;
242 if ( lenChunk == 0 )
243 {
244 // nothing left in the input string, conversion succeeded
245 if ( outLen )
246 {
247 // we shouldn't include the last NUL in the result length
248 *outLen = lenBuf ? lenBuf - 1 : 0;
249 }
250
251 return wbuf;
252 }
253
254 if ( lenChunk == (size_t)-1 )
255 break;
256
257 const size_t lenBufNew = lenBuf + lenChunk;
258 if ( !wbuf.extend(lenBufNew) )
259 break;
260
261 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
262 if ( lenChunk == (size_t)-1 )
263 break;
264
265 // +! for the embedded NUL (if something follows)
266 lenBuf = lenBufNew + 1;
267
268 // advance the input pointer past the end of this chunk
269 while ( memcmp(in, nul, nulLen) != 0 )
270 in++;
271
272 in += nulLen; // skipping over its terminator as well
273 }
274
275 // conversion failed
276 if ( outLen )
277 *outLen = 0;
278
279 return wxWCharBuffer();
280 }
281
282 const wxCharBuffer
283 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
284 {
285 // the currently accumulated multibyte characters
286 wxCharBuffer buf;
287
288 // the current length of buf
289 size_t lenBuf = 0;
290
291 // make a copy of the input string unless it is already properly
292 // NUL-terminated
293 //
294 // if we don't know its length we have no choice but to assume that it is,
295 // indeed, properly terminated
296 wxWCharBuffer bufTmp;
297 if ( inLen == (size_t)-1 )
298 {
299 inLen = wxWcslen(in) + 1;
300 }
301 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
302 {
303 // make a copy in order to properly NUL-terminate the string
304 bufTmp = wxWCharBuffer(inLen);
305 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
306 }
307
308 if ( bufTmp )
309 in = bufTmp;
310
311 for ( const wchar_t * const inEnd = in + inLen;; )
312 {
313 // try to convert the current chunk, if anything left
314 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
315 if ( lenChunk == 0 )
316 {
317 // nothing left in the input string, conversion succeeded
318 if ( outLen )
319 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
320
321 return buf;
322 }
323
324 if ( lenChunk == (size_t)-1 )
325 break;
326
327 const size_t lenBufNew = lenBuf + lenChunk;
328 if ( !buf.extend(lenBufNew) )
329 break;
330
331 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
332 if ( lenChunk == (size_t)-1 )
333 break;
334
335 // chunk successfully converted, go to the next one
336 in += wxWcslen(in) + 1 /* skip NUL too */;
337 lenBuf = lenBufNew + 1;
338 }
339
340 // conversion failed
341 if ( outLen )
342 *outLen = 0;
343
344 return wxCharBuffer();
345 }
346
347 // ----------------------------------------------------------------------------
348 // wxMBConvLibc
349 // ----------------------------------------------------------------------------
350
351 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
352 {
353 return wxMB2WC(buf, psz, n);
354 }
355
356 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
357 {
358 return wxWC2MB(buf, psz, n);
359 }
360
361 // ----------------------------------------------------------------------------
362 // wxConvBrokenFileNames
363 // ----------------------------------------------------------------------------
364
365 #ifdef __UNIX__
366
367 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
368 {
369 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
370 || wxStricmp(charset, _T("UTF8")) == 0 )
371 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
372 else
373 m_conv = new wxCSConv(charset);
374 }
375
376 #endif // __UNIX__
377
378 // ----------------------------------------------------------------------------
379 // UTF-7
380 // ----------------------------------------------------------------------------
381
382 // Implementation (C) 2004 Fredrik Roubert
383
384 //
385 // BASE64 decoding table
386 //
387 static const unsigned char utf7unb64[] =
388 {
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
395 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
396 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
398 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
399 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
400 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
402 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
403 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
404 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
413 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
421 };
422
423 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
424 {
425 size_t len = 0;
426
427 while ( *psz && (!buf || (len < n)) )
428 {
429 unsigned char cc = *psz++;
430 if (cc != '+')
431 {
432 // plain ASCII char
433 if (buf)
434 *buf++ = cc;
435 len++;
436 }
437 else if (*psz == '-')
438 {
439 // encoded plus sign
440 if (buf)
441 *buf++ = cc;
442 len++;
443 psz++;
444 }
445 else // start of BASE64 encoded string
446 {
447 bool lsb, ok;
448 unsigned int d, l;
449 for ( ok = lsb = false, d = 0, l = 0;
450 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
451 psz++ )
452 {
453 d <<= 6;
454 d += cc;
455 for (l += 6; l >= 8; lsb = !lsb)
456 {
457 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
458 if (lsb)
459 {
460 if (buf)
461 *buf++ |= c;
462 len ++;
463 }
464 else
465 {
466 if (buf)
467 *buf = (wchar_t)(c << 8);
468 }
469
470 ok = true;
471 }
472 }
473
474 if ( !ok )
475 {
476 // in valid UTF7 we should have valid characters after '+'
477 return (size_t)-1;
478 }
479
480 if (*psz == '-')
481 psz++;
482 }
483 }
484
485 if ( buf && (len < n) )
486 *buf = '\0';
487
488 return len;
489 }
490
491 //
492 // BASE64 encoding table
493 //
494 static const unsigned char utf7enb64[] =
495 {
496 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
497 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
498 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
499 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
500 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
501 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
502 'w', 'x', 'y', 'z', '0', '1', '2', '3',
503 '4', '5', '6', '7', '8', '9', '+', '/'
504 };
505
506 //
507 // UTF-7 encoding table
508 //
509 // 0 - Set D (directly encoded characters)
510 // 1 - Set O (optional direct characters)
511 // 2 - whitespace characters (optional)
512 // 3 - special characters
513 //
514 static const unsigned char utf7encode[128] =
515 {
516 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
517 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
518 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
520 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
524 };
525
526 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
527 {
528 size_t len = 0;
529
530 while (*psz && ((!buf) || (len < n)))
531 {
532 wchar_t cc = *psz++;
533 if (cc < 0x80 && utf7encode[cc] < 1)
534 {
535 // plain ASCII char
536 if (buf)
537 *buf++ = (char)cc;
538 len++;
539 }
540 #ifndef WC_UTF16
541 else if (((wxUint32)cc) > 0xffff)
542 {
543 // no surrogate pair generation (yet?)
544 return (size_t)-1;
545 }
546 #endif
547 else
548 {
549 if (buf)
550 *buf++ = '+';
551 len++;
552 if (cc != '+')
553 {
554 // BASE64 encode string
555 unsigned int lsb, d, l;
556 for (d = 0, l = 0; /*nothing*/; psz++)
557 {
558 for (lsb = 0; lsb < 2; lsb ++)
559 {
560 d <<= 8;
561 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
562
563 for (l += 8; l >= 6; )
564 {
565 l -= 6;
566 if (buf)
567 *buf++ = utf7enb64[(d >> l) % 64];
568 len++;
569 }
570 }
571 cc = *psz;
572 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
573 break;
574 }
575 if (l != 0)
576 {
577 if (buf)
578 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
579 len++;
580 }
581 }
582 if (buf)
583 *buf++ = '-';
584 len++;
585 }
586 }
587 if (buf && (len < n))
588 *buf = 0;
589 return len;
590 }
591
592 // ----------------------------------------------------------------------------
593 // UTF-8
594 // ----------------------------------------------------------------------------
595
596 static wxUint32 utf8_max[]=
597 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
598
599 // boundaries of the private use area we use to (temporarily) remap invalid
600 // characters invalid in a UTF-8 encoded string
601 const wxUint32 wxUnicodePUA = 0x100000;
602 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
603
604 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
605 {
606 size_t len = 0;
607
608 while (*psz && ((!buf) || (len < n)))
609 {
610 const char *opsz = psz;
611 bool invalid = false;
612 unsigned char cc = *psz++, fc = cc;
613 unsigned cnt;
614 for (cnt = 0; fc & 0x80; cnt++)
615 fc <<= 1;
616 if (!cnt)
617 {
618 // plain ASCII char
619 if (buf)
620 *buf++ = cc;
621 len++;
622
623 // escape the escape character for octal escapes
624 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
625 && cc == '\\' && (!buf || len < n))
626 {
627 if (buf)
628 *buf++ = cc;
629 len++;
630 }
631 }
632 else
633 {
634 cnt--;
635 if (!cnt)
636 {
637 // invalid UTF-8 sequence
638 invalid = true;
639 }
640 else
641 {
642 unsigned ocnt = cnt - 1;
643 wxUint32 res = cc & (0x3f >> cnt);
644 while (cnt--)
645 {
646 cc = *psz;
647 if ((cc & 0xC0) != 0x80)
648 {
649 // invalid UTF-8 sequence
650 invalid = true;
651 break;
652 }
653 psz++;
654 res = (res << 6) | (cc & 0x3f);
655 }
656 if (invalid || res <= utf8_max[ocnt])
657 {
658 // illegal UTF-8 encoding
659 invalid = true;
660 }
661 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
662 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
663 {
664 // if one of our PUA characters turns up externally
665 // it must also be treated as an illegal sequence
666 // (a bit like you have to escape an escape character)
667 invalid = true;
668 }
669 else
670 {
671 #ifdef WC_UTF16
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa = encode_utf16(res, (wxUint16 *)buf);
674 if (pa == (size_t)-1)
675 {
676 invalid = true;
677 }
678 else
679 {
680 if (buf)
681 buf += pa;
682 len += pa;
683 }
684 #else // !WC_UTF16
685 if (buf)
686 *buf++ = (wchar_t)res;
687 len++;
688 #endif // WC_UTF16/!WC_UTF16
689 }
690 }
691 if (invalid)
692 {
693 if (m_options & MAP_INVALID_UTF8_TO_PUA)
694 {
695 while (opsz < psz && (!buf || len < n))
696 {
697 #ifdef WC_UTF16
698 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
699 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
700 wxASSERT(pa != (size_t)-1);
701 if (buf)
702 buf += pa;
703 opsz++;
704 len += pa;
705 #else
706 if (buf)
707 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
708 opsz++;
709 len++;
710 #endif
711 }
712 }
713 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
714 {
715 while (opsz < psz && (!buf || len < n))
716 {
717 if ( buf && len + 3 < n )
718 {
719 unsigned char on = *opsz;
720 *buf++ = L'\\';
721 *buf++ = (wchar_t)( L'0' + on / 0100 );
722 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
723 *buf++ = (wchar_t)( L'0' + on % 010 );
724 }
725 opsz++;
726 len += 4;
727 }
728 }
729 else // MAP_INVALID_UTF8_NOT
730 {
731 return (size_t)-1;
732 }
733 }
734 }
735 }
736 if (buf && (len < n))
737 *buf = 0;
738 return len;
739 }
740
741 static inline bool isoctal(wchar_t wch)
742 {
743 return L'0' <= wch && wch <= L'7';
744 }
745
746 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
747 {
748 size_t len = 0;
749
750 while (*psz && ((!buf) || (len < n)))
751 {
752 wxUint32 cc;
753 #ifdef WC_UTF16
754 // cast is ok for WC_UTF16
755 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
756 psz += (pa == (size_t)-1) ? 1 : pa;
757 #else
758 cc=(*psz++) & 0x7fffffff;
759 #endif
760
761 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
762 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
763 {
764 if (buf)
765 *buf++ = (char)(cc - wxUnicodePUA);
766 len++;
767 }
768 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
769 && cc == L'\\' && psz[0] == L'\\' )
770 {
771 if (buf)
772 *buf++ = (char)cc;
773 psz++;
774 len++;
775 }
776 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
777 cc == L'\\' &&
778 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
779 {
780 if (buf)
781 {
782 *buf++ = (char) ((psz[0] - L'0')*0100 +
783 (psz[1] - L'0')*010 +
784 (psz[2] - L'0'));
785 }
786
787 psz += 3;
788 len++;
789 }
790 else
791 {
792 unsigned cnt;
793 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
794 if (!cnt)
795 {
796 // plain ASCII char
797 if (buf)
798 *buf++ = (char) cc;
799 len++;
800 }
801
802 else
803 {
804 len += cnt + 1;
805 if (buf)
806 {
807 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
808 while (cnt--)
809 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
810 }
811 }
812 }
813 }
814
815 if (buf && (len<n))
816 *buf = 0;
817
818 return len;
819 }
820
821 // ----------------------------------------------------------------------------
822 // UTF-16
823 // ----------------------------------------------------------------------------
824
825 #ifdef WORDS_BIGENDIAN
826 #define wxMBConvUTF16straight wxMBConvUTF16BE
827 #define wxMBConvUTF16swap wxMBConvUTF16LE
828 #else
829 #define wxMBConvUTF16swap wxMBConvUTF16BE
830 #define wxMBConvUTF16straight wxMBConvUTF16LE
831 #endif
832
833
834 #ifdef WC_UTF16
835
836 // copy 16bit MB to 16bit String
837 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
838 {
839 size_t len=0;
840
841 while (*(wxUint16*)psz && (!buf || len < n))
842 {
843 if (buf)
844 *buf++ = *(wxUint16*)psz;
845 len++;
846
847 psz += sizeof(wxUint16);
848 }
849 if (buf && len<n) *buf=0;
850
851 return len;
852 }
853
854
855 // copy 16bit String to 16bit MB
856 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
857 {
858 size_t len=0;
859
860 while (*psz && (!buf || len < n))
861 {
862 if (buf)
863 {
864 *(wxUint16*)buf = *psz;
865 buf += sizeof(wxUint16);
866 }
867 len += sizeof(wxUint16);
868 psz++;
869 }
870 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
871
872 return len;
873 }
874
875
876 // swap 16bit MB to 16bit String
877 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
878 {
879 size_t len = 0;
880
881 // UTF16 string must be terminated by 2 NULs as single NULs may occur
882 // inside the string
883 while ( (psz[0] || psz[1]) && (!buf || len < n) )
884 {
885 if ( buf )
886 {
887 ((char *)buf)[0] = psz[1];
888 ((char *)buf)[1] = psz[0];
889 buf++;
890 }
891 len++;
892 psz += 2;
893 }
894
895 if ( buf && len < n )
896 *buf = L'\0';
897
898 return len;
899 }
900
901
902 // swap 16bit MB to 16bit String
903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
904 {
905 size_t len = 0;
906
907 while ( *psz && (!buf || len < n) )
908 {
909 if ( buf )
910 {
911 *buf++ = ((char*)psz)[1];
912 *buf++ = ((char*)psz)[0];
913 }
914 len += 2;
915 psz++;
916 }
917
918 if ( buf && len < n )
919 *buf = '\0';
920
921 return len;
922 }
923
924
925 #else // WC_UTF16
926
927
928 // copy 16bit MB to 32bit String
929 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
930 {
931 size_t len=0;
932
933 while (*(wxUint16*)psz && (!buf || len < n))
934 {
935 wxUint32 cc;
936 size_t pa=decode_utf16((wxUint16*)psz, cc);
937 if (pa == (size_t)-1)
938 return pa;
939
940 if (buf)
941 *buf++ = (wchar_t)cc;
942 len++;
943 psz += pa * sizeof(wxUint16);
944 }
945 if (buf && len<n) *buf=0;
946
947 return len;
948 }
949
950
951 // copy 32bit String to 16bit MB
952 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
953 {
954 size_t len=0;
955
956 while (*psz && (!buf || len < n))
957 {
958 wxUint16 cc[2];
959 size_t pa=encode_utf16(*psz, cc);
960
961 if (pa == (size_t)-1)
962 return pa;
963
964 if (buf)
965 {
966 *(wxUint16*)buf = cc[0];
967 buf += sizeof(wxUint16);
968 if (pa > 1)
969 {
970 *(wxUint16*)buf = cc[1];
971 buf += sizeof(wxUint16);
972 }
973 }
974
975 len += pa*sizeof(wxUint16);
976 psz++;
977 }
978 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
979
980 return len;
981 }
982
983
984 // swap 16bit MB to 32bit String
985 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
986 {
987 size_t len=0;
988
989 while (*(wxUint16*)psz && (!buf || len < n))
990 {
991 wxUint32 cc;
992 char tmp[4];
993 tmp[0]=psz[1]; tmp[1]=psz[0];
994 tmp[2]=psz[3]; tmp[3]=psz[2];
995
996 size_t pa=decode_utf16((wxUint16*)tmp, cc);
997 if (pa == (size_t)-1)
998 return pa;
999
1000 if (buf)
1001 *buf++ = (wchar_t)cc;
1002
1003 len++;
1004 psz += pa * sizeof(wxUint16);
1005 }
1006 if (buf && len<n) *buf=0;
1007
1008 return len;
1009 }
1010
1011
1012 // swap 32bit String to 16bit MB
1013 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1014 {
1015 size_t len=0;
1016
1017 while (*psz && (!buf || len < n))
1018 {
1019 wxUint16 cc[2];
1020 size_t pa=encode_utf16(*psz, cc);
1021
1022 if (pa == (size_t)-1)
1023 return pa;
1024
1025 if (buf)
1026 {
1027 *buf++ = ((char*)cc)[1];
1028 *buf++ = ((char*)cc)[0];
1029 if (pa > 1)
1030 {
1031 *buf++ = ((char*)cc)[3];
1032 *buf++ = ((char*)cc)[2];
1033 }
1034 }
1035
1036 len += pa*sizeof(wxUint16);
1037 psz++;
1038 }
1039 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1040
1041 return len;
1042 }
1043
1044 #endif // WC_UTF16
1045
1046
1047 // ----------------------------------------------------------------------------
1048 // UTF-32
1049 // ----------------------------------------------------------------------------
1050
1051 #ifdef WORDS_BIGENDIAN
1052 #define wxMBConvUTF32straight wxMBConvUTF32BE
1053 #define wxMBConvUTF32swap wxMBConvUTF32LE
1054 #else
1055 #define wxMBConvUTF32swap wxMBConvUTF32BE
1056 #define wxMBConvUTF32straight wxMBConvUTF32LE
1057 #endif
1058
1059
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1062
1063
1064 #ifdef WC_UTF16
1065
1066 // copy 32bit MB to 16bit String
1067 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1068 {
1069 size_t len=0;
1070
1071 while (*(wxUint32*)psz && (!buf || len < n))
1072 {
1073 wxUint16 cc[2];
1074
1075 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1076 if (pa == (size_t)-1)
1077 return pa;
1078
1079 if (buf)
1080 {
1081 *buf++ = cc[0];
1082 if (pa > 1)
1083 *buf++ = cc[1];
1084 }
1085 len += pa;
1086 psz += sizeof(wxUint32);
1087 }
1088 if (buf && len<n) *buf=0;
1089
1090 return len;
1091 }
1092
1093
1094 // copy 16bit String to 32bit MB
1095 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1096 {
1097 size_t len=0;
1098
1099 while (*psz && (!buf || len < n))
1100 {
1101 wxUint32 cc;
1102
1103 // cast is ok for WC_UTF16
1104 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1105 if (pa == (size_t)-1)
1106 return pa;
1107
1108 if (buf)
1109 {
1110 *(wxUint32*)buf = cc;
1111 buf += sizeof(wxUint32);
1112 }
1113 len += sizeof(wxUint32);
1114 psz += pa;
1115 }
1116
1117 if (buf && len<=n-sizeof(wxUint32))
1118 *(wxUint32*)buf=0;
1119
1120 return len;
1121 }
1122
1123
1124
1125 // swap 32bit MB to 16bit String
1126 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1127 {
1128 size_t len=0;
1129
1130 while (*(wxUint32*)psz && (!buf || len < n))
1131 {
1132 char tmp[4];
1133 tmp[0] = psz[3]; tmp[1] = psz[2];
1134 tmp[2] = psz[1]; tmp[3] = psz[0];
1135
1136
1137 wxUint16 cc[2];
1138
1139 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1140 if (pa == (size_t)-1)
1141 return pa;
1142
1143 if (buf)
1144 {
1145 *buf++ = cc[0];
1146 if (pa > 1)
1147 *buf++ = cc[1];
1148 }
1149 len += pa;
1150 psz += sizeof(wxUint32);
1151 }
1152
1153 if (buf && len<n)
1154 *buf=0;
1155
1156 return len;
1157 }
1158
1159
1160 // swap 16bit String to 32bit MB
1161 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1162 {
1163 size_t len=0;
1164
1165 while (*psz && (!buf || len < n))
1166 {
1167 char cc[4];
1168
1169 // cast is ok for WC_UTF16
1170 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1171 if (pa == (size_t)-1)
1172 return pa;
1173
1174 if (buf)
1175 {
1176 *buf++ = cc[3];
1177 *buf++ = cc[2];
1178 *buf++ = cc[1];
1179 *buf++ = cc[0];
1180 }
1181 len += sizeof(wxUint32);
1182 psz += pa;
1183 }
1184
1185 if (buf && len<=n-sizeof(wxUint32))
1186 *(wxUint32*)buf=0;
1187
1188 return len;
1189 }
1190
1191 #else // WC_UTF16
1192
1193
1194 // copy 32bit MB to 32bit String
1195 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1196 {
1197 size_t len=0;
1198
1199 while (*(wxUint32*)psz && (!buf || len < n))
1200 {
1201 if (buf)
1202 *buf++ = (wchar_t)(*(wxUint32*)psz);
1203 len++;
1204 psz += sizeof(wxUint32);
1205 }
1206
1207 if (buf && len<n)
1208 *buf=0;
1209
1210 return len;
1211 }
1212
1213
1214 // copy 32bit String to 32bit MB
1215 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1216 {
1217 size_t len=0;
1218
1219 while (*psz && (!buf || len < n))
1220 {
1221 if (buf)
1222 {
1223 *(wxUint32*)buf = *psz;
1224 buf += sizeof(wxUint32);
1225 }
1226
1227 len += sizeof(wxUint32);
1228 psz++;
1229 }
1230
1231 if (buf && len<=n-sizeof(wxUint32))
1232 *(wxUint32*)buf=0;
1233
1234 return len;
1235 }
1236
1237
1238 // swap 32bit MB to 32bit String
1239 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1240 {
1241 size_t len=0;
1242
1243 while (*(wxUint32*)psz && (!buf || len < n))
1244 {
1245 if (buf)
1246 {
1247 ((char *)buf)[0] = psz[3];
1248 ((char *)buf)[1] = psz[2];
1249 ((char *)buf)[2] = psz[1];
1250 ((char *)buf)[3] = psz[0];
1251 buf++;
1252 }
1253 len++;
1254 psz += sizeof(wxUint32);
1255 }
1256
1257 if (buf && len<n)
1258 *buf=0;
1259
1260 return len;
1261 }
1262
1263
1264 // swap 32bit String to 32bit MB
1265 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1266 {
1267 size_t len=0;
1268
1269 while (*psz && (!buf || len < n))
1270 {
1271 if (buf)
1272 {
1273 *buf++ = ((char *)psz)[3];
1274 *buf++ = ((char *)psz)[2];
1275 *buf++ = ((char *)psz)[1];
1276 *buf++ = ((char *)psz)[0];
1277 }
1278 len += sizeof(wxUint32);
1279 psz++;
1280 }
1281
1282 if (buf && len<=n-sizeof(wxUint32))
1283 *(wxUint32*)buf=0;
1284
1285 return len;
1286 }
1287
1288
1289 #endif // WC_UTF16
1290
1291
1292 // ============================================================================
1293 // The classes doing conversion using the iconv_xxx() functions
1294 // ============================================================================
1295
1296 #ifdef HAVE_ICONV
1297
1298 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1299 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1300 // (unless there's yet another bug in glibc) the only case when iconv()
1301 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1302 // left in the input buffer -- when _real_ error occurs,
1303 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1304 // iconv() failure.
1305 // [This bug does not appear in glibc 2.2.]
1306 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1307 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1308 (errno != E2BIG || bufLeft != 0))
1309 #else
1310 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1311 #endif
1312
1313 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1314
1315 #define ICONV_T_INVALID ((iconv_t)-1)
1316
1317 #if SIZEOF_WCHAR_T == 4
1318 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1319 #define WC_ENC wxFONTENCODING_UTF32
1320 #elif SIZEOF_WCHAR_T == 2
1321 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1322 #define WC_ENC wxFONTENCODING_UTF16
1323 #else // sizeof(wchar_t) != 2 nor 4
1324 // does this ever happen?
1325 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1326 #endif
1327
1328 // ----------------------------------------------------------------------------
1329 // wxMBConv_iconv: encapsulates an iconv character set
1330 // ----------------------------------------------------------------------------
1331
1332 class wxMBConv_iconv : public wxMBConv
1333 {
1334 public:
1335 wxMBConv_iconv(const wxChar *name);
1336 virtual ~wxMBConv_iconv();
1337
1338 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1339 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1340
1341 bool IsOk() const
1342 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1343
1344 protected:
1345 // the iconv handlers used to translate from multibyte to wide char and in
1346 // the other direction
1347 iconv_t m2w,
1348 w2m;
1349 #if wxUSE_THREADS
1350 // guards access to m2w and w2m objects
1351 wxMutex m_iconvMutex;
1352 #endif
1353
1354 private:
1355 virtual const char *GetMBNul(size_t *nulLen) const;
1356
1357 // the name (for iconv_open()) of a wide char charset -- if none is
1358 // available on this machine, it will remain NULL
1359 static wxString ms_wcCharsetName;
1360
1361 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1362 // different endian-ness than the native one
1363 static bool ms_wcNeedsSwap;
1364
1365 // NUL representation
1366 size_t m_nulLen;
1367 char m_nulBuf[8];
1368 };
1369
1370 // make the constructor available for unit testing
1371 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1372 {
1373 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1374 if ( !result->IsOk() )
1375 {
1376 delete result;
1377 return 0;
1378 }
1379 return result;
1380 }
1381
1382 wxString wxMBConv_iconv::ms_wcCharsetName;
1383 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1384
1385 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1386 {
1387 m_nulLen = (size_t)-2;
1388
1389 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1390 // names for the charsets
1391 const wxCharBuffer cname(wxString(name).ToAscii());
1392
1393 // check for charset that represents wchar_t:
1394 if ( ms_wcCharsetName.empty() )
1395 {
1396 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1397
1398 #if wxUSE_FONTMAP
1399 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1400 #else // !wxUSE_FONTMAP
1401 static const wxChar *names[] =
1402 {
1403 #if SIZEOF_WCHAR_T == 4
1404 _T("UCS-4"),
1405 #elif SIZEOF_WCHAR_T = 2
1406 _T("UCS-2"),
1407 #endif
1408 NULL
1409 };
1410 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1411
1412 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1413 {
1414 const wxString nameCS(*names);
1415
1416 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1417 wxString nameXE(nameCS);
1418 #ifdef WORDS_BIGENDIAN
1419 nameXE += _T("BE");
1420 #else // little endian
1421 nameXE += _T("LE");
1422 #endif
1423
1424 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1425 nameXE.c_str());
1426
1427 m2w = iconv_open(nameXE.ToAscii(), cname);
1428 if ( m2w == ICONV_T_INVALID )
1429 {
1430 // try charset w/o bytesex info (e.g. "UCS4")
1431 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1432 nameCS.c_str());
1433 m2w = iconv_open(nameCS.ToAscii(), cname);
1434
1435 // and check for bytesex ourselves:
1436 if ( m2w != ICONV_T_INVALID )
1437 {
1438 char buf[2], *bufPtr;
1439 wchar_t wbuf[2], *wbufPtr;
1440 size_t insz, outsz;
1441 size_t res;
1442
1443 buf[0] = 'A';
1444 buf[1] = 0;
1445 wbuf[0] = 0;
1446 insz = 2;
1447 outsz = SIZEOF_WCHAR_T * 2;
1448 wbufPtr = wbuf;
1449 bufPtr = buf;
1450
1451 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1452 (char**)&wbufPtr, &outsz);
1453
1454 if (ICONV_FAILED(res, insz))
1455 {
1456 wxLogLastError(wxT("iconv"));
1457 wxLogError(_("Conversion to charset '%s' doesn't work."),
1458 nameCS.c_str());
1459 }
1460 else // ok, can convert to this encoding, remember it
1461 {
1462 ms_wcCharsetName = nameCS;
1463 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1464 }
1465 }
1466 }
1467 else // use charset not requiring byte swapping
1468 {
1469 ms_wcCharsetName = nameXE;
1470 }
1471 }
1472
1473 wxLogTrace(TRACE_STRCONV,
1474 wxT("iconv wchar_t charset is \"%s\"%s"),
1475 ms_wcCharsetName.empty() ? _T("<none>")
1476 : ms_wcCharsetName.c_str(),
1477 ms_wcNeedsSwap ? _T(" (needs swap)")
1478 : _T(""));
1479 }
1480 else // we already have ms_wcCharsetName
1481 {
1482 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1483 }
1484
1485 if ( ms_wcCharsetName.empty() )
1486 {
1487 w2m = ICONV_T_INVALID;
1488 }
1489 else
1490 {
1491 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1492 if ( w2m == ICONV_T_INVALID )
1493 {
1494 wxLogTrace(TRACE_STRCONV,
1495 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1496 ms_wcCharsetName.c_str(), cname.data());
1497 }
1498 }
1499 }
1500
1501 wxMBConv_iconv::~wxMBConv_iconv()
1502 {
1503 if ( m2w != ICONV_T_INVALID )
1504 iconv_close(m2w);
1505 if ( w2m != ICONV_T_INVALID )
1506 iconv_close(w2m);
1507 }
1508
1509 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1510 {
1511 #if wxUSE_THREADS
1512 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1513 // Unfortunately there is a couple of global wxCSConv objects such as
1514 // wxConvLocal that are used all over wx code, so we have to make sure
1515 // the handle is used by at most one thread at the time. Otherwise
1516 // only a few wx classes would be safe to use from non-main threads
1517 // as MB<->WC conversion would fail "randomly".
1518 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1519 #endif
1520
1521 size_t inbuf = strlen(psz);
1522 size_t outbuf = n * SIZEOF_WCHAR_T;
1523 size_t res, cres;
1524 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1525 wchar_t *bufPtr = buf;
1526 const char *pszPtr = psz;
1527
1528 if (buf)
1529 {
1530 // have destination buffer, convert there
1531 cres = iconv(m2w,
1532 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1533 (char**)&bufPtr, &outbuf);
1534 res = n - (outbuf / SIZEOF_WCHAR_T);
1535
1536 if (ms_wcNeedsSwap)
1537 {
1538 // convert to native endianness
1539 for ( unsigned i = 0; i < res; i++ )
1540 buf[n] = WC_BSWAP(buf[i]);
1541 }
1542
1543 // NB: iconv was given only strlen(psz) characters on input, and so
1544 // it couldn't convert the trailing zero. Let's do it ourselves
1545 // if there's some room left for it in the output buffer.
1546 if (res < n)
1547 buf[res] = 0;
1548 }
1549 else
1550 {
1551 // no destination buffer... convert using temp buffer
1552 // to calculate destination buffer requirement
1553 wchar_t tbuf[8];
1554 res = 0;
1555 do {
1556 bufPtr = tbuf;
1557 outbuf = 8*SIZEOF_WCHAR_T;
1558
1559 cres = iconv(m2w,
1560 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1561 (char**)&bufPtr, &outbuf );
1562
1563 res += 8-(outbuf/SIZEOF_WCHAR_T);
1564 } while ((cres==(size_t)-1) && (errno==E2BIG));
1565 }
1566
1567 if (ICONV_FAILED(cres, inbuf))
1568 {
1569 //VS: it is ok if iconv fails, hence trace only
1570 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1571 return (size_t)-1;
1572 }
1573
1574 return res;
1575 }
1576
1577 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1578 {
1579 #if wxUSE_THREADS
1580 // NB: explained in MB2WC
1581 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1582 #endif
1583
1584 size_t inlen = wxWcslen(psz);
1585 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1586 size_t outbuf = n;
1587 size_t res, cres;
1588
1589 wchar_t *tmpbuf = 0;
1590
1591 if (ms_wcNeedsSwap)
1592 {
1593 // need to copy to temp buffer to switch endianness
1594 // (doing WC_BSWAP twice on the original buffer won't help, as it
1595 // could be in read-only memory, or be accessed in some other thread)
1596 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1597 for ( size_t i = 0; i < inlen; i++ )
1598 tmpbuf[n] = WC_BSWAP(psz[i]);
1599 tmpbuf[inlen] = L'\0';
1600 psz = tmpbuf;
1601 }
1602
1603 if (buf)
1604 {
1605 // have destination buffer, convert there
1606 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1607
1608 res = n-outbuf;
1609
1610 // NB: iconv was given only wcslen(psz) characters on input, and so
1611 // it couldn't convert the trailing zero. Let's do it ourselves
1612 // if there's some room left for it in the output buffer.
1613 if (res < n)
1614 buf[0] = 0;
1615 }
1616 else
1617 {
1618 // no destination buffer... convert using temp buffer
1619 // to calculate destination buffer requirement
1620 char tbuf[16];
1621 res = 0;
1622 do {
1623 buf = tbuf; outbuf = 16;
1624
1625 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1626
1627 res += 16 - outbuf;
1628 } while ((cres==(size_t)-1) && (errno==E2BIG));
1629 }
1630
1631 if (ms_wcNeedsSwap)
1632 {
1633 free(tmpbuf);
1634 }
1635
1636 if (ICONV_FAILED(cres, inbuf))
1637 {
1638 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1639 return (size_t)-1;
1640 }
1641
1642 return res;
1643 }
1644
1645 const char *wxMBConv_iconv::GetMBNul(size_t *nulLen) const
1646 {
1647 if ( m_nulLen == (size_t)-2 )
1648 {
1649 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1650
1651 #if wxUSE_THREADS
1652 // NB: explained in MB2WC
1653 wxMutexLocker lock(self->m_iconvMutex);
1654 #endif
1655
1656 size_t inLen = 1,
1657 outLen = WXSIZEOF(m_nulBuf);
1658 self->m_nulLen = iconv(w2m, ICONV_CHAR_CAST(L""), &inLen,
1659 (char **)&self->m_nulBuf, &outLen);
1660 }
1661
1662 *nulLen = m_nulLen;
1663 return m_nulBuf;
1664 }
1665
1666 #endif // HAVE_ICONV
1667
1668
1669 // ============================================================================
1670 // Win32 conversion classes
1671 // ============================================================================
1672
1673 #ifdef wxHAVE_WIN32_MB2WC
1674
1675 // from utils.cpp
1676 #if wxUSE_FONTMAP
1677 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1678 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1679 #endif
1680
1681 class wxMBConv_win32 : public wxMBConv
1682 {
1683 public:
1684 wxMBConv_win32()
1685 {
1686 m_CodePage = CP_ACP;
1687 m_nulLen = (size_t)-2;
1688 }
1689
1690 #if wxUSE_FONTMAP
1691 wxMBConv_win32(const wxChar* name)
1692 {
1693 m_CodePage = wxCharsetToCodepage(name);
1694 m_nulLen = (size_t)-2;
1695 }
1696
1697 wxMBConv_win32(wxFontEncoding encoding)
1698 {
1699 m_CodePage = wxEncodingToCodepage(encoding);
1700 m_nulLen = (size_t)-2;
1701 }
1702 #endif // wxUSE_FONTMAP
1703
1704 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1705 {
1706 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1707 // the behaviour is not compatible with the Unix version (using iconv)
1708 // and break the library itself, e.g. wxTextInputStream::NextChar()
1709 // wouldn't work if reading an incomplete MB char didn't result in an
1710 // error
1711 //
1712 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1713 // an error (tested under Windows Server 2003) and apparently it is
1714 // done on purpose, i.e. the function accepts any input in this case
1715 // and although I'd prefer to return error on ill-formed output, our
1716 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1717 // explicitly ill-formed according to RFC 2152) neither so we don't
1718 // even have any fallback here...
1719 //
1720 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1721 // Win XP or newer and if it is specified on older versions, conversion
1722 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1723 // fails. So we can only use the flag on newer Windows versions.
1724 // Additionally, the flag is not supported by UTF7, symbol and CJK
1725 // encodings. See here:
1726 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1727 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1728 int flags = 0;
1729 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1730 m_CodePage < 50000 &&
1731 IsAtLeastWin2kSP4() )
1732 {
1733 flags = MB_ERR_INVALID_CHARS;
1734 }
1735 else if ( m_CodePage == CP_UTF8 )
1736 {
1737 // Avoid round-trip in the special case of UTF-8 by using our
1738 // own UTF-8 conversion code:
1739 return wxMBConvUTF8().MB2WC(buf, psz, n);
1740 }
1741
1742 const size_t len = ::MultiByteToWideChar
1743 (
1744 m_CodePage, // code page
1745 flags, // flags: fall on error
1746 psz, // input string
1747 -1, // its length (NUL-terminated)
1748 buf, // output string
1749 buf ? n : 0 // size of output buffer
1750 );
1751 if ( !len )
1752 {
1753 // function totally failed
1754 return (size_t)-1;
1755 }
1756
1757 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1758 // check if we succeeded, by doing a double trip:
1759 if ( !flags && buf )
1760 {
1761 const size_t mbLen = strlen(psz);
1762 wxCharBuffer mbBuf(mbLen);
1763 if ( ::WideCharToMultiByte
1764 (
1765 m_CodePage,
1766 0,
1767 buf,
1768 -1,
1769 mbBuf.data(),
1770 mbLen + 1, // size in bytes, not length
1771 NULL,
1772 NULL
1773 ) == 0 ||
1774 strcmp(mbBuf, psz) != 0 )
1775 {
1776 // we didn't obtain the same thing we started from, hence
1777 // the conversion was lossy and we consider that it failed
1778 return (size_t)-1;
1779 }
1780 }
1781
1782 // note that it returns count of written chars for buf != NULL and size
1783 // of the needed buffer for buf == NULL so in either case the length of
1784 // the string (which never includes the terminating NUL) is one less
1785 return len - 1;
1786 }
1787
1788 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1789 {
1790 /*
1791 we have a problem here: by default, WideCharToMultiByte() may
1792 replace characters unrepresentable in the target code page with bad
1793 quality approximations such as turning "1/2" symbol (U+00BD) into
1794 "1" for the code pages which don't have it and we, obviously, want
1795 to avoid this at any price
1796
1797 the trouble is that this function does it _silently_, i.e. it won't
1798 even tell us whether it did or not... Win98/2000 and higher provide
1799 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1800 we have to resort to a round trip, i.e. check that converting back
1801 results in the same string -- this is, of course, expensive but
1802 otherwise we simply can't be sure to not garble the data.
1803 */
1804
1805 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1806 // it doesn't work with CJK encodings (which we test for rather roughly
1807 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1808 // supporting it
1809 BOOL usedDef wxDUMMY_INITIALIZE(false);
1810 BOOL *pUsedDef;
1811 int flags;
1812 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1813 {
1814 // it's our lucky day
1815 flags = WC_NO_BEST_FIT_CHARS;
1816 pUsedDef = &usedDef;
1817 }
1818 else // old system or unsupported encoding
1819 {
1820 flags = 0;
1821 pUsedDef = NULL;
1822 }
1823
1824 const size_t len = ::WideCharToMultiByte
1825 (
1826 m_CodePage, // code page
1827 flags, // either none or no best fit
1828 pwz, // input string
1829 -1, // it is (wide) NUL-terminated
1830 buf, // output buffer
1831 buf ? n : 0, // and its size
1832 NULL, // default "replacement" char
1833 pUsedDef // [out] was it used?
1834 );
1835
1836 if ( !len )
1837 {
1838 // function totally failed
1839 return (size_t)-1;
1840 }
1841
1842 // if we were really converting, check if we succeeded
1843 if ( buf )
1844 {
1845 if ( flags )
1846 {
1847 // check if the conversion failed, i.e. if any replacements
1848 // were done
1849 if ( usedDef )
1850 return (size_t)-1;
1851 }
1852 else // we must resort to double tripping...
1853 {
1854 wxWCharBuffer wcBuf(n);
1855 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1856 wcscmp(wcBuf, pwz) != 0 )
1857 {
1858 // we didn't obtain the same thing we started from, hence
1859 // the conversion was lossy and we consider that it failed
1860 return (size_t)-1;
1861 }
1862 }
1863 }
1864
1865 // see the comment above for the reason of "len - 1"
1866 return len - 1;
1867 }
1868
1869 bool IsOk() const { return m_CodePage != -1; }
1870
1871 private:
1872 static bool CanUseNoBestFit()
1873 {
1874 static int s_isWin98Or2k = -1;
1875
1876 if ( s_isWin98Or2k == -1 )
1877 {
1878 int verMaj, verMin;
1879 switch ( wxGetOsVersion(&verMaj, &verMin) )
1880 {
1881 case wxWIN95:
1882 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1883 break;
1884
1885 case wxWINDOWS_NT:
1886 s_isWin98Or2k = verMaj >= 5;
1887 break;
1888
1889 default:
1890 // unknown, be conseravtive by default
1891 s_isWin98Or2k = 0;
1892 }
1893
1894 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1895 }
1896
1897 return s_isWin98Or2k == 1;
1898 }
1899
1900 static bool IsAtLeastWin2kSP4()
1901 {
1902 #ifdef __WXWINCE__
1903 return false;
1904 #else
1905 static int s_isAtLeastWin2kSP4 = -1;
1906
1907 if ( s_isAtLeastWin2kSP4 == -1 )
1908 {
1909 OSVERSIONINFOEX ver;
1910
1911 memset(&ver, 0, sizeof(ver));
1912 ver.dwOSVersionInfoSize = sizeof(ver);
1913 GetVersionEx((OSVERSIONINFO*)&ver);
1914
1915 s_isAtLeastWin2kSP4 =
1916 ((ver.dwMajorVersion > 5) || // Vista+
1917 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1918 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1919 ver.wServicePackMajor >= 4)) // 2000 SP4+
1920 ? 1 : 0;
1921 }
1922
1923 return s_isAtLeastWin2kSP4 == 1;
1924 #endif
1925 }
1926
1927 virtual const char *GetMBNul(size_t *nulLen) const
1928 {
1929 if ( m_nulLen == (size_t)-2 )
1930 {
1931 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1932
1933 self->m_nulLen = ::WideCharToMultiByte
1934 (
1935 m_CodePage, // code page
1936 0, // no flags
1937 L"", // input string
1938 1, // translate just NUL
1939 self->m_nulBuf, // output buffer
1940 WXSIZEOF(m_nulBuf), // and its size
1941 NULL, // "replacement" char
1942 NULL // [out] was it used?
1943 );
1944
1945 if ( m_nulLen == 0 )
1946 self->m_nulLen = (size_t)-1;
1947 }
1948
1949 *nulLen = m_nulLen;
1950 return m_nulBuf;
1951 }
1952
1953 long m_CodePage;
1954 size_t m_nulLen;
1955 char m_nulBuf[8];
1956 };
1957
1958 #endif // wxHAVE_WIN32_MB2WC
1959
1960 // ============================================================================
1961 // Cocoa conversion classes
1962 // ============================================================================
1963
1964 #if defined(__WXCOCOA__)
1965
1966 // RN: There is no UTF-32 support in either Core Foundation or
1967 // Cocoa. Strangely enough, internally Core Foundation uses
1968 // UTF 32 internally quite a bit - its just not public (yet).
1969
1970 #include <CoreFoundation/CFString.h>
1971 #include <CoreFoundation/CFStringEncodingExt.h>
1972
1973 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1974 {
1975 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1976 if ( encoding == wxFONTENCODING_DEFAULT )
1977 {
1978 enc = CFStringGetSystemEncoding();
1979 }
1980 else switch( encoding)
1981 {
1982 case wxFONTENCODING_ISO8859_1 :
1983 enc = kCFStringEncodingISOLatin1 ;
1984 break ;
1985 case wxFONTENCODING_ISO8859_2 :
1986 enc = kCFStringEncodingISOLatin2;
1987 break ;
1988 case wxFONTENCODING_ISO8859_3 :
1989 enc = kCFStringEncodingISOLatin3 ;
1990 break ;
1991 case wxFONTENCODING_ISO8859_4 :
1992 enc = kCFStringEncodingISOLatin4;
1993 break ;
1994 case wxFONTENCODING_ISO8859_5 :
1995 enc = kCFStringEncodingISOLatinCyrillic;
1996 break ;
1997 case wxFONTENCODING_ISO8859_6 :
1998 enc = kCFStringEncodingISOLatinArabic;
1999 break ;
2000 case wxFONTENCODING_ISO8859_7 :
2001 enc = kCFStringEncodingISOLatinGreek;
2002 break ;
2003 case wxFONTENCODING_ISO8859_8 :
2004 enc = kCFStringEncodingISOLatinHebrew;
2005 break ;
2006 case wxFONTENCODING_ISO8859_9 :
2007 enc = kCFStringEncodingISOLatin5;
2008 break ;
2009 case wxFONTENCODING_ISO8859_10 :
2010 enc = kCFStringEncodingISOLatin6;
2011 break ;
2012 case wxFONTENCODING_ISO8859_11 :
2013 enc = kCFStringEncodingISOLatinThai;
2014 break ;
2015 case wxFONTENCODING_ISO8859_13 :
2016 enc = kCFStringEncodingISOLatin7;
2017 break ;
2018 case wxFONTENCODING_ISO8859_14 :
2019 enc = kCFStringEncodingISOLatin8;
2020 break ;
2021 case wxFONTENCODING_ISO8859_15 :
2022 enc = kCFStringEncodingISOLatin9;
2023 break ;
2024
2025 case wxFONTENCODING_KOI8 :
2026 enc = kCFStringEncodingKOI8_R;
2027 break ;
2028 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2029 enc = kCFStringEncodingDOSRussian;
2030 break ;
2031
2032 // case wxFONTENCODING_BULGARIAN :
2033 // enc = ;
2034 // break ;
2035
2036 case wxFONTENCODING_CP437 :
2037 enc =kCFStringEncodingDOSLatinUS ;
2038 break ;
2039 case wxFONTENCODING_CP850 :
2040 enc = kCFStringEncodingDOSLatin1;
2041 break ;
2042 case wxFONTENCODING_CP852 :
2043 enc = kCFStringEncodingDOSLatin2;
2044 break ;
2045 case wxFONTENCODING_CP855 :
2046 enc = kCFStringEncodingDOSCyrillic;
2047 break ;
2048 case wxFONTENCODING_CP866 :
2049 enc =kCFStringEncodingDOSRussian ;
2050 break ;
2051 case wxFONTENCODING_CP874 :
2052 enc = kCFStringEncodingDOSThai;
2053 break ;
2054 case wxFONTENCODING_CP932 :
2055 enc = kCFStringEncodingDOSJapanese;
2056 break ;
2057 case wxFONTENCODING_CP936 :
2058 enc =kCFStringEncodingDOSChineseSimplif ;
2059 break ;
2060 case wxFONTENCODING_CP949 :
2061 enc = kCFStringEncodingDOSKorean;
2062 break ;
2063 case wxFONTENCODING_CP950 :
2064 enc = kCFStringEncodingDOSChineseTrad;
2065 break ;
2066 case wxFONTENCODING_CP1250 :
2067 enc = kCFStringEncodingWindowsLatin2;
2068 break ;
2069 case wxFONTENCODING_CP1251 :
2070 enc =kCFStringEncodingWindowsCyrillic ;
2071 break ;
2072 case wxFONTENCODING_CP1252 :
2073 enc =kCFStringEncodingWindowsLatin1 ;
2074 break ;
2075 case wxFONTENCODING_CP1253 :
2076 enc = kCFStringEncodingWindowsGreek;
2077 break ;
2078 case wxFONTENCODING_CP1254 :
2079 enc = kCFStringEncodingWindowsLatin5;
2080 break ;
2081 case wxFONTENCODING_CP1255 :
2082 enc =kCFStringEncodingWindowsHebrew ;
2083 break ;
2084 case wxFONTENCODING_CP1256 :
2085 enc =kCFStringEncodingWindowsArabic ;
2086 break ;
2087 case wxFONTENCODING_CP1257 :
2088 enc = kCFStringEncodingWindowsBalticRim;
2089 break ;
2090 // This only really encodes to UTF7 (if that) evidently
2091 // case wxFONTENCODING_UTF7 :
2092 // enc = kCFStringEncodingNonLossyASCII ;
2093 // break ;
2094 case wxFONTENCODING_UTF8 :
2095 enc = kCFStringEncodingUTF8 ;
2096 break ;
2097 case wxFONTENCODING_EUC_JP :
2098 enc = kCFStringEncodingEUC_JP;
2099 break ;
2100 case wxFONTENCODING_UTF16 :
2101 enc = kCFStringEncodingUnicode ;
2102 break ;
2103 case wxFONTENCODING_MACROMAN :
2104 enc = kCFStringEncodingMacRoman ;
2105 break ;
2106 case wxFONTENCODING_MACJAPANESE :
2107 enc = kCFStringEncodingMacJapanese ;
2108 break ;
2109 case wxFONTENCODING_MACCHINESETRAD :
2110 enc = kCFStringEncodingMacChineseTrad ;
2111 break ;
2112 case wxFONTENCODING_MACKOREAN :
2113 enc = kCFStringEncodingMacKorean ;
2114 break ;
2115 case wxFONTENCODING_MACARABIC :
2116 enc = kCFStringEncodingMacArabic ;
2117 break ;
2118 case wxFONTENCODING_MACHEBREW :
2119 enc = kCFStringEncodingMacHebrew ;
2120 break ;
2121 case wxFONTENCODING_MACGREEK :
2122 enc = kCFStringEncodingMacGreek ;
2123 break ;
2124 case wxFONTENCODING_MACCYRILLIC :
2125 enc = kCFStringEncodingMacCyrillic ;
2126 break ;
2127 case wxFONTENCODING_MACDEVANAGARI :
2128 enc = kCFStringEncodingMacDevanagari ;
2129 break ;
2130 case wxFONTENCODING_MACGURMUKHI :
2131 enc = kCFStringEncodingMacGurmukhi ;
2132 break ;
2133 case wxFONTENCODING_MACGUJARATI :
2134 enc = kCFStringEncodingMacGujarati ;
2135 break ;
2136 case wxFONTENCODING_MACORIYA :
2137 enc = kCFStringEncodingMacOriya ;
2138 break ;
2139 case wxFONTENCODING_MACBENGALI :
2140 enc = kCFStringEncodingMacBengali ;
2141 break ;
2142 case wxFONTENCODING_MACTAMIL :
2143 enc = kCFStringEncodingMacTamil ;
2144 break ;
2145 case wxFONTENCODING_MACTELUGU :
2146 enc = kCFStringEncodingMacTelugu ;
2147 break ;
2148 case wxFONTENCODING_MACKANNADA :
2149 enc = kCFStringEncodingMacKannada ;
2150 break ;
2151 case wxFONTENCODING_MACMALAJALAM :
2152 enc = kCFStringEncodingMacMalayalam ;
2153 break ;
2154 case wxFONTENCODING_MACSINHALESE :
2155 enc = kCFStringEncodingMacSinhalese ;
2156 break ;
2157 case wxFONTENCODING_MACBURMESE :
2158 enc = kCFStringEncodingMacBurmese ;
2159 break ;
2160 case wxFONTENCODING_MACKHMER :
2161 enc = kCFStringEncodingMacKhmer ;
2162 break ;
2163 case wxFONTENCODING_MACTHAI :
2164 enc = kCFStringEncodingMacThai ;
2165 break ;
2166 case wxFONTENCODING_MACLAOTIAN :
2167 enc = kCFStringEncodingMacLaotian ;
2168 break ;
2169 case wxFONTENCODING_MACGEORGIAN :
2170 enc = kCFStringEncodingMacGeorgian ;
2171 break ;
2172 case wxFONTENCODING_MACARMENIAN :
2173 enc = kCFStringEncodingMacArmenian ;
2174 break ;
2175 case wxFONTENCODING_MACCHINESESIMP :
2176 enc = kCFStringEncodingMacChineseSimp ;
2177 break ;
2178 case wxFONTENCODING_MACTIBETAN :
2179 enc = kCFStringEncodingMacTibetan ;
2180 break ;
2181 case wxFONTENCODING_MACMONGOLIAN :
2182 enc = kCFStringEncodingMacMongolian ;
2183 break ;
2184 case wxFONTENCODING_MACETHIOPIC :
2185 enc = kCFStringEncodingMacEthiopic ;
2186 break ;
2187 case wxFONTENCODING_MACCENTRALEUR :
2188 enc = kCFStringEncodingMacCentralEurRoman ;
2189 break ;
2190 case wxFONTENCODING_MACVIATNAMESE :
2191 enc = kCFStringEncodingMacVietnamese ;
2192 break ;
2193 case wxFONTENCODING_MACARABICEXT :
2194 enc = kCFStringEncodingMacExtArabic ;
2195 break ;
2196 case wxFONTENCODING_MACSYMBOL :
2197 enc = kCFStringEncodingMacSymbol ;
2198 break ;
2199 case wxFONTENCODING_MACDINGBATS :
2200 enc = kCFStringEncodingMacDingbats ;
2201 break ;
2202 case wxFONTENCODING_MACTURKISH :
2203 enc = kCFStringEncodingMacTurkish ;
2204 break ;
2205 case wxFONTENCODING_MACCROATIAN :
2206 enc = kCFStringEncodingMacCroatian ;
2207 break ;
2208 case wxFONTENCODING_MACICELANDIC :
2209 enc = kCFStringEncodingMacIcelandic ;
2210 break ;
2211 case wxFONTENCODING_MACROMANIAN :
2212 enc = kCFStringEncodingMacRomanian ;
2213 break ;
2214 case wxFONTENCODING_MACCELTIC :
2215 enc = kCFStringEncodingMacCeltic ;
2216 break ;
2217 case wxFONTENCODING_MACGAELIC :
2218 enc = kCFStringEncodingMacGaelic ;
2219 break ;
2220 // case wxFONTENCODING_MACKEYBOARD :
2221 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2222 // break ;
2223 default :
2224 // because gcc is picky
2225 break ;
2226 } ;
2227 return enc ;
2228 }
2229
2230 class wxMBConv_cocoa : public wxMBConv
2231 {
2232 public:
2233 wxMBConv_cocoa()
2234 {
2235 Init(CFStringGetSystemEncoding()) ;
2236 }
2237
2238 #if wxUSE_FONTMAP
2239 wxMBConv_cocoa(const wxChar* name)
2240 {
2241 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2242 }
2243 #endif
2244
2245 wxMBConv_cocoa(wxFontEncoding encoding)
2246 {
2247 Init( wxCFStringEncFromFontEnc(encoding) );
2248 }
2249
2250 ~wxMBConv_cocoa()
2251 {
2252 }
2253
2254 void Init( CFStringEncoding encoding)
2255 {
2256 m_encoding = encoding ;
2257 }
2258
2259 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2260 {
2261 wxASSERT(szUnConv);
2262
2263 CFStringRef theString = CFStringCreateWithBytes (
2264 NULL, //the allocator
2265 (const UInt8*)szUnConv,
2266 strlen(szUnConv),
2267 m_encoding,
2268 false //no BOM/external representation
2269 );
2270
2271 wxASSERT(theString);
2272
2273 size_t nOutLength = CFStringGetLength(theString);
2274
2275 if (szOut == NULL)
2276 {
2277 CFRelease(theString);
2278 return nOutLength;
2279 }
2280
2281 CFRange theRange = { 0, nOutSize };
2282
2283 #if SIZEOF_WCHAR_T == 4
2284 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2285 #endif
2286
2287 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2288
2289 CFRelease(theString);
2290
2291 szUniCharBuffer[nOutLength] = '\0' ;
2292
2293 #if SIZEOF_WCHAR_T == 4
2294 wxMBConvUTF16 converter ;
2295 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2296 delete[] szUniCharBuffer;
2297 #endif
2298
2299 return nOutLength;
2300 }
2301
2302 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2303 {
2304 wxASSERT(szUnConv);
2305
2306 size_t nRealOutSize;
2307 size_t nBufSize = wxWcslen(szUnConv);
2308 UniChar* szUniBuffer = (UniChar*) szUnConv;
2309
2310 #if SIZEOF_WCHAR_T == 4
2311 wxMBConvUTF16 converter ;
2312 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2313 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2314 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2315 nBufSize /= sizeof(UniChar);
2316 #endif
2317
2318 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2319 NULL, //allocator
2320 szUniBuffer,
2321 nBufSize,
2322 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2323 );
2324
2325 wxASSERT(theString);
2326
2327 //Note that CER puts a BOM when converting to unicode
2328 //so we check and use getchars instead in that case
2329 if (m_encoding == kCFStringEncodingUnicode)
2330 {
2331 if (szOut != NULL)
2332 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2333
2334 nRealOutSize = CFStringGetLength(theString) + 1;
2335 }
2336 else
2337 {
2338 CFStringGetBytes(
2339 theString,
2340 CFRangeMake(0, CFStringGetLength(theString)),
2341 m_encoding,
2342 0, //what to put in characters that can't be converted -
2343 //0 tells CFString to return NULL if it meets such a character
2344 false, //not an external representation
2345 (UInt8*) szOut,
2346 nOutSize,
2347 (CFIndex*) &nRealOutSize
2348 );
2349 }
2350
2351 CFRelease(theString);
2352
2353 #if SIZEOF_WCHAR_T == 4
2354 delete[] szUniBuffer;
2355 #endif
2356
2357 return nRealOutSize - 1;
2358 }
2359
2360 bool IsOk() const
2361 {
2362 return m_encoding != kCFStringEncodingInvalidId &&
2363 CFStringIsEncodingAvailable(m_encoding);
2364 }
2365
2366 private:
2367 CFStringEncoding m_encoding ;
2368 };
2369
2370 #endif // defined(__WXCOCOA__)
2371
2372 // ============================================================================
2373 // Mac conversion classes
2374 // ============================================================================
2375
2376 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2377
2378 class wxMBConv_mac : public wxMBConv
2379 {
2380 public:
2381 wxMBConv_mac()
2382 {
2383 Init(CFStringGetSystemEncoding()) ;
2384 }
2385
2386 #if wxUSE_FONTMAP
2387 wxMBConv_mac(const wxChar* name)
2388 {
2389 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2390 }
2391 #endif
2392
2393 wxMBConv_mac(wxFontEncoding encoding)
2394 {
2395 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2396 }
2397
2398 ~wxMBConv_mac()
2399 {
2400 OSStatus status = noErr ;
2401 status = TECDisposeConverter(m_MB2WC_converter);
2402 status = TECDisposeConverter(m_WC2MB_converter);
2403 }
2404
2405
2406 void Init( TextEncodingBase encoding)
2407 {
2408 OSStatus status = noErr ;
2409 m_char_encoding = encoding ;
2410 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2411
2412 status = TECCreateConverter(&m_MB2WC_converter,
2413 m_char_encoding,
2414 m_unicode_encoding);
2415 status = TECCreateConverter(&m_WC2MB_converter,
2416 m_unicode_encoding,
2417 m_char_encoding);
2418 }
2419
2420 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2421 {
2422 OSStatus status = noErr ;
2423 ByteCount byteOutLen ;
2424 ByteCount byteInLen = strlen(psz) ;
2425 wchar_t *tbuf = NULL ;
2426 UniChar* ubuf = NULL ;
2427 size_t res = 0 ;
2428
2429 if (buf == NULL)
2430 {
2431 //apple specs say at least 32
2432 n = wxMax( 32 , byteInLen ) ;
2433 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2434 }
2435 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2436 #if SIZEOF_WCHAR_T == 4
2437 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2438 #else
2439 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2440 #endif
2441 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2442 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2443 #if SIZEOF_WCHAR_T == 4
2444 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2445 // is not properly terminated we get random characters at the end
2446 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2447 wxMBConvUTF16 converter ;
2448 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2449 free( ubuf ) ;
2450 #else
2451 res = byteOutLen / sizeof( UniChar ) ;
2452 #endif
2453 if ( buf == NULL )
2454 free(tbuf) ;
2455
2456 if ( buf && res < n)
2457 buf[res] = 0;
2458
2459 return res ;
2460 }
2461
2462 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2463 {
2464 OSStatus status = noErr ;
2465 ByteCount byteOutLen ;
2466 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2467
2468 char *tbuf = NULL ;
2469
2470 if (buf == NULL)
2471 {
2472 //apple specs say at least 32
2473 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2474 tbuf = (char*) malloc( n ) ;
2475 }
2476
2477 ByteCount byteBufferLen = n ;
2478 UniChar* ubuf = NULL ;
2479 #if SIZEOF_WCHAR_T == 4
2480 wxMBConvUTF16 converter ;
2481 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2482 byteInLen = unicharlen ;
2483 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2484 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2485 #else
2486 ubuf = (UniChar*) psz ;
2487 #endif
2488 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2489 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2490 #if SIZEOF_WCHAR_T == 4
2491 free( ubuf ) ;
2492 #endif
2493 if ( buf == NULL )
2494 free(tbuf) ;
2495
2496 size_t res = byteOutLen ;
2497 if ( buf && res < n)
2498 {
2499 buf[res] = 0;
2500
2501 //we need to double-trip to verify it didn't insert any ? in place
2502 //of bogus characters
2503 wxWCharBuffer wcBuf(n);
2504 size_t pszlen = wxWcslen(psz);
2505 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2506 wxWcslen(wcBuf) != pszlen ||
2507 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2508 {
2509 // we didn't obtain the same thing we started from, hence
2510 // the conversion was lossy and we consider that it failed
2511 return (size_t)-1;
2512 }
2513 }
2514
2515 return res ;
2516 }
2517
2518 bool IsOk() const
2519 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2520
2521 private:
2522 TECObjectRef m_MB2WC_converter ;
2523 TECObjectRef m_WC2MB_converter ;
2524
2525 TextEncodingBase m_char_encoding ;
2526 TextEncodingBase m_unicode_encoding ;
2527 };
2528
2529 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2530
2531 // ============================================================================
2532 // wxEncodingConverter based conversion classes
2533 // ============================================================================
2534
2535 #if wxUSE_FONTMAP
2536
2537 class wxMBConv_wxwin : public wxMBConv
2538 {
2539 private:
2540 void Init()
2541 {
2542 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2543 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2544 }
2545
2546 public:
2547 // temporarily just use wxEncodingConverter stuff,
2548 // so that it works while a better implementation is built
2549 wxMBConv_wxwin(const wxChar* name)
2550 {
2551 if (name)
2552 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2553 else
2554 m_enc = wxFONTENCODING_SYSTEM;
2555
2556 Init();
2557 }
2558
2559 wxMBConv_wxwin(wxFontEncoding enc)
2560 {
2561 m_enc = enc;
2562
2563 Init();
2564 }
2565
2566 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2567 {
2568 size_t inbuf = strlen(psz);
2569 if (buf)
2570 {
2571 if (!m2w.Convert(psz,buf))
2572 return (size_t)-1;
2573 }
2574 return inbuf;
2575 }
2576
2577 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2578 {
2579 const size_t inbuf = wxWcslen(psz);
2580 if (buf)
2581 {
2582 if (!w2m.Convert(psz,buf))
2583 return (size_t)-1;
2584 }
2585
2586 return inbuf;
2587 }
2588
2589 bool IsOk() const { return m_ok; }
2590
2591 public:
2592 wxFontEncoding m_enc;
2593 wxEncodingConverter m2w, w2m;
2594
2595 private:
2596 virtual const char *GetMBNul(size_t *nulLen) const
2597 {
2598 switch ( m_enc )
2599 {
2600 case wxFONTENCODING_UTF16BE:
2601 case wxFONTENCODING_UTF16LE:
2602 *nulLen = 2;
2603 return "\0";
2604
2605 case wxFONTENCODING_UTF32BE:
2606 case wxFONTENCODING_UTF32LE:
2607 *nulLen = 4;
2608 return "\0\0\0";
2609
2610 default:
2611 *nulLen = 1;
2612 return "";
2613 }
2614 }
2615
2616 // were we initialized successfully?
2617 bool m_ok;
2618
2619 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2620 };
2621
2622 // make the constructors available for unit testing
2623 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2624 {
2625 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2626 if ( !result->IsOk() )
2627 {
2628 delete result;
2629 return 0;
2630 }
2631 return result;
2632 }
2633
2634 #endif // wxUSE_FONTMAP
2635
2636 // ============================================================================
2637 // wxCSConv implementation
2638 // ============================================================================
2639
2640 void wxCSConv::Init()
2641 {
2642 m_name = NULL;
2643 m_convReal = NULL;
2644 m_deferred = true;
2645 }
2646
2647 wxCSConv::wxCSConv(const wxChar *charset)
2648 {
2649 Init();
2650
2651 if ( charset )
2652 {
2653 SetName(charset);
2654 }
2655
2656 #if wxUSE_FONTMAP
2657 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2658 #else
2659 m_encoding = wxFONTENCODING_SYSTEM;
2660 #endif
2661 }
2662
2663 wxCSConv::wxCSConv(wxFontEncoding encoding)
2664 {
2665 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2666 {
2667 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2668
2669 encoding = wxFONTENCODING_SYSTEM;
2670 }
2671
2672 Init();
2673
2674 m_encoding = encoding;
2675 }
2676
2677 wxCSConv::~wxCSConv()
2678 {
2679 Clear();
2680 }
2681
2682 wxCSConv::wxCSConv(const wxCSConv& conv)
2683 : wxMBConv()
2684 {
2685 Init();
2686
2687 SetName(conv.m_name);
2688 m_encoding = conv.m_encoding;
2689 }
2690
2691 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2692 {
2693 Clear();
2694
2695 SetName(conv.m_name);
2696 m_encoding = conv.m_encoding;
2697
2698 return *this;
2699 }
2700
2701 void wxCSConv::Clear()
2702 {
2703 free(m_name);
2704 delete m_convReal;
2705
2706 m_name = NULL;
2707 m_convReal = NULL;
2708 }
2709
2710 void wxCSConv::SetName(const wxChar *charset)
2711 {
2712 if (charset)
2713 {
2714 m_name = wxStrdup(charset);
2715 m_deferred = true;
2716 }
2717 }
2718
2719 #if wxUSE_FONTMAP
2720 #include "wx/hashmap.h"
2721
2722 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2723 wxEncodingNameCache );
2724
2725 static wxEncodingNameCache gs_nameCache;
2726 #endif
2727
2728 wxMBConv *wxCSConv::DoCreate() const
2729 {
2730 #if wxUSE_FONTMAP
2731 wxLogTrace(TRACE_STRCONV,
2732 wxT("creating conversion for %s"),
2733 (m_name ? m_name
2734 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2735 #endif // wxUSE_FONTMAP
2736
2737 // check for the special case of ASCII or ISO8859-1 charset: as we have
2738 // special knowledge of it anyhow, we don't need to create a special
2739 // conversion object
2740 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2741 m_encoding == wxFONTENCODING_DEFAULT )
2742 {
2743 // don't convert at all
2744 return NULL;
2745 }
2746
2747 // we trust OS to do conversion better than we can so try external
2748 // conversion methods first
2749 //
2750 // the full order is:
2751 // 1. OS conversion (iconv() under Unix or Win32 API)
2752 // 2. hard coded conversions for UTF
2753 // 3. wxEncodingConverter as fall back
2754
2755 // step (1)
2756 #ifdef HAVE_ICONV
2757 #if !wxUSE_FONTMAP
2758 if ( m_name )
2759 #endif // !wxUSE_FONTMAP
2760 {
2761 wxString name(m_name);
2762 wxFontEncoding encoding(m_encoding);
2763
2764 if ( !name.empty() )
2765 {
2766 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2767 if ( conv->IsOk() )
2768 return conv;
2769
2770 delete conv;
2771
2772 #if wxUSE_FONTMAP
2773 encoding =
2774 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2775 #endif // wxUSE_FONTMAP
2776 }
2777 #if wxUSE_FONTMAP
2778 {
2779 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2780 if ( it != gs_nameCache.end() )
2781 {
2782 if ( it->second.empty() )
2783 return NULL;
2784
2785 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2786 if ( conv->IsOk() )
2787 return conv;
2788
2789 delete conv;
2790 }
2791
2792 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2793
2794 for ( ; *names; ++names )
2795 {
2796 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2797 if ( conv->IsOk() )
2798 {
2799 gs_nameCache[encoding] = *names;
2800 return conv;
2801 }
2802
2803 delete conv;
2804 }
2805
2806 gs_nameCache[encoding] = _T(""); // cache the failure
2807 }
2808 #endif // wxUSE_FONTMAP
2809 }
2810 #endif // HAVE_ICONV
2811
2812 #ifdef wxHAVE_WIN32_MB2WC
2813 {
2814 #if wxUSE_FONTMAP
2815 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2816 : new wxMBConv_win32(m_encoding);
2817 if ( conv->IsOk() )
2818 return conv;
2819
2820 delete conv;
2821 #else
2822 return NULL;
2823 #endif
2824 }
2825 #endif // wxHAVE_WIN32_MB2WC
2826 #if defined(__WXMAC__)
2827 {
2828 // leave UTF16 and UTF32 to the built-ins of wx
2829 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2830 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2831 {
2832
2833 #if wxUSE_FONTMAP
2834 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2835 : new wxMBConv_mac(m_encoding);
2836 #else
2837 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2838 #endif
2839 if ( conv->IsOk() )
2840 return conv;
2841
2842 delete conv;
2843 }
2844 }
2845 #endif
2846 #if defined(__WXCOCOA__)
2847 {
2848 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2849 {
2850
2851 #if wxUSE_FONTMAP
2852 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2853 : new wxMBConv_cocoa(m_encoding);
2854 #else
2855 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2856 #endif
2857 if ( conv->IsOk() )
2858 return conv;
2859
2860 delete conv;
2861 }
2862 }
2863 #endif
2864 // step (2)
2865 wxFontEncoding enc = m_encoding;
2866 #if wxUSE_FONTMAP
2867 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2868 {
2869 // use "false" to suppress interactive dialogs -- we can be called from
2870 // anywhere and popping up a dialog from here is the last thing we want to
2871 // do
2872 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2873 }
2874 #endif // wxUSE_FONTMAP
2875
2876 switch ( enc )
2877 {
2878 case wxFONTENCODING_UTF7:
2879 return new wxMBConvUTF7;
2880
2881 case wxFONTENCODING_UTF8:
2882 return new wxMBConvUTF8;
2883
2884 case wxFONTENCODING_UTF16BE:
2885 return new wxMBConvUTF16BE;
2886
2887 case wxFONTENCODING_UTF16LE:
2888 return new wxMBConvUTF16LE;
2889
2890 case wxFONTENCODING_UTF32BE:
2891 return new wxMBConvUTF32BE;
2892
2893 case wxFONTENCODING_UTF32LE:
2894 return new wxMBConvUTF32LE;
2895
2896 default:
2897 // nothing to do but put here to suppress gcc warnings
2898 ;
2899 }
2900
2901 // step (3)
2902 #if wxUSE_FONTMAP
2903 {
2904 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2905 : new wxMBConv_wxwin(m_encoding);
2906 if ( conv->IsOk() )
2907 return conv;
2908
2909 delete conv;
2910 }
2911 #endif // wxUSE_FONTMAP
2912
2913 // NB: This is a hack to prevent deadlock. What could otherwise happen
2914 // in Unicode build: wxConvLocal creation ends up being here
2915 // because of some failure and logs the error. But wxLog will try to
2916 // attach timestamp, for which it will need wxConvLocal (to convert
2917 // time to char* and then wchar_t*), but that fails, tries to log
2918 // error, but wxLog has a (already locked) critical section that
2919 // guards static buffer.
2920 static bool alreadyLoggingError = false;
2921 if (!alreadyLoggingError)
2922 {
2923 alreadyLoggingError = true;
2924 wxLogError(_("Cannot convert from the charset '%s'!"),
2925 m_name ? m_name
2926 :
2927 #if wxUSE_FONTMAP
2928 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2929 #else // !wxUSE_FONTMAP
2930 wxString::Format(_("encoding %s"), m_encoding).c_str()
2931 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2932 );
2933 alreadyLoggingError = false;
2934 }
2935
2936 return NULL;
2937 }
2938
2939 void wxCSConv::CreateConvIfNeeded() const
2940 {
2941 if ( m_deferred )
2942 {
2943 wxCSConv *self = (wxCSConv *)this; // const_cast
2944
2945 #if wxUSE_INTL
2946 // if we don't have neither the name nor the encoding, use the default
2947 // encoding for this system
2948 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2949 {
2950 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2951 }
2952 #endif // wxUSE_INTL
2953
2954 self->m_convReal = DoCreate();
2955 self->m_deferred = false;
2956 }
2957 }
2958
2959 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2960 {
2961 CreateConvIfNeeded();
2962
2963 if (m_convReal)
2964 return m_convReal->MB2WC(buf, psz, n);
2965
2966 // latin-1 (direct)
2967 size_t len = strlen(psz);
2968
2969 if (buf)
2970 {
2971 for (size_t c = 0; c <= len; c++)
2972 buf[c] = (unsigned char)(psz[c]);
2973 }
2974
2975 return len;
2976 }
2977
2978 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2979 {
2980 CreateConvIfNeeded();
2981
2982 if (m_convReal)
2983 return m_convReal->WC2MB(buf, psz, n);
2984
2985 // latin-1 (direct)
2986 const size_t len = wxWcslen(psz);
2987 if (buf)
2988 {
2989 for (size_t c = 0; c <= len; c++)
2990 {
2991 if (psz[c] > 0xFF)
2992 return (size_t)-1;
2993 buf[c] = (char)psz[c];
2994 }
2995 }
2996 else
2997 {
2998 for (size_t c = 0; c <= len; c++)
2999 {
3000 if (psz[c] > 0xFF)
3001 return (size_t)-1;
3002 }
3003 }
3004
3005 return len;
3006 }
3007
3008 const char *wxCSConv::GetMBNul(size_t *nulLen) const
3009 {
3010 CreateConvIfNeeded();
3011
3012 if ( m_convReal )
3013 {
3014 // cast needed just to call private function of m_convReal
3015 return ((wxCSConv *)m_convReal)->GetMBNul(nulLen);
3016 }
3017
3018 *nulLen = 1;
3019 return "";
3020 }
3021
3022 // ----------------------------------------------------------------------------
3023 // globals
3024 // ----------------------------------------------------------------------------
3025
3026 #ifdef __WINDOWS__
3027 static wxMBConv_win32 wxConvLibcObj;
3028 #elif defined(__WXMAC__) && !defined(__MACH__)
3029 static wxMBConv_mac wxConvLibcObj ;
3030 #else
3031 static wxMBConvLibc wxConvLibcObj;
3032 #endif
3033
3034 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3035 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3036 static wxMBConvUTF7 wxConvUTF7Obj;
3037 static wxMBConvUTF8 wxConvUTF8Obj;
3038
3039 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3040 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3041 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3044 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3045 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3046 #ifdef __WXOSX__
3047 wxConvUTF8Obj;
3048 #else
3049 wxConvLibcObj;
3050 #endif
3051
3052
3053 #else // !wxUSE_WCHAR_T
3054
3055 // stand-ins in absence of wchar_t
3056 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3057 wxConvISO8859_1,
3058 wxConvLocal,
3059 wxConvUTF8;
3060
3061 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T