]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
Described in the comments and documented the semantics of the parameters and
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer
191 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
192 {
193 // the currently accumulated wide characters
194 wxWCharBuffer wbuf;
195
196 // the current length of wbuf
197 size_t lenBuf = 0;
198
199 // we need to know the representation of L'\0' for this conversion
200 size_t nulLen;
201 const char * const nul = GetMBNul(&nulLen);
202 if ( nulLen == (size_t)-1 || nulLen == 0 )
203 return wxWCharBuffer();
204
205 // make a copy of the input string unless it is already properly
206 // NUL-terminated
207 wxCharBuffer bufTmp;
208
209 // now we can compute the input size if we were not given it: notice that
210 // in this case the string must be properly NUL-terminated, of course, as
211 // otherwise we have no way of knowing how long it is
212 if ( inLen == (size_t)-1 )
213 {
214 // not the most efficient algorithm but it shouldn't matter as normally
215 // there are not many NULs in the string and so normally memcmp()
216 // should stop on the first character
217 for ( const char *p = in; ; p++ )
218 {
219 if ( memcmp(p, nul, nulLen) == 0 )
220 break;
221 }
222
223 inLen = p - in + nulLen;
224 }
225 else // we already have the size
226 {
227 // check if it's not already NUL-terminated too to avoid the copy
228 if ( inLen < nulLen || memcmp(in + inLen - nulLen, nul, nulLen) != 0 )
229 {
230 // make a copy in order to properly NUL-terminate the string
231 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
232 memcpy(bufTmp.data(), in, inLen);
233 memcpy(bufTmp.data() + inLen, nul, nulLen);
234 }
235 }
236
237 if ( bufTmp )
238 in = bufTmp;
239
240 for ( const char * const inEnd = in + inLen;; )
241 {
242 // try to convert the current chunk if anything left
243 size_t lenChunk = in < inEnd ? MB2WC(NULL, in, 0) : 0;
244 if ( lenChunk == 0 )
245 {
246 // nothing left in the input string, conversion succeeded
247 if ( outLen )
248 {
249 // we shouldn't include the last NUL in the result length
250 *outLen = lenBuf ? lenBuf - 1 : 0;
251 }
252
253 return wbuf;
254 }
255
256 if ( lenChunk == (size_t)-1 )
257 break;
258
259 const size_t lenBufNew = lenBuf + lenChunk;
260 if ( !wbuf.extend(lenBufNew) )
261 break;
262
263 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
264 if ( lenChunk == (size_t)-1 )
265 break;
266
267 // +! for the embedded NUL (if something follows)
268 lenBuf = lenBufNew + 1;
269
270 // advance the input pointer past the end of this chunk
271 while ( memcmp(in, nul, nulLen) != 0 )
272 in++;
273
274 in += nulLen; // skipping over its terminator as well
275 }
276
277 // conversion failed
278 if ( outLen )
279 *outLen = 0;
280
281 return wxWCharBuffer();
282 }
283
284 const wxCharBuffer
285 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
286 {
287 // the currently accumulated multibyte characters
288 wxCharBuffer buf;
289
290 // the current length of buf
291 size_t lenBuf = 0;
292
293 // make a copy of the input string unless it is already properly
294 // NUL-terminated
295 //
296 // if we don't know its length we have no choice but to assume that it is,
297 // indeed, properly terminated
298 wxWCharBuffer bufTmp;
299 if ( inLen == (size_t)-1 )
300 {
301 inLen = wxWcslen(in) + 1;
302 }
303 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
304 {
305 // make a copy in order to properly NUL-terminate the string
306 bufTmp = wxWCharBuffer(inLen);
307 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
308 }
309
310 if ( bufTmp )
311 in = bufTmp;
312
313 for ( const wchar_t * const inEnd = in + inLen;; )
314 {
315 // try to convert the current chunk, if anything left
316 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
317 if ( lenChunk == 0 )
318 {
319 // nothing left in the input string, conversion succeeded
320 if ( outLen )
321 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
322
323 return buf;
324 }
325
326 if ( lenChunk == (size_t)-1 )
327 break;
328
329 const size_t lenBufNew = lenBuf + lenChunk;
330 if ( !buf.extend(lenBufNew) )
331 break;
332
333 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
334 if ( lenChunk == (size_t)-1 )
335 break;
336
337 // chunk successfully converted, go to the next one
338 in += wxWcslen(in) + 1 /* skip NUL too */;
339 lenBuf = lenBufNew + 1;
340 }
341
342 // conversion failed
343 if ( outLen )
344 *outLen = 0;
345
346 return wxCharBuffer();
347 }
348
349 // ----------------------------------------------------------------------------
350 // wxMBConvLibc
351 // ----------------------------------------------------------------------------
352
353 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
354 {
355 return wxMB2WC(buf, psz, n);
356 }
357
358 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
359 {
360 return wxWC2MB(buf, psz, n);
361 }
362
363 // ----------------------------------------------------------------------------
364 // wxConvBrokenFileNames
365 // ----------------------------------------------------------------------------
366
367 #ifdef __UNIX__
368
369 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
370 {
371 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
372 || wxStricmp(charset, _T("UTF8")) == 0 )
373 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
374 else
375 m_conv = new wxCSConv(charset);
376 }
377
378 #endif // __UNIX__
379
380 // ----------------------------------------------------------------------------
381 // UTF-7
382 // ----------------------------------------------------------------------------
383
384 // Implementation (C) 2004 Fredrik Roubert
385
386 //
387 // BASE64 decoding table
388 //
389 static const unsigned char utf7unb64[] =
390 {
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
395 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
396 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
397 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
398 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
399 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
400 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
401 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
402 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
403 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
404 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
405 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
406 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
413 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
423 };
424
425 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
426 {
427 size_t len = 0;
428
429 while ( *psz && (!buf || (len < n)) )
430 {
431 unsigned char cc = *psz++;
432 if (cc != '+')
433 {
434 // plain ASCII char
435 if (buf)
436 *buf++ = cc;
437 len++;
438 }
439 else if (*psz == '-')
440 {
441 // encoded plus sign
442 if (buf)
443 *buf++ = cc;
444 len++;
445 psz++;
446 }
447 else // start of BASE64 encoded string
448 {
449 bool lsb, ok;
450 unsigned int d, l;
451 for ( ok = lsb = false, d = 0, l = 0;
452 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
453 psz++ )
454 {
455 d <<= 6;
456 d += cc;
457 for (l += 6; l >= 8; lsb = !lsb)
458 {
459 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
460 if (lsb)
461 {
462 if (buf)
463 *buf++ |= c;
464 len ++;
465 }
466 else
467 {
468 if (buf)
469 *buf = (wchar_t)(c << 8);
470 }
471
472 ok = true;
473 }
474 }
475
476 if ( !ok )
477 {
478 // in valid UTF7 we should have valid characters after '+'
479 return (size_t)-1;
480 }
481
482 if (*psz == '-')
483 psz++;
484 }
485 }
486
487 if ( buf && (len < n) )
488 *buf = '\0';
489
490 return len;
491 }
492
493 //
494 // BASE64 encoding table
495 //
496 static const unsigned char utf7enb64[] =
497 {
498 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
499 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
500 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
501 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
502 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
503 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
504 'w', 'x', 'y', 'z', '0', '1', '2', '3',
505 '4', '5', '6', '7', '8', '9', '+', '/'
506 };
507
508 //
509 // UTF-7 encoding table
510 //
511 // 0 - Set D (directly encoded characters)
512 // 1 - Set O (optional direct characters)
513 // 2 - whitespace characters (optional)
514 // 3 - special characters
515 //
516 static const unsigned char utf7encode[128] =
517 {
518 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
519 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
520 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
524 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
526 };
527
528 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
529 {
530 size_t len = 0;
531
532 while (*psz && ((!buf) || (len < n)))
533 {
534 wchar_t cc = *psz++;
535 if (cc < 0x80 && utf7encode[cc] < 1)
536 {
537 // plain ASCII char
538 if (buf)
539 *buf++ = (char)cc;
540 len++;
541 }
542 #ifndef WC_UTF16
543 else if (((wxUint32)cc) > 0xffff)
544 {
545 // no surrogate pair generation (yet?)
546 return (size_t)-1;
547 }
548 #endif
549 else
550 {
551 if (buf)
552 *buf++ = '+';
553 len++;
554 if (cc != '+')
555 {
556 // BASE64 encode string
557 unsigned int lsb, d, l;
558 for (d = 0, l = 0; /*nothing*/; psz++)
559 {
560 for (lsb = 0; lsb < 2; lsb ++)
561 {
562 d <<= 8;
563 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
564
565 for (l += 8; l >= 6; )
566 {
567 l -= 6;
568 if (buf)
569 *buf++ = utf7enb64[(d >> l) % 64];
570 len++;
571 }
572 }
573 cc = *psz;
574 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
575 break;
576 }
577 if (l != 0)
578 {
579 if (buf)
580 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
581 len++;
582 }
583 }
584 if (buf)
585 *buf++ = '-';
586 len++;
587 }
588 }
589 if (buf && (len < n))
590 *buf = 0;
591 return len;
592 }
593
594 // ----------------------------------------------------------------------------
595 // UTF-8
596 // ----------------------------------------------------------------------------
597
598 static wxUint32 utf8_max[]=
599 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
600
601 // boundaries of the private use area we use to (temporarily) remap invalid
602 // characters invalid in a UTF-8 encoded string
603 const wxUint32 wxUnicodePUA = 0x100000;
604 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
605
606 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
607 {
608 size_t len = 0;
609
610 while (*psz && ((!buf) || (len < n)))
611 {
612 const char *opsz = psz;
613 bool invalid = false;
614 unsigned char cc = *psz++, fc = cc;
615 unsigned cnt;
616 for (cnt = 0; fc & 0x80; cnt++)
617 fc <<= 1;
618 if (!cnt)
619 {
620 // plain ASCII char
621 if (buf)
622 *buf++ = cc;
623 len++;
624
625 // escape the escape character for octal escapes
626 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
627 && cc == '\\' && (!buf || len < n))
628 {
629 if (buf)
630 *buf++ = cc;
631 len++;
632 }
633 }
634 else
635 {
636 cnt--;
637 if (!cnt)
638 {
639 // invalid UTF-8 sequence
640 invalid = true;
641 }
642 else
643 {
644 unsigned ocnt = cnt - 1;
645 wxUint32 res = cc & (0x3f >> cnt);
646 while (cnt--)
647 {
648 cc = *psz;
649 if ((cc & 0xC0) != 0x80)
650 {
651 // invalid UTF-8 sequence
652 invalid = true;
653 break;
654 }
655 psz++;
656 res = (res << 6) | (cc & 0x3f);
657 }
658 if (invalid || res <= utf8_max[ocnt])
659 {
660 // illegal UTF-8 encoding
661 invalid = true;
662 }
663 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
664 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
665 {
666 // if one of our PUA characters turns up externally
667 // it must also be treated as an illegal sequence
668 // (a bit like you have to escape an escape character)
669 invalid = true;
670 }
671 else
672 {
673 #ifdef WC_UTF16
674 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
675 size_t pa = encode_utf16(res, (wxUint16 *)buf);
676 if (pa == (size_t)-1)
677 {
678 invalid = true;
679 }
680 else
681 {
682 if (buf)
683 buf += pa;
684 len += pa;
685 }
686 #else // !WC_UTF16
687 if (buf)
688 *buf++ = (wchar_t)res;
689 len++;
690 #endif // WC_UTF16/!WC_UTF16
691 }
692 }
693 if (invalid)
694 {
695 if (m_options & MAP_INVALID_UTF8_TO_PUA)
696 {
697 while (opsz < psz && (!buf || len < n))
698 {
699 #ifdef WC_UTF16
700 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
701 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
702 wxASSERT(pa != (size_t)-1);
703 if (buf)
704 buf += pa;
705 opsz++;
706 len += pa;
707 #else
708 if (buf)
709 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
710 opsz++;
711 len++;
712 #endif
713 }
714 }
715 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
716 {
717 while (opsz < psz && (!buf || len < n))
718 {
719 if ( buf && len + 3 < n )
720 {
721 unsigned char on = *opsz;
722 *buf++ = L'\\';
723 *buf++ = (wchar_t)( L'0' + on / 0100 );
724 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
725 *buf++ = (wchar_t)( L'0' + on % 010 );
726 }
727 opsz++;
728 len += 4;
729 }
730 }
731 else // MAP_INVALID_UTF8_NOT
732 {
733 return (size_t)-1;
734 }
735 }
736 }
737 }
738 if (buf && (len < n))
739 *buf = 0;
740 return len;
741 }
742
743 static inline bool isoctal(wchar_t wch)
744 {
745 return L'0' <= wch && wch <= L'7';
746 }
747
748 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
749 {
750 size_t len = 0;
751
752 while (*psz && ((!buf) || (len < n)))
753 {
754 wxUint32 cc;
755 #ifdef WC_UTF16
756 // cast is ok for WC_UTF16
757 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
758 psz += (pa == (size_t)-1) ? 1 : pa;
759 #else
760 cc=(*psz++) & 0x7fffffff;
761 #endif
762
763 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
764 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
765 {
766 if (buf)
767 *buf++ = (char)(cc - wxUnicodePUA);
768 len++;
769 }
770 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
771 && cc == L'\\' && psz[0] == L'\\' )
772 {
773 if (buf)
774 *buf++ = (char)cc;
775 psz++;
776 len++;
777 }
778 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
779 cc == L'\\' &&
780 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
781 {
782 if (buf)
783 {
784 *buf++ = (char) ((psz[0] - L'0')*0100 +
785 (psz[1] - L'0')*010 +
786 (psz[2] - L'0'));
787 }
788
789 psz += 3;
790 len++;
791 }
792 else
793 {
794 unsigned cnt;
795 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
796 if (!cnt)
797 {
798 // plain ASCII char
799 if (buf)
800 *buf++ = (char) cc;
801 len++;
802 }
803
804 else
805 {
806 len += cnt + 1;
807 if (buf)
808 {
809 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
810 while (cnt--)
811 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
812 }
813 }
814 }
815 }
816
817 if (buf && (len<n))
818 *buf = 0;
819
820 return len;
821 }
822
823 // ----------------------------------------------------------------------------
824 // UTF-16
825 // ----------------------------------------------------------------------------
826
827 #ifdef WORDS_BIGENDIAN
828 #define wxMBConvUTF16straight wxMBConvUTF16BE
829 #define wxMBConvUTF16swap wxMBConvUTF16LE
830 #else
831 #define wxMBConvUTF16swap wxMBConvUTF16BE
832 #define wxMBConvUTF16straight wxMBConvUTF16LE
833 #endif
834
835
836 #ifdef WC_UTF16
837
838 // copy 16bit MB to 16bit String
839 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
840 {
841 size_t len=0;
842
843 while (*(wxUint16*)psz && (!buf || len < n))
844 {
845 if (buf)
846 *buf++ = *(wxUint16*)psz;
847 len++;
848
849 psz += sizeof(wxUint16);
850 }
851 if (buf && len<n) *buf=0;
852
853 return len;
854 }
855
856
857 // copy 16bit String to 16bit MB
858 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
859 {
860 size_t len=0;
861
862 while (*psz && (!buf || len < n))
863 {
864 if (buf)
865 {
866 *(wxUint16*)buf = *psz;
867 buf += sizeof(wxUint16);
868 }
869 len += sizeof(wxUint16);
870 psz++;
871 }
872 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
873
874 return len;
875 }
876
877
878 // swap 16bit MB to 16bit String
879 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
880 {
881 size_t len = 0;
882
883 // UTF16 string must be terminated by 2 NULs as single NULs may occur
884 // inside the string
885 while ( (psz[0] || psz[1]) && (!buf || len < n) )
886 {
887 if ( buf )
888 {
889 ((char *)buf)[0] = psz[1];
890 ((char *)buf)[1] = psz[0];
891 buf++;
892 }
893 len++;
894 psz += 2;
895 }
896
897 if ( buf && len < n )
898 *buf = L'\0';
899
900 return len;
901 }
902
903
904 // swap 16bit MB to 16bit String
905 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
906 {
907 size_t len = 0;
908
909 while ( *psz && (!buf || len < n) )
910 {
911 if ( buf )
912 {
913 *buf++ = ((char*)psz)[1];
914 *buf++ = ((char*)psz)[0];
915 }
916 len += 2;
917 psz++;
918 }
919
920 if ( buf && len < n )
921 *buf = '\0';
922
923 return len;
924 }
925
926
927 #else // WC_UTF16
928
929
930 // copy 16bit MB to 32bit String
931 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
932 {
933 size_t len=0;
934
935 while (*(wxUint16*)psz && (!buf || len < n))
936 {
937 wxUint32 cc;
938 size_t pa=decode_utf16((wxUint16*)psz, cc);
939 if (pa == (size_t)-1)
940 return pa;
941
942 if (buf)
943 *buf++ = (wchar_t)cc;
944 len++;
945 psz += pa * sizeof(wxUint16);
946 }
947 if (buf && len<n) *buf=0;
948
949 return len;
950 }
951
952
953 // copy 32bit String to 16bit MB
954 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
955 {
956 size_t len=0;
957
958 while (*psz && (!buf || len < n))
959 {
960 wxUint16 cc[2];
961 size_t pa=encode_utf16(*psz, cc);
962
963 if (pa == (size_t)-1)
964 return pa;
965
966 if (buf)
967 {
968 *(wxUint16*)buf = cc[0];
969 buf += sizeof(wxUint16);
970 if (pa > 1)
971 {
972 *(wxUint16*)buf = cc[1];
973 buf += sizeof(wxUint16);
974 }
975 }
976
977 len += pa*sizeof(wxUint16);
978 psz++;
979 }
980 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
981
982 return len;
983 }
984
985
986 // swap 16bit MB to 32bit String
987 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
988 {
989 size_t len=0;
990
991 while (*(wxUint16*)psz && (!buf || len < n))
992 {
993 wxUint32 cc;
994 char tmp[4];
995 tmp[0]=psz[1]; tmp[1]=psz[0];
996 tmp[2]=psz[3]; tmp[3]=psz[2];
997
998 size_t pa=decode_utf16((wxUint16*)tmp, cc);
999 if (pa == (size_t)-1)
1000 return pa;
1001
1002 if (buf)
1003 *buf++ = (wchar_t)cc;
1004
1005 len++;
1006 psz += pa * sizeof(wxUint16);
1007 }
1008 if (buf && len<n) *buf=0;
1009
1010 return len;
1011 }
1012
1013
1014 // swap 32bit String to 16bit MB
1015 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1016 {
1017 size_t len=0;
1018
1019 while (*psz && (!buf || len < n))
1020 {
1021 wxUint16 cc[2];
1022 size_t pa=encode_utf16(*psz, cc);
1023
1024 if (pa == (size_t)-1)
1025 return pa;
1026
1027 if (buf)
1028 {
1029 *buf++ = ((char*)cc)[1];
1030 *buf++ = ((char*)cc)[0];
1031 if (pa > 1)
1032 {
1033 *buf++ = ((char*)cc)[3];
1034 *buf++ = ((char*)cc)[2];
1035 }
1036 }
1037
1038 len += pa*sizeof(wxUint16);
1039 psz++;
1040 }
1041 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1042
1043 return len;
1044 }
1045
1046 #endif // WC_UTF16
1047
1048
1049 // ----------------------------------------------------------------------------
1050 // UTF-32
1051 // ----------------------------------------------------------------------------
1052
1053 #ifdef WORDS_BIGENDIAN
1054 #define wxMBConvUTF32straight wxMBConvUTF32BE
1055 #define wxMBConvUTF32swap wxMBConvUTF32LE
1056 #else
1057 #define wxMBConvUTF32swap wxMBConvUTF32BE
1058 #define wxMBConvUTF32straight wxMBConvUTF32LE
1059 #endif
1060
1061
1062 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1063 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1064
1065
1066 #ifdef WC_UTF16
1067
1068 // copy 32bit MB to 16bit String
1069 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1070 {
1071 size_t len=0;
1072
1073 while (*(wxUint32*)psz && (!buf || len < n))
1074 {
1075 wxUint16 cc[2];
1076
1077 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1078 if (pa == (size_t)-1)
1079 return pa;
1080
1081 if (buf)
1082 {
1083 *buf++ = cc[0];
1084 if (pa > 1)
1085 *buf++ = cc[1];
1086 }
1087 len += pa;
1088 psz += sizeof(wxUint32);
1089 }
1090 if (buf && len<n) *buf=0;
1091
1092 return len;
1093 }
1094
1095
1096 // copy 16bit String to 32bit MB
1097 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1098 {
1099 size_t len=0;
1100
1101 while (*psz && (!buf || len < n))
1102 {
1103 wxUint32 cc;
1104
1105 // cast is ok for WC_UTF16
1106 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1107 if (pa == (size_t)-1)
1108 return pa;
1109
1110 if (buf)
1111 {
1112 *(wxUint32*)buf = cc;
1113 buf += sizeof(wxUint32);
1114 }
1115 len += sizeof(wxUint32);
1116 psz += pa;
1117 }
1118
1119 if (buf && len<=n-sizeof(wxUint32))
1120 *(wxUint32*)buf=0;
1121
1122 return len;
1123 }
1124
1125
1126
1127 // swap 32bit MB to 16bit String
1128 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1129 {
1130 size_t len=0;
1131
1132 while (*(wxUint32*)psz && (!buf || len < n))
1133 {
1134 char tmp[4];
1135 tmp[0] = psz[3]; tmp[1] = psz[2];
1136 tmp[2] = psz[1]; tmp[3] = psz[0];
1137
1138
1139 wxUint16 cc[2];
1140
1141 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1142 if (pa == (size_t)-1)
1143 return pa;
1144
1145 if (buf)
1146 {
1147 *buf++ = cc[0];
1148 if (pa > 1)
1149 *buf++ = cc[1];
1150 }
1151 len += pa;
1152 psz += sizeof(wxUint32);
1153 }
1154
1155 if (buf && len<n)
1156 *buf=0;
1157
1158 return len;
1159 }
1160
1161
1162 // swap 16bit String to 32bit MB
1163 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1164 {
1165 size_t len=0;
1166
1167 while (*psz && (!buf || len < n))
1168 {
1169 char cc[4];
1170
1171 // cast is ok for WC_UTF16
1172 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1173 if (pa == (size_t)-1)
1174 return pa;
1175
1176 if (buf)
1177 {
1178 *buf++ = cc[3];
1179 *buf++ = cc[2];
1180 *buf++ = cc[1];
1181 *buf++ = cc[0];
1182 }
1183 len += sizeof(wxUint32);
1184 psz += pa;
1185 }
1186
1187 if (buf && len<=n-sizeof(wxUint32))
1188 *(wxUint32*)buf=0;
1189
1190 return len;
1191 }
1192
1193 #else // WC_UTF16
1194
1195
1196 // copy 32bit MB to 32bit String
1197 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1198 {
1199 size_t len=0;
1200
1201 while (*(wxUint32*)psz && (!buf || len < n))
1202 {
1203 if (buf)
1204 *buf++ = (wchar_t)(*(wxUint32*)psz);
1205 len++;
1206 psz += sizeof(wxUint32);
1207 }
1208
1209 if (buf && len<n)
1210 *buf=0;
1211
1212 return len;
1213 }
1214
1215
1216 // copy 32bit String to 32bit MB
1217 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1218 {
1219 size_t len=0;
1220
1221 while (*psz && (!buf || len < n))
1222 {
1223 if (buf)
1224 {
1225 *(wxUint32*)buf = *psz;
1226 buf += sizeof(wxUint32);
1227 }
1228
1229 len += sizeof(wxUint32);
1230 psz++;
1231 }
1232
1233 if (buf && len<=n-sizeof(wxUint32))
1234 *(wxUint32*)buf=0;
1235
1236 return len;
1237 }
1238
1239
1240 // swap 32bit MB to 32bit String
1241 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1242 {
1243 size_t len=0;
1244
1245 while (*(wxUint32*)psz && (!buf || len < n))
1246 {
1247 if (buf)
1248 {
1249 ((char *)buf)[0] = psz[3];
1250 ((char *)buf)[1] = psz[2];
1251 ((char *)buf)[2] = psz[1];
1252 ((char *)buf)[3] = psz[0];
1253 buf++;
1254 }
1255 len++;
1256 psz += sizeof(wxUint32);
1257 }
1258
1259 if (buf && len<n)
1260 *buf=0;
1261
1262 return len;
1263 }
1264
1265
1266 // swap 32bit String to 32bit MB
1267 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1268 {
1269 size_t len=0;
1270
1271 while (*psz && (!buf || len < n))
1272 {
1273 if (buf)
1274 {
1275 *buf++ = ((char *)psz)[3];
1276 *buf++ = ((char *)psz)[2];
1277 *buf++ = ((char *)psz)[1];
1278 *buf++ = ((char *)psz)[0];
1279 }
1280 len += sizeof(wxUint32);
1281 psz++;
1282 }
1283
1284 if (buf && len<=n-sizeof(wxUint32))
1285 *(wxUint32*)buf=0;
1286
1287 return len;
1288 }
1289
1290
1291 #endif // WC_UTF16
1292
1293
1294 // ============================================================================
1295 // The classes doing conversion using the iconv_xxx() functions
1296 // ============================================================================
1297
1298 #ifdef HAVE_ICONV
1299
1300 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1301 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1302 // (unless there's yet another bug in glibc) the only case when iconv()
1303 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1304 // left in the input buffer -- when _real_ error occurs,
1305 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1306 // iconv() failure.
1307 // [This bug does not appear in glibc 2.2.]
1308 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1309 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1310 (errno != E2BIG || bufLeft != 0))
1311 #else
1312 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1313 #endif
1314
1315 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1316
1317 #define ICONV_T_INVALID ((iconv_t)-1)
1318
1319 #if SIZEOF_WCHAR_T == 4
1320 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1321 #define WC_ENC wxFONTENCODING_UTF32
1322 #elif SIZEOF_WCHAR_T == 2
1323 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1324 #define WC_ENC wxFONTENCODING_UTF16
1325 #else // sizeof(wchar_t) != 2 nor 4
1326 // does this ever happen?
1327 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1328 #endif
1329
1330 // ----------------------------------------------------------------------------
1331 // wxMBConv_iconv: encapsulates an iconv character set
1332 // ----------------------------------------------------------------------------
1333
1334 class wxMBConv_iconv : public wxMBConv
1335 {
1336 public:
1337 wxMBConv_iconv(const wxChar *name);
1338 virtual ~wxMBConv_iconv();
1339
1340 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1341 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1342
1343 bool IsOk() const
1344 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1345
1346 protected:
1347 // the iconv handlers used to translate from multibyte to wide char and in
1348 // the other direction
1349 iconv_t m2w,
1350 w2m;
1351 #if wxUSE_THREADS
1352 // guards access to m2w and w2m objects
1353 wxMutex m_iconvMutex;
1354 #endif
1355
1356 private:
1357 virtual const char *GetMBNul(size_t *nulLen) const;
1358
1359 // the name (for iconv_open()) of a wide char charset -- if none is
1360 // available on this machine, it will remain NULL
1361 static wxString ms_wcCharsetName;
1362
1363 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1364 // different endian-ness than the native one
1365 static bool ms_wcNeedsSwap;
1366
1367 // NUL representation
1368 size_t m_nulLen;
1369 char m_nulBuf[8];
1370 };
1371
1372 // make the constructor available for unit testing
1373 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1374 {
1375 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1376 if ( !result->IsOk() )
1377 {
1378 delete result;
1379 return 0;
1380 }
1381 return result;
1382 }
1383
1384 wxString wxMBConv_iconv::ms_wcCharsetName;
1385 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1386
1387 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1388 {
1389 m_nulLen = (size_t)-2;
1390
1391 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1392 // names for the charsets
1393 const wxCharBuffer cname(wxString(name).ToAscii());
1394
1395 // check for charset that represents wchar_t:
1396 if ( ms_wcCharsetName.empty() )
1397 {
1398 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1399
1400 #if wxUSE_FONTMAP
1401 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1402 #else // !wxUSE_FONTMAP
1403 static const wxChar *names[] =
1404 {
1405 #if SIZEOF_WCHAR_T == 4
1406 _T("UCS-4"),
1407 #elif SIZEOF_WCHAR_T = 2
1408 _T("UCS-2"),
1409 #endif
1410 NULL
1411 };
1412 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1413
1414 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1415 {
1416 const wxString nameCS(*names);
1417
1418 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1419 wxString nameXE(nameCS);
1420 #ifdef WORDS_BIGENDIAN
1421 nameXE += _T("BE");
1422 #else // little endian
1423 nameXE += _T("LE");
1424 #endif
1425
1426 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1427 nameXE.c_str());
1428
1429 m2w = iconv_open(nameXE.ToAscii(), cname);
1430 if ( m2w == ICONV_T_INVALID )
1431 {
1432 // try charset w/o bytesex info (e.g. "UCS4")
1433 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1434 nameCS.c_str());
1435 m2w = iconv_open(nameCS.ToAscii(), cname);
1436
1437 // and check for bytesex ourselves:
1438 if ( m2w != ICONV_T_INVALID )
1439 {
1440 char buf[2], *bufPtr;
1441 wchar_t wbuf[2], *wbufPtr;
1442 size_t insz, outsz;
1443 size_t res;
1444
1445 buf[0] = 'A';
1446 buf[1] = 0;
1447 wbuf[0] = 0;
1448 insz = 2;
1449 outsz = SIZEOF_WCHAR_T * 2;
1450 wbufPtr = wbuf;
1451 bufPtr = buf;
1452
1453 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1454 (char**)&wbufPtr, &outsz);
1455
1456 if (ICONV_FAILED(res, insz))
1457 {
1458 wxLogLastError(wxT("iconv"));
1459 wxLogError(_("Conversion to charset '%s' doesn't work."),
1460 nameCS.c_str());
1461 }
1462 else // ok, can convert to this encoding, remember it
1463 {
1464 ms_wcCharsetName = nameCS;
1465 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1466 }
1467 }
1468 }
1469 else // use charset not requiring byte swapping
1470 {
1471 ms_wcCharsetName = nameXE;
1472 }
1473 }
1474
1475 wxLogTrace(TRACE_STRCONV,
1476 wxT("iconv wchar_t charset is \"%s\"%s"),
1477 ms_wcCharsetName.empty() ? _T("<none>")
1478 : ms_wcCharsetName.c_str(),
1479 ms_wcNeedsSwap ? _T(" (needs swap)")
1480 : _T(""));
1481 }
1482 else // we already have ms_wcCharsetName
1483 {
1484 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1485 }
1486
1487 if ( ms_wcCharsetName.empty() )
1488 {
1489 w2m = ICONV_T_INVALID;
1490 }
1491 else
1492 {
1493 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1494 if ( w2m == ICONV_T_INVALID )
1495 {
1496 wxLogTrace(TRACE_STRCONV,
1497 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1498 ms_wcCharsetName.c_str(), cname.data());
1499 }
1500 }
1501 }
1502
1503 wxMBConv_iconv::~wxMBConv_iconv()
1504 {
1505 if ( m2w != ICONV_T_INVALID )
1506 iconv_close(m2w);
1507 if ( w2m != ICONV_T_INVALID )
1508 iconv_close(w2m);
1509 }
1510
1511 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1512 {
1513 #if wxUSE_THREADS
1514 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1515 // Unfortunately there is a couple of global wxCSConv objects such as
1516 // wxConvLocal that are used all over wx code, so we have to make sure
1517 // the handle is used by at most one thread at the time. Otherwise
1518 // only a few wx classes would be safe to use from non-main threads
1519 // as MB<->WC conversion would fail "randomly".
1520 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1521 #endif
1522
1523 size_t inbuf = strlen(psz);
1524 size_t outbuf = n * SIZEOF_WCHAR_T;
1525 size_t res, cres;
1526 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1527 wchar_t *bufPtr = buf;
1528 const char *pszPtr = psz;
1529
1530 if (buf)
1531 {
1532 // have destination buffer, convert there
1533 cres = iconv(m2w,
1534 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1535 (char**)&bufPtr, &outbuf);
1536 res = n - (outbuf / SIZEOF_WCHAR_T);
1537
1538 if (ms_wcNeedsSwap)
1539 {
1540 // convert to native endianness
1541 for ( unsigned i = 0; i < res; i++ )
1542 buf[n] = WC_BSWAP(buf[i]);
1543 }
1544
1545 // NB: iconv was given only strlen(psz) characters on input, and so
1546 // it couldn't convert the trailing zero. Let's do it ourselves
1547 // if there's some room left for it in the output buffer.
1548 if (res < n)
1549 buf[res] = 0;
1550 }
1551 else
1552 {
1553 // no destination buffer... convert using temp buffer
1554 // to calculate destination buffer requirement
1555 wchar_t tbuf[8];
1556 res = 0;
1557 do {
1558 bufPtr = tbuf;
1559 outbuf = 8*SIZEOF_WCHAR_T;
1560
1561 cres = iconv(m2w,
1562 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1563 (char**)&bufPtr, &outbuf );
1564
1565 res += 8-(outbuf/SIZEOF_WCHAR_T);
1566 } while ((cres==(size_t)-1) && (errno==E2BIG));
1567 }
1568
1569 if (ICONV_FAILED(cres, inbuf))
1570 {
1571 //VS: it is ok if iconv fails, hence trace only
1572 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1573 return (size_t)-1;
1574 }
1575
1576 return res;
1577 }
1578
1579 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1580 {
1581 #if wxUSE_THREADS
1582 // NB: explained in MB2WC
1583 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1584 #endif
1585
1586 size_t inlen = wxWcslen(psz);
1587 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1588 size_t outbuf = n;
1589 size_t res, cres;
1590
1591 wchar_t *tmpbuf = 0;
1592
1593 if (ms_wcNeedsSwap)
1594 {
1595 // need to copy to temp buffer to switch endianness
1596 // (doing WC_BSWAP twice on the original buffer won't help, as it
1597 // could be in read-only memory, or be accessed in some other thread)
1598 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1599 for ( size_t i = 0; i < inlen; i++ )
1600 tmpbuf[n] = WC_BSWAP(psz[i]);
1601 tmpbuf[inlen] = L'\0';
1602 psz = tmpbuf;
1603 }
1604
1605 if (buf)
1606 {
1607 // have destination buffer, convert there
1608 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1609
1610 res = n-outbuf;
1611
1612 // NB: iconv was given only wcslen(psz) characters on input, and so
1613 // it couldn't convert the trailing zero. Let's do it ourselves
1614 // if there's some room left for it in the output buffer.
1615 if (res < n)
1616 buf[0] = 0;
1617 }
1618 else
1619 {
1620 // no destination buffer... convert using temp buffer
1621 // to calculate destination buffer requirement
1622 char tbuf[16];
1623 res = 0;
1624 do {
1625 buf = tbuf; outbuf = 16;
1626
1627 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1628
1629 res += 16 - outbuf;
1630 } while ((cres==(size_t)-1) && (errno==E2BIG));
1631 }
1632
1633 if (ms_wcNeedsSwap)
1634 {
1635 free(tmpbuf);
1636 }
1637
1638 if (ICONV_FAILED(cres, inbuf))
1639 {
1640 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1641 return (size_t)-1;
1642 }
1643
1644 return res;
1645 }
1646
1647 const char *wxMBConv_iconv::GetMBNul(size_t *nulLen) const
1648 {
1649 if ( m_nulLen == (size_t)-2 )
1650 {
1651 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1652
1653 #if wxUSE_THREADS
1654 // NB: explained in MB2WC
1655 wxMutexLocker lock(self->m_iconvMutex);
1656 #endif
1657
1658 size_t inLen = 1,
1659 outLen = WXSIZEOF(m_nulBuf);
1660 self->m_nulLen = iconv(w2m, ICONV_CHAR_CAST(L""), &inLen,
1661 &self->m_nulBuf, &outLen);
1662 }
1663
1664 *nulLen = m_nulLen;
1665 return m_nulBuf;
1666 }
1667
1668 #endif // HAVE_ICONV
1669
1670
1671 // ============================================================================
1672 // Win32 conversion classes
1673 // ============================================================================
1674
1675 #ifdef wxHAVE_WIN32_MB2WC
1676
1677 // from utils.cpp
1678 #if wxUSE_FONTMAP
1679 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1680 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1681 #endif
1682
1683 class wxMBConv_win32 : public wxMBConv
1684 {
1685 public:
1686 wxMBConv_win32()
1687 {
1688 m_CodePage = CP_ACP;
1689 m_nulLen = (size_t)-2;
1690 }
1691
1692 #if wxUSE_FONTMAP
1693 wxMBConv_win32(const wxChar* name)
1694 {
1695 m_CodePage = wxCharsetToCodepage(name);
1696 m_nulLen = (size_t)-2;
1697 }
1698
1699 wxMBConv_win32(wxFontEncoding encoding)
1700 {
1701 m_CodePage = wxEncodingToCodepage(encoding);
1702 m_nulLen = (size_t)-2;
1703 }
1704 #endif // wxUSE_FONTMAP
1705
1706 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1707 {
1708 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1709 // the behaviour is not compatible with the Unix version (using iconv)
1710 // and break the library itself, e.g. wxTextInputStream::NextChar()
1711 // wouldn't work if reading an incomplete MB char didn't result in an
1712 // error
1713 //
1714 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1715 // an error (tested under Windows Server 2003) and apparently it is
1716 // done on purpose, i.e. the function accepts any input in this case
1717 // and although I'd prefer to return error on ill-formed output, our
1718 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1719 // explicitly ill-formed according to RFC 2152) neither so we don't
1720 // even have any fallback here...
1721 //
1722 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1723 // Win XP or newer and if it is specified on older versions, conversion
1724 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1725 // fails. So we can only use the flag on newer Windows versions.
1726 // Additionally, the flag is not supported by UTF7, symbol and CJK
1727 // encodings. See here:
1728 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1729 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1730 int flags = 0;
1731 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1732 m_CodePage < 50000 &&
1733 IsAtLeastWin2kSP4() )
1734 {
1735 flags = MB_ERR_INVALID_CHARS;
1736 }
1737 else if ( m_CodePage == CP_UTF8 )
1738 {
1739 // Avoid round-trip in the special case of UTF-8 by using our
1740 // own UTF-8 conversion code:
1741 return wxMBConvUTF8().MB2WC(buf, psz, n);
1742 }
1743
1744 const size_t len = ::MultiByteToWideChar
1745 (
1746 m_CodePage, // code page
1747 flags, // flags: fall on error
1748 psz, // input string
1749 -1, // its length (NUL-terminated)
1750 buf, // output string
1751 buf ? n : 0 // size of output buffer
1752 );
1753 if ( !len )
1754 {
1755 // function totally failed
1756 return (size_t)-1;
1757 }
1758
1759 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1760 // check if we succeeded, by doing a double trip:
1761 if ( !flags && buf )
1762 {
1763 const size_t mbLen = strlen(psz);
1764 wxCharBuffer mbBuf(mbLen);
1765 if ( ::WideCharToMultiByte
1766 (
1767 m_CodePage,
1768 0,
1769 buf,
1770 -1,
1771 mbBuf.data(),
1772 mbLen + 1, // size in bytes, not length
1773 NULL,
1774 NULL
1775 ) == 0 ||
1776 strcmp(mbBuf, psz) != 0 )
1777 {
1778 // we didn't obtain the same thing we started from, hence
1779 // the conversion was lossy and we consider that it failed
1780 return (size_t)-1;
1781 }
1782 }
1783
1784 // note that it returns count of written chars for buf != NULL and size
1785 // of the needed buffer for buf == NULL so in either case the length of
1786 // the string (which never includes the terminating NUL) is one less
1787 return len - 1;
1788 }
1789
1790 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1791 {
1792 /*
1793 we have a problem here: by default, WideCharToMultiByte() may
1794 replace characters unrepresentable in the target code page with bad
1795 quality approximations such as turning "1/2" symbol (U+00BD) into
1796 "1" for the code pages which don't have it and we, obviously, want
1797 to avoid this at any price
1798
1799 the trouble is that this function does it _silently_, i.e. it won't
1800 even tell us whether it did or not... Win98/2000 and higher provide
1801 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1802 we have to resort to a round trip, i.e. check that converting back
1803 results in the same string -- this is, of course, expensive but
1804 otherwise we simply can't be sure to not garble the data.
1805 */
1806
1807 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1808 // it doesn't work with CJK encodings (which we test for rather roughly
1809 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1810 // supporting it
1811 BOOL usedDef wxDUMMY_INITIALIZE(false);
1812 BOOL *pUsedDef;
1813 int flags;
1814 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1815 {
1816 // it's our lucky day
1817 flags = WC_NO_BEST_FIT_CHARS;
1818 pUsedDef = &usedDef;
1819 }
1820 else // old system or unsupported encoding
1821 {
1822 flags = 0;
1823 pUsedDef = NULL;
1824 }
1825
1826 const size_t len = ::WideCharToMultiByte
1827 (
1828 m_CodePage, // code page
1829 flags, // either none or no best fit
1830 pwz, // input string
1831 -1, // it is (wide) NUL-terminated
1832 buf, // output buffer
1833 buf ? n : 0, // and its size
1834 NULL, // default "replacement" char
1835 pUsedDef // [out] was it used?
1836 );
1837
1838 if ( !len )
1839 {
1840 // function totally failed
1841 return (size_t)-1;
1842 }
1843
1844 // if we were really converting, check if we succeeded
1845 if ( buf )
1846 {
1847 if ( flags )
1848 {
1849 // check if the conversion failed, i.e. if any replacements
1850 // were done
1851 if ( usedDef )
1852 return (size_t)-1;
1853 }
1854 else // we must resort to double tripping...
1855 {
1856 wxWCharBuffer wcBuf(n);
1857 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1858 wcscmp(wcBuf, pwz) != 0 )
1859 {
1860 // we didn't obtain the same thing we started from, hence
1861 // the conversion was lossy and we consider that it failed
1862 return (size_t)-1;
1863 }
1864 }
1865 }
1866
1867 // see the comment above for the reason of "len - 1"
1868 return len - 1;
1869 }
1870
1871 bool IsOk() const { return m_CodePage != -1; }
1872
1873 private:
1874 static bool CanUseNoBestFit()
1875 {
1876 static int s_isWin98Or2k = -1;
1877
1878 if ( s_isWin98Or2k == -1 )
1879 {
1880 int verMaj, verMin;
1881 switch ( wxGetOsVersion(&verMaj, &verMin) )
1882 {
1883 case wxWIN95:
1884 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1885 break;
1886
1887 case wxWINDOWS_NT:
1888 s_isWin98Or2k = verMaj >= 5;
1889 break;
1890
1891 default:
1892 // unknown, be conseravtive by default
1893 s_isWin98Or2k = 0;
1894 }
1895
1896 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1897 }
1898
1899 return s_isWin98Or2k == 1;
1900 }
1901
1902 static bool IsAtLeastWin2kSP4()
1903 {
1904 #ifdef __WXWINCE__
1905 return false;
1906 #else
1907 static int s_isAtLeastWin2kSP4 = -1;
1908
1909 if ( s_isAtLeastWin2kSP4 == -1 )
1910 {
1911 OSVERSIONINFOEX ver;
1912
1913 memset(&ver, 0, sizeof(ver));
1914 ver.dwOSVersionInfoSize = sizeof(ver);
1915 GetVersionEx((OSVERSIONINFO*)&ver);
1916
1917 s_isAtLeastWin2kSP4 =
1918 ((ver.dwMajorVersion > 5) || // Vista+
1919 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1920 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1921 ver.wServicePackMajor >= 4)) // 2000 SP4+
1922 ? 1 : 0;
1923 }
1924
1925 return s_isAtLeastWin2kSP4 == 1;
1926 #endif
1927 }
1928
1929 virtual const char *GetMBNul(size_t *nulLen) const
1930 {
1931 if ( m_nulLen == (size_t)-2 )
1932 {
1933 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1934
1935 self->m_nulLen = ::WideCharToMultiByte
1936 (
1937 m_CodePage, // code page
1938 0, // no flags
1939 L"", // input string
1940 1, // translate just NUL
1941 self->m_nulBuf, // output buffer
1942 WXSIZEOF(m_nulBuf), // and its size
1943 NULL, // "replacement" char
1944 NULL // [out] was it used?
1945 );
1946
1947 if ( m_nulLen == 0 )
1948 self->m_nulLen = (size_t)-1;
1949 }
1950
1951 *nulLen = m_nulLen;
1952 return m_nulBuf;
1953 }
1954
1955 long m_CodePage;
1956 size_t m_nulLen;
1957 char m_nulBuf[8];
1958 };
1959
1960 #endif // wxHAVE_WIN32_MB2WC
1961
1962 // ============================================================================
1963 // Cocoa conversion classes
1964 // ============================================================================
1965
1966 #if defined(__WXCOCOA__)
1967
1968 // RN: There is no UTF-32 support in either Core Foundation or
1969 // Cocoa. Strangely enough, internally Core Foundation uses
1970 // UTF 32 internally quite a bit - its just not public (yet).
1971
1972 #include <CoreFoundation/CFString.h>
1973 #include <CoreFoundation/CFStringEncodingExt.h>
1974
1975 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1976 {
1977 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1978 if ( encoding == wxFONTENCODING_DEFAULT )
1979 {
1980 enc = CFStringGetSystemEncoding();
1981 }
1982 else switch( encoding)
1983 {
1984 case wxFONTENCODING_ISO8859_1 :
1985 enc = kCFStringEncodingISOLatin1 ;
1986 break ;
1987 case wxFONTENCODING_ISO8859_2 :
1988 enc = kCFStringEncodingISOLatin2;
1989 break ;
1990 case wxFONTENCODING_ISO8859_3 :
1991 enc = kCFStringEncodingISOLatin3 ;
1992 break ;
1993 case wxFONTENCODING_ISO8859_4 :
1994 enc = kCFStringEncodingISOLatin4;
1995 break ;
1996 case wxFONTENCODING_ISO8859_5 :
1997 enc = kCFStringEncodingISOLatinCyrillic;
1998 break ;
1999 case wxFONTENCODING_ISO8859_6 :
2000 enc = kCFStringEncodingISOLatinArabic;
2001 break ;
2002 case wxFONTENCODING_ISO8859_7 :
2003 enc = kCFStringEncodingISOLatinGreek;
2004 break ;
2005 case wxFONTENCODING_ISO8859_8 :
2006 enc = kCFStringEncodingISOLatinHebrew;
2007 break ;
2008 case wxFONTENCODING_ISO8859_9 :
2009 enc = kCFStringEncodingISOLatin5;
2010 break ;
2011 case wxFONTENCODING_ISO8859_10 :
2012 enc = kCFStringEncodingISOLatin6;
2013 break ;
2014 case wxFONTENCODING_ISO8859_11 :
2015 enc = kCFStringEncodingISOLatinThai;
2016 break ;
2017 case wxFONTENCODING_ISO8859_13 :
2018 enc = kCFStringEncodingISOLatin7;
2019 break ;
2020 case wxFONTENCODING_ISO8859_14 :
2021 enc = kCFStringEncodingISOLatin8;
2022 break ;
2023 case wxFONTENCODING_ISO8859_15 :
2024 enc = kCFStringEncodingISOLatin9;
2025 break ;
2026
2027 case wxFONTENCODING_KOI8 :
2028 enc = kCFStringEncodingKOI8_R;
2029 break ;
2030 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2031 enc = kCFStringEncodingDOSRussian;
2032 break ;
2033
2034 // case wxFONTENCODING_BULGARIAN :
2035 // enc = ;
2036 // break ;
2037
2038 case wxFONTENCODING_CP437 :
2039 enc =kCFStringEncodingDOSLatinUS ;
2040 break ;
2041 case wxFONTENCODING_CP850 :
2042 enc = kCFStringEncodingDOSLatin1;
2043 break ;
2044 case wxFONTENCODING_CP852 :
2045 enc = kCFStringEncodingDOSLatin2;
2046 break ;
2047 case wxFONTENCODING_CP855 :
2048 enc = kCFStringEncodingDOSCyrillic;
2049 break ;
2050 case wxFONTENCODING_CP866 :
2051 enc =kCFStringEncodingDOSRussian ;
2052 break ;
2053 case wxFONTENCODING_CP874 :
2054 enc = kCFStringEncodingDOSThai;
2055 break ;
2056 case wxFONTENCODING_CP932 :
2057 enc = kCFStringEncodingDOSJapanese;
2058 break ;
2059 case wxFONTENCODING_CP936 :
2060 enc =kCFStringEncodingDOSChineseSimplif ;
2061 break ;
2062 case wxFONTENCODING_CP949 :
2063 enc = kCFStringEncodingDOSKorean;
2064 break ;
2065 case wxFONTENCODING_CP950 :
2066 enc = kCFStringEncodingDOSChineseTrad;
2067 break ;
2068 case wxFONTENCODING_CP1250 :
2069 enc = kCFStringEncodingWindowsLatin2;
2070 break ;
2071 case wxFONTENCODING_CP1251 :
2072 enc =kCFStringEncodingWindowsCyrillic ;
2073 break ;
2074 case wxFONTENCODING_CP1252 :
2075 enc =kCFStringEncodingWindowsLatin1 ;
2076 break ;
2077 case wxFONTENCODING_CP1253 :
2078 enc = kCFStringEncodingWindowsGreek;
2079 break ;
2080 case wxFONTENCODING_CP1254 :
2081 enc = kCFStringEncodingWindowsLatin5;
2082 break ;
2083 case wxFONTENCODING_CP1255 :
2084 enc =kCFStringEncodingWindowsHebrew ;
2085 break ;
2086 case wxFONTENCODING_CP1256 :
2087 enc =kCFStringEncodingWindowsArabic ;
2088 break ;
2089 case wxFONTENCODING_CP1257 :
2090 enc = kCFStringEncodingWindowsBalticRim;
2091 break ;
2092 // This only really encodes to UTF7 (if that) evidently
2093 // case wxFONTENCODING_UTF7 :
2094 // enc = kCFStringEncodingNonLossyASCII ;
2095 // break ;
2096 case wxFONTENCODING_UTF8 :
2097 enc = kCFStringEncodingUTF8 ;
2098 break ;
2099 case wxFONTENCODING_EUC_JP :
2100 enc = kCFStringEncodingEUC_JP;
2101 break ;
2102 case wxFONTENCODING_UTF16 :
2103 enc = kCFStringEncodingUnicode ;
2104 break ;
2105 case wxFONTENCODING_MACROMAN :
2106 enc = kCFStringEncodingMacRoman ;
2107 break ;
2108 case wxFONTENCODING_MACJAPANESE :
2109 enc = kCFStringEncodingMacJapanese ;
2110 break ;
2111 case wxFONTENCODING_MACCHINESETRAD :
2112 enc = kCFStringEncodingMacChineseTrad ;
2113 break ;
2114 case wxFONTENCODING_MACKOREAN :
2115 enc = kCFStringEncodingMacKorean ;
2116 break ;
2117 case wxFONTENCODING_MACARABIC :
2118 enc = kCFStringEncodingMacArabic ;
2119 break ;
2120 case wxFONTENCODING_MACHEBREW :
2121 enc = kCFStringEncodingMacHebrew ;
2122 break ;
2123 case wxFONTENCODING_MACGREEK :
2124 enc = kCFStringEncodingMacGreek ;
2125 break ;
2126 case wxFONTENCODING_MACCYRILLIC :
2127 enc = kCFStringEncodingMacCyrillic ;
2128 break ;
2129 case wxFONTENCODING_MACDEVANAGARI :
2130 enc = kCFStringEncodingMacDevanagari ;
2131 break ;
2132 case wxFONTENCODING_MACGURMUKHI :
2133 enc = kCFStringEncodingMacGurmukhi ;
2134 break ;
2135 case wxFONTENCODING_MACGUJARATI :
2136 enc = kCFStringEncodingMacGujarati ;
2137 break ;
2138 case wxFONTENCODING_MACORIYA :
2139 enc = kCFStringEncodingMacOriya ;
2140 break ;
2141 case wxFONTENCODING_MACBENGALI :
2142 enc = kCFStringEncodingMacBengali ;
2143 break ;
2144 case wxFONTENCODING_MACTAMIL :
2145 enc = kCFStringEncodingMacTamil ;
2146 break ;
2147 case wxFONTENCODING_MACTELUGU :
2148 enc = kCFStringEncodingMacTelugu ;
2149 break ;
2150 case wxFONTENCODING_MACKANNADA :
2151 enc = kCFStringEncodingMacKannada ;
2152 break ;
2153 case wxFONTENCODING_MACMALAJALAM :
2154 enc = kCFStringEncodingMacMalayalam ;
2155 break ;
2156 case wxFONTENCODING_MACSINHALESE :
2157 enc = kCFStringEncodingMacSinhalese ;
2158 break ;
2159 case wxFONTENCODING_MACBURMESE :
2160 enc = kCFStringEncodingMacBurmese ;
2161 break ;
2162 case wxFONTENCODING_MACKHMER :
2163 enc = kCFStringEncodingMacKhmer ;
2164 break ;
2165 case wxFONTENCODING_MACTHAI :
2166 enc = kCFStringEncodingMacThai ;
2167 break ;
2168 case wxFONTENCODING_MACLAOTIAN :
2169 enc = kCFStringEncodingMacLaotian ;
2170 break ;
2171 case wxFONTENCODING_MACGEORGIAN :
2172 enc = kCFStringEncodingMacGeorgian ;
2173 break ;
2174 case wxFONTENCODING_MACARMENIAN :
2175 enc = kCFStringEncodingMacArmenian ;
2176 break ;
2177 case wxFONTENCODING_MACCHINESESIMP :
2178 enc = kCFStringEncodingMacChineseSimp ;
2179 break ;
2180 case wxFONTENCODING_MACTIBETAN :
2181 enc = kCFStringEncodingMacTibetan ;
2182 break ;
2183 case wxFONTENCODING_MACMONGOLIAN :
2184 enc = kCFStringEncodingMacMongolian ;
2185 break ;
2186 case wxFONTENCODING_MACETHIOPIC :
2187 enc = kCFStringEncodingMacEthiopic ;
2188 break ;
2189 case wxFONTENCODING_MACCENTRALEUR :
2190 enc = kCFStringEncodingMacCentralEurRoman ;
2191 break ;
2192 case wxFONTENCODING_MACVIATNAMESE :
2193 enc = kCFStringEncodingMacVietnamese ;
2194 break ;
2195 case wxFONTENCODING_MACARABICEXT :
2196 enc = kCFStringEncodingMacExtArabic ;
2197 break ;
2198 case wxFONTENCODING_MACSYMBOL :
2199 enc = kCFStringEncodingMacSymbol ;
2200 break ;
2201 case wxFONTENCODING_MACDINGBATS :
2202 enc = kCFStringEncodingMacDingbats ;
2203 break ;
2204 case wxFONTENCODING_MACTURKISH :
2205 enc = kCFStringEncodingMacTurkish ;
2206 break ;
2207 case wxFONTENCODING_MACCROATIAN :
2208 enc = kCFStringEncodingMacCroatian ;
2209 break ;
2210 case wxFONTENCODING_MACICELANDIC :
2211 enc = kCFStringEncodingMacIcelandic ;
2212 break ;
2213 case wxFONTENCODING_MACROMANIAN :
2214 enc = kCFStringEncodingMacRomanian ;
2215 break ;
2216 case wxFONTENCODING_MACCELTIC :
2217 enc = kCFStringEncodingMacCeltic ;
2218 break ;
2219 case wxFONTENCODING_MACGAELIC :
2220 enc = kCFStringEncodingMacGaelic ;
2221 break ;
2222 // case wxFONTENCODING_MACKEYBOARD :
2223 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2224 // break ;
2225 default :
2226 // because gcc is picky
2227 break ;
2228 } ;
2229 return enc ;
2230 }
2231
2232 class wxMBConv_cocoa : public wxMBConv
2233 {
2234 public:
2235 wxMBConv_cocoa()
2236 {
2237 Init(CFStringGetSystemEncoding()) ;
2238 }
2239
2240 #if wxUSE_FONTMAP
2241 wxMBConv_cocoa(const wxChar* name)
2242 {
2243 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2244 }
2245 #endif
2246
2247 wxMBConv_cocoa(wxFontEncoding encoding)
2248 {
2249 Init( wxCFStringEncFromFontEnc(encoding) );
2250 }
2251
2252 ~wxMBConv_cocoa()
2253 {
2254 }
2255
2256 void Init( CFStringEncoding encoding)
2257 {
2258 m_encoding = encoding ;
2259 }
2260
2261 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2262 {
2263 wxASSERT(szUnConv);
2264
2265 CFStringRef theString = CFStringCreateWithBytes (
2266 NULL, //the allocator
2267 (const UInt8*)szUnConv,
2268 strlen(szUnConv),
2269 m_encoding,
2270 false //no BOM/external representation
2271 );
2272
2273 wxASSERT(theString);
2274
2275 size_t nOutLength = CFStringGetLength(theString);
2276
2277 if (szOut == NULL)
2278 {
2279 CFRelease(theString);
2280 return nOutLength;
2281 }
2282
2283 CFRange theRange = { 0, nOutSize };
2284
2285 #if SIZEOF_WCHAR_T == 4
2286 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2287 #endif
2288
2289 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2290
2291 CFRelease(theString);
2292
2293 szUniCharBuffer[nOutLength] = '\0' ;
2294
2295 #if SIZEOF_WCHAR_T == 4
2296 wxMBConvUTF16 converter ;
2297 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2298 delete[] szUniCharBuffer;
2299 #endif
2300
2301 return nOutLength;
2302 }
2303
2304 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2305 {
2306 wxASSERT(szUnConv);
2307
2308 size_t nRealOutSize;
2309 size_t nBufSize = wxWcslen(szUnConv);
2310 UniChar* szUniBuffer = (UniChar*) szUnConv;
2311
2312 #if SIZEOF_WCHAR_T == 4
2313 wxMBConvUTF16 converter ;
2314 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2315 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2316 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2317 nBufSize /= sizeof(UniChar);
2318 #endif
2319
2320 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2321 NULL, //allocator
2322 szUniBuffer,
2323 nBufSize,
2324 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2325 );
2326
2327 wxASSERT(theString);
2328
2329 //Note that CER puts a BOM when converting to unicode
2330 //so we check and use getchars instead in that case
2331 if (m_encoding == kCFStringEncodingUnicode)
2332 {
2333 if (szOut != NULL)
2334 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2335
2336 nRealOutSize = CFStringGetLength(theString) + 1;
2337 }
2338 else
2339 {
2340 CFStringGetBytes(
2341 theString,
2342 CFRangeMake(0, CFStringGetLength(theString)),
2343 m_encoding,
2344 0, //what to put in characters that can't be converted -
2345 //0 tells CFString to return NULL if it meets such a character
2346 false, //not an external representation
2347 (UInt8*) szOut,
2348 nOutSize,
2349 (CFIndex*) &nRealOutSize
2350 );
2351 }
2352
2353 CFRelease(theString);
2354
2355 #if SIZEOF_WCHAR_T == 4
2356 delete[] szUniBuffer;
2357 #endif
2358
2359 return nRealOutSize - 1;
2360 }
2361
2362 bool IsOk() const
2363 {
2364 return m_encoding != kCFStringEncodingInvalidId &&
2365 CFStringIsEncodingAvailable(m_encoding);
2366 }
2367
2368 private:
2369 CFStringEncoding m_encoding ;
2370 };
2371
2372 #endif // defined(__WXCOCOA__)
2373
2374 // ============================================================================
2375 // Mac conversion classes
2376 // ============================================================================
2377
2378 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2379
2380 class wxMBConv_mac : public wxMBConv
2381 {
2382 public:
2383 wxMBConv_mac()
2384 {
2385 Init(CFStringGetSystemEncoding()) ;
2386 }
2387
2388 #if wxUSE_FONTMAP
2389 wxMBConv_mac(const wxChar* name)
2390 {
2391 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2392 }
2393 #endif
2394
2395 wxMBConv_mac(wxFontEncoding encoding)
2396 {
2397 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2398 }
2399
2400 ~wxMBConv_mac()
2401 {
2402 OSStatus status = noErr ;
2403 status = TECDisposeConverter(m_MB2WC_converter);
2404 status = TECDisposeConverter(m_WC2MB_converter);
2405 }
2406
2407
2408 void Init( TextEncodingBase encoding)
2409 {
2410 OSStatus status = noErr ;
2411 m_char_encoding = encoding ;
2412 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2413
2414 status = TECCreateConverter(&m_MB2WC_converter,
2415 m_char_encoding,
2416 m_unicode_encoding);
2417 status = TECCreateConverter(&m_WC2MB_converter,
2418 m_unicode_encoding,
2419 m_char_encoding);
2420 }
2421
2422 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2423 {
2424 OSStatus status = noErr ;
2425 ByteCount byteOutLen ;
2426 ByteCount byteInLen = strlen(psz) ;
2427 wchar_t *tbuf = NULL ;
2428 UniChar* ubuf = NULL ;
2429 size_t res = 0 ;
2430
2431 if (buf == NULL)
2432 {
2433 //apple specs say at least 32
2434 n = wxMax( 32 , byteInLen ) ;
2435 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2436 }
2437 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2438 #if SIZEOF_WCHAR_T == 4
2439 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2440 #else
2441 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2442 #endif
2443 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2444 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2445 #if SIZEOF_WCHAR_T == 4
2446 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2447 // is not properly terminated we get random characters at the end
2448 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2449 wxMBConvUTF16 converter ;
2450 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2451 free( ubuf ) ;
2452 #else
2453 res = byteOutLen / sizeof( UniChar ) ;
2454 #endif
2455 if ( buf == NULL )
2456 free(tbuf) ;
2457
2458 if ( buf && res < n)
2459 buf[res] = 0;
2460
2461 return res ;
2462 }
2463
2464 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2465 {
2466 OSStatus status = noErr ;
2467 ByteCount byteOutLen ;
2468 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2469
2470 char *tbuf = NULL ;
2471
2472 if (buf == NULL)
2473 {
2474 //apple specs say at least 32
2475 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2476 tbuf = (char*) malloc( n ) ;
2477 }
2478
2479 ByteCount byteBufferLen = n ;
2480 UniChar* ubuf = NULL ;
2481 #if SIZEOF_WCHAR_T == 4
2482 wxMBConvUTF16 converter ;
2483 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2484 byteInLen = unicharlen ;
2485 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2486 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2487 #else
2488 ubuf = (UniChar*) psz ;
2489 #endif
2490 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2491 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2492 #if SIZEOF_WCHAR_T == 4
2493 free( ubuf ) ;
2494 #endif
2495 if ( buf == NULL )
2496 free(tbuf) ;
2497
2498 size_t res = byteOutLen ;
2499 if ( buf && res < n)
2500 {
2501 buf[res] = 0;
2502
2503 //we need to double-trip to verify it didn't insert any ? in place
2504 //of bogus characters
2505 wxWCharBuffer wcBuf(n);
2506 size_t pszlen = wxWcslen(psz);
2507 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2508 wxWcslen(wcBuf) != pszlen ||
2509 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2510 {
2511 // we didn't obtain the same thing we started from, hence
2512 // the conversion was lossy and we consider that it failed
2513 return (size_t)-1;
2514 }
2515 }
2516
2517 return res ;
2518 }
2519
2520 bool IsOk() const
2521 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2522
2523 private:
2524 TECObjectRef m_MB2WC_converter ;
2525 TECObjectRef m_WC2MB_converter ;
2526
2527 TextEncodingBase m_char_encoding ;
2528 TextEncodingBase m_unicode_encoding ;
2529 };
2530
2531 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2532
2533 // ============================================================================
2534 // wxEncodingConverter based conversion classes
2535 // ============================================================================
2536
2537 #if wxUSE_FONTMAP
2538
2539 class wxMBConv_wxwin : public wxMBConv
2540 {
2541 private:
2542 void Init()
2543 {
2544 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2545 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2546 }
2547
2548 public:
2549 // temporarily just use wxEncodingConverter stuff,
2550 // so that it works while a better implementation is built
2551 wxMBConv_wxwin(const wxChar* name)
2552 {
2553 if (name)
2554 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2555 else
2556 m_enc = wxFONTENCODING_SYSTEM;
2557
2558 Init();
2559 }
2560
2561 wxMBConv_wxwin(wxFontEncoding enc)
2562 {
2563 m_enc = enc;
2564
2565 Init();
2566 }
2567
2568 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2569 {
2570 size_t inbuf = strlen(psz);
2571 if (buf)
2572 {
2573 if (!m2w.Convert(psz,buf))
2574 return (size_t)-1;
2575 }
2576 return inbuf;
2577 }
2578
2579 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2580 {
2581 const size_t inbuf = wxWcslen(psz);
2582 if (buf)
2583 {
2584 if (!w2m.Convert(psz,buf))
2585 return (size_t)-1;
2586 }
2587
2588 return inbuf;
2589 }
2590
2591 bool IsOk() const { return m_ok; }
2592
2593 public:
2594 wxFontEncoding m_enc;
2595 wxEncodingConverter m2w, w2m;
2596
2597 private:
2598 virtual const char *GetMBNul(size_t *nulLen) const
2599 {
2600 switch ( m_enc )
2601 {
2602 case wxFONTENCODING_UTF16BE:
2603 case wxFONTENCODING_UTF16LE:
2604 *nulLen = 2;
2605 return "\0";
2606
2607 case wxFONTENCODING_UTF32BE:
2608 case wxFONTENCODING_UTF32LE:
2609 *nulLen = 4;
2610 return "\0\0\0";
2611
2612 default:
2613 *nulLen = 1;
2614 return "";
2615 }
2616 }
2617
2618 // were we initialized successfully?
2619 bool m_ok;
2620
2621 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2622 };
2623
2624 // make the constructors available for unit testing
2625 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2626 {
2627 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2628 if ( !result->IsOk() )
2629 {
2630 delete result;
2631 return 0;
2632 }
2633 return result;
2634 }
2635
2636 #endif // wxUSE_FONTMAP
2637
2638 // ============================================================================
2639 // wxCSConv implementation
2640 // ============================================================================
2641
2642 void wxCSConv::Init()
2643 {
2644 m_name = NULL;
2645 m_convReal = NULL;
2646 m_deferred = true;
2647 }
2648
2649 wxCSConv::wxCSConv(const wxChar *charset)
2650 {
2651 Init();
2652
2653 if ( charset )
2654 {
2655 SetName(charset);
2656 }
2657
2658 #if wxUSE_FONTMAP
2659 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2660 #else
2661 m_encoding = wxFONTENCODING_SYSTEM;
2662 #endif
2663 }
2664
2665 wxCSConv::wxCSConv(wxFontEncoding encoding)
2666 {
2667 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2668 {
2669 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2670
2671 encoding = wxFONTENCODING_SYSTEM;
2672 }
2673
2674 Init();
2675
2676 m_encoding = encoding;
2677 }
2678
2679 wxCSConv::~wxCSConv()
2680 {
2681 Clear();
2682 }
2683
2684 wxCSConv::wxCSConv(const wxCSConv& conv)
2685 : wxMBConv()
2686 {
2687 Init();
2688
2689 SetName(conv.m_name);
2690 m_encoding = conv.m_encoding;
2691 }
2692
2693 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2694 {
2695 Clear();
2696
2697 SetName(conv.m_name);
2698 m_encoding = conv.m_encoding;
2699
2700 return *this;
2701 }
2702
2703 void wxCSConv::Clear()
2704 {
2705 free(m_name);
2706 delete m_convReal;
2707
2708 m_name = NULL;
2709 m_convReal = NULL;
2710 }
2711
2712 void wxCSConv::SetName(const wxChar *charset)
2713 {
2714 if (charset)
2715 {
2716 m_name = wxStrdup(charset);
2717 m_deferred = true;
2718 }
2719 }
2720
2721 #if wxUSE_FONTMAP
2722 #include "wx/hashmap.h"
2723
2724 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2725 wxEncodingNameCache );
2726
2727 static wxEncodingNameCache gs_nameCache;
2728 #endif
2729
2730 wxMBConv *wxCSConv::DoCreate() const
2731 {
2732 #if wxUSE_FONTMAP
2733 wxLogTrace(TRACE_STRCONV,
2734 wxT("creating conversion for %s"),
2735 (m_name ? m_name
2736 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2737 #endif // wxUSE_FONTMAP
2738
2739 // check for the special case of ASCII or ISO8859-1 charset: as we have
2740 // special knowledge of it anyhow, we don't need to create a special
2741 // conversion object
2742 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2743 m_encoding == wxFONTENCODING_DEFAULT )
2744 {
2745 // don't convert at all
2746 return NULL;
2747 }
2748
2749 // we trust OS to do conversion better than we can so try external
2750 // conversion methods first
2751 //
2752 // the full order is:
2753 // 1. OS conversion (iconv() under Unix or Win32 API)
2754 // 2. hard coded conversions for UTF
2755 // 3. wxEncodingConverter as fall back
2756
2757 // step (1)
2758 #ifdef HAVE_ICONV
2759 #if !wxUSE_FONTMAP
2760 if ( m_name )
2761 #endif // !wxUSE_FONTMAP
2762 {
2763 wxString name(m_name);
2764 wxFontEncoding encoding(m_encoding);
2765
2766 if ( !name.empty() )
2767 {
2768 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2769 if ( conv->IsOk() )
2770 return conv;
2771
2772 delete conv;
2773
2774 #if wxUSE_FONTMAP
2775 encoding =
2776 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2777 #endif // wxUSE_FONTMAP
2778 }
2779 #if wxUSE_FONTMAP
2780 {
2781 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2782 if ( it != gs_nameCache.end() )
2783 {
2784 if ( it->second.empty() )
2785 return NULL;
2786
2787 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2788 if ( conv->IsOk() )
2789 return conv;
2790
2791 delete conv;
2792 }
2793
2794 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2795
2796 for ( ; *names; ++names )
2797 {
2798 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2799 if ( conv->IsOk() )
2800 {
2801 gs_nameCache[encoding] = *names;
2802 return conv;
2803 }
2804
2805 delete conv;
2806 }
2807
2808 gs_nameCache[encoding] = _T(""); // cache the failure
2809 }
2810 #endif // wxUSE_FONTMAP
2811 }
2812 #endif // HAVE_ICONV
2813
2814 #ifdef wxHAVE_WIN32_MB2WC
2815 {
2816 #if wxUSE_FONTMAP
2817 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2818 : new wxMBConv_win32(m_encoding);
2819 if ( conv->IsOk() )
2820 return conv;
2821
2822 delete conv;
2823 #else
2824 return NULL;
2825 #endif
2826 }
2827 #endif // wxHAVE_WIN32_MB2WC
2828 #if defined(__WXMAC__)
2829 {
2830 // leave UTF16 and UTF32 to the built-ins of wx
2831 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2832 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2833 {
2834
2835 #if wxUSE_FONTMAP
2836 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2837 : new wxMBConv_mac(m_encoding);
2838 #else
2839 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2840 #endif
2841 if ( conv->IsOk() )
2842 return conv;
2843
2844 delete conv;
2845 }
2846 }
2847 #endif
2848 #if defined(__WXCOCOA__)
2849 {
2850 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2851 {
2852
2853 #if wxUSE_FONTMAP
2854 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2855 : new wxMBConv_cocoa(m_encoding);
2856 #else
2857 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2858 #endif
2859 if ( conv->IsOk() )
2860 return conv;
2861
2862 delete conv;
2863 }
2864 }
2865 #endif
2866 // step (2)
2867 wxFontEncoding enc = m_encoding;
2868 #if wxUSE_FONTMAP
2869 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2870 {
2871 // use "false" to suppress interactive dialogs -- we can be called from
2872 // anywhere and popping up a dialog from here is the last thing we want to
2873 // do
2874 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2875 }
2876 #endif // wxUSE_FONTMAP
2877
2878 switch ( enc )
2879 {
2880 case wxFONTENCODING_UTF7:
2881 return new wxMBConvUTF7;
2882
2883 case wxFONTENCODING_UTF8:
2884 return new wxMBConvUTF8;
2885
2886 case wxFONTENCODING_UTF16BE:
2887 return new wxMBConvUTF16BE;
2888
2889 case wxFONTENCODING_UTF16LE:
2890 return new wxMBConvUTF16LE;
2891
2892 case wxFONTENCODING_UTF32BE:
2893 return new wxMBConvUTF32BE;
2894
2895 case wxFONTENCODING_UTF32LE:
2896 return new wxMBConvUTF32LE;
2897
2898 default:
2899 // nothing to do but put here to suppress gcc warnings
2900 ;
2901 }
2902
2903 // step (3)
2904 #if wxUSE_FONTMAP
2905 {
2906 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2907 : new wxMBConv_wxwin(m_encoding);
2908 if ( conv->IsOk() )
2909 return conv;
2910
2911 delete conv;
2912 }
2913 #endif // wxUSE_FONTMAP
2914
2915 // NB: This is a hack to prevent deadlock. What could otherwise happen
2916 // in Unicode build: wxConvLocal creation ends up being here
2917 // because of some failure and logs the error. But wxLog will try to
2918 // attach timestamp, for which it will need wxConvLocal (to convert
2919 // time to char* and then wchar_t*), but that fails, tries to log
2920 // error, but wxLog has a (already locked) critical section that
2921 // guards static buffer.
2922 static bool alreadyLoggingError = false;
2923 if (!alreadyLoggingError)
2924 {
2925 alreadyLoggingError = true;
2926 wxLogError(_("Cannot convert from the charset '%s'!"),
2927 m_name ? m_name
2928 :
2929 #if wxUSE_FONTMAP
2930 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2931 #else // !wxUSE_FONTMAP
2932 wxString::Format(_("encoding %s"), m_encoding).c_str()
2933 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2934 );
2935 alreadyLoggingError = false;
2936 }
2937
2938 return NULL;
2939 }
2940
2941 void wxCSConv::CreateConvIfNeeded() const
2942 {
2943 if ( m_deferred )
2944 {
2945 wxCSConv *self = (wxCSConv *)this; // const_cast
2946
2947 #if wxUSE_INTL
2948 // if we don't have neither the name nor the encoding, use the default
2949 // encoding for this system
2950 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2951 {
2952 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2953 }
2954 #endif // wxUSE_INTL
2955
2956 self->m_convReal = DoCreate();
2957 self->m_deferred = false;
2958 }
2959 }
2960
2961 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2962 {
2963 CreateConvIfNeeded();
2964
2965 if (m_convReal)
2966 return m_convReal->MB2WC(buf, psz, n);
2967
2968 // latin-1 (direct)
2969 size_t len = strlen(psz);
2970
2971 if (buf)
2972 {
2973 for (size_t c = 0; c <= len; c++)
2974 buf[c] = (unsigned char)(psz[c]);
2975 }
2976
2977 return len;
2978 }
2979
2980 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2981 {
2982 CreateConvIfNeeded();
2983
2984 if (m_convReal)
2985 return m_convReal->WC2MB(buf, psz, n);
2986
2987 // latin-1 (direct)
2988 const size_t len = wxWcslen(psz);
2989 if (buf)
2990 {
2991 for (size_t c = 0; c <= len; c++)
2992 {
2993 if (psz[c] > 0xFF)
2994 return (size_t)-1;
2995 buf[c] = (char)psz[c];
2996 }
2997 }
2998 else
2999 {
3000 for (size_t c = 0; c <= len; c++)
3001 {
3002 if (psz[c] > 0xFF)
3003 return (size_t)-1;
3004 }
3005 }
3006
3007 return len;
3008 }
3009
3010 const char *wxCSConv::GetMBNul(size_t *nulLen) const
3011 {
3012 CreateConvIfNeeded();
3013
3014 if ( m_convReal )
3015 {
3016 // cast needed just to call private function of m_convReal
3017 return ((wxCSConv *)m_convReal)->GetMBNul(nulLen);
3018 }
3019
3020 *nulLen = 1;
3021 return "";
3022 }
3023
3024 // ----------------------------------------------------------------------------
3025 // globals
3026 // ----------------------------------------------------------------------------
3027
3028 #ifdef __WINDOWS__
3029 static wxMBConv_win32 wxConvLibcObj;
3030 #elif defined(__WXMAC__) && !defined(__MACH__)
3031 static wxMBConv_mac wxConvLibcObj ;
3032 #else
3033 static wxMBConvLibc wxConvLibcObj;
3034 #endif
3035
3036 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3037 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3038 static wxMBConvUTF7 wxConvUTF7Obj;
3039 static wxMBConvUTF8 wxConvUTF8Obj;
3040
3041 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3042 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3043 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3044 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3045 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3046 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3047 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3048 #ifdef __WXOSX__
3049 wxConvUTF8Obj;
3050 #else
3051 wxConvLibcObj;
3052 #endif
3053
3054
3055 #else // !wxUSE_WCHAR_T
3056
3057 // stand-ins in absence of wchar_t
3058 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3059 wxConvISO8859_1,
3060 wxConvLocal,
3061 wxConvUTF8;
3062
3063 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T