]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
fix compilation problem in prior commit
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 const wxWCharBuffer
191 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
192 {
193 // the currently accumulated wide characters
194 wxWCharBuffer wbuf;
195
196 // the current length of wbuf
197 size_t lenBuf = 0;
198
199 // we need to know the representation of L'\0' for this conversion
200 size_t nulLen;
201 const char * const nul = GetMBNul(&nulLen);
202 if ( nulLen == (size_t)-1 || nulLen == 0 )
203 return wxWCharBuffer();
204
205 // make a copy of the input string unless it is already properly
206 // NUL-terminated
207 wxCharBuffer bufTmp;
208
209 // now we can compute the input size if we were not given it: notice that
210 // in this case the string must be properly NUL-terminated, of course, as
211 // otherwise we have no way of knowing how long it is
212 if ( inLen == (size_t)-1 )
213 {
214 // not the most efficient algorithm but it shouldn't matter as normally
215 // there are not many NULs in the string and so normally memcmp()
216 // should stop on the first character
217 const char *p = in;
218 while ( memcmp(p, nul, nulLen) != 0 )
219 p++;
220
221 inLen = p - in + nulLen;
222 }
223 else // we already have the size
224 {
225 // check if it's not already NUL-terminated too to avoid the copy
226 if ( inLen < nulLen || memcmp(in + inLen - nulLen, nul, nulLen) != 0 )
227 {
228 // make a copy in order to properly NUL-terminate the string
229 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
230 memcpy(bufTmp.data(), in, inLen);
231 memcpy(bufTmp.data() + inLen, nul, nulLen);
232 }
233 }
234
235 if ( bufTmp )
236 in = bufTmp;
237
238 for ( const char * const inEnd = in + inLen;; )
239 {
240 // try to convert the current chunk if anything left
241 size_t lenChunk = in < inEnd ? MB2WC(NULL, in, 0) : 0;
242 if ( lenChunk == 0 )
243 {
244 // nothing left in the input string, conversion succeeded
245 if ( outLen )
246 {
247 // we shouldn't include the last NUL in the result length
248 *outLen = lenBuf ? lenBuf - 1 : 0;
249 }
250
251 return wbuf;
252 }
253
254 if ( lenChunk == (size_t)-1 )
255 break;
256
257 const size_t lenBufNew = lenBuf + lenChunk;
258 if ( !wbuf.extend(lenBufNew) )
259 break;
260
261 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
262 if ( lenChunk == (size_t)-1 )
263 break;
264
265 // +! for the embedded NUL (if something follows)
266 lenBuf = lenBufNew + 1;
267
268 // advance the input pointer past the end of this chunk
269 while ( memcmp(in, nul, nulLen) != 0 )
270 in++;
271
272 in += nulLen; // skipping over its terminator as well
273 }
274
275 // conversion failed
276 if ( outLen )
277 *outLen = 0;
278
279 return wxWCharBuffer();
280 }
281
282 const wxCharBuffer
283 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
284 {
285 // the currently accumulated multibyte characters
286 wxCharBuffer buf;
287
288 // the current length of buf
289 size_t lenBuf = 0;
290
291 // make a copy of the input string unless it is already properly
292 // NUL-terminated
293 //
294 // if we don't know its length we have no choice but to assume that it is,
295 // indeed, properly terminated
296 wxWCharBuffer bufTmp;
297 if ( inLen == (size_t)-1 )
298 {
299 inLen = wxWcslen(in) + 1;
300 }
301 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
302 {
303 // make a copy in order to properly NUL-terminate the string
304 bufTmp = wxWCharBuffer(inLen);
305 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
306 }
307
308 if ( bufTmp )
309 in = bufTmp;
310
311 for ( const wchar_t * const inEnd = in + inLen;; )
312 {
313 // try to convert the current chunk, if anything left
314 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
315 if ( lenChunk == 0 )
316 {
317 // nothing left in the input string, conversion succeeded
318 if ( outLen )
319 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
320
321 return buf;
322 }
323
324 if ( lenChunk == (size_t)-1 )
325 break;
326
327 const size_t lenBufNew = lenBuf + lenChunk;
328 if ( !buf.extend(lenBufNew) )
329 break;
330
331 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
332 if ( lenChunk == (size_t)-1 )
333 break;
334
335 // chunk successfully converted, go to the next one
336 in += wxWcslen(in) + 1 /* skip NUL too */;
337 lenBuf = lenBufNew + 1;
338 }
339
340 // conversion failed
341 if ( outLen )
342 *outLen = 0;
343
344 return wxCharBuffer();
345 }
346
347 // ----------------------------------------------------------------------------
348 // wxMBConvLibc
349 // ----------------------------------------------------------------------------
350
351 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
352 {
353 return wxMB2WC(buf, psz, n);
354 }
355
356 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
357 {
358 return wxWC2MB(buf, psz, n);
359 }
360
361 // ----------------------------------------------------------------------------
362 // wxConvBrokenFileNames
363 // ----------------------------------------------------------------------------
364
365 #ifdef __UNIX__
366
367 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
368 {
369 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
370 || wxStricmp(charset, _T("UTF8")) == 0 )
371 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
372 else
373 m_conv = new wxCSConv(charset);
374 }
375
376 #endif // __UNIX__
377
378 // ----------------------------------------------------------------------------
379 // UTF-7
380 // ----------------------------------------------------------------------------
381
382 // Implementation (C) 2004 Fredrik Roubert
383
384 //
385 // BASE64 decoding table
386 //
387 static const unsigned char utf7unb64[] =
388 {
389 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
395 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
396 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
397 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
398 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
399 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
400 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
401 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
402 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
403 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
404 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
405 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
406 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
407 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
408 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
410 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
411 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
412 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
413 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
414 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
415 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
421 };
422
423 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
424 {
425 size_t len = 0;
426
427 while ( *psz && (!buf || (len < n)) )
428 {
429 unsigned char cc = *psz++;
430 if (cc != '+')
431 {
432 // plain ASCII char
433 if (buf)
434 *buf++ = cc;
435 len++;
436 }
437 else if (*psz == '-')
438 {
439 // encoded plus sign
440 if (buf)
441 *buf++ = cc;
442 len++;
443 psz++;
444 }
445 else // start of BASE64 encoded string
446 {
447 bool lsb, ok;
448 unsigned int d, l;
449 for ( ok = lsb = false, d = 0, l = 0;
450 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
451 psz++ )
452 {
453 d <<= 6;
454 d += cc;
455 for (l += 6; l >= 8; lsb = !lsb)
456 {
457 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
458 if (lsb)
459 {
460 if (buf)
461 *buf++ |= c;
462 len ++;
463 }
464 else
465 {
466 if (buf)
467 *buf = (wchar_t)(c << 8);
468 }
469
470 ok = true;
471 }
472 }
473
474 if ( !ok )
475 {
476 // in valid UTF7 we should have valid characters after '+'
477 return (size_t)-1;
478 }
479
480 if (*psz == '-')
481 psz++;
482 }
483 }
484
485 if ( buf && (len < n) )
486 *buf = '\0';
487
488 return len;
489 }
490
491 //
492 // BASE64 encoding table
493 //
494 static const unsigned char utf7enb64[] =
495 {
496 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
497 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
498 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
499 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
500 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
501 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
502 'w', 'x', 'y', 'z', '0', '1', '2', '3',
503 '4', '5', '6', '7', '8', '9', '+', '/'
504 };
505
506 //
507 // UTF-7 encoding table
508 //
509 // 0 - Set D (directly encoded characters)
510 // 1 - Set O (optional direct characters)
511 // 2 - whitespace characters (optional)
512 // 3 - special characters
513 //
514 static const unsigned char utf7encode[128] =
515 {
516 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
517 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
518 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
520 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
522 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
524 };
525
526 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
527 {
528 size_t len = 0;
529
530 while (*psz && ((!buf) || (len < n)))
531 {
532 wchar_t cc = *psz++;
533 if (cc < 0x80 && utf7encode[cc] < 1)
534 {
535 // plain ASCII char
536 if (buf)
537 *buf++ = (char)cc;
538 len++;
539 }
540 #ifndef WC_UTF16
541 else if (((wxUint32)cc) > 0xffff)
542 {
543 // no surrogate pair generation (yet?)
544 return (size_t)-1;
545 }
546 #endif
547 else
548 {
549 if (buf)
550 *buf++ = '+';
551 len++;
552 if (cc != '+')
553 {
554 // BASE64 encode string
555 unsigned int lsb, d, l;
556 for (d = 0, l = 0; /*nothing*/; psz++)
557 {
558 for (lsb = 0; lsb < 2; lsb ++)
559 {
560 d <<= 8;
561 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
562
563 for (l += 8; l >= 6; )
564 {
565 l -= 6;
566 if (buf)
567 *buf++ = utf7enb64[(d >> l) % 64];
568 len++;
569 }
570 }
571 cc = *psz;
572 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
573 break;
574 }
575 if (l != 0)
576 {
577 if (buf)
578 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
579 len++;
580 }
581 }
582 if (buf)
583 *buf++ = '-';
584 len++;
585 }
586 }
587 if (buf && (len < n))
588 *buf = 0;
589 return len;
590 }
591
592 // ----------------------------------------------------------------------------
593 // UTF-8
594 // ----------------------------------------------------------------------------
595
596 static wxUint32 utf8_max[]=
597 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
598
599 // boundaries of the private use area we use to (temporarily) remap invalid
600 // characters invalid in a UTF-8 encoded string
601 const wxUint32 wxUnicodePUA = 0x100000;
602 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
603
604 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
605 {
606 size_t len = 0;
607
608 while (*psz && ((!buf) || (len < n)))
609 {
610 const char *opsz = psz;
611 bool invalid = false;
612 unsigned char cc = *psz++, fc = cc;
613 unsigned cnt;
614 for (cnt = 0; fc & 0x80; cnt++)
615 fc <<= 1;
616 if (!cnt)
617 {
618 // plain ASCII char
619 if (buf)
620 *buf++ = cc;
621 len++;
622
623 // escape the escape character for octal escapes
624 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
625 && cc == '\\' && (!buf || len < n))
626 {
627 if (buf)
628 *buf++ = cc;
629 len++;
630 }
631 }
632 else
633 {
634 cnt--;
635 if (!cnt)
636 {
637 // invalid UTF-8 sequence
638 invalid = true;
639 }
640 else
641 {
642 unsigned ocnt = cnt - 1;
643 wxUint32 res = cc & (0x3f >> cnt);
644 while (cnt--)
645 {
646 cc = *psz;
647 if ((cc & 0xC0) != 0x80)
648 {
649 // invalid UTF-8 sequence
650 invalid = true;
651 break;
652 }
653 psz++;
654 res = (res << 6) | (cc & 0x3f);
655 }
656 if (invalid || res <= utf8_max[ocnt])
657 {
658 // illegal UTF-8 encoding
659 invalid = true;
660 }
661 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
662 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
663 {
664 // if one of our PUA characters turns up externally
665 // it must also be treated as an illegal sequence
666 // (a bit like you have to escape an escape character)
667 invalid = true;
668 }
669 else
670 {
671 #ifdef WC_UTF16
672 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
673 size_t pa = encode_utf16(res, (wxUint16 *)buf);
674 if (pa == (size_t)-1)
675 {
676 invalid = true;
677 }
678 else
679 {
680 if (buf)
681 buf += pa;
682 len += pa;
683 }
684 #else // !WC_UTF16
685 if (buf)
686 *buf++ = (wchar_t)res;
687 len++;
688 #endif // WC_UTF16/!WC_UTF16
689 }
690 }
691 if (invalid)
692 {
693 if (m_options & MAP_INVALID_UTF8_TO_PUA)
694 {
695 while (opsz < psz && (!buf || len < n))
696 {
697 #ifdef WC_UTF16
698 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
699 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
700 wxASSERT(pa != (size_t)-1);
701 if (buf)
702 buf += pa;
703 opsz++;
704 len += pa;
705 #else
706 if (buf)
707 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
708 opsz++;
709 len++;
710 #endif
711 }
712 }
713 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
714 {
715 while (opsz < psz && (!buf || len < n))
716 {
717 if ( buf && len + 3 < n )
718 {
719 unsigned char on = *opsz;
720 *buf++ = L'\\';
721 *buf++ = (wchar_t)( L'0' + on / 0100 );
722 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
723 *buf++ = (wchar_t)( L'0' + on % 010 );
724 }
725 opsz++;
726 len += 4;
727 }
728 }
729 else // MAP_INVALID_UTF8_NOT
730 {
731 return (size_t)-1;
732 }
733 }
734 }
735 }
736 if (buf && (len < n))
737 *buf = 0;
738 return len;
739 }
740
741 static inline bool isoctal(wchar_t wch)
742 {
743 return L'0' <= wch && wch <= L'7';
744 }
745
746 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
747 {
748 size_t len = 0;
749
750 while (*psz && ((!buf) || (len < n)))
751 {
752 wxUint32 cc;
753 #ifdef WC_UTF16
754 // cast is ok for WC_UTF16
755 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
756 psz += (pa == (size_t)-1) ? 1 : pa;
757 #else
758 cc=(*psz++) & 0x7fffffff;
759 #endif
760
761 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
762 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
763 {
764 if (buf)
765 *buf++ = (char)(cc - wxUnicodePUA);
766 len++;
767 }
768 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
769 && cc == L'\\' && psz[0] == L'\\' )
770 {
771 if (buf)
772 *buf++ = (char)cc;
773 psz++;
774 len++;
775 }
776 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
777 cc == L'\\' &&
778 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
779 {
780 if (buf)
781 {
782 *buf++ = (char) ((psz[0] - L'0')*0100 +
783 (psz[1] - L'0')*010 +
784 (psz[2] - L'0'));
785 }
786
787 psz += 3;
788 len++;
789 }
790 else
791 {
792 unsigned cnt;
793 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
794 if (!cnt)
795 {
796 // plain ASCII char
797 if (buf)
798 *buf++ = (char) cc;
799 len++;
800 }
801
802 else
803 {
804 len += cnt + 1;
805 if (buf)
806 {
807 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
808 while (cnt--)
809 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
810 }
811 }
812 }
813 }
814
815 if (buf && (len<n))
816 *buf = 0;
817
818 return len;
819 }
820
821 // ----------------------------------------------------------------------------
822 // UTF-16
823 // ----------------------------------------------------------------------------
824
825 #ifdef WORDS_BIGENDIAN
826 #define wxMBConvUTF16straight wxMBConvUTF16BE
827 #define wxMBConvUTF16swap wxMBConvUTF16LE
828 #else
829 #define wxMBConvUTF16swap wxMBConvUTF16BE
830 #define wxMBConvUTF16straight wxMBConvUTF16LE
831 #endif
832
833
834 #ifdef WC_UTF16
835
836 // copy 16bit MB to 16bit String
837 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
838 {
839 size_t len=0;
840
841 while (*(wxUint16*)psz && (!buf || len < n))
842 {
843 if (buf)
844 *buf++ = *(wxUint16*)psz;
845 len++;
846
847 psz += sizeof(wxUint16);
848 }
849 if (buf && len<n) *buf=0;
850
851 return len;
852 }
853
854
855 // copy 16bit String to 16bit MB
856 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
857 {
858 size_t len=0;
859
860 while (*psz && (!buf || len < n))
861 {
862 if (buf)
863 {
864 *(wxUint16*)buf = *psz;
865 buf += sizeof(wxUint16);
866 }
867 len += sizeof(wxUint16);
868 psz++;
869 }
870 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
871
872 return len;
873 }
874
875
876 // swap 16bit MB to 16bit String
877 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
878 {
879 size_t len = 0;
880
881 // UTF16 string must be terminated by 2 NULs as single NULs may occur
882 // inside the string
883 while ( (psz[0] || psz[1]) && (!buf || len < n) )
884 {
885 if ( buf )
886 {
887 ((char *)buf)[0] = psz[1];
888 ((char *)buf)[1] = psz[0];
889 buf++;
890 }
891 len++;
892 psz += 2;
893 }
894
895 if ( buf && len < n )
896 *buf = L'\0';
897
898 return len;
899 }
900
901
902 // swap 16bit MB to 16bit String
903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
904 {
905 size_t len = 0;
906
907 while ( *psz && (!buf || len < n) )
908 {
909 if ( buf )
910 {
911 *buf++ = ((char*)psz)[1];
912 *buf++ = ((char*)psz)[0];
913 }
914 len += 2;
915 psz++;
916 }
917
918 if ( buf && len < n )
919 *buf = '\0';
920
921 return len;
922 }
923
924
925 #else // WC_UTF16
926
927
928 // copy 16bit MB to 32bit String
929 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
930 {
931 size_t len=0;
932
933 while (*(wxUint16*)psz && (!buf || len < n))
934 {
935 wxUint32 cc;
936 size_t pa=decode_utf16((wxUint16*)psz, cc);
937 if (pa == (size_t)-1)
938 return pa;
939
940 if (buf)
941 *buf++ = (wchar_t)cc;
942 len++;
943 psz += pa * sizeof(wxUint16);
944 }
945 if (buf && len<n) *buf=0;
946
947 return len;
948 }
949
950
951 // copy 32bit String to 16bit MB
952 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
953 {
954 size_t len=0;
955
956 while (*psz && (!buf || len < n))
957 {
958 wxUint16 cc[2];
959 size_t pa=encode_utf16(*psz, cc);
960
961 if (pa == (size_t)-1)
962 return pa;
963
964 if (buf)
965 {
966 *(wxUint16*)buf = cc[0];
967 buf += sizeof(wxUint16);
968 if (pa > 1)
969 {
970 *(wxUint16*)buf = cc[1];
971 buf += sizeof(wxUint16);
972 }
973 }
974
975 len += pa*sizeof(wxUint16);
976 psz++;
977 }
978 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
979
980 return len;
981 }
982
983
984 // swap 16bit MB to 32bit String
985 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
986 {
987 size_t len=0;
988
989 while (*(wxUint16*)psz && (!buf || len < n))
990 {
991 wxUint32 cc;
992 char tmp[4];
993 tmp[0]=psz[1]; tmp[1]=psz[0];
994 tmp[2]=psz[3]; tmp[3]=psz[2];
995
996 size_t pa=decode_utf16((wxUint16*)tmp, cc);
997 if (pa == (size_t)-1)
998 return pa;
999
1000 if (buf)
1001 *buf++ = (wchar_t)cc;
1002
1003 len++;
1004 psz += pa * sizeof(wxUint16);
1005 }
1006 if (buf && len<n) *buf=0;
1007
1008 return len;
1009 }
1010
1011
1012 // swap 32bit String to 16bit MB
1013 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1014 {
1015 size_t len=0;
1016
1017 while (*psz && (!buf || len < n))
1018 {
1019 wxUint16 cc[2];
1020 size_t pa=encode_utf16(*psz, cc);
1021
1022 if (pa == (size_t)-1)
1023 return pa;
1024
1025 if (buf)
1026 {
1027 *buf++ = ((char*)cc)[1];
1028 *buf++ = ((char*)cc)[0];
1029 if (pa > 1)
1030 {
1031 *buf++ = ((char*)cc)[3];
1032 *buf++ = ((char*)cc)[2];
1033 }
1034 }
1035
1036 len += pa*sizeof(wxUint16);
1037 psz++;
1038 }
1039 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1040
1041 return len;
1042 }
1043
1044 #endif // WC_UTF16
1045
1046
1047 // ----------------------------------------------------------------------------
1048 // UTF-32
1049 // ----------------------------------------------------------------------------
1050
1051 #ifdef WORDS_BIGENDIAN
1052 #define wxMBConvUTF32straight wxMBConvUTF32BE
1053 #define wxMBConvUTF32swap wxMBConvUTF32LE
1054 #else
1055 #define wxMBConvUTF32swap wxMBConvUTF32BE
1056 #define wxMBConvUTF32straight wxMBConvUTF32LE
1057 #endif
1058
1059
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1062
1063
1064 #ifdef WC_UTF16
1065
1066 // copy 32bit MB to 16bit String
1067 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1068 {
1069 size_t len=0;
1070
1071 while (*(wxUint32*)psz && (!buf || len < n))
1072 {
1073 wxUint16 cc[2];
1074
1075 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1076 if (pa == (size_t)-1)
1077 return pa;
1078
1079 if (buf)
1080 {
1081 *buf++ = cc[0];
1082 if (pa > 1)
1083 *buf++ = cc[1];
1084 }
1085 len += pa;
1086 psz += sizeof(wxUint32);
1087 }
1088 if (buf && len<n) *buf=0;
1089
1090 return len;
1091 }
1092
1093
1094 // copy 16bit String to 32bit MB
1095 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1096 {
1097 size_t len=0;
1098
1099 while (*psz && (!buf || len < n))
1100 {
1101 wxUint32 cc;
1102
1103 // cast is ok for WC_UTF16
1104 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1105 if (pa == (size_t)-1)
1106 return pa;
1107
1108 if (buf)
1109 {
1110 *(wxUint32*)buf = cc;
1111 buf += sizeof(wxUint32);
1112 }
1113 len += sizeof(wxUint32);
1114 psz += pa;
1115 }
1116
1117 if (buf && len<=n-sizeof(wxUint32))
1118 *(wxUint32*)buf=0;
1119
1120 return len;
1121 }
1122
1123
1124
1125 // swap 32bit MB to 16bit String
1126 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1127 {
1128 size_t len=0;
1129
1130 while (*(wxUint32*)psz && (!buf || len < n))
1131 {
1132 char tmp[4];
1133 tmp[0] = psz[3]; tmp[1] = psz[2];
1134 tmp[2] = psz[1]; tmp[3] = psz[0];
1135
1136
1137 wxUint16 cc[2];
1138
1139 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1140 if (pa == (size_t)-1)
1141 return pa;
1142
1143 if (buf)
1144 {
1145 *buf++ = cc[0];
1146 if (pa > 1)
1147 *buf++ = cc[1];
1148 }
1149 len += pa;
1150 psz += sizeof(wxUint32);
1151 }
1152
1153 if (buf && len<n)
1154 *buf=0;
1155
1156 return len;
1157 }
1158
1159
1160 // swap 16bit String to 32bit MB
1161 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1162 {
1163 size_t len=0;
1164
1165 while (*psz && (!buf || len < n))
1166 {
1167 char cc[4];
1168
1169 // cast is ok for WC_UTF16
1170 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1171 if (pa == (size_t)-1)
1172 return pa;
1173
1174 if (buf)
1175 {
1176 *buf++ = cc[3];
1177 *buf++ = cc[2];
1178 *buf++ = cc[1];
1179 *buf++ = cc[0];
1180 }
1181 len += sizeof(wxUint32);
1182 psz += pa;
1183 }
1184
1185 if (buf && len<=n-sizeof(wxUint32))
1186 *(wxUint32*)buf=0;
1187
1188 return len;
1189 }
1190
1191 #else // WC_UTF16
1192
1193
1194 // copy 32bit MB to 32bit String
1195 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1196 {
1197 size_t len=0;
1198
1199 while (*(wxUint32*)psz && (!buf || len < n))
1200 {
1201 if (buf)
1202 *buf++ = (wchar_t)(*(wxUint32*)psz);
1203 len++;
1204 psz += sizeof(wxUint32);
1205 }
1206
1207 if (buf && len<n)
1208 *buf=0;
1209
1210 return len;
1211 }
1212
1213
1214 // copy 32bit String to 32bit MB
1215 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1216 {
1217 size_t len=0;
1218
1219 while (*psz && (!buf || len < n))
1220 {
1221 if (buf)
1222 {
1223 *(wxUint32*)buf = *psz;
1224 buf += sizeof(wxUint32);
1225 }
1226
1227 len += sizeof(wxUint32);
1228 psz++;
1229 }
1230
1231 if (buf && len<=n-sizeof(wxUint32))
1232 *(wxUint32*)buf=0;
1233
1234 return len;
1235 }
1236
1237
1238 // swap 32bit MB to 32bit String
1239 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1240 {
1241 size_t len=0;
1242
1243 while (*(wxUint32*)psz && (!buf || len < n))
1244 {
1245 if (buf)
1246 {
1247 ((char *)buf)[0] = psz[3];
1248 ((char *)buf)[1] = psz[2];
1249 ((char *)buf)[2] = psz[1];
1250 ((char *)buf)[3] = psz[0];
1251 buf++;
1252 }
1253 len++;
1254 psz += sizeof(wxUint32);
1255 }
1256
1257 if (buf && len<n)
1258 *buf=0;
1259
1260 return len;
1261 }
1262
1263
1264 // swap 32bit String to 32bit MB
1265 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1266 {
1267 size_t len=0;
1268
1269 while (*psz && (!buf || len < n))
1270 {
1271 if (buf)
1272 {
1273 *buf++ = ((char *)psz)[3];
1274 *buf++ = ((char *)psz)[2];
1275 *buf++ = ((char *)psz)[1];
1276 *buf++ = ((char *)psz)[0];
1277 }
1278 len += sizeof(wxUint32);
1279 psz++;
1280 }
1281
1282 if (buf && len<=n-sizeof(wxUint32))
1283 *(wxUint32*)buf=0;
1284
1285 return len;
1286 }
1287
1288
1289 #endif // WC_UTF16
1290
1291
1292 // ============================================================================
1293 // The classes doing conversion using the iconv_xxx() functions
1294 // ============================================================================
1295
1296 #ifdef HAVE_ICONV
1297
1298 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1299 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1300 // (unless there's yet another bug in glibc) the only case when iconv()
1301 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1302 // left in the input buffer -- when _real_ error occurs,
1303 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1304 // iconv() failure.
1305 // [This bug does not appear in glibc 2.2.]
1306 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1307 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1308 (errno != E2BIG || bufLeft != 0))
1309 #else
1310 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1311 #endif
1312
1313 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1314
1315 #define ICONV_T_INVALID ((iconv_t)-1)
1316
1317 #if SIZEOF_WCHAR_T == 4
1318 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1319 #define WC_ENC wxFONTENCODING_UTF32
1320 #elif SIZEOF_WCHAR_T == 2
1321 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1322 #define WC_ENC wxFONTENCODING_UTF16
1323 #else // sizeof(wchar_t) != 2 nor 4
1324 // does this ever happen?
1325 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1326 #endif
1327
1328 // ----------------------------------------------------------------------------
1329 // wxMBConv_iconv: encapsulates an iconv character set
1330 // ----------------------------------------------------------------------------
1331
1332 class wxMBConv_iconv : public wxMBConv
1333 {
1334 public:
1335 wxMBConv_iconv(const wxChar *name);
1336 virtual ~wxMBConv_iconv();
1337
1338 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1339 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1340
1341 bool IsOk() const
1342 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1343
1344 protected:
1345 // the iconv handlers used to translate from multibyte to wide char and in
1346 // the other direction
1347 iconv_t m2w,
1348 w2m;
1349 #if wxUSE_THREADS
1350 // guards access to m2w and w2m objects
1351 wxMutex m_iconvMutex;
1352 #endif
1353
1354 private:
1355 virtual const char *GetMBNul(size_t *nulLen) const;
1356
1357 // the name (for iconv_open()) of a wide char charset -- if none is
1358 // available on this machine, it will remain NULL
1359 static wxString ms_wcCharsetName;
1360
1361 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1362 // different endian-ness than the native one
1363 static bool ms_wcNeedsSwap;
1364
1365 // NUL representation
1366 size_t m_nulLen;
1367 char m_nulBuf[8];
1368 };
1369
1370 // make the constructor available for unit testing
1371 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1372 {
1373 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1374 if ( !result->IsOk() )
1375 {
1376 delete result;
1377 return 0;
1378 }
1379 return result;
1380 }
1381
1382 wxString wxMBConv_iconv::ms_wcCharsetName;
1383 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1384
1385 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1386 {
1387 m_nulLen = (size_t)-2;
1388
1389 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1390 // names for the charsets
1391 const wxCharBuffer cname(wxString(name).ToAscii());
1392
1393 // check for charset that represents wchar_t:
1394 if ( ms_wcCharsetName.empty() )
1395 {
1396 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1397
1398 #if wxUSE_FONTMAP
1399 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1400 #else // !wxUSE_FONTMAP
1401 static const wxChar *names[] =
1402 {
1403 #if SIZEOF_WCHAR_T == 4
1404 _T("UCS-4"),
1405 #elif SIZEOF_WCHAR_T = 2
1406 _T("UCS-2"),
1407 #endif
1408 NULL
1409 };
1410 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1411
1412 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1413 {
1414 const wxString nameCS(*names);
1415
1416 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1417 wxString nameXE(nameCS);
1418 #ifdef WORDS_BIGENDIAN
1419 nameXE += _T("BE");
1420 #else // little endian
1421 nameXE += _T("LE");
1422 #endif
1423
1424 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1425 nameXE.c_str());
1426
1427 m2w = iconv_open(nameXE.ToAscii(), cname);
1428 if ( m2w == ICONV_T_INVALID )
1429 {
1430 // try charset w/o bytesex info (e.g. "UCS4")
1431 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1432 nameCS.c_str());
1433 m2w = iconv_open(nameCS.ToAscii(), cname);
1434
1435 // and check for bytesex ourselves:
1436 if ( m2w != ICONV_T_INVALID )
1437 {
1438 char buf[2], *bufPtr;
1439 wchar_t wbuf[2], *wbufPtr;
1440 size_t insz, outsz;
1441 size_t res;
1442
1443 buf[0] = 'A';
1444 buf[1] = 0;
1445 wbuf[0] = 0;
1446 insz = 2;
1447 outsz = SIZEOF_WCHAR_T * 2;
1448 wbufPtr = wbuf;
1449 bufPtr = buf;
1450
1451 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1452 (char**)&wbufPtr, &outsz);
1453
1454 if (ICONV_FAILED(res, insz))
1455 {
1456 wxLogLastError(wxT("iconv"));
1457 wxLogError(_("Conversion to charset '%s' doesn't work."),
1458 nameCS.c_str());
1459 }
1460 else // ok, can convert to this encoding, remember it
1461 {
1462 ms_wcCharsetName = nameCS;
1463 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1464 }
1465 }
1466 }
1467 else // use charset not requiring byte swapping
1468 {
1469 ms_wcCharsetName = nameXE;
1470 }
1471 }
1472
1473 wxLogTrace(TRACE_STRCONV,
1474 wxT("iconv wchar_t charset is \"%s\"%s"),
1475 ms_wcCharsetName.empty() ? _T("<none>")
1476 : ms_wcCharsetName.c_str(),
1477 ms_wcNeedsSwap ? _T(" (needs swap)")
1478 : _T(""));
1479 }
1480 else // we already have ms_wcCharsetName
1481 {
1482 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1483 }
1484
1485 if ( ms_wcCharsetName.empty() )
1486 {
1487 w2m = ICONV_T_INVALID;
1488 }
1489 else
1490 {
1491 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1492 if ( w2m == ICONV_T_INVALID )
1493 {
1494 wxLogTrace(TRACE_STRCONV,
1495 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1496 ms_wcCharsetName.c_str(), cname.data());
1497 }
1498 }
1499 }
1500
1501 wxMBConv_iconv::~wxMBConv_iconv()
1502 {
1503 if ( m2w != ICONV_T_INVALID )
1504 iconv_close(m2w);
1505 if ( w2m != ICONV_T_INVALID )
1506 iconv_close(w2m);
1507 }
1508
1509 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1510 {
1511 #if wxUSE_THREADS
1512 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1513 // Unfortunately there is a couple of global wxCSConv objects such as
1514 // wxConvLocal that are used all over wx code, so we have to make sure
1515 // the handle is used by at most one thread at the time. Otherwise
1516 // only a few wx classes would be safe to use from non-main threads
1517 // as MB<->WC conversion would fail "randomly".
1518 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1519 #endif
1520
1521 size_t inbuf = strlen(psz);
1522 size_t outbuf = n * SIZEOF_WCHAR_T;
1523 size_t res, cres;
1524 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1525 wchar_t *bufPtr = buf;
1526 const char *pszPtr = psz;
1527
1528 if (buf)
1529 {
1530 // have destination buffer, convert there
1531 cres = iconv(m2w,
1532 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1533 (char**)&bufPtr, &outbuf);
1534 res = n - (outbuf / SIZEOF_WCHAR_T);
1535
1536 if (ms_wcNeedsSwap)
1537 {
1538 // convert to native endianness
1539 for ( unsigned i = 0; i < res; i++ )
1540 buf[n] = WC_BSWAP(buf[i]);
1541 }
1542
1543 // NB: iconv was given only strlen(psz) characters on input, and so
1544 // it couldn't convert the trailing zero. Let's do it ourselves
1545 // if there's some room left for it in the output buffer.
1546 if (res < n)
1547 buf[res] = 0;
1548 }
1549 else
1550 {
1551 // no destination buffer... convert using temp buffer
1552 // to calculate destination buffer requirement
1553 wchar_t tbuf[8];
1554 res = 0;
1555 do {
1556 bufPtr = tbuf;
1557 outbuf = 8*SIZEOF_WCHAR_T;
1558
1559 cres = iconv(m2w,
1560 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1561 (char**)&bufPtr, &outbuf );
1562
1563 res += 8-(outbuf/SIZEOF_WCHAR_T);
1564 } while ((cres==(size_t)-1) && (errno==E2BIG));
1565 }
1566
1567 if (ICONV_FAILED(cres, inbuf))
1568 {
1569 //VS: it is ok if iconv fails, hence trace only
1570 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1571 return (size_t)-1;
1572 }
1573
1574 return res;
1575 }
1576
1577 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1578 {
1579 #if wxUSE_THREADS
1580 // NB: explained in MB2WC
1581 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1582 #endif
1583
1584 size_t inlen = wxWcslen(psz);
1585 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1586 size_t outbuf = n;
1587 size_t res, cres;
1588
1589 wchar_t *tmpbuf = 0;
1590
1591 if (ms_wcNeedsSwap)
1592 {
1593 // need to copy to temp buffer to switch endianness
1594 // (doing WC_BSWAP twice on the original buffer won't help, as it
1595 // could be in read-only memory, or be accessed in some other thread)
1596 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1597 for ( size_t i = 0; i < inlen; i++ )
1598 tmpbuf[n] = WC_BSWAP(psz[i]);
1599 tmpbuf[inlen] = L'\0';
1600 psz = tmpbuf;
1601 }
1602
1603 if (buf)
1604 {
1605 // have destination buffer, convert there
1606 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1607
1608 res = n-outbuf;
1609
1610 // NB: iconv was given only wcslen(psz) characters on input, and so
1611 // it couldn't convert the trailing zero. Let's do it ourselves
1612 // if there's some room left for it in the output buffer.
1613 if (res < n)
1614 buf[0] = 0;
1615 }
1616 else
1617 {
1618 // no destination buffer... convert using temp buffer
1619 // to calculate destination buffer requirement
1620 char tbuf[16];
1621 res = 0;
1622 do {
1623 buf = tbuf; outbuf = 16;
1624
1625 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1626
1627 res += 16 - outbuf;
1628 } while ((cres==(size_t)-1) && (errno==E2BIG));
1629 }
1630
1631 if (ms_wcNeedsSwap)
1632 {
1633 free(tmpbuf);
1634 }
1635
1636 if (ICONV_FAILED(cres, inbuf))
1637 {
1638 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1639 return (size_t)-1;
1640 }
1641
1642 return res;
1643 }
1644
1645 const char *wxMBConv_iconv::GetMBNul(size_t *nulLen) const
1646 {
1647 if ( m_nulLen == (size_t)-2 )
1648 {
1649 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1650
1651 #if wxUSE_THREADS
1652 // NB: explained in MB2WC
1653 wxMutexLocker lock(self->m_iconvMutex);
1654 #endif
1655
1656 wchar_t *wnul = L"";
1657 size_t inLen = sizeof(wchar_t),
1658 outLen = WXSIZEOF(m_nulBuf);
1659 const char *in = (char *)wnul;
1660 char *out = self->m_nulBuf;
1661 if ( iconv(w2m, &in, &inLen, &out, &outLen) == (size_t)-1 )
1662 {
1663 self->m_nulLen = (size_t)-1;
1664 }
1665 else // ok
1666 {
1667 self->m_nulLen = out - m_nulBuf;
1668 }
1669 }
1670
1671 *nulLen = m_nulLen;
1672 return m_nulBuf;
1673 }
1674
1675 #endif // HAVE_ICONV
1676
1677
1678 // ============================================================================
1679 // Win32 conversion classes
1680 // ============================================================================
1681
1682 #ifdef wxHAVE_WIN32_MB2WC
1683
1684 // from utils.cpp
1685 #if wxUSE_FONTMAP
1686 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1687 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1688 #endif
1689
1690 class wxMBConv_win32 : public wxMBConv
1691 {
1692 public:
1693 wxMBConv_win32()
1694 {
1695 m_CodePage = CP_ACP;
1696 m_nulLen = (size_t)-2;
1697 }
1698
1699 #if wxUSE_FONTMAP
1700 wxMBConv_win32(const wxChar* name)
1701 {
1702 m_CodePage = wxCharsetToCodepage(name);
1703 m_nulLen = (size_t)-2;
1704 }
1705
1706 wxMBConv_win32(wxFontEncoding encoding)
1707 {
1708 m_CodePage = wxEncodingToCodepage(encoding);
1709 m_nulLen = (size_t)-2;
1710 }
1711 #endif // wxUSE_FONTMAP
1712
1713 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1714 {
1715 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1716 // the behaviour is not compatible with the Unix version (using iconv)
1717 // and break the library itself, e.g. wxTextInputStream::NextChar()
1718 // wouldn't work if reading an incomplete MB char didn't result in an
1719 // error
1720 //
1721 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1722 // an error (tested under Windows Server 2003) and apparently it is
1723 // done on purpose, i.e. the function accepts any input in this case
1724 // and although I'd prefer to return error on ill-formed output, our
1725 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1726 // explicitly ill-formed according to RFC 2152) neither so we don't
1727 // even have any fallback here...
1728 //
1729 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1730 // Win XP or newer and if it is specified on older versions, conversion
1731 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1732 // fails. So we can only use the flag on newer Windows versions.
1733 // Additionally, the flag is not supported by UTF7, symbol and CJK
1734 // encodings. See here:
1735 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1736 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1737 int flags = 0;
1738 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1739 m_CodePage < 50000 &&
1740 IsAtLeastWin2kSP4() )
1741 {
1742 flags = MB_ERR_INVALID_CHARS;
1743 }
1744 else if ( m_CodePage == CP_UTF8 )
1745 {
1746 // Avoid round-trip in the special case of UTF-8 by using our
1747 // own UTF-8 conversion code:
1748 return wxMBConvUTF8().MB2WC(buf, psz, n);
1749 }
1750
1751 const size_t len = ::MultiByteToWideChar
1752 (
1753 m_CodePage, // code page
1754 flags, // flags: fall on error
1755 psz, // input string
1756 -1, // its length (NUL-terminated)
1757 buf, // output string
1758 buf ? n : 0 // size of output buffer
1759 );
1760 if ( !len )
1761 {
1762 // function totally failed
1763 return (size_t)-1;
1764 }
1765
1766 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1767 // check if we succeeded, by doing a double trip:
1768 if ( !flags && buf )
1769 {
1770 const size_t mbLen = strlen(psz);
1771 wxCharBuffer mbBuf(mbLen);
1772 if ( ::WideCharToMultiByte
1773 (
1774 m_CodePage,
1775 0,
1776 buf,
1777 -1,
1778 mbBuf.data(),
1779 mbLen + 1, // size in bytes, not length
1780 NULL,
1781 NULL
1782 ) == 0 ||
1783 strcmp(mbBuf, psz) != 0 )
1784 {
1785 // we didn't obtain the same thing we started from, hence
1786 // the conversion was lossy and we consider that it failed
1787 return (size_t)-1;
1788 }
1789 }
1790
1791 // note that it returns count of written chars for buf != NULL and size
1792 // of the needed buffer for buf == NULL so in either case the length of
1793 // the string (which never includes the terminating NUL) is one less
1794 return len - 1;
1795 }
1796
1797 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1798 {
1799 /*
1800 we have a problem here: by default, WideCharToMultiByte() may
1801 replace characters unrepresentable in the target code page with bad
1802 quality approximations such as turning "1/2" symbol (U+00BD) into
1803 "1" for the code pages which don't have it and we, obviously, want
1804 to avoid this at any price
1805
1806 the trouble is that this function does it _silently_, i.e. it won't
1807 even tell us whether it did or not... Win98/2000 and higher provide
1808 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1809 we have to resort to a round trip, i.e. check that converting back
1810 results in the same string -- this is, of course, expensive but
1811 otherwise we simply can't be sure to not garble the data.
1812 */
1813
1814 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1815 // it doesn't work with CJK encodings (which we test for rather roughly
1816 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1817 // supporting it
1818 BOOL usedDef wxDUMMY_INITIALIZE(false);
1819 BOOL *pUsedDef;
1820 int flags;
1821 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1822 {
1823 // it's our lucky day
1824 flags = WC_NO_BEST_FIT_CHARS;
1825 pUsedDef = &usedDef;
1826 }
1827 else // old system or unsupported encoding
1828 {
1829 flags = 0;
1830 pUsedDef = NULL;
1831 }
1832
1833 const size_t len = ::WideCharToMultiByte
1834 (
1835 m_CodePage, // code page
1836 flags, // either none or no best fit
1837 pwz, // input string
1838 -1, // it is (wide) NUL-terminated
1839 buf, // output buffer
1840 buf ? n : 0, // and its size
1841 NULL, // default "replacement" char
1842 pUsedDef // [out] was it used?
1843 );
1844
1845 if ( !len )
1846 {
1847 // function totally failed
1848 return (size_t)-1;
1849 }
1850
1851 // if we were really converting, check if we succeeded
1852 if ( buf )
1853 {
1854 if ( flags )
1855 {
1856 // check if the conversion failed, i.e. if any replacements
1857 // were done
1858 if ( usedDef )
1859 return (size_t)-1;
1860 }
1861 else // we must resort to double tripping...
1862 {
1863 wxWCharBuffer wcBuf(n);
1864 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1865 wcscmp(wcBuf, pwz) != 0 )
1866 {
1867 // we didn't obtain the same thing we started from, hence
1868 // the conversion was lossy and we consider that it failed
1869 return (size_t)-1;
1870 }
1871 }
1872 }
1873
1874 // see the comment above for the reason of "len - 1"
1875 return len - 1;
1876 }
1877
1878 bool IsOk() const { return m_CodePage != -1; }
1879
1880 private:
1881 static bool CanUseNoBestFit()
1882 {
1883 static int s_isWin98Or2k = -1;
1884
1885 if ( s_isWin98Or2k == -1 )
1886 {
1887 int verMaj, verMin;
1888 switch ( wxGetOsVersion(&verMaj, &verMin) )
1889 {
1890 case wxWIN95:
1891 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1892 break;
1893
1894 case wxWINDOWS_NT:
1895 s_isWin98Or2k = verMaj >= 5;
1896 break;
1897
1898 default:
1899 // unknown, be conseravtive by default
1900 s_isWin98Or2k = 0;
1901 }
1902
1903 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1904 }
1905
1906 return s_isWin98Or2k == 1;
1907 }
1908
1909 static bool IsAtLeastWin2kSP4()
1910 {
1911 #ifdef __WXWINCE__
1912 return false;
1913 #else
1914 static int s_isAtLeastWin2kSP4 = -1;
1915
1916 if ( s_isAtLeastWin2kSP4 == -1 )
1917 {
1918 OSVERSIONINFOEX ver;
1919
1920 memset(&ver, 0, sizeof(ver));
1921 ver.dwOSVersionInfoSize = sizeof(ver);
1922 GetVersionEx((OSVERSIONINFO*)&ver);
1923
1924 s_isAtLeastWin2kSP4 =
1925 ((ver.dwMajorVersion > 5) || // Vista+
1926 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1927 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1928 ver.wServicePackMajor >= 4)) // 2000 SP4+
1929 ? 1 : 0;
1930 }
1931
1932 return s_isAtLeastWin2kSP4 == 1;
1933 #endif
1934 }
1935
1936 virtual const char *GetMBNul(size_t *nulLen) const
1937 {
1938 if ( m_nulLen == (size_t)-2 )
1939 {
1940 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1941
1942 self->m_nulLen = ::WideCharToMultiByte
1943 (
1944 m_CodePage, // code page
1945 0, // no flags
1946 L"", // input string
1947 1, // translate just NUL
1948 self->m_nulBuf, // output buffer
1949 WXSIZEOF(m_nulBuf), // and its size
1950 NULL, // "replacement" char
1951 NULL // [out] was it used?
1952 );
1953
1954 if ( m_nulLen == 0 )
1955 self->m_nulLen = (size_t)-1;
1956 }
1957
1958 *nulLen = m_nulLen;
1959 return m_nulBuf;
1960 }
1961
1962 long m_CodePage;
1963 size_t m_nulLen;
1964 char m_nulBuf[8];
1965 };
1966
1967 #endif // wxHAVE_WIN32_MB2WC
1968
1969 // ============================================================================
1970 // Cocoa conversion classes
1971 // ============================================================================
1972
1973 #if defined(__WXCOCOA__)
1974
1975 // RN: There is no UTF-32 support in either Core Foundation or
1976 // Cocoa. Strangely enough, internally Core Foundation uses
1977 // UTF 32 internally quite a bit - its just not public (yet).
1978
1979 #include <CoreFoundation/CFString.h>
1980 #include <CoreFoundation/CFStringEncodingExt.h>
1981
1982 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1983 {
1984 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1985 if ( encoding == wxFONTENCODING_DEFAULT )
1986 {
1987 enc = CFStringGetSystemEncoding();
1988 }
1989 else switch( encoding)
1990 {
1991 case wxFONTENCODING_ISO8859_1 :
1992 enc = kCFStringEncodingISOLatin1 ;
1993 break ;
1994 case wxFONTENCODING_ISO8859_2 :
1995 enc = kCFStringEncodingISOLatin2;
1996 break ;
1997 case wxFONTENCODING_ISO8859_3 :
1998 enc = kCFStringEncodingISOLatin3 ;
1999 break ;
2000 case wxFONTENCODING_ISO8859_4 :
2001 enc = kCFStringEncodingISOLatin4;
2002 break ;
2003 case wxFONTENCODING_ISO8859_5 :
2004 enc = kCFStringEncodingISOLatinCyrillic;
2005 break ;
2006 case wxFONTENCODING_ISO8859_6 :
2007 enc = kCFStringEncodingISOLatinArabic;
2008 break ;
2009 case wxFONTENCODING_ISO8859_7 :
2010 enc = kCFStringEncodingISOLatinGreek;
2011 break ;
2012 case wxFONTENCODING_ISO8859_8 :
2013 enc = kCFStringEncodingISOLatinHebrew;
2014 break ;
2015 case wxFONTENCODING_ISO8859_9 :
2016 enc = kCFStringEncodingISOLatin5;
2017 break ;
2018 case wxFONTENCODING_ISO8859_10 :
2019 enc = kCFStringEncodingISOLatin6;
2020 break ;
2021 case wxFONTENCODING_ISO8859_11 :
2022 enc = kCFStringEncodingISOLatinThai;
2023 break ;
2024 case wxFONTENCODING_ISO8859_13 :
2025 enc = kCFStringEncodingISOLatin7;
2026 break ;
2027 case wxFONTENCODING_ISO8859_14 :
2028 enc = kCFStringEncodingISOLatin8;
2029 break ;
2030 case wxFONTENCODING_ISO8859_15 :
2031 enc = kCFStringEncodingISOLatin9;
2032 break ;
2033
2034 case wxFONTENCODING_KOI8 :
2035 enc = kCFStringEncodingKOI8_R;
2036 break ;
2037 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2038 enc = kCFStringEncodingDOSRussian;
2039 break ;
2040
2041 // case wxFONTENCODING_BULGARIAN :
2042 // enc = ;
2043 // break ;
2044
2045 case wxFONTENCODING_CP437 :
2046 enc =kCFStringEncodingDOSLatinUS ;
2047 break ;
2048 case wxFONTENCODING_CP850 :
2049 enc = kCFStringEncodingDOSLatin1;
2050 break ;
2051 case wxFONTENCODING_CP852 :
2052 enc = kCFStringEncodingDOSLatin2;
2053 break ;
2054 case wxFONTENCODING_CP855 :
2055 enc = kCFStringEncodingDOSCyrillic;
2056 break ;
2057 case wxFONTENCODING_CP866 :
2058 enc =kCFStringEncodingDOSRussian ;
2059 break ;
2060 case wxFONTENCODING_CP874 :
2061 enc = kCFStringEncodingDOSThai;
2062 break ;
2063 case wxFONTENCODING_CP932 :
2064 enc = kCFStringEncodingDOSJapanese;
2065 break ;
2066 case wxFONTENCODING_CP936 :
2067 enc =kCFStringEncodingDOSChineseSimplif ;
2068 break ;
2069 case wxFONTENCODING_CP949 :
2070 enc = kCFStringEncodingDOSKorean;
2071 break ;
2072 case wxFONTENCODING_CP950 :
2073 enc = kCFStringEncodingDOSChineseTrad;
2074 break ;
2075 case wxFONTENCODING_CP1250 :
2076 enc = kCFStringEncodingWindowsLatin2;
2077 break ;
2078 case wxFONTENCODING_CP1251 :
2079 enc =kCFStringEncodingWindowsCyrillic ;
2080 break ;
2081 case wxFONTENCODING_CP1252 :
2082 enc =kCFStringEncodingWindowsLatin1 ;
2083 break ;
2084 case wxFONTENCODING_CP1253 :
2085 enc = kCFStringEncodingWindowsGreek;
2086 break ;
2087 case wxFONTENCODING_CP1254 :
2088 enc = kCFStringEncodingWindowsLatin5;
2089 break ;
2090 case wxFONTENCODING_CP1255 :
2091 enc =kCFStringEncodingWindowsHebrew ;
2092 break ;
2093 case wxFONTENCODING_CP1256 :
2094 enc =kCFStringEncodingWindowsArabic ;
2095 break ;
2096 case wxFONTENCODING_CP1257 :
2097 enc = kCFStringEncodingWindowsBalticRim;
2098 break ;
2099 // This only really encodes to UTF7 (if that) evidently
2100 // case wxFONTENCODING_UTF7 :
2101 // enc = kCFStringEncodingNonLossyASCII ;
2102 // break ;
2103 case wxFONTENCODING_UTF8 :
2104 enc = kCFStringEncodingUTF8 ;
2105 break ;
2106 case wxFONTENCODING_EUC_JP :
2107 enc = kCFStringEncodingEUC_JP;
2108 break ;
2109 case wxFONTENCODING_UTF16 :
2110 enc = kCFStringEncodingUnicode ;
2111 break ;
2112 case wxFONTENCODING_MACROMAN :
2113 enc = kCFStringEncodingMacRoman ;
2114 break ;
2115 case wxFONTENCODING_MACJAPANESE :
2116 enc = kCFStringEncodingMacJapanese ;
2117 break ;
2118 case wxFONTENCODING_MACCHINESETRAD :
2119 enc = kCFStringEncodingMacChineseTrad ;
2120 break ;
2121 case wxFONTENCODING_MACKOREAN :
2122 enc = kCFStringEncodingMacKorean ;
2123 break ;
2124 case wxFONTENCODING_MACARABIC :
2125 enc = kCFStringEncodingMacArabic ;
2126 break ;
2127 case wxFONTENCODING_MACHEBREW :
2128 enc = kCFStringEncodingMacHebrew ;
2129 break ;
2130 case wxFONTENCODING_MACGREEK :
2131 enc = kCFStringEncodingMacGreek ;
2132 break ;
2133 case wxFONTENCODING_MACCYRILLIC :
2134 enc = kCFStringEncodingMacCyrillic ;
2135 break ;
2136 case wxFONTENCODING_MACDEVANAGARI :
2137 enc = kCFStringEncodingMacDevanagari ;
2138 break ;
2139 case wxFONTENCODING_MACGURMUKHI :
2140 enc = kCFStringEncodingMacGurmukhi ;
2141 break ;
2142 case wxFONTENCODING_MACGUJARATI :
2143 enc = kCFStringEncodingMacGujarati ;
2144 break ;
2145 case wxFONTENCODING_MACORIYA :
2146 enc = kCFStringEncodingMacOriya ;
2147 break ;
2148 case wxFONTENCODING_MACBENGALI :
2149 enc = kCFStringEncodingMacBengali ;
2150 break ;
2151 case wxFONTENCODING_MACTAMIL :
2152 enc = kCFStringEncodingMacTamil ;
2153 break ;
2154 case wxFONTENCODING_MACTELUGU :
2155 enc = kCFStringEncodingMacTelugu ;
2156 break ;
2157 case wxFONTENCODING_MACKANNADA :
2158 enc = kCFStringEncodingMacKannada ;
2159 break ;
2160 case wxFONTENCODING_MACMALAJALAM :
2161 enc = kCFStringEncodingMacMalayalam ;
2162 break ;
2163 case wxFONTENCODING_MACSINHALESE :
2164 enc = kCFStringEncodingMacSinhalese ;
2165 break ;
2166 case wxFONTENCODING_MACBURMESE :
2167 enc = kCFStringEncodingMacBurmese ;
2168 break ;
2169 case wxFONTENCODING_MACKHMER :
2170 enc = kCFStringEncodingMacKhmer ;
2171 break ;
2172 case wxFONTENCODING_MACTHAI :
2173 enc = kCFStringEncodingMacThai ;
2174 break ;
2175 case wxFONTENCODING_MACLAOTIAN :
2176 enc = kCFStringEncodingMacLaotian ;
2177 break ;
2178 case wxFONTENCODING_MACGEORGIAN :
2179 enc = kCFStringEncodingMacGeorgian ;
2180 break ;
2181 case wxFONTENCODING_MACARMENIAN :
2182 enc = kCFStringEncodingMacArmenian ;
2183 break ;
2184 case wxFONTENCODING_MACCHINESESIMP :
2185 enc = kCFStringEncodingMacChineseSimp ;
2186 break ;
2187 case wxFONTENCODING_MACTIBETAN :
2188 enc = kCFStringEncodingMacTibetan ;
2189 break ;
2190 case wxFONTENCODING_MACMONGOLIAN :
2191 enc = kCFStringEncodingMacMongolian ;
2192 break ;
2193 case wxFONTENCODING_MACETHIOPIC :
2194 enc = kCFStringEncodingMacEthiopic ;
2195 break ;
2196 case wxFONTENCODING_MACCENTRALEUR :
2197 enc = kCFStringEncodingMacCentralEurRoman ;
2198 break ;
2199 case wxFONTENCODING_MACVIATNAMESE :
2200 enc = kCFStringEncodingMacVietnamese ;
2201 break ;
2202 case wxFONTENCODING_MACARABICEXT :
2203 enc = kCFStringEncodingMacExtArabic ;
2204 break ;
2205 case wxFONTENCODING_MACSYMBOL :
2206 enc = kCFStringEncodingMacSymbol ;
2207 break ;
2208 case wxFONTENCODING_MACDINGBATS :
2209 enc = kCFStringEncodingMacDingbats ;
2210 break ;
2211 case wxFONTENCODING_MACTURKISH :
2212 enc = kCFStringEncodingMacTurkish ;
2213 break ;
2214 case wxFONTENCODING_MACCROATIAN :
2215 enc = kCFStringEncodingMacCroatian ;
2216 break ;
2217 case wxFONTENCODING_MACICELANDIC :
2218 enc = kCFStringEncodingMacIcelandic ;
2219 break ;
2220 case wxFONTENCODING_MACROMANIAN :
2221 enc = kCFStringEncodingMacRomanian ;
2222 break ;
2223 case wxFONTENCODING_MACCELTIC :
2224 enc = kCFStringEncodingMacCeltic ;
2225 break ;
2226 case wxFONTENCODING_MACGAELIC :
2227 enc = kCFStringEncodingMacGaelic ;
2228 break ;
2229 // case wxFONTENCODING_MACKEYBOARD :
2230 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2231 // break ;
2232 default :
2233 // because gcc is picky
2234 break ;
2235 } ;
2236 return enc ;
2237 }
2238
2239 class wxMBConv_cocoa : public wxMBConv
2240 {
2241 public:
2242 wxMBConv_cocoa()
2243 {
2244 Init(CFStringGetSystemEncoding()) ;
2245 }
2246
2247 #if wxUSE_FONTMAP
2248 wxMBConv_cocoa(const wxChar* name)
2249 {
2250 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2251 }
2252 #endif
2253
2254 wxMBConv_cocoa(wxFontEncoding encoding)
2255 {
2256 Init( wxCFStringEncFromFontEnc(encoding) );
2257 }
2258
2259 ~wxMBConv_cocoa()
2260 {
2261 }
2262
2263 void Init( CFStringEncoding encoding)
2264 {
2265 m_encoding = encoding ;
2266 }
2267
2268 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2269 {
2270 wxASSERT(szUnConv);
2271
2272 CFStringRef theString = CFStringCreateWithBytes (
2273 NULL, //the allocator
2274 (const UInt8*)szUnConv,
2275 strlen(szUnConv),
2276 m_encoding,
2277 false //no BOM/external representation
2278 );
2279
2280 wxASSERT(theString);
2281
2282 size_t nOutLength = CFStringGetLength(theString);
2283
2284 if (szOut == NULL)
2285 {
2286 CFRelease(theString);
2287 return nOutLength;
2288 }
2289
2290 CFRange theRange = { 0, nOutSize };
2291
2292 #if SIZEOF_WCHAR_T == 4
2293 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2294 #endif
2295
2296 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2297
2298 CFRelease(theString);
2299
2300 szUniCharBuffer[nOutLength] = '\0' ;
2301
2302 #if SIZEOF_WCHAR_T == 4
2303 wxMBConvUTF16 converter ;
2304 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2305 delete[] szUniCharBuffer;
2306 #endif
2307
2308 return nOutLength;
2309 }
2310
2311 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2312 {
2313 wxASSERT(szUnConv);
2314
2315 size_t nRealOutSize;
2316 size_t nBufSize = wxWcslen(szUnConv);
2317 UniChar* szUniBuffer = (UniChar*) szUnConv;
2318
2319 #if SIZEOF_WCHAR_T == 4
2320 wxMBConvUTF16 converter ;
2321 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2322 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2323 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2324 nBufSize /= sizeof(UniChar);
2325 #endif
2326
2327 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2328 NULL, //allocator
2329 szUniBuffer,
2330 nBufSize,
2331 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2332 );
2333
2334 wxASSERT(theString);
2335
2336 //Note that CER puts a BOM when converting to unicode
2337 //so we check and use getchars instead in that case
2338 if (m_encoding == kCFStringEncodingUnicode)
2339 {
2340 if (szOut != NULL)
2341 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2342
2343 nRealOutSize = CFStringGetLength(theString) + 1;
2344 }
2345 else
2346 {
2347 CFStringGetBytes(
2348 theString,
2349 CFRangeMake(0, CFStringGetLength(theString)),
2350 m_encoding,
2351 0, //what to put in characters that can't be converted -
2352 //0 tells CFString to return NULL if it meets such a character
2353 false, //not an external representation
2354 (UInt8*) szOut,
2355 nOutSize,
2356 (CFIndex*) &nRealOutSize
2357 );
2358 }
2359
2360 CFRelease(theString);
2361
2362 #if SIZEOF_WCHAR_T == 4
2363 delete[] szUniBuffer;
2364 #endif
2365
2366 return nRealOutSize - 1;
2367 }
2368
2369 bool IsOk() const
2370 {
2371 return m_encoding != kCFStringEncodingInvalidId &&
2372 CFStringIsEncodingAvailable(m_encoding);
2373 }
2374
2375 private:
2376 CFStringEncoding m_encoding ;
2377 };
2378
2379 #endif // defined(__WXCOCOA__)
2380
2381 // ============================================================================
2382 // Mac conversion classes
2383 // ============================================================================
2384
2385 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2386
2387 class wxMBConv_mac : public wxMBConv
2388 {
2389 public:
2390 wxMBConv_mac()
2391 {
2392 Init(CFStringGetSystemEncoding()) ;
2393 }
2394
2395 #if wxUSE_FONTMAP
2396 wxMBConv_mac(const wxChar* name)
2397 {
2398 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2399 }
2400 #endif
2401
2402 wxMBConv_mac(wxFontEncoding encoding)
2403 {
2404 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2405 }
2406
2407 ~wxMBConv_mac()
2408 {
2409 OSStatus status = noErr ;
2410 status = TECDisposeConverter(m_MB2WC_converter);
2411 status = TECDisposeConverter(m_WC2MB_converter);
2412 }
2413
2414
2415 void Init( TextEncodingBase encoding)
2416 {
2417 OSStatus status = noErr ;
2418 m_char_encoding = encoding ;
2419 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2420
2421 status = TECCreateConverter(&m_MB2WC_converter,
2422 m_char_encoding,
2423 m_unicode_encoding);
2424 status = TECCreateConverter(&m_WC2MB_converter,
2425 m_unicode_encoding,
2426 m_char_encoding);
2427 }
2428
2429 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2430 {
2431 OSStatus status = noErr ;
2432 ByteCount byteOutLen ;
2433 ByteCount byteInLen = strlen(psz) ;
2434 wchar_t *tbuf = NULL ;
2435 UniChar* ubuf = NULL ;
2436 size_t res = 0 ;
2437
2438 if (buf == NULL)
2439 {
2440 //apple specs say at least 32
2441 n = wxMax( 32 , byteInLen ) ;
2442 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2443 }
2444 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2445 #if SIZEOF_WCHAR_T == 4
2446 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2447 #else
2448 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2449 #endif
2450 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2451 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2452 #if SIZEOF_WCHAR_T == 4
2453 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2454 // is not properly terminated we get random characters at the end
2455 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2456 wxMBConvUTF16 converter ;
2457 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2458 free( ubuf ) ;
2459 #else
2460 res = byteOutLen / sizeof( UniChar ) ;
2461 #endif
2462 if ( buf == NULL )
2463 free(tbuf) ;
2464
2465 if ( buf && res < n)
2466 buf[res] = 0;
2467
2468 return res ;
2469 }
2470
2471 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2472 {
2473 OSStatus status = noErr ;
2474 ByteCount byteOutLen ;
2475 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2476
2477 char *tbuf = NULL ;
2478
2479 if (buf == NULL)
2480 {
2481 //apple specs say at least 32
2482 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2483 tbuf = (char*) malloc( n ) ;
2484 }
2485
2486 ByteCount byteBufferLen = n ;
2487 UniChar* ubuf = NULL ;
2488 #if SIZEOF_WCHAR_T == 4
2489 wxMBConvUTF16 converter ;
2490 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2491 byteInLen = unicharlen ;
2492 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2493 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2494 #else
2495 ubuf = (UniChar*) psz ;
2496 #endif
2497 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2498 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2499 #if SIZEOF_WCHAR_T == 4
2500 free( ubuf ) ;
2501 #endif
2502 if ( buf == NULL )
2503 free(tbuf) ;
2504
2505 size_t res = byteOutLen ;
2506 if ( buf && res < n)
2507 {
2508 buf[res] = 0;
2509
2510 //we need to double-trip to verify it didn't insert any ? in place
2511 //of bogus characters
2512 wxWCharBuffer wcBuf(n);
2513 size_t pszlen = wxWcslen(psz);
2514 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2515 wxWcslen(wcBuf) != pszlen ||
2516 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2517 {
2518 // we didn't obtain the same thing we started from, hence
2519 // the conversion was lossy and we consider that it failed
2520 return (size_t)-1;
2521 }
2522 }
2523
2524 return res ;
2525 }
2526
2527 bool IsOk() const
2528 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2529
2530 private:
2531 TECObjectRef m_MB2WC_converter ;
2532 TECObjectRef m_WC2MB_converter ;
2533
2534 TextEncodingBase m_char_encoding ;
2535 TextEncodingBase m_unicode_encoding ;
2536 };
2537
2538 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2539
2540 // ============================================================================
2541 // wxEncodingConverter based conversion classes
2542 // ============================================================================
2543
2544 #if wxUSE_FONTMAP
2545
2546 class wxMBConv_wxwin : public wxMBConv
2547 {
2548 private:
2549 void Init()
2550 {
2551 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2552 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2553 }
2554
2555 public:
2556 // temporarily just use wxEncodingConverter stuff,
2557 // so that it works while a better implementation is built
2558 wxMBConv_wxwin(const wxChar* name)
2559 {
2560 if (name)
2561 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2562 else
2563 m_enc = wxFONTENCODING_SYSTEM;
2564
2565 Init();
2566 }
2567
2568 wxMBConv_wxwin(wxFontEncoding enc)
2569 {
2570 m_enc = enc;
2571
2572 Init();
2573 }
2574
2575 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2576 {
2577 size_t inbuf = strlen(psz);
2578 if (buf)
2579 {
2580 if (!m2w.Convert(psz,buf))
2581 return (size_t)-1;
2582 }
2583 return inbuf;
2584 }
2585
2586 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2587 {
2588 const size_t inbuf = wxWcslen(psz);
2589 if (buf)
2590 {
2591 if (!w2m.Convert(psz,buf))
2592 return (size_t)-1;
2593 }
2594
2595 return inbuf;
2596 }
2597
2598 bool IsOk() const { return m_ok; }
2599
2600 public:
2601 wxFontEncoding m_enc;
2602 wxEncodingConverter m2w, w2m;
2603
2604 private:
2605 virtual const char *GetMBNul(size_t *nulLen) const
2606 {
2607 switch ( m_enc )
2608 {
2609 case wxFONTENCODING_UTF16BE:
2610 case wxFONTENCODING_UTF16LE:
2611 *nulLen = 2;
2612 return "\0";
2613
2614 case wxFONTENCODING_UTF32BE:
2615 case wxFONTENCODING_UTF32LE:
2616 *nulLen = 4;
2617 return "\0\0\0";
2618
2619 default:
2620 *nulLen = 1;
2621 return "";
2622 }
2623 }
2624
2625 // were we initialized successfully?
2626 bool m_ok;
2627
2628 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2629 };
2630
2631 // make the constructors available for unit testing
2632 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2633 {
2634 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2635 if ( !result->IsOk() )
2636 {
2637 delete result;
2638 return 0;
2639 }
2640 return result;
2641 }
2642
2643 #endif // wxUSE_FONTMAP
2644
2645 // ============================================================================
2646 // wxCSConv implementation
2647 // ============================================================================
2648
2649 void wxCSConv::Init()
2650 {
2651 m_name = NULL;
2652 m_convReal = NULL;
2653 m_deferred = true;
2654 }
2655
2656 wxCSConv::wxCSConv(const wxChar *charset)
2657 {
2658 Init();
2659
2660 if ( charset )
2661 {
2662 SetName(charset);
2663 }
2664
2665 #if wxUSE_FONTMAP
2666 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2667 #else
2668 m_encoding = wxFONTENCODING_SYSTEM;
2669 #endif
2670 }
2671
2672 wxCSConv::wxCSConv(wxFontEncoding encoding)
2673 {
2674 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2675 {
2676 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2677
2678 encoding = wxFONTENCODING_SYSTEM;
2679 }
2680
2681 Init();
2682
2683 m_encoding = encoding;
2684 }
2685
2686 wxCSConv::~wxCSConv()
2687 {
2688 Clear();
2689 }
2690
2691 wxCSConv::wxCSConv(const wxCSConv& conv)
2692 : wxMBConv()
2693 {
2694 Init();
2695
2696 SetName(conv.m_name);
2697 m_encoding = conv.m_encoding;
2698 }
2699
2700 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2701 {
2702 Clear();
2703
2704 SetName(conv.m_name);
2705 m_encoding = conv.m_encoding;
2706
2707 return *this;
2708 }
2709
2710 void wxCSConv::Clear()
2711 {
2712 free(m_name);
2713 delete m_convReal;
2714
2715 m_name = NULL;
2716 m_convReal = NULL;
2717 }
2718
2719 void wxCSConv::SetName(const wxChar *charset)
2720 {
2721 if (charset)
2722 {
2723 m_name = wxStrdup(charset);
2724 m_deferred = true;
2725 }
2726 }
2727
2728 #if wxUSE_FONTMAP
2729 #include "wx/hashmap.h"
2730
2731 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2732 wxEncodingNameCache );
2733
2734 static wxEncodingNameCache gs_nameCache;
2735 #endif
2736
2737 wxMBConv *wxCSConv::DoCreate() const
2738 {
2739 #if wxUSE_FONTMAP
2740 wxLogTrace(TRACE_STRCONV,
2741 wxT("creating conversion for %s"),
2742 (m_name ? m_name
2743 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2744 #endif // wxUSE_FONTMAP
2745
2746 // check for the special case of ASCII or ISO8859-1 charset: as we have
2747 // special knowledge of it anyhow, we don't need to create a special
2748 // conversion object
2749 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2750 m_encoding == wxFONTENCODING_DEFAULT )
2751 {
2752 // don't convert at all
2753 return NULL;
2754 }
2755
2756 // we trust OS to do conversion better than we can so try external
2757 // conversion methods first
2758 //
2759 // the full order is:
2760 // 1. OS conversion (iconv() under Unix or Win32 API)
2761 // 2. hard coded conversions for UTF
2762 // 3. wxEncodingConverter as fall back
2763
2764 // step (1)
2765 #ifdef HAVE_ICONV
2766 #if !wxUSE_FONTMAP
2767 if ( m_name )
2768 #endif // !wxUSE_FONTMAP
2769 {
2770 wxString name(m_name);
2771 wxFontEncoding encoding(m_encoding);
2772
2773 if ( !name.empty() )
2774 {
2775 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2776 if ( conv->IsOk() )
2777 return conv;
2778
2779 delete conv;
2780
2781 #if wxUSE_FONTMAP
2782 encoding =
2783 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2784 #endif // wxUSE_FONTMAP
2785 }
2786 #if wxUSE_FONTMAP
2787 {
2788 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2789 if ( it != gs_nameCache.end() )
2790 {
2791 if ( it->second.empty() )
2792 return NULL;
2793
2794 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2795 if ( conv->IsOk() )
2796 return conv;
2797
2798 delete conv;
2799 }
2800
2801 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2802
2803 for ( ; *names; ++names )
2804 {
2805 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2806 if ( conv->IsOk() )
2807 {
2808 gs_nameCache[encoding] = *names;
2809 return conv;
2810 }
2811
2812 delete conv;
2813 }
2814
2815 gs_nameCache[encoding] = _T(""); // cache the failure
2816 }
2817 #endif // wxUSE_FONTMAP
2818 }
2819 #endif // HAVE_ICONV
2820
2821 #ifdef wxHAVE_WIN32_MB2WC
2822 {
2823 #if wxUSE_FONTMAP
2824 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2825 : new wxMBConv_win32(m_encoding);
2826 if ( conv->IsOk() )
2827 return conv;
2828
2829 delete conv;
2830 #else
2831 return NULL;
2832 #endif
2833 }
2834 #endif // wxHAVE_WIN32_MB2WC
2835 #if defined(__WXMAC__)
2836 {
2837 // leave UTF16 and UTF32 to the built-ins of wx
2838 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2839 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2840 {
2841
2842 #if wxUSE_FONTMAP
2843 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2844 : new wxMBConv_mac(m_encoding);
2845 #else
2846 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2847 #endif
2848 if ( conv->IsOk() )
2849 return conv;
2850
2851 delete conv;
2852 }
2853 }
2854 #endif
2855 #if defined(__WXCOCOA__)
2856 {
2857 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2858 {
2859
2860 #if wxUSE_FONTMAP
2861 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2862 : new wxMBConv_cocoa(m_encoding);
2863 #else
2864 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2865 #endif
2866 if ( conv->IsOk() )
2867 return conv;
2868
2869 delete conv;
2870 }
2871 }
2872 #endif
2873 // step (2)
2874 wxFontEncoding enc = m_encoding;
2875 #if wxUSE_FONTMAP
2876 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2877 {
2878 // use "false" to suppress interactive dialogs -- we can be called from
2879 // anywhere and popping up a dialog from here is the last thing we want to
2880 // do
2881 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2882 }
2883 #endif // wxUSE_FONTMAP
2884
2885 switch ( enc )
2886 {
2887 case wxFONTENCODING_UTF7:
2888 return new wxMBConvUTF7;
2889
2890 case wxFONTENCODING_UTF8:
2891 return new wxMBConvUTF8;
2892
2893 case wxFONTENCODING_UTF16BE:
2894 return new wxMBConvUTF16BE;
2895
2896 case wxFONTENCODING_UTF16LE:
2897 return new wxMBConvUTF16LE;
2898
2899 case wxFONTENCODING_UTF32BE:
2900 return new wxMBConvUTF32BE;
2901
2902 case wxFONTENCODING_UTF32LE:
2903 return new wxMBConvUTF32LE;
2904
2905 default:
2906 // nothing to do but put here to suppress gcc warnings
2907 ;
2908 }
2909
2910 // step (3)
2911 #if wxUSE_FONTMAP
2912 {
2913 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2914 : new wxMBConv_wxwin(m_encoding);
2915 if ( conv->IsOk() )
2916 return conv;
2917
2918 delete conv;
2919 }
2920 #endif // wxUSE_FONTMAP
2921
2922 // NB: This is a hack to prevent deadlock. What could otherwise happen
2923 // in Unicode build: wxConvLocal creation ends up being here
2924 // because of some failure and logs the error. But wxLog will try to
2925 // attach timestamp, for which it will need wxConvLocal (to convert
2926 // time to char* and then wchar_t*), but that fails, tries to log
2927 // error, but wxLog has a (already locked) critical section that
2928 // guards static buffer.
2929 static bool alreadyLoggingError = false;
2930 if (!alreadyLoggingError)
2931 {
2932 alreadyLoggingError = true;
2933 wxLogError(_("Cannot convert from the charset '%s'!"),
2934 m_name ? m_name
2935 :
2936 #if wxUSE_FONTMAP
2937 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2938 #else // !wxUSE_FONTMAP
2939 wxString::Format(_("encoding %s"), m_encoding).c_str()
2940 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2941 );
2942 alreadyLoggingError = false;
2943 }
2944
2945 return NULL;
2946 }
2947
2948 void wxCSConv::CreateConvIfNeeded() const
2949 {
2950 if ( m_deferred )
2951 {
2952 wxCSConv *self = (wxCSConv *)this; // const_cast
2953
2954 #if wxUSE_INTL
2955 // if we don't have neither the name nor the encoding, use the default
2956 // encoding for this system
2957 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2958 {
2959 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2960 }
2961 #endif // wxUSE_INTL
2962
2963 self->m_convReal = DoCreate();
2964 self->m_deferred = false;
2965 }
2966 }
2967
2968 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2969 {
2970 CreateConvIfNeeded();
2971
2972 if (m_convReal)
2973 return m_convReal->MB2WC(buf, psz, n);
2974
2975 // latin-1 (direct)
2976 size_t len = strlen(psz);
2977
2978 if (buf)
2979 {
2980 for (size_t c = 0; c <= len; c++)
2981 buf[c] = (unsigned char)(psz[c]);
2982 }
2983
2984 return len;
2985 }
2986
2987 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2988 {
2989 CreateConvIfNeeded();
2990
2991 if (m_convReal)
2992 return m_convReal->WC2MB(buf, psz, n);
2993
2994 // latin-1 (direct)
2995 const size_t len = wxWcslen(psz);
2996 if (buf)
2997 {
2998 for (size_t c = 0; c <= len; c++)
2999 {
3000 if (psz[c] > 0xFF)
3001 return (size_t)-1;
3002 buf[c] = (char)psz[c];
3003 }
3004 }
3005 else
3006 {
3007 for (size_t c = 0; c <= len; c++)
3008 {
3009 if (psz[c] > 0xFF)
3010 return (size_t)-1;
3011 }
3012 }
3013
3014 return len;
3015 }
3016
3017 const char *wxCSConv::GetMBNul(size_t *nulLen) const
3018 {
3019 CreateConvIfNeeded();
3020
3021 if ( m_convReal )
3022 {
3023 // cast needed just to call private function of m_convReal
3024 return ((wxCSConv *)m_convReal)->GetMBNul(nulLen);
3025 }
3026
3027 *nulLen = 1;
3028 return "";
3029 }
3030
3031 // ----------------------------------------------------------------------------
3032 // globals
3033 // ----------------------------------------------------------------------------
3034
3035 #ifdef __WINDOWS__
3036 static wxMBConv_win32 wxConvLibcObj;
3037 #elif defined(__WXMAC__) && !defined(__MACH__)
3038 static wxMBConv_mac wxConvLibcObj ;
3039 #else
3040 static wxMBConvLibc wxConvLibcObj;
3041 #endif
3042
3043 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3044 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3045 static wxMBConvUTF7 wxConvUTF7Obj;
3046 static wxMBConvUTF8 wxConvUTF8Obj;
3047
3048 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3049 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3050 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3051 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3052 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3053 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3054 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3055 #ifdef __WXOSX__
3056 wxConvUTF8Obj;
3057 #else
3058 wxConvLibcObj;
3059 #endif
3060
3061
3062 #else // !wxUSE_WCHAR_T
3063
3064 // stand-ins in absence of wchar_t
3065 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3066 wxConvISO8859_1,
3067 wxConvLocal,
3068 wxConvUTF8;
3069
3070 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T