]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
00ab2fc8f51348f0d442a13e651583455fc91037
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // UTF-16 en/decoding to/from UCS-4
91 // ----------------------------------------------------------------------------
92
93
94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
95 {
96 if (input<=0xffff)
97 {
98 if (output)
99 *output = (wxUint16) input;
100 return 1;
101 }
102 else if (input>=0x110000)
103 {
104 return (size_t)-1;
105 }
106 else
107 {
108 if (output)
109 {
110 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
111 *output = (wxUint16) ((input&0x3ff)+0xdc00);
112 }
113 return 2;
114 }
115 }
116
117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
118 {
119 if ((*input<0xd800) || (*input>0xdfff))
120 {
121 output = *input;
122 return 1;
123 }
124 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
125 {
126 output = *input;
127 return (size_t)-1;
128 }
129 else
130 {
131 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
132 return 2;
133 }
134 }
135
136
137 // ----------------------------------------------------------------------------
138 // wxMBConv
139 // ----------------------------------------------------------------------------
140
141 wxMBConv::~wxMBConv()
142 {
143 // nothing to do here (necessary for Darwin linking probably)
144 }
145
146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
147 {
148 if ( psz )
149 {
150 // calculate the length of the buffer needed first
151 size_t nLen = MB2WC(NULL, psz, 0);
152 if ( nLen != (size_t)-1 )
153 {
154 // now do the actual conversion
155 wxWCharBuffer buf(nLen);
156 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
157 if ( nLen != (size_t)-1 )
158 {
159 return buf;
160 }
161 }
162 }
163
164 wxWCharBuffer buf((wchar_t *)NULL);
165
166 return buf;
167 }
168
169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
170 {
171 if ( pwz )
172 {
173 size_t nLen = WC2MB(NULL, pwz, 0);
174 if ( nLen != (size_t)-1 )
175 {
176 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
177 nLen = WC2MB(buf.data(), pwz, nLen + 4);
178 if ( nLen != (size_t)-1 )
179 {
180 return buf;
181 }
182 }
183 }
184
185 wxCharBuffer buf((char *)NULL);
186
187 return buf;
188 }
189
190 // helper of cMB2WC(): check if n bytes at this location are all NUL
191 static bool NotAllNULs(const char *p, size_t n)
192 {
193 while ( n && *p++ == '\0' )
194 n--;
195
196 return n != 0;
197 }
198
199 const wxWCharBuffer
200 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
201 {
202 // the currently accumulated wide characters
203 wxWCharBuffer wbuf;
204
205 // the current length of wbuf
206 size_t lenBuf = 0;
207
208 // the number of NULs terminating this string
209 size_t nulLen wxDUMMY_INITIALIZE(0);
210
211 // make a copy of the input string unless it is already properly
212 // NUL-terminated
213 wxCharBuffer bufTmp;
214
215 // if we were not given the input size we just have to assume that the
216 // string is properly terminated as we have no way of knowing how long it
217 // is anyhow, but if we do have the size check whether there are enough
218 // NULs at the end
219 if ( inLen != (size_t)-1 )
220 {
221 // we need to know how to find the end of this string
222 nulLen = GetMinMBCharWidth();
223 if ( nulLen == (size_t)-1 )
224 return wbuf;
225
226 // if there are enough NULs we can avoid the copy
227 if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
228 {
229 // make a copy in order to properly NUL-terminate the string
230 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
231 char * const p = bufTmp.data();
232 memcpy(p, in, inLen);
233 for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
234 *s = '\0';
235 }
236 }
237
238 if ( bufTmp )
239 in = bufTmp;
240
241 size_t lenChunk;
242 for ( const char * const inEnd = in + inLen;; )
243 {
244 // try to convert the current chunk
245 lenChunk = MB2WC(NULL, in, 0);
246 if ( lenChunk == 0 )
247 {
248 // nothing left in the input string, conversion succeeded
249 break;
250 }
251
252 if ( lenChunk == (size_t)-1 )
253 break;
254
255 // if we already have a previous chunk, leave the NUL separating it
256 // from this one
257 if ( lenBuf )
258 lenBuf++;
259
260 const size_t lenBufNew = lenBuf + lenChunk;
261 if ( !wbuf.extend(lenBufNew) )
262 {
263 lenChunk = (size_t)-1;
264 break;
265 }
266
267 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
268 if ( lenChunk == (size_t)-1 )
269 break;
270
271 lenBuf = lenBufNew;
272
273 if ( inLen == (size_t)-1 )
274 {
275 // convert only one chunk in this case, as we suppose that the
276 // string is NUL-terminated and so inEnd is not used at all
277 break;
278 }
279
280 // advance the input pointer past the end of this chunk
281 while ( NotAllNULs(in, nulLen) )
282 {
283 // notice that we must skip over multiple bytes here as we suppose
284 // that if NUL takes 2 or 4 bytes, then all the other characters do
285 // too and so if advanced by a single byte we might erroneously
286 // detect sequences of NUL bytes in the middle of the input
287 in += nulLen;
288 }
289
290 in += nulLen; // skipping over its terminator as well
291
292 // note that ">=" (and not just "==") is needed here as the terminator
293 // we skipped just above could be inside or just after the buffer
294 // delimited by inEnd
295 if ( in >= inEnd )
296 break;
297 }
298
299 if ( lenChunk == (size_t)-1 )
300 {
301 // conversion failed
302 lenBuf = 0;
303 wbuf.reset();
304 }
305
306 if ( outLen )
307 *outLen = lenBuf;
308
309 return wbuf;
310 }
311
312 const wxCharBuffer
313 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
314 {
315 // the currently accumulated multibyte characters
316 wxCharBuffer buf;
317
318 // the current length of buf
319 size_t lenBuf = 0;
320
321 // make a copy of the input string unless it is already properly
322 // NUL-terminated
323 //
324 // if we don't know its length we have no choice but to assume that it is,
325 // indeed, properly terminated
326 wxWCharBuffer bufTmp;
327 if ( inLen == (size_t)-1 )
328 {
329 inLen = wxWcslen(in) + 1;
330 }
331 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
332 {
333 // make a copy in order to properly NUL-terminate the string
334 bufTmp = wxWCharBuffer(inLen);
335 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
336 }
337
338 if ( bufTmp )
339 in = bufTmp;
340
341 for ( const wchar_t * const inEnd = in + inLen;; )
342 {
343 // try to convert the current chunk, if anything left
344 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
345 if ( lenChunk == 0 )
346 {
347 // nothing left in the input string, conversion succeeded
348 if ( outLen )
349 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
350
351 return buf;
352 }
353
354 if ( lenChunk == (size_t)-1 )
355 break;
356
357 const size_t lenBufNew = lenBuf + lenChunk;
358 if ( !buf.extend(lenBufNew) )
359 break;
360
361 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
362 if ( lenChunk == (size_t)-1 )
363 break;
364
365 // chunk successfully converted, go to the next one
366 in += wxWcslen(in) + 1 /* skip NUL too */;
367 lenBuf = lenBufNew + 1;
368 }
369
370 // conversion failed
371 if ( outLen )
372 *outLen = 0;
373
374 return wxCharBuffer();
375 }
376
377 // ----------------------------------------------------------------------------
378 // wxMBConvLibc
379 // ----------------------------------------------------------------------------
380
381 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
382 {
383 return wxMB2WC(buf, psz, n);
384 }
385
386 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
387 {
388 return wxWC2MB(buf, psz, n);
389 }
390
391 // ----------------------------------------------------------------------------
392 // wxConvBrokenFileNames
393 // ----------------------------------------------------------------------------
394
395 #ifdef __UNIX__
396
397 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
398 {
399 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
400 || wxStricmp(charset, _T("UTF8")) == 0 )
401 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
402 else
403 m_conv = new wxCSConv(charset);
404 }
405
406 #endif // __UNIX__
407
408 // ----------------------------------------------------------------------------
409 // UTF-7
410 // ----------------------------------------------------------------------------
411
412 // Implementation (C) 2004 Fredrik Roubert
413
414 //
415 // BASE64 decoding table
416 //
417 static const unsigned char utf7unb64[] =
418 {
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
425 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
426 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
428 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
429 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
430 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
432 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
433 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
434 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
451 };
452
453 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
454 {
455 size_t len = 0;
456
457 while ( *psz && (!buf || (len < n)) )
458 {
459 unsigned char cc = *psz++;
460 if (cc != '+')
461 {
462 // plain ASCII char
463 if (buf)
464 *buf++ = cc;
465 len++;
466 }
467 else if (*psz == '-')
468 {
469 // encoded plus sign
470 if (buf)
471 *buf++ = cc;
472 len++;
473 psz++;
474 }
475 else // start of BASE64 encoded string
476 {
477 bool lsb, ok;
478 unsigned int d, l;
479 for ( ok = lsb = false, d = 0, l = 0;
480 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
481 psz++ )
482 {
483 d <<= 6;
484 d += cc;
485 for (l += 6; l >= 8; lsb = !lsb)
486 {
487 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
488 if (lsb)
489 {
490 if (buf)
491 *buf++ |= c;
492 len ++;
493 }
494 else
495 {
496 if (buf)
497 *buf = (wchar_t)(c << 8);
498 }
499
500 ok = true;
501 }
502 }
503
504 if ( !ok )
505 {
506 // in valid UTF7 we should have valid characters after '+'
507 return (size_t)-1;
508 }
509
510 if (*psz == '-')
511 psz++;
512 }
513 }
514
515 if ( buf && (len < n) )
516 *buf = '\0';
517
518 return len;
519 }
520
521 //
522 // BASE64 encoding table
523 //
524 static const unsigned char utf7enb64[] =
525 {
526 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
527 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
528 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
529 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
530 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
531 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
532 'w', 'x', 'y', 'z', '0', '1', '2', '3',
533 '4', '5', '6', '7', '8', '9', '+', '/'
534 };
535
536 //
537 // UTF-7 encoding table
538 //
539 // 0 - Set D (directly encoded characters)
540 // 1 - Set O (optional direct characters)
541 // 2 - whitespace characters (optional)
542 // 3 - special characters
543 //
544 static const unsigned char utf7encode[128] =
545 {
546 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
547 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
548 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
550 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
552 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
554 };
555
556 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
557 {
558 size_t len = 0;
559
560 while (*psz && ((!buf) || (len < n)))
561 {
562 wchar_t cc = *psz++;
563 if (cc < 0x80 && utf7encode[cc] < 1)
564 {
565 // plain ASCII char
566 if (buf)
567 *buf++ = (char)cc;
568 len++;
569 }
570 #ifndef WC_UTF16
571 else if (((wxUint32)cc) > 0xffff)
572 {
573 // no surrogate pair generation (yet?)
574 return (size_t)-1;
575 }
576 #endif
577 else
578 {
579 if (buf)
580 *buf++ = '+';
581 len++;
582 if (cc != '+')
583 {
584 // BASE64 encode string
585 unsigned int lsb, d, l;
586 for (d = 0, l = 0; /*nothing*/; psz++)
587 {
588 for (lsb = 0; lsb < 2; lsb ++)
589 {
590 d <<= 8;
591 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
592
593 for (l += 8; l >= 6; )
594 {
595 l -= 6;
596 if (buf)
597 *buf++ = utf7enb64[(d >> l) % 64];
598 len++;
599 }
600 }
601 cc = *psz;
602 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
603 break;
604 }
605 if (l != 0)
606 {
607 if (buf)
608 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
609 len++;
610 }
611 }
612 if (buf)
613 *buf++ = '-';
614 len++;
615 }
616 }
617 if (buf && (len < n))
618 *buf = 0;
619 return len;
620 }
621
622 // ----------------------------------------------------------------------------
623 // UTF-8
624 // ----------------------------------------------------------------------------
625
626 static wxUint32 utf8_max[]=
627 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
628
629 // boundaries of the private use area we use to (temporarily) remap invalid
630 // characters invalid in a UTF-8 encoded string
631 const wxUint32 wxUnicodePUA = 0x100000;
632 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
633
634 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
635 {
636 size_t len = 0;
637
638 while (*psz && ((!buf) || (len < n)))
639 {
640 const char *opsz = psz;
641 bool invalid = false;
642 unsigned char cc = *psz++, fc = cc;
643 unsigned cnt;
644 for (cnt = 0; fc & 0x80; cnt++)
645 fc <<= 1;
646 if (!cnt)
647 {
648 // plain ASCII char
649 if (buf)
650 *buf++ = cc;
651 len++;
652
653 // escape the escape character for octal escapes
654 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
655 && cc == '\\' && (!buf || len < n))
656 {
657 if (buf)
658 *buf++ = cc;
659 len++;
660 }
661 }
662 else
663 {
664 cnt--;
665 if (!cnt)
666 {
667 // invalid UTF-8 sequence
668 invalid = true;
669 }
670 else
671 {
672 unsigned ocnt = cnt - 1;
673 wxUint32 res = cc & (0x3f >> cnt);
674 while (cnt--)
675 {
676 cc = *psz;
677 if ((cc & 0xC0) != 0x80)
678 {
679 // invalid UTF-8 sequence
680 invalid = true;
681 break;
682 }
683 psz++;
684 res = (res << 6) | (cc & 0x3f);
685 }
686 if (invalid || res <= utf8_max[ocnt])
687 {
688 // illegal UTF-8 encoding
689 invalid = true;
690 }
691 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
692 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
693 {
694 // if one of our PUA characters turns up externally
695 // it must also be treated as an illegal sequence
696 // (a bit like you have to escape an escape character)
697 invalid = true;
698 }
699 else
700 {
701 #ifdef WC_UTF16
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa = encode_utf16(res, (wxUint16 *)buf);
704 if (pa == (size_t)-1)
705 {
706 invalid = true;
707 }
708 else
709 {
710 if (buf)
711 buf += pa;
712 len += pa;
713 }
714 #else // !WC_UTF16
715 if (buf)
716 *buf++ = (wchar_t)res;
717 len++;
718 #endif // WC_UTF16/!WC_UTF16
719 }
720 }
721 if (invalid)
722 {
723 if (m_options & MAP_INVALID_UTF8_TO_PUA)
724 {
725 while (opsz < psz && (!buf || len < n))
726 {
727 #ifdef WC_UTF16
728 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
729 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
730 wxASSERT(pa != (size_t)-1);
731 if (buf)
732 buf += pa;
733 opsz++;
734 len += pa;
735 #else
736 if (buf)
737 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
738 opsz++;
739 len++;
740 #endif
741 }
742 }
743 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
744 {
745 while (opsz < psz && (!buf || len < n))
746 {
747 if ( buf && len + 3 < n )
748 {
749 unsigned char on = *opsz;
750 *buf++ = L'\\';
751 *buf++ = (wchar_t)( L'0' + on / 0100 );
752 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
753 *buf++ = (wchar_t)( L'0' + on % 010 );
754 }
755 opsz++;
756 len += 4;
757 }
758 }
759 else // MAP_INVALID_UTF8_NOT
760 {
761 return (size_t)-1;
762 }
763 }
764 }
765 }
766 if (buf && (len < n))
767 *buf = 0;
768 return len;
769 }
770
771 static inline bool isoctal(wchar_t wch)
772 {
773 return L'0' <= wch && wch <= L'7';
774 }
775
776 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
777 {
778 size_t len = 0;
779
780 while (*psz && ((!buf) || (len < n)))
781 {
782 wxUint32 cc;
783 #ifdef WC_UTF16
784 // cast is ok for WC_UTF16
785 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
786 psz += (pa == (size_t)-1) ? 1 : pa;
787 #else
788 cc=(*psz++) & 0x7fffffff;
789 #endif
790
791 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
792 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
793 {
794 if (buf)
795 *buf++ = (char)(cc - wxUnicodePUA);
796 len++;
797 }
798 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
799 && cc == L'\\' && psz[0] == L'\\' )
800 {
801 if (buf)
802 *buf++ = (char)cc;
803 psz++;
804 len++;
805 }
806 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
807 cc == L'\\' &&
808 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
809 {
810 if (buf)
811 {
812 *buf++ = (char) ((psz[0] - L'0')*0100 +
813 (psz[1] - L'0')*010 +
814 (psz[2] - L'0'));
815 }
816
817 psz += 3;
818 len++;
819 }
820 else
821 {
822 unsigned cnt;
823 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
824 if (!cnt)
825 {
826 // plain ASCII char
827 if (buf)
828 *buf++ = (char) cc;
829 len++;
830 }
831
832 else
833 {
834 len += cnt + 1;
835 if (buf)
836 {
837 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
838 while (cnt--)
839 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
840 }
841 }
842 }
843 }
844
845 if (buf && (len<n))
846 *buf = 0;
847
848 return len;
849 }
850
851 // ----------------------------------------------------------------------------
852 // UTF-16
853 // ----------------------------------------------------------------------------
854
855 #ifdef WORDS_BIGENDIAN
856 #define wxMBConvUTF16straight wxMBConvUTF16BE
857 #define wxMBConvUTF16swap wxMBConvUTF16LE
858 #else
859 #define wxMBConvUTF16swap wxMBConvUTF16BE
860 #define wxMBConvUTF16straight wxMBConvUTF16LE
861 #endif
862
863
864 #ifdef WC_UTF16
865
866 // copy 16bit MB to 16bit String
867 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
868 {
869 size_t len=0;
870
871 while (*(wxUint16*)psz && (!buf || len < n))
872 {
873 if (buf)
874 *buf++ = *(wxUint16*)psz;
875 len++;
876
877 psz += sizeof(wxUint16);
878 }
879 if (buf && len<n) *buf=0;
880
881 return len;
882 }
883
884
885 // copy 16bit String to 16bit MB
886 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
887 {
888 size_t len=0;
889
890 while (*psz && (!buf || len < n))
891 {
892 if (buf)
893 {
894 *(wxUint16*)buf = *psz;
895 buf += sizeof(wxUint16);
896 }
897 len += sizeof(wxUint16);
898 psz++;
899 }
900 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
901
902 return len;
903 }
904
905
906 // swap 16bit MB to 16bit String
907 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
908 {
909 size_t len = 0;
910
911 // UTF16 string must be terminated by 2 NULs as single NULs may occur
912 // inside the string
913 while ( (psz[0] || psz[1]) && (!buf || len < n) )
914 {
915 if ( buf )
916 {
917 ((char *)buf)[0] = psz[1];
918 ((char *)buf)[1] = psz[0];
919 buf++;
920 }
921 len++;
922 psz += 2;
923 }
924
925 if ( buf && len < n )
926 *buf = L'\0';
927
928 return len;
929 }
930
931
932 // swap 16bit MB to 16bit String
933 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
934 {
935 size_t len = 0;
936
937 while ( *psz && (!buf || len < n) )
938 {
939 if ( buf )
940 {
941 *buf++ = ((char*)psz)[1];
942 *buf++ = ((char*)psz)[0];
943 }
944 len += 2;
945 psz++;
946 }
947
948 if ( buf && len < n )
949 *buf = '\0';
950
951 return len;
952 }
953
954
955 #else // WC_UTF16
956
957
958 // copy 16bit MB to 32bit String
959 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
960 {
961 size_t len=0;
962
963 while (*(wxUint16*)psz && (!buf || len < n))
964 {
965 wxUint32 cc;
966 size_t pa=decode_utf16((wxUint16*)psz, cc);
967 if (pa == (size_t)-1)
968 return pa;
969
970 if (buf)
971 *buf++ = (wchar_t)cc;
972 len++;
973 psz += pa * sizeof(wxUint16);
974 }
975 if (buf && len<n) *buf=0;
976
977 return len;
978 }
979
980
981 // copy 32bit String to 16bit MB
982 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
983 {
984 size_t len=0;
985
986 while (*psz && (!buf || len < n))
987 {
988 wxUint16 cc[2];
989 size_t pa=encode_utf16(*psz, cc);
990
991 if (pa == (size_t)-1)
992 return pa;
993
994 if (buf)
995 {
996 *(wxUint16*)buf = cc[0];
997 buf += sizeof(wxUint16);
998 if (pa > 1)
999 {
1000 *(wxUint16*)buf = cc[1];
1001 buf += sizeof(wxUint16);
1002 }
1003 }
1004
1005 len += pa*sizeof(wxUint16);
1006 psz++;
1007 }
1008 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1009
1010 return len;
1011 }
1012
1013
1014 // swap 16bit MB to 32bit String
1015 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016 {
1017 size_t len=0;
1018
1019 while (*(wxUint16*)psz && (!buf || len < n))
1020 {
1021 wxUint32 cc;
1022 char tmp[4];
1023 tmp[0]=psz[1]; tmp[1]=psz[0];
1024 tmp[2]=psz[3]; tmp[3]=psz[2];
1025
1026 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1027 if (pa == (size_t)-1)
1028 return pa;
1029
1030 if (buf)
1031 *buf++ = (wchar_t)cc;
1032
1033 len++;
1034 psz += pa * sizeof(wxUint16);
1035 }
1036 if (buf && len<n) *buf=0;
1037
1038 return len;
1039 }
1040
1041
1042 // swap 32bit String to 16bit MB
1043 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045 size_t len=0;
1046
1047 while (*psz && (!buf || len < n))
1048 {
1049 wxUint16 cc[2];
1050 size_t pa=encode_utf16(*psz, cc);
1051
1052 if (pa == (size_t)-1)
1053 return pa;
1054
1055 if (buf)
1056 {
1057 *buf++ = ((char*)cc)[1];
1058 *buf++ = ((char*)cc)[0];
1059 if (pa > 1)
1060 {
1061 *buf++ = ((char*)cc)[3];
1062 *buf++ = ((char*)cc)[2];
1063 }
1064 }
1065
1066 len += pa*sizeof(wxUint16);
1067 psz++;
1068 }
1069 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1070
1071 return len;
1072 }
1073
1074 #endif // WC_UTF16
1075
1076
1077 // ----------------------------------------------------------------------------
1078 // UTF-32
1079 // ----------------------------------------------------------------------------
1080
1081 #ifdef WORDS_BIGENDIAN
1082 #define wxMBConvUTF32straight wxMBConvUTF32BE
1083 #define wxMBConvUTF32swap wxMBConvUTF32LE
1084 #else
1085 #define wxMBConvUTF32swap wxMBConvUTF32BE
1086 #define wxMBConvUTF32straight wxMBConvUTF32LE
1087 #endif
1088
1089
1090 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1091 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1092
1093
1094 #ifdef WC_UTF16
1095
1096 // copy 32bit MB to 16bit String
1097 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1098 {
1099 size_t len=0;
1100
1101 while (*(wxUint32*)psz && (!buf || len < n))
1102 {
1103 wxUint16 cc[2];
1104
1105 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1106 if (pa == (size_t)-1)
1107 return pa;
1108
1109 if (buf)
1110 {
1111 *buf++ = cc[0];
1112 if (pa > 1)
1113 *buf++ = cc[1];
1114 }
1115 len += pa;
1116 psz += sizeof(wxUint32);
1117 }
1118 if (buf && len<n) *buf=0;
1119
1120 return len;
1121 }
1122
1123
1124 // copy 16bit String to 32bit MB
1125 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127 size_t len=0;
1128
1129 while (*psz && (!buf || len < n))
1130 {
1131 wxUint32 cc;
1132
1133 // cast is ok for WC_UTF16
1134 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1135 if (pa == (size_t)-1)
1136 return pa;
1137
1138 if (buf)
1139 {
1140 *(wxUint32*)buf = cc;
1141 buf += sizeof(wxUint32);
1142 }
1143 len += sizeof(wxUint32);
1144 psz += pa;
1145 }
1146
1147 if (buf && len<=n-sizeof(wxUint32))
1148 *(wxUint32*)buf=0;
1149
1150 return len;
1151 }
1152
1153
1154
1155 // swap 32bit MB to 16bit String
1156 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1157 {
1158 size_t len=0;
1159
1160 while (*(wxUint32*)psz && (!buf || len < n))
1161 {
1162 char tmp[4];
1163 tmp[0] = psz[3]; tmp[1] = psz[2];
1164 tmp[2] = psz[1]; tmp[3] = psz[0];
1165
1166
1167 wxUint16 cc[2];
1168
1169 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1170 if (pa == (size_t)-1)
1171 return pa;
1172
1173 if (buf)
1174 {
1175 *buf++ = cc[0];
1176 if (pa > 1)
1177 *buf++ = cc[1];
1178 }
1179 len += pa;
1180 psz += sizeof(wxUint32);
1181 }
1182
1183 if (buf && len<n)
1184 *buf=0;
1185
1186 return len;
1187 }
1188
1189
1190 // swap 16bit String to 32bit MB
1191 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1192 {
1193 size_t len=0;
1194
1195 while (*psz && (!buf || len < n))
1196 {
1197 char cc[4];
1198
1199 // cast is ok for WC_UTF16
1200 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1201 if (pa == (size_t)-1)
1202 return pa;
1203
1204 if (buf)
1205 {
1206 *buf++ = cc[3];
1207 *buf++ = cc[2];
1208 *buf++ = cc[1];
1209 *buf++ = cc[0];
1210 }
1211 len += sizeof(wxUint32);
1212 psz += pa;
1213 }
1214
1215 if (buf && len<=n-sizeof(wxUint32))
1216 *(wxUint32*)buf=0;
1217
1218 return len;
1219 }
1220
1221 #else // WC_UTF16
1222
1223
1224 // copy 32bit MB to 32bit String
1225 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1226 {
1227 size_t len=0;
1228
1229 while (*(wxUint32*)psz && (!buf || len < n))
1230 {
1231 if (buf)
1232 *buf++ = (wchar_t)(*(wxUint32*)psz);
1233 len++;
1234 psz += sizeof(wxUint32);
1235 }
1236
1237 if (buf && len<n)
1238 *buf=0;
1239
1240 return len;
1241 }
1242
1243
1244 // copy 32bit String to 32bit MB
1245 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246 {
1247 size_t len=0;
1248
1249 while (*psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 *(wxUint32*)buf = *psz;
1254 buf += sizeof(wxUint32);
1255 }
1256
1257 len += sizeof(wxUint32);
1258 psz++;
1259 }
1260
1261 if (buf && len<=n-sizeof(wxUint32))
1262 *(wxUint32*)buf=0;
1263
1264 return len;
1265 }
1266
1267
1268 // swap 32bit MB to 32bit String
1269 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1270 {
1271 size_t len=0;
1272
1273 while (*(wxUint32*)psz && (!buf || len < n))
1274 {
1275 if (buf)
1276 {
1277 ((char *)buf)[0] = psz[3];
1278 ((char *)buf)[1] = psz[2];
1279 ((char *)buf)[2] = psz[1];
1280 ((char *)buf)[3] = psz[0];
1281 buf++;
1282 }
1283 len++;
1284 psz += sizeof(wxUint32);
1285 }
1286
1287 if (buf && len<n)
1288 *buf=0;
1289
1290 return len;
1291 }
1292
1293
1294 // swap 32bit String to 32bit MB
1295 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1296 {
1297 size_t len=0;
1298
1299 while (*psz && (!buf || len < n))
1300 {
1301 if (buf)
1302 {
1303 *buf++ = ((char *)psz)[3];
1304 *buf++ = ((char *)psz)[2];
1305 *buf++ = ((char *)psz)[1];
1306 *buf++ = ((char *)psz)[0];
1307 }
1308 len += sizeof(wxUint32);
1309 psz++;
1310 }
1311
1312 if (buf && len<=n-sizeof(wxUint32))
1313 *(wxUint32*)buf=0;
1314
1315 return len;
1316 }
1317
1318
1319 #endif // WC_UTF16
1320
1321
1322 // ============================================================================
1323 // The classes doing conversion using the iconv_xxx() functions
1324 // ============================================================================
1325
1326 #ifdef HAVE_ICONV
1327
1328 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1329 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1330 // (unless there's yet another bug in glibc) the only case when iconv()
1331 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1332 // left in the input buffer -- when _real_ error occurs,
1333 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1334 // iconv() failure.
1335 // [This bug does not appear in glibc 2.2.]
1336 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1337 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1338 (errno != E2BIG || bufLeft != 0))
1339 #else
1340 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1341 #endif
1342
1343 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1344
1345 #define ICONV_T_INVALID ((iconv_t)-1)
1346
1347 #if SIZEOF_WCHAR_T == 4
1348 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1349 #define WC_ENC wxFONTENCODING_UTF32
1350 #elif SIZEOF_WCHAR_T == 2
1351 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1352 #define WC_ENC wxFONTENCODING_UTF16
1353 #else // sizeof(wchar_t) != 2 nor 4
1354 // does this ever happen?
1355 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1356 #endif
1357
1358 // ----------------------------------------------------------------------------
1359 // wxMBConv_iconv: encapsulates an iconv character set
1360 // ----------------------------------------------------------------------------
1361
1362 class wxMBConv_iconv : public wxMBConv
1363 {
1364 public:
1365 wxMBConv_iconv(const wxChar *name);
1366 virtual ~wxMBConv_iconv();
1367
1368 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1369 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1370
1371 bool IsOk() const
1372 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1373
1374 protected:
1375 // the iconv handlers used to translate from multibyte to wide char and in
1376 // the other direction
1377 iconv_t m2w,
1378 w2m;
1379 #if wxUSE_THREADS
1380 // guards access to m2w and w2m objects
1381 wxMutex m_iconvMutex;
1382 #endif
1383
1384 private:
1385 // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
1386 // comment
1387 virtual size_t GetMinMBCharWidth() const;
1388
1389 // the name (for iconv_open()) of a wide char charset -- if none is
1390 // available on this machine, it will remain NULL
1391 static wxString ms_wcCharsetName;
1392
1393 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1394 // different endian-ness than the native one
1395 static bool ms_wcNeedsSwap;
1396
1397 // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
1398 // initially
1399 size_t m_minMBCharWidth;
1400 };
1401
1402 // make the constructor available for unit testing
1403 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1404 {
1405 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1406 if ( !result->IsOk() )
1407 {
1408 delete result;
1409 return 0;
1410 }
1411 return result;
1412 }
1413
1414 wxString wxMBConv_iconv::ms_wcCharsetName;
1415 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1416
1417 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1418 {
1419 m_minMBCharWidth = 0;
1420
1421 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1422 // names for the charsets
1423 const wxCharBuffer cname(wxString(name).ToAscii());
1424
1425 // check for charset that represents wchar_t:
1426 if ( ms_wcCharsetName.empty() )
1427 {
1428 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1429
1430 #if wxUSE_FONTMAP
1431 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1432 #else // !wxUSE_FONTMAP
1433 static const wxChar *names[] =
1434 {
1435 #if SIZEOF_WCHAR_T == 4
1436 _T("UCS-4"),
1437 #elif SIZEOF_WCHAR_T = 2
1438 _T("UCS-2"),
1439 #endif
1440 NULL
1441 };
1442 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1443
1444 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1445 {
1446 const wxString nameCS(*names);
1447
1448 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1449 wxString nameXE(nameCS);
1450 #ifdef WORDS_BIGENDIAN
1451 nameXE += _T("BE");
1452 #else // little endian
1453 nameXE += _T("LE");
1454 #endif
1455
1456 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1457 nameXE.c_str());
1458
1459 m2w = iconv_open(nameXE.ToAscii(), cname);
1460 if ( m2w == ICONV_T_INVALID )
1461 {
1462 // try charset w/o bytesex info (e.g. "UCS4")
1463 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1464 nameCS.c_str());
1465 m2w = iconv_open(nameCS.ToAscii(), cname);
1466
1467 // and check for bytesex ourselves:
1468 if ( m2w != ICONV_T_INVALID )
1469 {
1470 char buf[2], *bufPtr;
1471 wchar_t wbuf[2], *wbufPtr;
1472 size_t insz, outsz;
1473 size_t res;
1474
1475 buf[0] = 'A';
1476 buf[1] = 0;
1477 wbuf[0] = 0;
1478 insz = 2;
1479 outsz = SIZEOF_WCHAR_T * 2;
1480 wbufPtr = wbuf;
1481 bufPtr = buf;
1482
1483 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1484 (char**)&wbufPtr, &outsz);
1485
1486 if (ICONV_FAILED(res, insz))
1487 {
1488 wxLogLastError(wxT("iconv"));
1489 wxLogError(_("Conversion to charset '%s' doesn't work."),
1490 nameCS.c_str());
1491 }
1492 else // ok, can convert to this encoding, remember it
1493 {
1494 ms_wcCharsetName = nameCS;
1495 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1496 }
1497 }
1498 }
1499 else // use charset not requiring byte swapping
1500 {
1501 ms_wcCharsetName = nameXE;
1502 }
1503 }
1504
1505 wxLogTrace(TRACE_STRCONV,
1506 wxT("iconv wchar_t charset is \"%s\"%s"),
1507 ms_wcCharsetName.empty() ? _T("<none>")
1508 : ms_wcCharsetName.c_str(),
1509 ms_wcNeedsSwap ? _T(" (needs swap)")
1510 : _T(""));
1511 }
1512 else // we already have ms_wcCharsetName
1513 {
1514 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1515 }
1516
1517 if ( ms_wcCharsetName.empty() )
1518 {
1519 w2m = ICONV_T_INVALID;
1520 }
1521 else
1522 {
1523 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1524 if ( w2m == ICONV_T_INVALID )
1525 {
1526 wxLogTrace(TRACE_STRCONV,
1527 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1528 ms_wcCharsetName.c_str(), cname.data());
1529 }
1530 }
1531 }
1532
1533 wxMBConv_iconv::~wxMBConv_iconv()
1534 {
1535 if ( m2w != ICONV_T_INVALID )
1536 iconv_close(m2w);
1537 if ( w2m != ICONV_T_INVALID )
1538 iconv_close(w2m);
1539 }
1540
1541 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1542 {
1543 #if wxUSE_THREADS
1544 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1545 // Unfortunately there is a couple of global wxCSConv objects such as
1546 // wxConvLocal that are used all over wx code, so we have to make sure
1547 // the handle is used by at most one thread at the time. Otherwise
1548 // only a few wx classes would be safe to use from non-main threads
1549 // as MB<->WC conversion would fail "randomly".
1550 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1551 #endif
1552
1553 size_t inbuf = strlen(psz);
1554 size_t outbuf = n * SIZEOF_WCHAR_T;
1555 size_t res, cres;
1556 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1557 wchar_t *bufPtr = buf;
1558 const char *pszPtr = psz;
1559
1560 if (buf)
1561 {
1562 // have destination buffer, convert there
1563 cres = iconv(m2w,
1564 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1565 (char**)&bufPtr, &outbuf);
1566 res = n - (outbuf / SIZEOF_WCHAR_T);
1567
1568 if (ms_wcNeedsSwap)
1569 {
1570 // convert to native endianness
1571 for ( unsigned i = 0; i < res; i++ )
1572 buf[n] = WC_BSWAP(buf[i]);
1573 }
1574
1575 // NB: iconv was given only strlen(psz) characters on input, and so
1576 // it couldn't convert the trailing zero. Let's do it ourselves
1577 // if there's some room left for it in the output buffer.
1578 if (res < n)
1579 buf[res] = 0;
1580 }
1581 else
1582 {
1583 // no destination buffer... convert using temp buffer
1584 // to calculate destination buffer requirement
1585 wchar_t tbuf[8];
1586 res = 0;
1587 do {
1588 bufPtr = tbuf;
1589 outbuf = 8*SIZEOF_WCHAR_T;
1590
1591 cres = iconv(m2w,
1592 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1593 (char**)&bufPtr, &outbuf );
1594
1595 res += 8-(outbuf/SIZEOF_WCHAR_T);
1596 } while ((cres==(size_t)-1) && (errno==E2BIG));
1597 }
1598
1599 if (ICONV_FAILED(cres, inbuf))
1600 {
1601 //VS: it is ok if iconv fails, hence trace only
1602 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1603 return (size_t)-1;
1604 }
1605
1606 return res;
1607 }
1608
1609 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1610 {
1611 #if wxUSE_THREADS
1612 // NB: explained in MB2WC
1613 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1614 #endif
1615
1616 size_t inlen = wxWcslen(psz);
1617 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1618 size_t outbuf = n;
1619 size_t res, cres;
1620
1621 wchar_t *tmpbuf = 0;
1622
1623 if (ms_wcNeedsSwap)
1624 {
1625 // need to copy to temp buffer to switch endianness
1626 // (doing WC_BSWAP twice on the original buffer won't help, as it
1627 // could be in read-only memory, or be accessed in some other thread)
1628 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1629 for ( size_t i = 0; i < inlen; i++ )
1630 tmpbuf[n] = WC_BSWAP(psz[i]);
1631 tmpbuf[inlen] = L'\0';
1632 psz = tmpbuf;
1633 }
1634
1635 if (buf)
1636 {
1637 // have destination buffer, convert there
1638 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1639
1640 res = n-outbuf;
1641
1642 // NB: iconv was given only wcslen(psz) characters on input, and so
1643 // it couldn't convert the trailing zero. Let's do it ourselves
1644 // if there's some room left for it in the output buffer.
1645 if (res < n)
1646 buf[0] = 0;
1647 }
1648 else
1649 {
1650 // no destination buffer... convert using temp buffer
1651 // to calculate destination buffer requirement
1652 char tbuf[16];
1653 res = 0;
1654 do {
1655 buf = tbuf; outbuf = 16;
1656
1657 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1658
1659 res += 16 - outbuf;
1660 } while ((cres==(size_t)-1) && (errno==E2BIG));
1661 }
1662
1663 if (ms_wcNeedsSwap)
1664 {
1665 free(tmpbuf);
1666 }
1667
1668 if (ICONV_FAILED(cres, inbuf))
1669 {
1670 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1671 return (size_t)-1;
1672 }
1673
1674 return res;
1675 }
1676
1677 size_t wxMBConv_iconv::GetMinMBCharWidth() const
1678 {
1679 if ( m_minMBCharWidth == 0 )
1680 {
1681 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1682
1683 #if wxUSE_THREADS
1684 // NB: explained in MB2WC
1685 wxMutexLocker lock(self->m_iconvMutex);
1686 #endif
1687
1688 wchar_t *wnul = L"";
1689 char buf[8]; // should be enough for NUL in any encoding
1690 size_t inLen = sizeof(wchar_t),
1691 outLen = WXSIZEOF(buf);
1692 char *in = (char *)wnul;
1693 char *out = buf;
1694 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
1695 {
1696 self->m_minMBCharWidth = (size_t)-1;
1697 }
1698 else // ok
1699 {
1700 self->m_minMBCharWidth = out - buf;
1701 }
1702 }
1703
1704 return m_minMBCharWidth;
1705 }
1706
1707 #endif // HAVE_ICONV
1708
1709
1710 // ============================================================================
1711 // Win32 conversion classes
1712 // ============================================================================
1713
1714 #ifdef wxHAVE_WIN32_MB2WC
1715
1716 // from utils.cpp
1717 #if wxUSE_FONTMAP
1718 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1719 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1720 #endif
1721
1722 class wxMBConv_win32 : public wxMBConv
1723 {
1724 public:
1725 wxMBConv_win32()
1726 {
1727 m_CodePage = CP_ACP;
1728 m_minMBCharWidth = 0;
1729 }
1730
1731 #if wxUSE_FONTMAP
1732 wxMBConv_win32(const wxChar* name)
1733 {
1734 m_CodePage = wxCharsetToCodepage(name);
1735 m_minMBCharWidth = 0;
1736 }
1737
1738 wxMBConv_win32(wxFontEncoding encoding)
1739 {
1740 m_CodePage = wxEncodingToCodepage(encoding);
1741 m_minMBCharWidth = 0;
1742 }
1743 #endif // wxUSE_FONTMAP
1744
1745 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1746 {
1747 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1748 // the behaviour is not compatible with the Unix version (using iconv)
1749 // and break the library itself, e.g. wxTextInputStream::NextChar()
1750 // wouldn't work if reading an incomplete MB char didn't result in an
1751 // error
1752 //
1753 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1754 // an error (tested under Windows Server 2003) and apparently it is
1755 // done on purpose, i.e. the function accepts any input in this case
1756 // and although I'd prefer to return error on ill-formed output, our
1757 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1758 // explicitly ill-formed according to RFC 2152) neither so we don't
1759 // even have any fallback here...
1760 //
1761 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1762 // Win XP or newer and if it is specified on older versions, conversion
1763 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1764 // fails. So we can only use the flag on newer Windows versions.
1765 // Additionally, the flag is not supported by UTF7, symbol and CJK
1766 // encodings. See here:
1767 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1768 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1769 int flags = 0;
1770 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1771 m_CodePage < 50000 &&
1772 IsAtLeastWin2kSP4() )
1773 {
1774 flags = MB_ERR_INVALID_CHARS;
1775 }
1776 else if ( m_CodePage == CP_UTF8 )
1777 {
1778 // Avoid round-trip in the special case of UTF-8 by using our
1779 // own UTF-8 conversion code:
1780 return wxMBConvUTF8().MB2WC(buf, psz, n);
1781 }
1782
1783 const size_t len = ::MultiByteToWideChar
1784 (
1785 m_CodePage, // code page
1786 flags, // flags: fall on error
1787 psz, // input string
1788 -1, // its length (NUL-terminated)
1789 buf, // output string
1790 buf ? n : 0 // size of output buffer
1791 );
1792 if ( !len )
1793 {
1794 // function totally failed
1795 return (size_t)-1;
1796 }
1797
1798 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1799 // check if we succeeded, by doing a double trip:
1800 if ( !flags && buf )
1801 {
1802 const size_t mbLen = strlen(psz);
1803 wxCharBuffer mbBuf(mbLen);
1804 if ( ::WideCharToMultiByte
1805 (
1806 m_CodePage,
1807 0,
1808 buf,
1809 -1,
1810 mbBuf.data(),
1811 mbLen + 1, // size in bytes, not length
1812 NULL,
1813 NULL
1814 ) == 0 ||
1815 strcmp(mbBuf, psz) != 0 )
1816 {
1817 // we didn't obtain the same thing we started from, hence
1818 // the conversion was lossy and we consider that it failed
1819 return (size_t)-1;
1820 }
1821 }
1822
1823 // note that it returns count of written chars for buf != NULL and size
1824 // of the needed buffer for buf == NULL so in either case the length of
1825 // the string (which never includes the terminating NUL) is one less
1826 return len - 1;
1827 }
1828
1829 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1830 {
1831 /*
1832 we have a problem here: by default, WideCharToMultiByte() may
1833 replace characters unrepresentable in the target code page with bad
1834 quality approximations such as turning "1/2" symbol (U+00BD) into
1835 "1" for the code pages which don't have it and we, obviously, want
1836 to avoid this at any price
1837
1838 the trouble is that this function does it _silently_, i.e. it won't
1839 even tell us whether it did or not... Win98/2000 and higher provide
1840 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1841 we have to resort to a round trip, i.e. check that converting back
1842 results in the same string -- this is, of course, expensive but
1843 otherwise we simply can't be sure to not garble the data.
1844 */
1845
1846 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1847 // it doesn't work with CJK encodings (which we test for rather roughly
1848 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1849 // supporting it
1850 BOOL usedDef wxDUMMY_INITIALIZE(false);
1851 BOOL *pUsedDef;
1852 int flags;
1853 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1854 {
1855 // it's our lucky day
1856 flags = WC_NO_BEST_FIT_CHARS;
1857 pUsedDef = &usedDef;
1858 }
1859 else // old system or unsupported encoding
1860 {
1861 flags = 0;
1862 pUsedDef = NULL;
1863 }
1864
1865 const size_t len = ::WideCharToMultiByte
1866 (
1867 m_CodePage, // code page
1868 flags, // either none or no best fit
1869 pwz, // input string
1870 -1, // it is (wide) NUL-terminated
1871 buf, // output buffer
1872 buf ? n : 0, // and its size
1873 NULL, // default "replacement" char
1874 pUsedDef // [out] was it used?
1875 );
1876
1877 if ( !len )
1878 {
1879 // function totally failed
1880 return (size_t)-1;
1881 }
1882
1883 // if we were really converting, check if we succeeded
1884 if ( buf )
1885 {
1886 if ( flags )
1887 {
1888 // check if the conversion failed, i.e. if any replacements
1889 // were done
1890 if ( usedDef )
1891 return (size_t)-1;
1892 }
1893 else // we must resort to double tripping...
1894 {
1895 wxWCharBuffer wcBuf(n);
1896 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1897 wcscmp(wcBuf, pwz) != 0 )
1898 {
1899 // we didn't obtain the same thing we started from, hence
1900 // the conversion was lossy and we consider that it failed
1901 return (size_t)-1;
1902 }
1903 }
1904 }
1905
1906 // see the comment above for the reason of "len - 1"
1907 return len - 1;
1908 }
1909
1910 bool IsOk() const { return m_CodePage != -1; }
1911
1912 private:
1913 static bool CanUseNoBestFit()
1914 {
1915 static int s_isWin98Or2k = -1;
1916
1917 if ( s_isWin98Or2k == -1 )
1918 {
1919 int verMaj, verMin;
1920 switch ( wxGetOsVersion(&verMaj, &verMin) )
1921 {
1922 case wxWIN95:
1923 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1924 break;
1925
1926 case wxWINDOWS_NT:
1927 s_isWin98Or2k = verMaj >= 5;
1928 break;
1929
1930 default:
1931 // unknown, be conseravtive by default
1932 s_isWin98Or2k = 0;
1933 }
1934
1935 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1936 }
1937
1938 return s_isWin98Or2k == 1;
1939 }
1940
1941 static bool IsAtLeastWin2kSP4()
1942 {
1943 #ifdef __WXWINCE__
1944 return false;
1945 #else
1946 static int s_isAtLeastWin2kSP4 = -1;
1947
1948 if ( s_isAtLeastWin2kSP4 == -1 )
1949 {
1950 OSVERSIONINFOEX ver;
1951
1952 memset(&ver, 0, sizeof(ver));
1953 ver.dwOSVersionInfoSize = sizeof(ver);
1954 GetVersionEx((OSVERSIONINFO*)&ver);
1955
1956 s_isAtLeastWin2kSP4 =
1957 ((ver.dwMajorVersion > 5) || // Vista+
1958 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1959 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1960 ver.wServicePackMajor >= 4)) // 2000 SP4+
1961 ? 1 : 0;
1962 }
1963
1964 return s_isAtLeastWin2kSP4 == 1;
1965 #endif
1966 }
1967
1968 virtual size_t GetMinMBCharWidth() const
1969 {
1970 if ( m_minMBCharWidth == 0 )
1971 {
1972 int len = ::WideCharToMultiByte
1973 (
1974 m_CodePage, // code page
1975 0, // no flags
1976 L"", // input string
1977 1, // translate just the NUL
1978 NULL, // output buffer
1979 0, // and its size
1980 NULL, // no replacement char
1981 NULL // [out] don't care if it was used
1982 );
1983
1984 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1985 switch ( len )
1986 {
1987 default:
1988 wxLogDebug(_T("Unexpected NUL length %d"), len);
1989 // fall through
1990
1991 case 0:
1992 self->m_minMBCharWidth = (size_t)-1;
1993 break;
1994
1995 case 1:
1996 case 2:
1997 case 4:
1998 self->m_minMBCharWidth = len;
1999 break;
2000 }
2001 }
2002
2003 return m_minMBCharWidth;
2004 }
2005
2006 // the code page we're working with
2007 long m_CodePage;
2008
2009 // cached result of GetMinMBCharWidth(), set to 0 initially meaning
2010 // "unknown"
2011 size_t m_minMBCharWidth;
2012 };
2013
2014 #endif // wxHAVE_WIN32_MB2WC
2015
2016 // ============================================================================
2017 // Cocoa conversion classes
2018 // ============================================================================
2019
2020 #if defined(__WXCOCOA__)
2021
2022 // RN: There is no UTF-32 support in either Core Foundation or
2023 // Cocoa. Strangely enough, internally Core Foundation uses
2024 // UTF 32 internally quite a bit - its just not public (yet).
2025
2026 #include <CoreFoundation/CFString.h>
2027 #include <CoreFoundation/CFStringEncodingExt.h>
2028
2029 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2030 {
2031 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2032 if ( encoding == wxFONTENCODING_DEFAULT )
2033 {
2034 enc = CFStringGetSystemEncoding();
2035 }
2036 else switch( encoding)
2037 {
2038 case wxFONTENCODING_ISO8859_1 :
2039 enc = kCFStringEncodingISOLatin1 ;
2040 break ;
2041 case wxFONTENCODING_ISO8859_2 :
2042 enc = kCFStringEncodingISOLatin2;
2043 break ;
2044 case wxFONTENCODING_ISO8859_3 :
2045 enc = kCFStringEncodingISOLatin3 ;
2046 break ;
2047 case wxFONTENCODING_ISO8859_4 :
2048 enc = kCFStringEncodingISOLatin4;
2049 break ;
2050 case wxFONTENCODING_ISO8859_5 :
2051 enc = kCFStringEncodingISOLatinCyrillic;
2052 break ;
2053 case wxFONTENCODING_ISO8859_6 :
2054 enc = kCFStringEncodingISOLatinArabic;
2055 break ;
2056 case wxFONTENCODING_ISO8859_7 :
2057 enc = kCFStringEncodingISOLatinGreek;
2058 break ;
2059 case wxFONTENCODING_ISO8859_8 :
2060 enc = kCFStringEncodingISOLatinHebrew;
2061 break ;
2062 case wxFONTENCODING_ISO8859_9 :
2063 enc = kCFStringEncodingISOLatin5;
2064 break ;
2065 case wxFONTENCODING_ISO8859_10 :
2066 enc = kCFStringEncodingISOLatin6;
2067 break ;
2068 case wxFONTENCODING_ISO8859_11 :
2069 enc = kCFStringEncodingISOLatinThai;
2070 break ;
2071 case wxFONTENCODING_ISO8859_13 :
2072 enc = kCFStringEncodingISOLatin7;
2073 break ;
2074 case wxFONTENCODING_ISO8859_14 :
2075 enc = kCFStringEncodingISOLatin8;
2076 break ;
2077 case wxFONTENCODING_ISO8859_15 :
2078 enc = kCFStringEncodingISOLatin9;
2079 break ;
2080
2081 case wxFONTENCODING_KOI8 :
2082 enc = kCFStringEncodingKOI8_R;
2083 break ;
2084 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2085 enc = kCFStringEncodingDOSRussian;
2086 break ;
2087
2088 // case wxFONTENCODING_BULGARIAN :
2089 // enc = ;
2090 // break ;
2091
2092 case wxFONTENCODING_CP437 :
2093 enc =kCFStringEncodingDOSLatinUS ;
2094 break ;
2095 case wxFONTENCODING_CP850 :
2096 enc = kCFStringEncodingDOSLatin1;
2097 break ;
2098 case wxFONTENCODING_CP852 :
2099 enc = kCFStringEncodingDOSLatin2;
2100 break ;
2101 case wxFONTENCODING_CP855 :
2102 enc = kCFStringEncodingDOSCyrillic;
2103 break ;
2104 case wxFONTENCODING_CP866 :
2105 enc =kCFStringEncodingDOSRussian ;
2106 break ;
2107 case wxFONTENCODING_CP874 :
2108 enc = kCFStringEncodingDOSThai;
2109 break ;
2110 case wxFONTENCODING_CP932 :
2111 enc = kCFStringEncodingDOSJapanese;
2112 break ;
2113 case wxFONTENCODING_CP936 :
2114 enc =kCFStringEncodingDOSChineseSimplif ;
2115 break ;
2116 case wxFONTENCODING_CP949 :
2117 enc = kCFStringEncodingDOSKorean;
2118 break ;
2119 case wxFONTENCODING_CP950 :
2120 enc = kCFStringEncodingDOSChineseTrad;
2121 break ;
2122 case wxFONTENCODING_CP1250 :
2123 enc = kCFStringEncodingWindowsLatin2;
2124 break ;
2125 case wxFONTENCODING_CP1251 :
2126 enc =kCFStringEncodingWindowsCyrillic ;
2127 break ;
2128 case wxFONTENCODING_CP1252 :
2129 enc =kCFStringEncodingWindowsLatin1 ;
2130 break ;
2131 case wxFONTENCODING_CP1253 :
2132 enc = kCFStringEncodingWindowsGreek;
2133 break ;
2134 case wxFONTENCODING_CP1254 :
2135 enc = kCFStringEncodingWindowsLatin5;
2136 break ;
2137 case wxFONTENCODING_CP1255 :
2138 enc =kCFStringEncodingWindowsHebrew ;
2139 break ;
2140 case wxFONTENCODING_CP1256 :
2141 enc =kCFStringEncodingWindowsArabic ;
2142 break ;
2143 case wxFONTENCODING_CP1257 :
2144 enc = kCFStringEncodingWindowsBalticRim;
2145 break ;
2146 // This only really encodes to UTF7 (if that) evidently
2147 // case wxFONTENCODING_UTF7 :
2148 // enc = kCFStringEncodingNonLossyASCII ;
2149 // break ;
2150 case wxFONTENCODING_UTF8 :
2151 enc = kCFStringEncodingUTF8 ;
2152 break ;
2153 case wxFONTENCODING_EUC_JP :
2154 enc = kCFStringEncodingEUC_JP;
2155 break ;
2156 case wxFONTENCODING_UTF16 :
2157 enc = kCFStringEncodingUnicode ;
2158 break ;
2159 case wxFONTENCODING_MACROMAN :
2160 enc = kCFStringEncodingMacRoman ;
2161 break ;
2162 case wxFONTENCODING_MACJAPANESE :
2163 enc = kCFStringEncodingMacJapanese ;
2164 break ;
2165 case wxFONTENCODING_MACCHINESETRAD :
2166 enc = kCFStringEncodingMacChineseTrad ;
2167 break ;
2168 case wxFONTENCODING_MACKOREAN :
2169 enc = kCFStringEncodingMacKorean ;
2170 break ;
2171 case wxFONTENCODING_MACARABIC :
2172 enc = kCFStringEncodingMacArabic ;
2173 break ;
2174 case wxFONTENCODING_MACHEBREW :
2175 enc = kCFStringEncodingMacHebrew ;
2176 break ;
2177 case wxFONTENCODING_MACGREEK :
2178 enc = kCFStringEncodingMacGreek ;
2179 break ;
2180 case wxFONTENCODING_MACCYRILLIC :
2181 enc = kCFStringEncodingMacCyrillic ;
2182 break ;
2183 case wxFONTENCODING_MACDEVANAGARI :
2184 enc = kCFStringEncodingMacDevanagari ;
2185 break ;
2186 case wxFONTENCODING_MACGURMUKHI :
2187 enc = kCFStringEncodingMacGurmukhi ;
2188 break ;
2189 case wxFONTENCODING_MACGUJARATI :
2190 enc = kCFStringEncodingMacGujarati ;
2191 break ;
2192 case wxFONTENCODING_MACORIYA :
2193 enc = kCFStringEncodingMacOriya ;
2194 break ;
2195 case wxFONTENCODING_MACBENGALI :
2196 enc = kCFStringEncodingMacBengali ;
2197 break ;
2198 case wxFONTENCODING_MACTAMIL :
2199 enc = kCFStringEncodingMacTamil ;
2200 break ;
2201 case wxFONTENCODING_MACTELUGU :
2202 enc = kCFStringEncodingMacTelugu ;
2203 break ;
2204 case wxFONTENCODING_MACKANNADA :
2205 enc = kCFStringEncodingMacKannada ;
2206 break ;
2207 case wxFONTENCODING_MACMALAJALAM :
2208 enc = kCFStringEncodingMacMalayalam ;
2209 break ;
2210 case wxFONTENCODING_MACSINHALESE :
2211 enc = kCFStringEncodingMacSinhalese ;
2212 break ;
2213 case wxFONTENCODING_MACBURMESE :
2214 enc = kCFStringEncodingMacBurmese ;
2215 break ;
2216 case wxFONTENCODING_MACKHMER :
2217 enc = kCFStringEncodingMacKhmer ;
2218 break ;
2219 case wxFONTENCODING_MACTHAI :
2220 enc = kCFStringEncodingMacThai ;
2221 break ;
2222 case wxFONTENCODING_MACLAOTIAN :
2223 enc = kCFStringEncodingMacLaotian ;
2224 break ;
2225 case wxFONTENCODING_MACGEORGIAN :
2226 enc = kCFStringEncodingMacGeorgian ;
2227 break ;
2228 case wxFONTENCODING_MACARMENIAN :
2229 enc = kCFStringEncodingMacArmenian ;
2230 break ;
2231 case wxFONTENCODING_MACCHINESESIMP :
2232 enc = kCFStringEncodingMacChineseSimp ;
2233 break ;
2234 case wxFONTENCODING_MACTIBETAN :
2235 enc = kCFStringEncodingMacTibetan ;
2236 break ;
2237 case wxFONTENCODING_MACMONGOLIAN :
2238 enc = kCFStringEncodingMacMongolian ;
2239 break ;
2240 case wxFONTENCODING_MACETHIOPIC :
2241 enc = kCFStringEncodingMacEthiopic ;
2242 break ;
2243 case wxFONTENCODING_MACCENTRALEUR :
2244 enc = kCFStringEncodingMacCentralEurRoman ;
2245 break ;
2246 case wxFONTENCODING_MACVIATNAMESE :
2247 enc = kCFStringEncodingMacVietnamese ;
2248 break ;
2249 case wxFONTENCODING_MACARABICEXT :
2250 enc = kCFStringEncodingMacExtArabic ;
2251 break ;
2252 case wxFONTENCODING_MACSYMBOL :
2253 enc = kCFStringEncodingMacSymbol ;
2254 break ;
2255 case wxFONTENCODING_MACDINGBATS :
2256 enc = kCFStringEncodingMacDingbats ;
2257 break ;
2258 case wxFONTENCODING_MACTURKISH :
2259 enc = kCFStringEncodingMacTurkish ;
2260 break ;
2261 case wxFONTENCODING_MACCROATIAN :
2262 enc = kCFStringEncodingMacCroatian ;
2263 break ;
2264 case wxFONTENCODING_MACICELANDIC :
2265 enc = kCFStringEncodingMacIcelandic ;
2266 break ;
2267 case wxFONTENCODING_MACROMANIAN :
2268 enc = kCFStringEncodingMacRomanian ;
2269 break ;
2270 case wxFONTENCODING_MACCELTIC :
2271 enc = kCFStringEncodingMacCeltic ;
2272 break ;
2273 case wxFONTENCODING_MACGAELIC :
2274 enc = kCFStringEncodingMacGaelic ;
2275 break ;
2276 // case wxFONTENCODING_MACKEYBOARD :
2277 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2278 // break ;
2279 default :
2280 // because gcc is picky
2281 break ;
2282 } ;
2283 return enc ;
2284 }
2285
2286 class wxMBConv_cocoa : public wxMBConv
2287 {
2288 public:
2289 wxMBConv_cocoa()
2290 {
2291 Init(CFStringGetSystemEncoding()) ;
2292 }
2293
2294 #if wxUSE_FONTMAP
2295 wxMBConv_cocoa(const wxChar* name)
2296 {
2297 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2298 }
2299 #endif
2300
2301 wxMBConv_cocoa(wxFontEncoding encoding)
2302 {
2303 Init( wxCFStringEncFromFontEnc(encoding) );
2304 }
2305
2306 ~wxMBConv_cocoa()
2307 {
2308 }
2309
2310 void Init( CFStringEncoding encoding)
2311 {
2312 m_encoding = encoding ;
2313 }
2314
2315 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2316 {
2317 wxASSERT(szUnConv);
2318
2319 CFStringRef theString = CFStringCreateWithBytes (
2320 NULL, //the allocator
2321 (const UInt8*)szUnConv,
2322 strlen(szUnConv),
2323 m_encoding,
2324 false //no BOM/external representation
2325 );
2326
2327 wxASSERT(theString);
2328
2329 size_t nOutLength = CFStringGetLength(theString);
2330
2331 if (szOut == NULL)
2332 {
2333 CFRelease(theString);
2334 return nOutLength;
2335 }
2336
2337 CFRange theRange = { 0, nOutSize };
2338
2339 #if SIZEOF_WCHAR_T == 4
2340 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2341 #endif
2342
2343 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2344
2345 CFRelease(theString);
2346
2347 szUniCharBuffer[nOutLength] = '\0' ;
2348
2349 #if SIZEOF_WCHAR_T == 4
2350 wxMBConvUTF16 converter ;
2351 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2352 delete[] szUniCharBuffer;
2353 #endif
2354
2355 return nOutLength;
2356 }
2357
2358 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2359 {
2360 wxASSERT(szUnConv);
2361
2362 size_t nRealOutSize;
2363 size_t nBufSize = wxWcslen(szUnConv);
2364 UniChar* szUniBuffer = (UniChar*) szUnConv;
2365
2366 #if SIZEOF_WCHAR_T == 4
2367 wxMBConvUTF16 converter ;
2368 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2369 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2370 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2371 nBufSize /= sizeof(UniChar);
2372 #endif
2373
2374 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2375 NULL, //allocator
2376 szUniBuffer,
2377 nBufSize,
2378 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2379 );
2380
2381 wxASSERT(theString);
2382
2383 //Note that CER puts a BOM when converting to unicode
2384 //so we check and use getchars instead in that case
2385 if (m_encoding == kCFStringEncodingUnicode)
2386 {
2387 if (szOut != NULL)
2388 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2389
2390 nRealOutSize = CFStringGetLength(theString) + 1;
2391 }
2392 else
2393 {
2394 CFStringGetBytes(
2395 theString,
2396 CFRangeMake(0, CFStringGetLength(theString)),
2397 m_encoding,
2398 0, //what to put in characters that can't be converted -
2399 //0 tells CFString to return NULL if it meets such a character
2400 false, //not an external representation
2401 (UInt8*) szOut,
2402 nOutSize,
2403 (CFIndex*) &nRealOutSize
2404 );
2405 }
2406
2407 CFRelease(theString);
2408
2409 #if SIZEOF_WCHAR_T == 4
2410 delete[] szUniBuffer;
2411 #endif
2412
2413 return nRealOutSize - 1;
2414 }
2415
2416 bool IsOk() const
2417 {
2418 return m_encoding != kCFStringEncodingInvalidId &&
2419 CFStringIsEncodingAvailable(m_encoding);
2420 }
2421
2422 private:
2423 CFStringEncoding m_encoding ;
2424 };
2425
2426 #endif // defined(__WXCOCOA__)
2427
2428 // ============================================================================
2429 // Mac conversion classes
2430 // ============================================================================
2431
2432 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2433
2434 class wxMBConv_mac : public wxMBConv
2435 {
2436 public:
2437 wxMBConv_mac()
2438 {
2439 Init(CFStringGetSystemEncoding()) ;
2440 }
2441
2442 #if wxUSE_FONTMAP
2443 wxMBConv_mac(const wxChar* name)
2444 {
2445 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2446 }
2447 #endif
2448
2449 wxMBConv_mac(wxFontEncoding encoding)
2450 {
2451 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2452 }
2453
2454 ~wxMBConv_mac()
2455 {
2456 OSStatus status = noErr ;
2457 status = TECDisposeConverter(m_MB2WC_converter);
2458 status = TECDisposeConverter(m_WC2MB_converter);
2459 }
2460
2461
2462 void Init( TextEncodingBase encoding)
2463 {
2464 OSStatus status = noErr ;
2465 m_char_encoding = encoding ;
2466 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2467
2468 status = TECCreateConverter(&m_MB2WC_converter,
2469 m_char_encoding,
2470 m_unicode_encoding);
2471 status = TECCreateConverter(&m_WC2MB_converter,
2472 m_unicode_encoding,
2473 m_char_encoding);
2474 }
2475
2476 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2477 {
2478 OSStatus status = noErr ;
2479 ByteCount byteOutLen ;
2480 ByteCount byteInLen = strlen(psz) ;
2481 wchar_t *tbuf = NULL ;
2482 UniChar* ubuf = NULL ;
2483 size_t res = 0 ;
2484
2485 if (buf == NULL)
2486 {
2487 //apple specs say at least 32
2488 n = wxMax( 32 , byteInLen ) ;
2489 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2490 }
2491 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2492 #if SIZEOF_WCHAR_T == 4
2493 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2494 #else
2495 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2496 #endif
2497 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2498 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2499 #if SIZEOF_WCHAR_T == 4
2500 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2501 // is not properly terminated we get random characters at the end
2502 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2503 wxMBConvUTF16 converter ;
2504 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2505 free( ubuf ) ;
2506 #else
2507 res = byteOutLen / sizeof( UniChar ) ;
2508 #endif
2509 if ( buf == NULL )
2510 free(tbuf) ;
2511
2512 if ( buf && res < n)
2513 buf[res] = 0;
2514
2515 return res ;
2516 }
2517
2518 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2519 {
2520 OSStatus status = noErr ;
2521 ByteCount byteOutLen ;
2522 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2523
2524 char *tbuf = NULL ;
2525
2526 if (buf == NULL)
2527 {
2528 //apple specs say at least 32
2529 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2530 tbuf = (char*) malloc( n ) ;
2531 }
2532
2533 ByteCount byteBufferLen = n ;
2534 UniChar* ubuf = NULL ;
2535 #if SIZEOF_WCHAR_T == 4
2536 wxMBConvUTF16 converter ;
2537 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2538 byteInLen = unicharlen ;
2539 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2540 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2541 #else
2542 ubuf = (UniChar*) psz ;
2543 #endif
2544 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2545 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2546 #if SIZEOF_WCHAR_T == 4
2547 free( ubuf ) ;
2548 #endif
2549 if ( buf == NULL )
2550 free(tbuf) ;
2551
2552 size_t res = byteOutLen ;
2553 if ( buf && res < n)
2554 {
2555 buf[res] = 0;
2556
2557 //we need to double-trip to verify it didn't insert any ? in place
2558 //of bogus characters
2559 wxWCharBuffer wcBuf(n);
2560 size_t pszlen = wxWcslen(psz);
2561 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2562 wxWcslen(wcBuf) != pszlen ||
2563 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2564 {
2565 // we didn't obtain the same thing we started from, hence
2566 // the conversion was lossy and we consider that it failed
2567 return (size_t)-1;
2568 }
2569 }
2570
2571 return res ;
2572 }
2573
2574 bool IsOk() const
2575 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2576
2577 private:
2578 TECObjectRef m_MB2WC_converter ;
2579 TECObjectRef m_WC2MB_converter ;
2580
2581 TextEncodingBase m_char_encoding ;
2582 TextEncodingBase m_unicode_encoding ;
2583 };
2584
2585 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2586
2587 // ============================================================================
2588 // wxEncodingConverter based conversion classes
2589 // ============================================================================
2590
2591 #if wxUSE_FONTMAP
2592
2593 class wxMBConv_wxwin : public wxMBConv
2594 {
2595 private:
2596 void Init()
2597 {
2598 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2599 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2600 }
2601
2602 public:
2603 // temporarily just use wxEncodingConverter stuff,
2604 // so that it works while a better implementation is built
2605 wxMBConv_wxwin(const wxChar* name)
2606 {
2607 if (name)
2608 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2609 else
2610 m_enc = wxFONTENCODING_SYSTEM;
2611
2612 Init();
2613 }
2614
2615 wxMBConv_wxwin(wxFontEncoding enc)
2616 {
2617 m_enc = enc;
2618
2619 Init();
2620 }
2621
2622 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2623 {
2624 size_t inbuf = strlen(psz);
2625 if (buf)
2626 {
2627 if (!m2w.Convert(psz,buf))
2628 return (size_t)-1;
2629 }
2630 return inbuf;
2631 }
2632
2633 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2634 {
2635 const size_t inbuf = wxWcslen(psz);
2636 if (buf)
2637 {
2638 if (!w2m.Convert(psz,buf))
2639 return (size_t)-1;
2640 }
2641
2642 return inbuf;
2643 }
2644
2645 bool IsOk() const { return m_ok; }
2646
2647 public:
2648 wxFontEncoding m_enc;
2649 wxEncodingConverter m2w, w2m;
2650
2651 private:
2652 virtual size_t GetMinMBCharWidth() const
2653 {
2654 switch ( m_enc )
2655 {
2656 case wxFONTENCODING_UTF16BE:
2657 case wxFONTENCODING_UTF16LE:
2658 return 2;
2659
2660 case wxFONTENCODING_UTF32BE:
2661 case wxFONTENCODING_UTF32LE:
2662 return 4;
2663
2664 default:
2665 return 1;
2666 }
2667 }
2668
2669 // were we initialized successfully?
2670 bool m_ok;
2671
2672 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2673 };
2674
2675 // make the constructors available for unit testing
2676 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2677 {
2678 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2679 if ( !result->IsOk() )
2680 {
2681 delete result;
2682 return 0;
2683 }
2684 return result;
2685 }
2686
2687 #endif // wxUSE_FONTMAP
2688
2689 // ============================================================================
2690 // wxCSConv implementation
2691 // ============================================================================
2692
2693 void wxCSConv::Init()
2694 {
2695 m_name = NULL;
2696 m_convReal = NULL;
2697 m_deferred = true;
2698 }
2699
2700 wxCSConv::wxCSConv(const wxChar *charset)
2701 {
2702 Init();
2703
2704 if ( charset )
2705 {
2706 SetName(charset);
2707 }
2708
2709 #if wxUSE_FONTMAP
2710 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2711 #else
2712 m_encoding = wxFONTENCODING_SYSTEM;
2713 #endif
2714 }
2715
2716 wxCSConv::wxCSConv(wxFontEncoding encoding)
2717 {
2718 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2719 {
2720 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2721
2722 encoding = wxFONTENCODING_SYSTEM;
2723 }
2724
2725 Init();
2726
2727 m_encoding = encoding;
2728 }
2729
2730 wxCSConv::~wxCSConv()
2731 {
2732 Clear();
2733 }
2734
2735 wxCSConv::wxCSConv(const wxCSConv& conv)
2736 : wxMBConv()
2737 {
2738 Init();
2739
2740 SetName(conv.m_name);
2741 m_encoding = conv.m_encoding;
2742 }
2743
2744 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2745 {
2746 Clear();
2747
2748 SetName(conv.m_name);
2749 m_encoding = conv.m_encoding;
2750
2751 return *this;
2752 }
2753
2754 void wxCSConv::Clear()
2755 {
2756 free(m_name);
2757 delete m_convReal;
2758
2759 m_name = NULL;
2760 m_convReal = NULL;
2761 }
2762
2763 void wxCSConv::SetName(const wxChar *charset)
2764 {
2765 if (charset)
2766 {
2767 m_name = wxStrdup(charset);
2768 m_deferred = true;
2769 }
2770 }
2771
2772 #if wxUSE_FONTMAP
2773 #include "wx/hashmap.h"
2774
2775 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2776 wxEncodingNameCache );
2777
2778 static wxEncodingNameCache gs_nameCache;
2779 #endif
2780
2781 wxMBConv *wxCSConv::DoCreate() const
2782 {
2783 #if wxUSE_FONTMAP
2784 wxLogTrace(TRACE_STRCONV,
2785 wxT("creating conversion for %s"),
2786 (m_name ? m_name
2787 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2788 #endif // wxUSE_FONTMAP
2789
2790 // check for the special case of ASCII or ISO8859-1 charset: as we have
2791 // special knowledge of it anyhow, we don't need to create a special
2792 // conversion object
2793 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2794 m_encoding == wxFONTENCODING_DEFAULT )
2795 {
2796 // don't convert at all
2797 return NULL;
2798 }
2799
2800 // we trust OS to do conversion better than we can so try external
2801 // conversion methods first
2802 //
2803 // the full order is:
2804 // 1. OS conversion (iconv() under Unix or Win32 API)
2805 // 2. hard coded conversions for UTF
2806 // 3. wxEncodingConverter as fall back
2807
2808 // step (1)
2809 #ifdef HAVE_ICONV
2810 #if !wxUSE_FONTMAP
2811 if ( m_name )
2812 #endif // !wxUSE_FONTMAP
2813 {
2814 wxString name(m_name);
2815 wxFontEncoding encoding(m_encoding);
2816
2817 if ( !name.empty() )
2818 {
2819 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2820 if ( conv->IsOk() )
2821 return conv;
2822
2823 delete conv;
2824
2825 #if wxUSE_FONTMAP
2826 encoding =
2827 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2828 #endif // wxUSE_FONTMAP
2829 }
2830 #if wxUSE_FONTMAP
2831 {
2832 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2833 if ( it != gs_nameCache.end() )
2834 {
2835 if ( it->second.empty() )
2836 return NULL;
2837
2838 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2839 if ( conv->IsOk() )
2840 return conv;
2841
2842 delete conv;
2843 }
2844
2845 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2846
2847 for ( ; *names; ++names )
2848 {
2849 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2850 if ( conv->IsOk() )
2851 {
2852 gs_nameCache[encoding] = *names;
2853 return conv;
2854 }
2855
2856 delete conv;
2857 }
2858
2859 gs_nameCache[encoding] = _T(""); // cache the failure
2860 }
2861 #endif // wxUSE_FONTMAP
2862 }
2863 #endif // HAVE_ICONV
2864
2865 #ifdef wxHAVE_WIN32_MB2WC
2866 {
2867 #if wxUSE_FONTMAP
2868 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2869 : new wxMBConv_win32(m_encoding);
2870 if ( conv->IsOk() )
2871 return conv;
2872
2873 delete conv;
2874 #else
2875 return NULL;
2876 #endif
2877 }
2878 #endif // wxHAVE_WIN32_MB2WC
2879 #if defined(__WXMAC__)
2880 {
2881 // leave UTF16 and UTF32 to the built-ins of wx
2882 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2883 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2884 {
2885
2886 #if wxUSE_FONTMAP
2887 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2888 : new wxMBConv_mac(m_encoding);
2889 #else
2890 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2891 #endif
2892 if ( conv->IsOk() )
2893 return conv;
2894
2895 delete conv;
2896 }
2897 }
2898 #endif
2899 #if defined(__WXCOCOA__)
2900 {
2901 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2902 {
2903
2904 #if wxUSE_FONTMAP
2905 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2906 : new wxMBConv_cocoa(m_encoding);
2907 #else
2908 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2909 #endif
2910 if ( conv->IsOk() )
2911 return conv;
2912
2913 delete conv;
2914 }
2915 }
2916 #endif
2917 // step (2)
2918 wxFontEncoding enc = m_encoding;
2919 #if wxUSE_FONTMAP
2920 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2921 {
2922 // use "false" to suppress interactive dialogs -- we can be called from
2923 // anywhere and popping up a dialog from here is the last thing we want to
2924 // do
2925 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2926 }
2927 #endif // wxUSE_FONTMAP
2928
2929 switch ( enc )
2930 {
2931 case wxFONTENCODING_UTF7:
2932 return new wxMBConvUTF7;
2933
2934 case wxFONTENCODING_UTF8:
2935 return new wxMBConvUTF8;
2936
2937 case wxFONTENCODING_UTF16BE:
2938 return new wxMBConvUTF16BE;
2939
2940 case wxFONTENCODING_UTF16LE:
2941 return new wxMBConvUTF16LE;
2942
2943 case wxFONTENCODING_UTF32BE:
2944 return new wxMBConvUTF32BE;
2945
2946 case wxFONTENCODING_UTF32LE:
2947 return new wxMBConvUTF32LE;
2948
2949 default:
2950 // nothing to do but put here to suppress gcc warnings
2951 ;
2952 }
2953
2954 // step (3)
2955 #if wxUSE_FONTMAP
2956 {
2957 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2958 : new wxMBConv_wxwin(m_encoding);
2959 if ( conv->IsOk() )
2960 return conv;
2961
2962 delete conv;
2963 }
2964 #endif // wxUSE_FONTMAP
2965
2966 // NB: This is a hack to prevent deadlock. What could otherwise happen
2967 // in Unicode build: wxConvLocal creation ends up being here
2968 // because of some failure and logs the error. But wxLog will try to
2969 // attach timestamp, for which it will need wxConvLocal (to convert
2970 // time to char* and then wchar_t*), but that fails, tries to log
2971 // error, but wxLog has a (already locked) critical section that
2972 // guards static buffer.
2973 static bool alreadyLoggingError = false;
2974 if (!alreadyLoggingError)
2975 {
2976 alreadyLoggingError = true;
2977 wxLogError(_("Cannot convert from the charset '%s'!"),
2978 m_name ? m_name
2979 :
2980 #if wxUSE_FONTMAP
2981 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2982 #else // !wxUSE_FONTMAP
2983 wxString::Format(_("encoding %s"), m_encoding).c_str()
2984 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2985 );
2986 alreadyLoggingError = false;
2987 }
2988
2989 return NULL;
2990 }
2991
2992 void wxCSConv::CreateConvIfNeeded() const
2993 {
2994 if ( m_deferred )
2995 {
2996 wxCSConv *self = (wxCSConv *)this; // const_cast
2997
2998 #if wxUSE_INTL
2999 // if we don't have neither the name nor the encoding, use the default
3000 // encoding for this system
3001 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3002 {
3003 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3004 }
3005 #endif // wxUSE_INTL
3006
3007 self->m_convReal = DoCreate();
3008 self->m_deferred = false;
3009 }
3010 }
3011
3012 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3013 {
3014 CreateConvIfNeeded();
3015
3016 if (m_convReal)
3017 return m_convReal->MB2WC(buf, psz, n);
3018
3019 // latin-1 (direct)
3020 size_t len = strlen(psz);
3021
3022 if (buf)
3023 {
3024 for (size_t c = 0; c <= len; c++)
3025 buf[c] = (unsigned char)(psz[c]);
3026 }
3027
3028 return len;
3029 }
3030
3031 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3032 {
3033 CreateConvIfNeeded();
3034
3035 if (m_convReal)
3036 return m_convReal->WC2MB(buf, psz, n);
3037
3038 // latin-1 (direct)
3039 const size_t len = wxWcslen(psz);
3040 if (buf)
3041 {
3042 for (size_t c = 0; c <= len; c++)
3043 {
3044 if (psz[c] > 0xFF)
3045 return (size_t)-1;
3046 buf[c] = (char)psz[c];
3047 }
3048 }
3049 else
3050 {
3051 for (size_t c = 0; c <= len; c++)
3052 {
3053 if (psz[c] > 0xFF)
3054 return (size_t)-1;
3055 }
3056 }
3057
3058 return len;
3059 }
3060
3061 size_t wxCSConv::GetMinMBCharWidth() const
3062 {
3063 CreateConvIfNeeded();
3064
3065 if ( m_convReal )
3066 {
3067 // cast needed just to call private function of m_convReal
3068 return ((wxCSConv *)m_convReal)->GetMinMBCharWidth();
3069 }
3070
3071 return 1;
3072 }
3073
3074 // ----------------------------------------------------------------------------
3075 // globals
3076 // ----------------------------------------------------------------------------
3077
3078 #ifdef __WINDOWS__
3079 static wxMBConv_win32 wxConvLibcObj;
3080 #elif defined(__WXMAC__) && !defined(__MACH__)
3081 static wxMBConv_mac wxConvLibcObj ;
3082 #else
3083 static wxMBConvLibc wxConvLibcObj;
3084 #endif
3085
3086 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3087 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3088 static wxMBConvUTF7 wxConvUTF7Obj;
3089 static wxMBConvUTF8 wxConvUTF8Obj;
3090
3091 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3092 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3093 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3094 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3095 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3096 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3097 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3098 #ifdef __WXOSX__
3099 wxConvUTF8Obj;
3100 #else
3101 wxConvLibcObj;
3102 #endif
3103
3104
3105 #else // !wxUSE_WCHAR_T
3106
3107 // stand-ins in absence of wchar_t
3108 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3109 wxConvISO8859_1,
3110 wxConvLocal,
3111 wxConvUTF8;
3112
3113 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T