]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
renamed GetMinMBCharWidth() to GetMBNulLen(), made it public and documented it
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // helper function of cMB2WC(): check if n bytes at this location are all NUL
90 static bool NotAllNULs(const char *p, size_t n)
91 {
92 while ( n && *p++ == '\0' )
93 n--;
94
95 return n != 0;
96 }
97
98 // ----------------------------------------------------------------------------
99 // UTF-16 en/decoding to/from UCS-4
100 // ----------------------------------------------------------------------------
101
102
103 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
104 {
105 if (input<=0xffff)
106 {
107 if (output)
108 *output = (wxUint16) input;
109 return 1;
110 }
111 else if (input>=0x110000)
112 {
113 return (size_t)-1;
114 }
115 else
116 {
117 if (output)
118 {
119 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
120 *output = (wxUint16) ((input&0x3ff)+0xdc00);
121 }
122 return 2;
123 }
124 }
125
126 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
127 {
128 if ((*input<0xd800) || (*input>0xdfff))
129 {
130 output = *input;
131 return 1;
132 }
133 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
134 {
135 output = *input;
136 return (size_t)-1;
137 }
138 else
139 {
140 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
141 return 2;
142 }
143 }
144
145
146 // ----------------------------------------------------------------------------
147 // wxMBConv
148 // ----------------------------------------------------------------------------
149
150 wxMBConv::~wxMBConv()
151 {
152 // nothing to do here (necessary for Darwin linking probably)
153 }
154
155 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
156 {
157 if ( psz )
158 {
159 // calculate the length of the buffer needed first
160 size_t nLen = MB2WC(NULL, psz, 0);
161 if ( nLen != (size_t)-1 )
162 {
163 // now do the actual conversion
164 wxWCharBuffer buf(nLen);
165 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
166 if ( nLen != (size_t)-1 )
167 {
168 return buf;
169 }
170 }
171 }
172
173 wxWCharBuffer buf((wchar_t *)NULL);
174
175 return buf;
176 }
177
178 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
179 {
180 if ( pwz )
181 {
182 size_t nLen = WC2MB(NULL, pwz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
186 nLen = WC2MB(buf.data(), pwz, nLen + 4);
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxCharBuffer buf((char *)NULL);
195
196 return buf;
197 }
198
199 const wxWCharBuffer
200 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
201 {
202 // the currently accumulated wide characters
203 wxWCharBuffer wbuf;
204
205 // the current length of wbuf
206 size_t lenBuf = 0;
207
208 // the number of NULs terminating this string
209 size_t nulLen wxDUMMY_INITIALIZE(0);
210
211 // make a copy of the input string unless it is already properly
212 // NUL-terminated
213 wxCharBuffer bufTmp;
214
215 // if we were not given the input size we just have to assume that the
216 // string is properly terminated as we have no way of knowing how long it
217 // is anyhow, but if we do have the size check whether there are enough
218 // NULs at the end
219 if ( inLen != (size_t)-1 )
220 {
221 // we need to know how to find the end of this string
222 nulLen = GetMBNulLen();
223 if ( nulLen == (size_t)-1 )
224 return wbuf;
225
226 // if there are enough NULs we can avoid the copy
227 if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
228 {
229 // make a copy in order to properly NUL-terminate the string
230 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
231 char * const p = bufTmp.data();
232 memcpy(p, in, inLen);
233 for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
234 *s = '\0';
235 }
236 }
237
238 if ( bufTmp )
239 in = bufTmp;
240
241 size_t lenChunk;
242 for ( const char * const inEnd = in + inLen;; )
243 {
244 // try to convert the current chunk
245 lenChunk = MB2WC(NULL, in, 0);
246 if ( lenChunk == 0 )
247 {
248 // nothing left in the input string, conversion succeeded
249 break;
250 }
251
252 if ( lenChunk == (size_t)-1 )
253 break;
254
255 // if we already have a previous chunk, leave the NUL separating it
256 // from this one
257 if ( lenBuf )
258 lenBuf++;
259
260 const size_t lenBufNew = lenBuf + lenChunk;
261 if ( !wbuf.extend(lenBufNew) )
262 {
263 lenChunk = (size_t)-1;
264 break;
265 }
266
267 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
268 if ( lenChunk == (size_t)-1 )
269 break;
270
271 lenBuf = lenBufNew;
272
273 if ( inLen == (size_t)-1 )
274 {
275 // convert only one chunk in this case, as we suppose that the
276 // string is NUL-terminated and so inEnd is not used at all
277 break;
278 }
279
280 // advance the input pointer past the end of this chunk
281 while ( NotAllNULs(in, nulLen) )
282 {
283 // notice that we must skip over multiple bytes here as we suppose
284 // that if NUL takes 2 or 4 bytes, then all the other characters do
285 // too and so if advanced by a single byte we might erroneously
286 // detect sequences of NUL bytes in the middle of the input
287 in += nulLen;
288 }
289
290 in += nulLen; // skipping over its terminator as well
291
292 // note that ">=" (and not just "==") is needed here as the terminator
293 // we skipped just above could be inside or just after the buffer
294 // delimited by inEnd
295 if ( in >= inEnd )
296 break;
297 }
298
299 if ( lenChunk == (size_t)-1 )
300 {
301 // conversion failed
302 lenBuf = 0;
303 wbuf.reset();
304 }
305
306 if ( outLen )
307 *outLen = lenBuf;
308
309 return wbuf;
310 }
311
312 const wxCharBuffer
313 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
314 {
315 // the currently accumulated multibyte characters
316 wxCharBuffer buf;
317
318 // the current length of buf
319 size_t lenBuf = 0;
320
321 // make a copy of the input string unless it is already properly
322 // NUL-terminated
323 //
324 // if we don't know its length we have no choice but to assume that it is,
325 // indeed, properly terminated
326 wxWCharBuffer bufTmp;
327 if ( inLen == (size_t)-1 )
328 {
329 inLen = wxWcslen(in) + 1;
330 }
331 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
332 {
333 // make a copy in order to properly NUL-terminate the string
334 bufTmp = wxWCharBuffer(inLen);
335 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
336 }
337
338 if ( bufTmp )
339 in = bufTmp;
340
341 for ( const wchar_t * const inEnd = in + inLen;; )
342 {
343 // try to convert the current chunk, if anything left
344 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
345 if ( lenChunk == 0 )
346 {
347 // nothing left in the input string, conversion succeeded
348 if ( outLen )
349 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
350
351 return buf;
352 }
353
354 if ( lenChunk == (size_t)-1 )
355 break;
356
357 const size_t lenBufNew = lenBuf + lenChunk;
358 if ( !buf.extend(lenBufNew) )
359 break;
360
361 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
362 if ( lenChunk == (size_t)-1 )
363 break;
364
365 // chunk successfully converted, go to the next one
366 in += wxWcslen(in) + 1 /* skip NUL too */;
367 lenBuf = lenBufNew + 1;
368 }
369
370 // conversion failed
371 if ( outLen )
372 *outLen = 0;
373
374 return wxCharBuffer();
375 }
376
377 // ----------------------------------------------------------------------------
378 // wxMBConvLibc
379 // ----------------------------------------------------------------------------
380
381 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
382 {
383 return wxMB2WC(buf, psz, n);
384 }
385
386 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
387 {
388 return wxWC2MB(buf, psz, n);
389 }
390
391 // ----------------------------------------------------------------------------
392 // wxConvBrokenFileNames
393 // ----------------------------------------------------------------------------
394
395 #ifdef __UNIX__
396
397 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
398 {
399 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
400 || wxStricmp(charset, _T("UTF8")) == 0 )
401 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
402 else
403 m_conv = new wxCSConv(charset);
404 }
405
406 #endif // __UNIX__
407
408 // ----------------------------------------------------------------------------
409 // UTF-7
410 // ----------------------------------------------------------------------------
411
412 // Implementation (C) 2004 Fredrik Roubert
413
414 //
415 // BASE64 decoding table
416 //
417 static const unsigned char utf7unb64[] =
418 {
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
425 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
426 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
428 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
429 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
430 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
432 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
433 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
434 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
451 };
452
453 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
454 {
455 size_t len = 0;
456
457 while ( *psz && (!buf || (len < n)) )
458 {
459 unsigned char cc = *psz++;
460 if (cc != '+')
461 {
462 // plain ASCII char
463 if (buf)
464 *buf++ = cc;
465 len++;
466 }
467 else if (*psz == '-')
468 {
469 // encoded plus sign
470 if (buf)
471 *buf++ = cc;
472 len++;
473 psz++;
474 }
475 else // start of BASE64 encoded string
476 {
477 bool lsb, ok;
478 unsigned int d, l;
479 for ( ok = lsb = false, d = 0, l = 0;
480 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
481 psz++ )
482 {
483 d <<= 6;
484 d += cc;
485 for (l += 6; l >= 8; lsb = !lsb)
486 {
487 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
488 if (lsb)
489 {
490 if (buf)
491 *buf++ |= c;
492 len ++;
493 }
494 else
495 {
496 if (buf)
497 *buf = (wchar_t)(c << 8);
498 }
499
500 ok = true;
501 }
502 }
503
504 if ( !ok )
505 {
506 // in valid UTF7 we should have valid characters after '+'
507 return (size_t)-1;
508 }
509
510 if (*psz == '-')
511 psz++;
512 }
513 }
514
515 if ( buf && (len < n) )
516 *buf = '\0';
517
518 return len;
519 }
520
521 //
522 // BASE64 encoding table
523 //
524 static const unsigned char utf7enb64[] =
525 {
526 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
527 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
528 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
529 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
530 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
531 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
532 'w', 'x', 'y', 'z', '0', '1', '2', '3',
533 '4', '5', '6', '7', '8', '9', '+', '/'
534 };
535
536 //
537 // UTF-7 encoding table
538 //
539 // 0 - Set D (directly encoded characters)
540 // 1 - Set O (optional direct characters)
541 // 2 - whitespace characters (optional)
542 // 3 - special characters
543 //
544 static const unsigned char utf7encode[128] =
545 {
546 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
547 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
548 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
550 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
552 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
554 };
555
556 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
557 {
558 size_t len = 0;
559
560 while (*psz && ((!buf) || (len < n)))
561 {
562 wchar_t cc = *psz++;
563 if (cc < 0x80 && utf7encode[cc] < 1)
564 {
565 // plain ASCII char
566 if (buf)
567 *buf++ = (char)cc;
568 len++;
569 }
570 #ifndef WC_UTF16
571 else if (((wxUint32)cc) > 0xffff)
572 {
573 // no surrogate pair generation (yet?)
574 return (size_t)-1;
575 }
576 #endif
577 else
578 {
579 if (buf)
580 *buf++ = '+';
581 len++;
582 if (cc != '+')
583 {
584 // BASE64 encode string
585 unsigned int lsb, d, l;
586 for (d = 0, l = 0; /*nothing*/; psz++)
587 {
588 for (lsb = 0; lsb < 2; lsb ++)
589 {
590 d <<= 8;
591 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
592
593 for (l += 8; l >= 6; )
594 {
595 l -= 6;
596 if (buf)
597 *buf++ = utf7enb64[(d >> l) % 64];
598 len++;
599 }
600 }
601 cc = *psz;
602 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
603 break;
604 }
605 if (l != 0)
606 {
607 if (buf)
608 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
609 len++;
610 }
611 }
612 if (buf)
613 *buf++ = '-';
614 len++;
615 }
616 }
617 if (buf && (len < n))
618 *buf = 0;
619 return len;
620 }
621
622 // ----------------------------------------------------------------------------
623 // UTF-8
624 // ----------------------------------------------------------------------------
625
626 static wxUint32 utf8_max[]=
627 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
628
629 // boundaries of the private use area we use to (temporarily) remap invalid
630 // characters invalid in a UTF-8 encoded string
631 const wxUint32 wxUnicodePUA = 0x100000;
632 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
633
634 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
635 {
636 size_t len = 0;
637
638 while (*psz && ((!buf) || (len < n)))
639 {
640 const char *opsz = psz;
641 bool invalid = false;
642 unsigned char cc = *psz++, fc = cc;
643 unsigned cnt;
644 for (cnt = 0; fc & 0x80; cnt++)
645 fc <<= 1;
646 if (!cnt)
647 {
648 // plain ASCII char
649 if (buf)
650 *buf++ = cc;
651 len++;
652
653 // escape the escape character for octal escapes
654 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
655 && cc == '\\' && (!buf || len < n))
656 {
657 if (buf)
658 *buf++ = cc;
659 len++;
660 }
661 }
662 else
663 {
664 cnt--;
665 if (!cnt)
666 {
667 // invalid UTF-8 sequence
668 invalid = true;
669 }
670 else
671 {
672 unsigned ocnt = cnt - 1;
673 wxUint32 res = cc & (0x3f >> cnt);
674 while (cnt--)
675 {
676 cc = *psz;
677 if ((cc & 0xC0) != 0x80)
678 {
679 // invalid UTF-8 sequence
680 invalid = true;
681 break;
682 }
683 psz++;
684 res = (res << 6) | (cc & 0x3f);
685 }
686 if (invalid || res <= utf8_max[ocnt])
687 {
688 // illegal UTF-8 encoding
689 invalid = true;
690 }
691 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
692 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
693 {
694 // if one of our PUA characters turns up externally
695 // it must also be treated as an illegal sequence
696 // (a bit like you have to escape an escape character)
697 invalid = true;
698 }
699 else
700 {
701 #ifdef WC_UTF16
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa = encode_utf16(res, (wxUint16 *)buf);
704 if (pa == (size_t)-1)
705 {
706 invalid = true;
707 }
708 else
709 {
710 if (buf)
711 buf += pa;
712 len += pa;
713 }
714 #else // !WC_UTF16
715 if (buf)
716 *buf++ = (wchar_t)res;
717 len++;
718 #endif // WC_UTF16/!WC_UTF16
719 }
720 }
721 if (invalid)
722 {
723 if (m_options & MAP_INVALID_UTF8_TO_PUA)
724 {
725 while (opsz < psz && (!buf || len < n))
726 {
727 #ifdef WC_UTF16
728 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
729 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
730 wxASSERT(pa != (size_t)-1);
731 if (buf)
732 buf += pa;
733 opsz++;
734 len += pa;
735 #else
736 if (buf)
737 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
738 opsz++;
739 len++;
740 #endif
741 }
742 }
743 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
744 {
745 while (opsz < psz && (!buf || len < n))
746 {
747 if ( buf && len + 3 < n )
748 {
749 unsigned char on = *opsz;
750 *buf++ = L'\\';
751 *buf++ = (wchar_t)( L'0' + on / 0100 );
752 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
753 *buf++ = (wchar_t)( L'0' + on % 010 );
754 }
755 opsz++;
756 len += 4;
757 }
758 }
759 else // MAP_INVALID_UTF8_NOT
760 {
761 return (size_t)-1;
762 }
763 }
764 }
765 }
766 if (buf && (len < n))
767 *buf = 0;
768 return len;
769 }
770
771 static inline bool isoctal(wchar_t wch)
772 {
773 return L'0' <= wch && wch <= L'7';
774 }
775
776 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
777 {
778 size_t len = 0;
779
780 while (*psz && ((!buf) || (len < n)))
781 {
782 wxUint32 cc;
783 #ifdef WC_UTF16
784 // cast is ok for WC_UTF16
785 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
786 psz += (pa == (size_t)-1) ? 1 : pa;
787 #else
788 cc=(*psz++) & 0x7fffffff;
789 #endif
790
791 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
792 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
793 {
794 if (buf)
795 *buf++ = (char)(cc - wxUnicodePUA);
796 len++;
797 }
798 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
799 && cc == L'\\' && psz[0] == L'\\' )
800 {
801 if (buf)
802 *buf++ = (char)cc;
803 psz++;
804 len++;
805 }
806 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
807 cc == L'\\' &&
808 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
809 {
810 if (buf)
811 {
812 *buf++ = (char) ((psz[0] - L'0')*0100 +
813 (psz[1] - L'0')*010 +
814 (psz[2] - L'0'));
815 }
816
817 psz += 3;
818 len++;
819 }
820 else
821 {
822 unsigned cnt;
823 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
824 if (!cnt)
825 {
826 // plain ASCII char
827 if (buf)
828 *buf++ = (char) cc;
829 len++;
830 }
831
832 else
833 {
834 len += cnt + 1;
835 if (buf)
836 {
837 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
838 while (cnt--)
839 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
840 }
841 }
842 }
843 }
844
845 if (buf && (len<n))
846 *buf = 0;
847
848 return len;
849 }
850
851 // ----------------------------------------------------------------------------
852 // UTF-16
853 // ----------------------------------------------------------------------------
854
855 #ifdef WORDS_BIGENDIAN
856 #define wxMBConvUTF16straight wxMBConvUTF16BE
857 #define wxMBConvUTF16swap wxMBConvUTF16LE
858 #else
859 #define wxMBConvUTF16swap wxMBConvUTF16BE
860 #define wxMBConvUTF16straight wxMBConvUTF16LE
861 #endif
862
863
864 #ifdef WC_UTF16
865
866 // copy 16bit MB to 16bit String
867 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
868 {
869 size_t len=0;
870
871 while (*(wxUint16*)psz && (!buf || len < n))
872 {
873 if (buf)
874 *buf++ = *(wxUint16*)psz;
875 len++;
876
877 psz += sizeof(wxUint16);
878 }
879 if (buf && len<n) *buf=0;
880
881 return len;
882 }
883
884
885 // copy 16bit String to 16bit MB
886 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
887 {
888 size_t len=0;
889
890 while (*psz && (!buf || len < n))
891 {
892 if (buf)
893 {
894 *(wxUint16*)buf = *psz;
895 buf += sizeof(wxUint16);
896 }
897 len += sizeof(wxUint16);
898 psz++;
899 }
900 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
901
902 return len;
903 }
904
905
906 // swap 16bit MB to 16bit String
907 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
908 {
909 size_t len = 0;
910
911 // UTF16 string must be terminated by 2 NULs as single NULs may occur
912 // inside the string
913 while ( (psz[0] || psz[1]) && (!buf || len < n) )
914 {
915 if ( buf )
916 {
917 ((char *)buf)[0] = psz[1];
918 ((char *)buf)[1] = psz[0];
919 buf++;
920 }
921 len++;
922 psz += 2;
923 }
924
925 if ( buf && len < n )
926 *buf = L'\0';
927
928 return len;
929 }
930
931
932 // swap 16bit MB to 16bit String
933 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
934 {
935 size_t len = 0;
936
937 while ( *psz && (!buf || len < n) )
938 {
939 if ( buf )
940 {
941 *buf++ = ((char*)psz)[1];
942 *buf++ = ((char*)psz)[0];
943 }
944 len += 2;
945 psz++;
946 }
947
948 if ( buf && len < n )
949 *buf = '\0';
950
951 return len;
952 }
953
954
955 #else // WC_UTF16
956
957
958 // copy 16bit MB to 32bit String
959 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
960 {
961 size_t len=0;
962
963 while (*(wxUint16*)psz && (!buf || len < n))
964 {
965 wxUint32 cc;
966 size_t pa=decode_utf16((wxUint16*)psz, cc);
967 if (pa == (size_t)-1)
968 return pa;
969
970 if (buf)
971 *buf++ = (wchar_t)cc;
972 len++;
973 psz += pa * sizeof(wxUint16);
974 }
975 if (buf && len<n) *buf=0;
976
977 return len;
978 }
979
980
981 // copy 32bit String to 16bit MB
982 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
983 {
984 size_t len=0;
985
986 while (*psz && (!buf || len < n))
987 {
988 wxUint16 cc[2];
989 size_t pa=encode_utf16(*psz, cc);
990
991 if (pa == (size_t)-1)
992 return pa;
993
994 if (buf)
995 {
996 *(wxUint16*)buf = cc[0];
997 buf += sizeof(wxUint16);
998 if (pa > 1)
999 {
1000 *(wxUint16*)buf = cc[1];
1001 buf += sizeof(wxUint16);
1002 }
1003 }
1004
1005 len += pa*sizeof(wxUint16);
1006 psz++;
1007 }
1008 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1009
1010 return len;
1011 }
1012
1013
1014 // swap 16bit MB to 32bit String
1015 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016 {
1017 size_t len=0;
1018
1019 while (*(wxUint16*)psz && (!buf || len < n))
1020 {
1021 wxUint32 cc;
1022 char tmp[4];
1023 tmp[0]=psz[1]; tmp[1]=psz[0];
1024 tmp[2]=psz[3]; tmp[3]=psz[2];
1025
1026 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1027 if (pa == (size_t)-1)
1028 return pa;
1029
1030 if (buf)
1031 *buf++ = (wchar_t)cc;
1032
1033 len++;
1034 psz += pa * sizeof(wxUint16);
1035 }
1036 if (buf && len<n) *buf=0;
1037
1038 return len;
1039 }
1040
1041
1042 // swap 32bit String to 16bit MB
1043 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045 size_t len=0;
1046
1047 while (*psz && (!buf || len < n))
1048 {
1049 wxUint16 cc[2];
1050 size_t pa=encode_utf16(*psz, cc);
1051
1052 if (pa == (size_t)-1)
1053 return pa;
1054
1055 if (buf)
1056 {
1057 *buf++ = ((char*)cc)[1];
1058 *buf++ = ((char*)cc)[0];
1059 if (pa > 1)
1060 {
1061 *buf++ = ((char*)cc)[3];
1062 *buf++ = ((char*)cc)[2];
1063 }
1064 }
1065
1066 len += pa*sizeof(wxUint16);
1067 psz++;
1068 }
1069 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1070
1071 return len;
1072 }
1073
1074 #endif // WC_UTF16
1075
1076
1077 // ----------------------------------------------------------------------------
1078 // UTF-32
1079 // ----------------------------------------------------------------------------
1080
1081 #ifdef WORDS_BIGENDIAN
1082 #define wxMBConvUTF32straight wxMBConvUTF32BE
1083 #define wxMBConvUTF32swap wxMBConvUTF32LE
1084 #else
1085 #define wxMBConvUTF32swap wxMBConvUTF32BE
1086 #define wxMBConvUTF32straight wxMBConvUTF32LE
1087 #endif
1088
1089
1090 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1091 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1092
1093
1094 #ifdef WC_UTF16
1095
1096 // copy 32bit MB to 16bit String
1097 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1098 {
1099 size_t len=0;
1100
1101 while (*(wxUint32*)psz && (!buf || len < n))
1102 {
1103 wxUint16 cc[2];
1104
1105 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1106 if (pa == (size_t)-1)
1107 return pa;
1108
1109 if (buf)
1110 {
1111 *buf++ = cc[0];
1112 if (pa > 1)
1113 *buf++ = cc[1];
1114 }
1115 len += pa;
1116 psz += sizeof(wxUint32);
1117 }
1118 if (buf && len<n) *buf=0;
1119
1120 return len;
1121 }
1122
1123
1124 // copy 16bit String to 32bit MB
1125 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127 size_t len=0;
1128
1129 while (*psz && (!buf || len < n))
1130 {
1131 wxUint32 cc;
1132
1133 // cast is ok for WC_UTF16
1134 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1135 if (pa == (size_t)-1)
1136 return pa;
1137
1138 if (buf)
1139 {
1140 *(wxUint32*)buf = cc;
1141 buf += sizeof(wxUint32);
1142 }
1143 len += sizeof(wxUint32);
1144 psz += pa;
1145 }
1146
1147 if (buf && len<=n-sizeof(wxUint32))
1148 *(wxUint32*)buf=0;
1149
1150 return len;
1151 }
1152
1153
1154
1155 // swap 32bit MB to 16bit String
1156 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1157 {
1158 size_t len=0;
1159
1160 while (*(wxUint32*)psz && (!buf || len < n))
1161 {
1162 char tmp[4];
1163 tmp[0] = psz[3]; tmp[1] = psz[2];
1164 tmp[2] = psz[1]; tmp[3] = psz[0];
1165
1166
1167 wxUint16 cc[2];
1168
1169 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1170 if (pa == (size_t)-1)
1171 return pa;
1172
1173 if (buf)
1174 {
1175 *buf++ = cc[0];
1176 if (pa > 1)
1177 *buf++ = cc[1];
1178 }
1179 len += pa;
1180 psz += sizeof(wxUint32);
1181 }
1182
1183 if (buf && len<n)
1184 *buf=0;
1185
1186 return len;
1187 }
1188
1189
1190 // swap 16bit String to 32bit MB
1191 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1192 {
1193 size_t len=0;
1194
1195 while (*psz && (!buf || len < n))
1196 {
1197 char cc[4];
1198
1199 // cast is ok for WC_UTF16
1200 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1201 if (pa == (size_t)-1)
1202 return pa;
1203
1204 if (buf)
1205 {
1206 *buf++ = cc[3];
1207 *buf++ = cc[2];
1208 *buf++ = cc[1];
1209 *buf++ = cc[0];
1210 }
1211 len += sizeof(wxUint32);
1212 psz += pa;
1213 }
1214
1215 if (buf && len<=n-sizeof(wxUint32))
1216 *(wxUint32*)buf=0;
1217
1218 return len;
1219 }
1220
1221 #else // WC_UTF16
1222
1223
1224 // copy 32bit MB to 32bit String
1225 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1226 {
1227 size_t len=0;
1228
1229 while (*(wxUint32*)psz && (!buf || len < n))
1230 {
1231 if (buf)
1232 *buf++ = (wchar_t)(*(wxUint32*)psz);
1233 len++;
1234 psz += sizeof(wxUint32);
1235 }
1236
1237 if (buf && len<n)
1238 *buf=0;
1239
1240 return len;
1241 }
1242
1243
1244 // copy 32bit String to 32bit MB
1245 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246 {
1247 size_t len=0;
1248
1249 while (*psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 *(wxUint32*)buf = *psz;
1254 buf += sizeof(wxUint32);
1255 }
1256
1257 len += sizeof(wxUint32);
1258 psz++;
1259 }
1260
1261 if (buf && len<=n-sizeof(wxUint32))
1262 *(wxUint32*)buf=0;
1263
1264 return len;
1265 }
1266
1267
1268 // swap 32bit MB to 32bit String
1269 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1270 {
1271 size_t len=0;
1272
1273 while (*(wxUint32*)psz && (!buf || len < n))
1274 {
1275 if (buf)
1276 {
1277 ((char *)buf)[0] = psz[3];
1278 ((char *)buf)[1] = psz[2];
1279 ((char *)buf)[2] = psz[1];
1280 ((char *)buf)[3] = psz[0];
1281 buf++;
1282 }
1283 len++;
1284 psz += sizeof(wxUint32);
1285 }
1286
1287 if (buf && len<n)
1288 *buf=0;
1289
1290 return len;
1291 }
1292
1293
1294 // swap 32bit String to 32bit MB
1295 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1296 {
1297 size_t len=0;
1298
1299 while (*psz && (!buf || len < n))
1300 {
1301 if (buf)
1302 {
1303 *buf++ = ((char *)psz)[3];
1304 *buf++ = ((char *)psz)[2];
1305 *buf++ = ((char *)psz)[1];
1306 *buf++ = ((char *)psz)[0];
1307 }
1308 len += sizeof(wxUint32);
1309 psz++;
1310 }
1311
1312 if (buf && len<=n-sizeof(wxUint32))
1313 *(wxUint32*)buf=0;
1314
1315 return len;
1316 }
1317
1318
1319 #endif // WC_UTF16
1320
1321
1322 // ============================================================================
1323 // The classes doing conversion using the iconv_xxx() functions
1324 // ============================================================================
1325
1326 #ifdef HAVE_ICONV
1327
1328 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1329 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1330 // (unless there's yet another bug in glibc) the only case when iconv()
1331 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1332 // left in the input buffer -- when _real_ error occurs,
1333 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1334 // iconv() failure.
1335 // [This bug does not appear in glibc 2.2.]
1336 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1337 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1338 (errno != E2BIG || bufLeft != 0))
1339 #else
1340 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1341 #endif
1342
1343 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1344
1345 #define ICONV_T_INVALID ((iconv_t)-1)
1346
1347 #if SIZEOF_WCHAR_T == 4
1348 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1349 #define WC_ENC wxFONTENCODING_UTF32
1350 #elif SIZEOF_WCHAR_T == 2
1351 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1352 #define WC_ENC wxFONTENCODING_UTF16
1353 #else // sizeof(wchar_t) != 2 nor 4
1354 // does this ever happen?
1355 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1356 #endif
1357
1358 // ----------------------------------------------------------------------------
1359 // wxMBConv_iconv: encapsulates an iconv character set
1360 // ----------------------------------------------------------------------------
1361
1362 class wxMBConv_iconv : public wxMBConv
1363 {
1364 public:
1365 wxMBConv_iconv(const wxChar *name);
1366 virtual ~wxMBConv_iconv();
1367
1368 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1369 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1370
1371 // classify this encoding as explained in wxMBConv::GetMBNulLen()
1372 // comment
1373 virtual size_t GetMBNulLen() const;
1374
1375 bool IsOk() const
1376 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1377
1378 protected:
1379 // the iconv handlers used to translate from multibyte to wide char and in
1380 // the other direction
1381 iconv_t m2w,
1382 w2m;
1383 #if wxUSE_THREADS
1384 // guards access to m2w and w2m objects
1385 wxMutex m_iconvMutex;
1386 #endif
1387
1388 private:
1389 // the name (for iconv_open()) of a wide char charset -- if none is
1390 // available on this machine, it will remain NULL
1391 static wxString ms_wcCharsetName;
1392
1393 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1394 // different endian-ness than the native one
1395 static bool ms_wcNeedsSwap;
1396
1397 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1398 // initially
1399 size_t m_minMBCharWidth;
1400 };
1401
1402 // make the constructor available for unit testing
1403 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1404 {
1405 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1406 if ( !result->IsOk() )
1407 {
1408 delete result;
1409 return 0;
1410 }
1411 return result;
1412 }
1413
1414 wxString wxMBConv_iconv::ms_wcCharsetName;
1415 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1416
1417 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1418 {
1419 m_minMBCharWidth = 0;
1420
1421 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1422 // names for the charsets
1423 const wxCharBuffer cname(wxString(name).ToAscii());
1424
1425 // check for charset that represents wchar_t:
1426 if ( ms_wcCharsetName.empty() )
1427 {
1428 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1429
1430 #if wxUSE_FONTMAP
1431 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1432 #else // !wxUSE_FONTMAP
1433 static const wxChar *names[] =
1434 {
1435 #if SIZEOF_WCHAR_T == 4
1436 _T("UCS-4"),
1437 #elif SIZEOF_WCHAR_T = 2
1438 _T("UCS-2"),
1439 #endif
1440 NULL
1441 };
1442 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1443
1444 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1445 {
1446 const wxString nameCS(*names);
1447
1448 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1449 wxString nameXE(nameCS);
1450 #ifdef WORDS_BIGENDIAN
1451 nameXE += _T("BE");
1452 #else // little endian
1453 nameXE += _T("LE");
1454 #endif
1455
1456 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1457 nameXE.c_str());
1458
1459 m2w = iconv_open(nameXE.ToAscii(), cname);
1460 if ( m2w == ICONV_T_INVALID )
1461 {
1462 // try charset w/o bytesex info (e.g. "UCS4")
1463 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1464 nameCS.c_str());
1465 m2w = iconv_open(nameCS.ToAscii(), cname);
1466
1467 // and check for bytesex ourselves:
1468 if ( m2w != ICONV_T_INVALID )
1469 {
1470 char buf[2], *bufPtr;
1471 wchar_t wbuf[2], *wbufPtr;
1472 size_t insz, outsz;
1473 size_t res;
1474
1475 buf[0] = 'A';
1476 buf[1] = 0;
1477 wbuf[0] = 0;
1478 insz = 2;
1479 outsz = SIZEOF_WCHAR_T * 2;
1480 wbufPtr = wbuf;
1481 bufPtr = buf;
1482
1483 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1484 (char**)&wbufPtr, &outsz);
1485
1486 if (ICONV_FAILED(res, insz))
1487 {
1488 wxLogLastError(wxT("iconv"));
1489 wxLogError(_("Conversion to charset '%s' doesn't work."),
1490 nameCS.c_str());
1491 }
1492 else // ok, can convert to this encoding, remember it
1493 {
1494 ms_wcCharsetName = nameCS;
1495 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1496 }
1497 }
1498 }
1499 else // use charset not requiring byte swapping
1500 {
1501 ms_wcCharsetName = nameXE;
1502 }
1503 }
1504
1505 wxLogTrace(TRACE_STRCONV,
1506 wxT("iconv wchar_t charset is \"%s\"%s"),
1507 ms_wcCharsetName.empty() ? _T("<none>")
1508 : ms_wcCharsetName.c_str(),
1509 ms_wcNeedsSwap ? _T(" (needs swap)")
1510 : _T(""));
1511 }
1512 else // we already have ms_wcCharsetName
1513 {
1514 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1515 }
1516
1517 if ( ms_wcCharsetName.empty() )
1518 {
1519 w2m = ICONV_T_INVALID;
1520 }
1521 else
1522 {
1523 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1524 if ( w2m == ICONV_T_INVALID )
1525 {
1526 wxLogTrace(TRACE_STRCONV,
1527 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1528 ms_wcCharsetName.c_str(), cname.data());
1529 }
1530 }
1531 }
1532
1533 wxMBConv_iconv::~wxMBConv_iconv()
1534 {
1535 if ( m2w != ICONV_T_INVALID )
1536 iconv_close(m2w);
1537 if ( w2m != ICONV_T_INVALID )
1538 iconv_close(w2m);
1539 }
1540
1541 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1542 {
1543 // find the string length: notice that must be done differently for
1544 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1545 size_t inbuf;
1546 const size_t nulLen = GetMBNulLen();
1547 switch ( nulLen )
1548 {
1549 default:
1550 return (size_t)-1;
1551
1552 case 1:
1553 inbuf = strlen(psz); // arguably more optimized than our version
1554 break;
1555
1556 case 2:
1557 case 4:
1558 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1559 // they also have to start at character boundary and not span two
1560 // adjacent characters
1561 const char *p;
1562 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1563 ;
1564 inbuf = p - psz;
1565 break;
1566 }
1567
1568 #if wxUSE_THREADS
1569 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1570 // Unfortunately there is a couple of global wxCSConv objects such as
1571 // wxConvLocal that are used all over wx code, so we have to make sure
1572 // the handle is used by at most one thread at the time. Otherwise
1573 // only a few wx classes would be safe to use from non-main threads
1574 // as MB<->WC conversion would fail "randomly".
1575 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1576 #endif // wxUSE_THREADS
1577
1578
1579 size_t outbuf = n * SIZEOF_WCHAR_T;
1580 size_t res, cres;
1581 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1582 wchar_t *bufPtr = buf;
1583 const char *pszPtr = psz;
1584
1585 if (buf)
1586 {
1587 // have destination buffer, convert there
1588 cres = iconv(m2w,
1589 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1590 (char**)&bufPtr, &outbuf);
1591 res = n - (outbuf / SIZEOF_WCHAR_T);
1592
1593 if (ms_wcNeedsSwap)
1594 {
1595 // convert to native endianness
1596 for ( unsigned i = 0; i < res; i++ )
1597 buf[n] = WC_BSWAP(buf[i]);
1598 }
1599
1600 // NUL-terminate the string if there is any space left
1601 if (res < n)
1602 buf[res] = 0;
1603 }
1604 else
1605 {
1606 // no destination buffer... convert using temp buffer
1607 // to calculate destination buffer requirement
1608 wchar_t tbuf[8];
1609 res = 0;
1610 do {
1611 bufPtr = tbuf;
1612 outbuf = 8*SIZEOF_WCHAR_T;
1613
1614 cres = iconv(m2w,
1615 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1616 (char**)&bufPtr, &outbuf );
1617
1618 res += 8-(outbuf/SIZEOF_WCHAR_T);
1619 } while ((cres==(size_t)-1) && (errno==E2BIG));
1620 }
1621
1622 if (ICONV_FAILED(cres, inbuf))
1623 {
1624 //VS: it is ok if iconv fails, hence trace only
1625 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1626 return (size_t)-1;
1627 }
1628
1629 return res;
1630 }
1631
1632 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1633 {
1634 #if wxUSE_THREADS
1635 // NB: explained in MB2WC
1636 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1637 #endif
1638
1639 size_t inlen = wxWcslen(psz);
1640 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1641 size_t outbuf = n;
1642 size_t res, cres;
1643
1644 wchar_t *tmpbuf = 0;
1645
1646 if (ms_wcNeedsSwap)
1647 {
1648 // need to copy to temp buffer to switch endianness
1649 // (doing WC_BSWAP twice on the original buffer won't help, as it
1650 // could be in read-only memory, or be accessed in some other thread)
1651 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1652 for ( size_t i = 0; i < inlen; i++ )
1653 tmpbuf[n] = WC_BSWAP(psz[i]);
1654 tmpbuf[inlen] = L'\0';
1655 psz = tmpbuf;
1656 }
1657
1658 if (buf)
1659 {
1660 // have destination buffer, convert there
1661 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1662
1663 res = n-outbuf;
1664
1665 // NB: iconv was given only wcslen(psz) characters on input, and so
1666 // it couldn't convert the trailing zero. Let's do it ourselves
1667 // if there's some room left for it in the output buffer.
1668 if (res < n)
1669 buf[0] = 0;
1670 }
1671 else
1672 {
1673 // no destination buffer... convert using temp buffer
1674 // to calculate destination buffer requirement
1675 char tbuf[16];
1676 res = 0;
1677 do {
1678 buf = tbuf; outbuf = 16;
1679
1680 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1681
1682 res += 16 - outbuf;
1683 } while ((cres==(size_t)-1) && (errno==E2BIG));
1684 }
1685
1686 if (ms_wcNeedsSwap)
1687 {
1688 free(tmpbuf);
1689 }
1690
1691 if (ICONV_FAILED(cres, inbuf))
1692 {
1693 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1694 return (size_t)-1;
1695 }
1696
1697 return res;
1698 }
1699
1700 size_t wxMBConv_iconv::GetMBNulLen() const
1701 {
1702 if ( m_minMBCharWidth == 0 )
1703 {
1704 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1705
1706 #if wxUSE_THREADS
1707 // NB: explained in MB2WC
1708 wxMutexLocker lock(self->m_iconvMutex);
1709 #endif
1710
1711 wchar_t *wnul = L"";
1712 char buf[8]; // should be enough for NUL in any encoding
1713 size_t inLen = sizeof(wchar_t),
1714 outLen = WXSIZEOF(buf);
1715 char *in = (char *)wnul;
1716 char *out = buf;
1717 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
1718 {
1719 self->m_minMBCharWidth = (size_t)-1;
1720 }
1721 else // ok
1722 {
1723 self->m_minMBCharWidth = out - buf;
1724 }
1725 }
1726
1727 return m_minMBCharWidth;
1728 }
1729
1730 #endif // HAVE_ICONV
1731
1732
1733 // ============================================================================
1734 // Win32 conversion classes
1735 // ============================================================================
1736
1737 #ifdef wxHAVE_WIN32_MB2WC
1738
1739 // from utils.cpp
1740 #if wxUSE_FONTMAP
1741 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1742 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1743 #endif
1744
1745 class wxMBConv_win32 : public wxMBConv
1746 {
1747 public:
1748 wxMBConv_win32()
1749 {
1750 m_CodePage = CP_ACP;
1751 m_minMBCharWidth = 0;
1752 }
1753
1754 #if wxUSE_FONTMAP
1755 wxMBConv_win32(const wxChar* name)
1756 {
1757 m_CodePage = wxCharsetToCodepage(name);
1758 m_minMBCharWidth = 0;
1759 }
1760
1761 wxMBConv_win32(wxFontEncoding encoding)
1762 {
1763 m_CodePage = wxEncodingToCodepage(encoding);
1764 m_minMBCharWidth = 0;
1765 }
1766 #endif // wxUSE_FONTMAP
1767
1768 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1769 {
1770 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1771 // the behaviour is not compatible with the Unix version (using iconv)
1772 // and break the library itself, e.g. wxTextInputStream::NextChar()
1773 // wouldn't work if reading an incomplete MB char didn't result in an
1774 // error
1775 //
1776 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1777 // an error (tested under Windows Server 2003) and apparently it is
1778 // done on purpose, i.e. the function accepts any input in this case
1779 // and although I'd prefer to return error on ill-formed output, our
1780 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1781 // explicitly ill-formed according to RFC 2152) neither so we don't
1782 // even have any fallback here...
1783 //
1784 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1785 // Win XP or newer and if it is specified on older versions, conversion
1786 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1787 // fails. So we can only use the flag on newer Windows versions.
1788 // Additionally, the flag is not supported by UTF7, symbol and CJK
1789 // encodings. See here:
1790 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1791 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1792 int flags = 0;
1793 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1794 m_CodePage < 50000 &&
1795 IsAtLeastWin2kSP4() )
1796 {
1797 flags = MB_ERR_INVALID_CHARS;
1798 }
1799 else if ( m_CodePage == CP_UTF8 )
1800 {
1801 // Avoid round-trip in the special case of UTF-8 by using our
1802 // own UTF-8 conversion code:
1803 return wxMBConvUTF8().MB2WC(buf, psz, n);
1804 }
1805
1806 const size_t len = ::MultiByteToWideChar
1807 (
1808 m_CodePage, // code page
1809 flags, // flags: fall on error
1810 psz, // input string
1811 -1, // its length (NUL-terminated)
1812 buf, // output string
1813 buf ? n : 0 // size of output buffer
1814 );
1815 if ( !len )
1816 {
1817 // function totally failed
1818 return (size_t)-1;
1819 }
1820
1821 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1822 // check if we succeeded, by doing a double trip:
1823 if ( !flags && buf )
1824 {
1825 const size_t mbLen = strlen(psz);
1826 wxCharBuffer mbBuf(mbLen);
1827 if ( ::WideCharToMultiByte
1828 (
1829 m_CodePage,
1830 0,
1831 buf,
1832 -1,
1833 mbBuf.data(),
1834 mbLen + 1, // size in bytes, not length
1835 NULL,
1836 NULL
1837 ) == 0 ||
1838 strcmp(mbBuf, psz) != 0 )
1839 {
1840 // we didn't obtain the same thing we started from, hence
1841 // the conversion was lossy and we consider that it failed
1842 return (size_t)-1;
1843 }
1844 }
1845
1846 // note that it returns count of written chars for buf != NULL and size
1847 // of the needed buffer for buf == NULL so in either case the length of
1848 // the string (which never includes the terminating NUL) is one less
1849 return len - 1;
1850 }
1851
1852 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1853 {
1854 /*
1855 we have a problem here: by default, WideCharToMultiByte() may
1856 replace characters unrepresentable in the target code page with bad
1857 quality approximations such as turning "1/2" symbol (U+00BD) into
1858 "1" for the code pages which don't have it and we, obviously, want
1859 to avoid this at any price
1860
1861 the trouble is that this function does it _silently_, i.e. it won't
1862 even tell us whether it did or not... Win98/2000 and higher provide
1863 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1864 we have to resort to a round trip, i.e. check that converting back
1865 results in the same string -- this is, of course, expensive but
1866 otherwise we simply can't be sure to not garble the data.
1867 */
1868
1869 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1870 // it doesn't work with CJK encodings (which we test for rather roughly
1871 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1872 // supporting it
1873 BOOL usedDef wxDUMMY_INITIALIZE(false);
1874 BOOL *pUsedDef;
1875 int flags;
1876 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1877 {
1878 // it's our lucky day
1879 flags = WC_NO_BEST_FIT_CHARS;
1880 pUsedDef = &usedDef;
1881 }
1882 else // old system or unsupported encoding
1883 {
1884 flags = 0;
1885 pUsedDef = NULL;
1886 }
1887
1888 const size_t len = ::WideCharToMultiByte
1889 (
1890 m_CodePage, // code page
1891 flags, // either none or no best fit
1892 pwz, // input string
1893 -1, // it is (wide) NUL-terminated
1894 buf, // output buffer
1895 buf ? n : 0, // and its size
1896 NULL, // default "replacement" char
1897 pUsedDef // [out] was it used?
1898 );
1899
1900 if ( !len )
1901 {
1902 // function totally failed
1903 return (size_t)-1;
1904 }
1905
1906 // if we were really converting, check if we succeeded
1907 if ( buf )
1908 {
1909 if ( flags )
1910 {
1911 // check if the conversion failed, i.e. if any replacements
1912 // were done
1913 if ( usedDef )
1914 return (size_t)-1;
1915 }
1916 else // we must resort to double tripping...
1917 {
1918 wxWCharBuffer wcBuf(n);
1919 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1920 wcscmp(wcBuf, pwz) != 0 )
1921 {
1922 // we didn't obtain the same thing we started from, hence
1923 // the conversion was lossy and we consider that it failed
1924 return (size_t)-1;
1925 }
1926 }
1927 }
1928
1929 // see the comment above for the reason of "len - 1"
1930 return len - 1;
1931 }
1932
1933 virtual size_t GetMBNulLen() const
1934 {
1935 if ( m_minMBCharWidth == 0 )
1936 {
1937 int len = ::WideCharToMultiByte
1938 (
1939 m_CodePage, // code page
1940 0, // no flags
1941 L"", // input string
1942 1, // translate just the NUL
1943 NULL, // output buffer
1944 0, // and its size
1945 NULL, // no replacement char
1946 NULL // [out] don't care if it was used
1947 );
1948
1949 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1950 switch ( len )
1951 {
1952 default:
1953 wxLogDebug(_T("Unexpected NUL length %d"), len);
1954 // fall through
1955
1956 case 0:
1957 self->m_minMBCharWidth = (size_t)-1;
1958 break;
1959
1960 case 1:
1961 case 2:
1962 case 4:
1963 self->m_minMBCharWidth = len;
1964 break;
1965 }
1966 }
1967
1968 return m_minMBCharWidth;
1969 }
1970
1971 bool IsOk() const { return m_CodePage != -1; }
1972
1973 private:
1974 static bool CanUseNoBestFit()
1975 {
1976 static int s_isWin98Or2k = -1;
1977
1978 if ( s_isWin98Or2k == -1 )
1979 {
1980 int verMaj, verMin;
1981 switch ( wxGetOsVersion(&verMaj, &verMin) )
1982 {
1983 case wxWIN95:
1984 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1985 break;
1986
1987 case wxWINDOWS_NT:
1988 s_isWin98Or2k = verMaj >= 5;
1989 break;
1990
1991 default:
1992 // unknown, be conseravtive by default
1993 s_isWin98Or2k = 0;
1994 }
1995
1996 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1997 }
1998
1999 return s_isWin98Or2k == 1;
2000 }
2001
2002 static bool IsAtLeastWin2kSP4()
2003 {
2004 #ifdef __WXWINCE__
2005 return false;
2006 #else
2007 static int s_isAtLeastWin2kSP4 = -1;
2008
2009 if ( s_isAtLeastWin2kSP4 == -1 )
2010 {
2011 OSVERSIONINFOEX ver;
2012
2013 memset(&ver, 0, sizeof(ver));
2014 ver.dwOSVersionInfoSize = sizeof(ver);
2015 GetVersionEx((OSVERSIONINFO*)&ver);
2016
2017 s_isAtLeastWin2kSP4 =
2018 ((ver.dwMajorVersion > 5) || // Vista+
2019 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2020 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2021 ver.wServicePackMajor >= 4)) // 2000 SP4+
2022 ? 1 : 0;
2023 }
2024
2025 return s_isAtLeastWin2kSP4 == 1;
2026 #endif
2027 }
2028
2029
2030 // the code page we're working with
2031 long m_CodePage;
2032
2033 // cached result of GetMBNulLen(), set to 0 initially meaning
2034 // "unknown"
2035 size_t m_minMBCharWidth;
2036 };
2037
2038 #endif // wxHAVE_WIN32_MB2WC
2039
2040 // ============================================================================
2041 // Cocoa conversion classes
2042 // ============================================================================
2043
2044 #if defined(__WXCOCOA__)
2045
2046 // RN: There is no UTF-32 support in either Core Foundation or
2047 // Cocoa. Strangely enough, internally Core Foundation uses
2048 // UTF 32 internally quite a bit - its just not public (yet).
2049
2050 #include <CoreFoundation/CFString.h>
2051 #include <CoreFoundation/CFStringEncodingExt.h>
2052
2053 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2054 {
2055 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2056 if ( encoding == wxFONTENCODING_DEFAULT )
2057 {
2058 enc = CFStringGetSystemEncoding();
2059 }
2060 else switch( encoding)
2061 {
2062 case wxFONTENCODING_ISO8859_1 :
2063 enc = kCFStringEncodingISOLatin1 ;
2064 break ;
2065 case wxFONTENCODING_ISO8859_2 :
2066 enc = kCFStringEncodingISOLatin2;
2067 break ;
2068 case wxFONTENCODING_ISO8859_3 :
2069 enc = kCFStringEncodingISOLatin3 ;
2070 break ;
2071 case wxFONTENCODING_ISO8859_4 :
2072 enc = kCFStringEncodingISOLatin4;
2073 break ;
2074 case wxFONTENCODING_ISO8859_5 :
2075 enc = kCFStringEncodingISOLatinCyrillic;
2076 break ;
2077 case wxFONTENCODING_ISO8859_6 :
2078 enc = kCFStringEncodingISOLatinArabic;
2079 break ;
2080 case wxFONTENCODING_ISO8859_7 :
2081 enc = kCFStringEncodingISOLatinGreek;
2082 break ;
2083 case wxFONTENCODING_ISO8859_8 :
2084 enc = kCFStringEncodingISOLatinHebrew;
2085 break ;
2086 case wxFONTENCODING_ISO8859_9 :
2087 enc = kCFStringEncodingISOLatin5;
2088 break ;
2089 case wxFONTENCODING_ISO8859_10 :
2090 enc = kCFStringEncodingISOLatin6;
2091 break ;
2092 case wxFONTENCODING_ISO8859_11 :
2093 enc = kCFStringEncodingISOLatinThai;
2094 break ;
2095 case wxFONTENCODING_ISO8859_13 :
2096 enc = kCFStringEncodingISOLatin7;
2097 break ;
2098 case wxFONTENCODING_ISO8859_14 :
2099 enc = kCFStringEncodingISOLatin8;
2100 break ;
2101 case wxFONTENCODING_ISO8859_15 :
2102 enc = kCFStringEncodingISOLatin9;
2103 break ;
2104
2105 case wxFONTENCODING_KOI8 :
2106 enc = kCFStringEncodingKOI8_R;
2107 break ;
2108 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2109 enc = kCFStringEncodingDOSRussian;
2110 break ;
2111
2112 // case wxFONTENCODING_BULGARIAN :
2113 // enc = ;
2114 // break ;
2115
2116 case wxFONTENCODING_CP437 :
2117 enc =kCFStringEncodingDOSLatinUS ;
2118 break ;
2119 case wxFONTENCODING_CP850 :
2120 enc = kCFStringEncodingDOSLatin1;
2121 break ;
2122 case wxFONTENCODING_CP852 :
2123 enc = kCFStringEncodingDOSLatin2;
2124 break ;
2125 case wxFONTENCODING_CP855 :
2126 enc = kCFStringEncodingDOSCyrillic;
2127 break ;
2128 case wxFONTENCODING_CP866 :
2129 enc =kCFStringEncodingDOSRussian ;
2130 break ;
2131 case wxFONTENCODING_CP874 :
2132 enc = kCFStringEncodingDOSThai;
2133 break ;
2134 case wxFONTENCODING_CP932 :
2135 enc = kCFStringEncodingDOSJapanese;
2136 break ;
2137 case wxFONTENCODING_CP936 :
2138 enc =kCFStringEncodingDOSChineseSimplif ;
2139 break ;
2140 case wxFONTENCODING_CP949 :
2141 enc = kCFStringEncodingDOSKorean;
2142 break ;
2143 case wxFONTENCODING_CP950 :
2144 enc = kCFStringEncodingDOSChineseTrad;
2145 break ;
2146 case wxFONTENCODING_CP1250 :
2147 enc = kCFStringEncodingWindowsLatin2;
2148 break ;
2149 case wxFONTENCODING_CP1251 :
2150 enc =kCFStringEncodingWindowsCyrillic ;
2151 break ;
2152 case wxFONTENCODING_CP1252 :
2153 enc =kCFStringEncodingWindowsLatin1 ;
2154 break ;
2155 case wxFONTENCODING_CP1253 :
2156 enc = kCFStringEncodingWindowsGreek;
2157 break ;
2158 case wxFONTENCODING_CP1254 :
2159 enc = kCFStringEncodingWindowsLatin5;
2160 break ;
2161 case wxFONTENCODING_CP1255 :
2162 enc =kCFStringEncodingWindowsHebrew ;
2163 break ;
2164 case wxFONTENCODING_CP1256 :
2165 enc =kCFStringEncodingWindowsArabic ;
2166 break ;
2167 case wxFONTENCODING_CP1257 :
2168 enc = kCFStringEncodingWindowsBalticRim;
2169 break ;
2170 // This only really encodes to UTF7 (if that) evidently
2171 // case wxFONTENCODING_UTF7 :
2172 // enc = kCFStringEncodingNonLossyASCII ;
2173 // break ;
2174 case wxFONTENCODING_UTF8 :
2175 enc = kCFStringEncodingUTF8 ;
2176 break ;
2177 case wxFONTENCODING_EUC_JP :
2178 enc = kCFStringEncodingEUC_JP;
2179 break ;
2180 case wxFONTENCODING_UTF16 :
2181 enc = kCFStringEncodingUnicode ;
2182 break ;
2183 case wxFONTENCODING_MACROMAN :
2184 enc = kCFStringEncodingMacRoman ;
2185 break ;
2186 case wxFONTENCODING_MACJAPANESE :
2187 enc = kCFStringEncodingMacJapanese ;
2188 break ;
2189 case wxFONTENCODING_MACCHINESETRAD :
2190 enc = kCFStringEncodingMacChineseTrad ;
2191 break ;
2192 case wxFONTENCODING_MACKOREAN :
2193 enc = kCFStringEncodingMacKorean ;
2194 break ;
2195 case wxFONTENCODING_MACARABIC :
2196 enc = kCFStringEncodingMacArabic ;
2197 break ;
2198 case wxFONTENCODING_MACHEBREW :
2199 enc = kCFStringEncodingMacHebrew ;
2200 break ;
2201 case wxFONTENCODING_MACGREEK :
2202 enc = kCFStringEncodingMacGreek ;
2203 break ;
2204 case wxFONTENCODING_MACCYRILLIC :
2205 enc = kCFStringEncodingMacCyrillic ;
2206 break ;
2207 case wxFONTENCODING_MACDEVANAGARI :
2208 enc = kCFStringEncodingMacDevanagari ;
2209 break ;
2210 case wxFONTENCODING_MACGURMUKHI :
2211 enc = kCFStringEncodingMacGurmukhi ;
2212 break ;
2213 case wxFONTENCODING_MACGUJARATI :
2214 enc = kCFStringEncodingMacGujarati ;
2215 break ;
2216 case wxFONTENCODING_MACORIYA :
2217 enc = kCFStringEncodingMacOriya ;
2218 break ;
2219 case wxFONTENCODING_MACBENGALI :
2220 enc = kCFStringEncodingMacBengali ;
2221 break ;
2222 case wxFONTENCODING_MACTAMIL :
2223 enc = kCFStringEncodingMacTamil ;
2224 break ;
2225 case wxFONTENCODING_MACTELUGU :
2226 enc = kCFStringEncodingMacTelugu ;
2227 break ;
2228 case wxFONTENCODING_MACKANNADA :
2229 enc = kCFStringEncodingMacKannada ;
2230 break ;
2231 case wxFONTENCODING_MACMALAJALAM :
2232 enc = kCFStringEncodingMacMalayalam ;
2233 break ;
2234 case wxFONTENCODING_MACSINHALESE :
2235 enc = kCFStringEncodingMacSinhalese ;
2236 break ;
2237 case wxFONTENCODING_MACBURMESE :
2238 enc = kCFStringEncodingMacBurmese ;
2239 break ;
2240 case wxFONTENCODING_MACKHMER :
2241 enc = kCFStringEncodingMacKhmer ;
2242 break ;
2243 case wxFONTENCODING_MACTHAI :
2244 enc = kCFStringEncodingMacThai ;
2245 break ;
2246 case wxFONTENCODING_MACLAOTIAN :
2247 enc = kCFStringEncodingMacLaotian ;
2248 break ;
2249 case wxFONTENCODING_MACGEORGIAN :
2250 enc = kCFStringEncodingMacGeorgian ;
2251 break ;
2252 case wxFONTENCODING_MACARMENIAN :
2253 enc = kCFStringEncodingMacArmenian ;
2254 break ;
2255 case wxFONTENCODING_MACCHINESESIMP :
2256 enc = kCFStringEncodingMacChineseSimp ;
2257 break ;
2258 case wxFONTENCODING_MACTIBETAN :
2259 enc = kCFStringEncodingMacTibetan ;
2260 break ;
2261 case wxFONTENCODING_MACMONGOLIAN :
2262 enc = kCFStringEncodingMacMongolian ;
2263 break ;
2264 case wxFONTENCODING_MACETHIOPIC :
2265 enc = kCFStringEncodingMacEthiopic ;
2266 break ;
2267 case wxFONTENCODING_MACCENTRALEUR :
2268 enc = kCFStringEncodingMacCentralEurRoman ;
2269 break ;
2270 case wxFONTENCODING_MACVIATNAMESE :
2271 enc = kCFStringEncodingMacVietnamese ;
2272 break ;
2273 case wxFONTENCODING_MACARABICEXT :
2274 enc = kCFStringEncodingMacExtArabic ;
2275 break ;
2276 case wxFONTENCODING_MACSYMBOL :
2277 enc = kCFStringEncodingMacSymbol ;
2278 break ;
2279 case wxFONTENCODING_MACDINGBATS :
2280 enc = kCFStringEncodingMacDingbats ;
2281 break ;
2282 case wxFONTENCODING_MACTURKISH :
2283 enc = kCFStringEncodingMacTurkish ;
2284 break ;
2285 case wxFONTENCODING_MACCROATIAN :
2286 enc = kCFStringEncodingMacCroatian ;
2287 break ;
2288 case wxFONTENCODING_MACICELANDIC :
2289 enc = kCFStringEncodingMacIcelandic ;
2290 break ;
2291 case wxFONTENCODING_MACROMANIAN :
2292 enc = kCFStringEncodingMacRomanian ;
2293 break ;
2294 case wxFONTENCODING_MACCELTIC :
2295 enc = kCFStringEncodingMacCeltic ;
2296 break ;
2297 case wxFONTENCODING_MACGAELIC :
2298 enc = kCFStringEncodingMacGaelic ;
2299 break ;
2300 // case wxFONTENCODING_MACKEYBOARD :
2301 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2302 // break ;
2303 default :
2304 // because gcc is picky
2305 break ;
2306 } ;
2307 return enc ;
2308 }
2309
2310 class wxMBConv_cocoa : public wxMBConv
2311 {
2312 public:
2313 wxMBConv_cocoa()
2314 {
2315 Init(CFStringGetSystemEncoding()) ;
2316 }
2317
2318 #if wxUSE_FONTMAP
2319 wxMBConv_cocoa(const wxChar* name)
2320 {
2321 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2322 }
2323 #endif
2324
2325 wxMBConv_cocoa(wxFontEncoding encoding)
2326 {
2327 Init( wxCFStringEncFromFontEnc(encoding) );
2328 }
2329
2330 ~wxMBConv_cocoa()
2331 {
2332 }
2333
2334 void Init( CFStringEncoding encoding)
2335 {
2336 m_encoding = encoding ;
2337 }
2338
2339 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2340 {
2341 wxASSERT(szUnConv);
2342
2343 CFStringRef theString = CFStringCreateWithBytes (
2344 NULL, //the allocator
2345 (const UInt8*)szUnConv,
2346 strlen(szUnConv),
2347 m_encoding,
2348 false //no BOM/external representation
2349 );
2350
2351 wxASSERT(theString);
2352
2353 size_t nOutLength = CFStringGetLength(theString);
2354
2355 if (szOut == NULL)
2356 {
2357 CFRelease(theString);
2358 return nOutLength;
2359 }
2360
2361 CFRange theRange = { 0, nOutSize };
2362
2363 #if SIZEOF_WCHAR_T == 4
2364 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2365 #endif
2366
2367 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2368
2369 CFRelease(theString);
2370
2371 szUniCharBuffer[nOutLength] = '\0' ;
2372
2373 #if SIZEOF_WCHAR_T == 4
2374 wxMBConvUTF16 converter ;
2375 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2376 delete[] szUniCharBuffer;
2377 #endif
2378
2379 return nOutLength;
2380 }
2381
2382 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2383 {
2384 wxASSERT(szUnConv);
2385
2386 size_t nRealOutSize;
2387 size_t nBufSize = wxWcslen(szUnConv);
2388 UniChar* szUniBuffer = (UniChar*) szUnConv;
2389
2390 #if SIZEOF_WCHAR_T == 4
2391 wxMBConvUTF16 converter ;
2392 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2393 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2394 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2395 nBufSize /= sizeof(UniChar);
2396 #endif
2397
2398 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2399 NULL, //allocator
2400 szUniBuffer,
2401 nBufSize,
2402 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2403 );
2404
2405 wxASSERT(theString);
2406
2407 //Note that CER puts a BOM when converting to unicode
2408 //so we check and use getchars instead in that case
2409 if (m_encoding == kCFStringEncodingUnicode)
2410 {
2411 if (szOut != NULL)
2412 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2413
2414 nRealOutSize = CFStringGetLength(theString) + 1;
2415 }
2416 else
2417 {
2418 CFStringGetBytes(
2419 theString,
2420 CFRangeMake(0, CFStringGetLength(theString)),
2421 m_encoding,
2422 0, //what to put in characters that can't be converted -
2423 //0 tells CFString to return NULL if it meets such a character
2424 false, //not an external representation
2425 (UInt8*) szOut,
2426 nOutSize,
2427 (CFIndex*) &nRealOutSize
2428 );
2429 }
2430
2431 CFRelease(theString);
2432
2433 #if SIZEOF_WCHAR_T == 4
2434 delete[] szUniBuffer;
2435 #endif
2436
2437 return nRealOutSize - 1;
2438 }
2439
2440 bool IsOk() const
2441 {
2442 return m_encoding != kCFStringEncodingInvalidId &&
2443 CFStringIsEncodingAvailable(m_encoding);
2444 }
2445
2446 private:
2447 CFStringEncoding m_encoding ;
2448 };
2449
2450 #endif // defined(__WXCOCOA__)
2451
2452 // ============================================================================
2453 // Mac conversion classes
2454 // ============================================================================
2455
2456 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2457
2458 class wxMBConv_mac : public wxMBConv
2459 {
2460 public:
2461 wxMBConv_mac()
2462 {
2463 Init(CFStringGetSystemEncoding()) ;
2464 }
2465
2466 #if wxUSE_FONTMAP
2467 wxMBConv_mac(const wxChar* name)
2468 {
2469 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2470 }
2471 #endif
2472
2473 wxMBConv_mac(wxFontEncoding encoding)
2474 {
2475 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2476 }
2477
2478 ~wxMBConv_mac()
2479 {
2480 OSStatus status = noErr ;
2481 status = TECDisposeConverter(m_MB2WC_converter);
2482 status = TECDisposeConverter(m_WC2MB_converter);
2483 }
2484
2485
2486 void Init( TextEncodingBase encoding)
2487 {
2488 OSStatus status = noErr ;
2489 m_char_encoding = encoding ;
2490 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2491
2492 status = TECCreateConverter(&m_MB2WC_converter,
2493 m_char_encoding,
2494 m_unicode_encoding);
2495 status = TECCreateConverter(&m_WC2MB_converter,
2496 m_unicode_encoding,
2497 m_char_encoding);
2498 }
2499
2500 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2501 {
2502 OSStatus status = noErr ;
2503 ByteCount byteOutLen ;
2504 ByteCount byteInLen = strlen(psz) ;
2505 wchar_t *tbuf = NULL ;
2506 UniChar* ubuf = NULL ;
2507 size_t res = 0 ;
2508
2509 if (buf == NULL)
2510 {
2511 //apple specs say at least 32
2512 n = wxMax( 32 , byteInLen ) ;
2513 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2514 }
2515 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2516 #if SIZEOF_WCHAR_T == 4
2517 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2518 #else
2519 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2520 #endif
2521 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2522 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2523 #if SIZEOF_WCHAR_T == 4
2524 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2525 // is not properly terminated we get random characters at the end
2526 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2527 wxMBConvUTF16 converter ;
2528 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2529 free( ubuf ) ;
2530 #else
2531 res = byteOutLen / sizeof( UniChar ) ;
2532 #endif
2533 if ( buf == NULL )
2534 free(tbuf) ;
2535
2536 if ( buf && res < n)
2537 buf[res] = 0;
2538
2539 return res ;
2540 }
2541
2542 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2543 {
2544 OSStatus status = noErr ;
2545 ByteCount byteOutLen ;
2546 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2547
2548 char *tbuf = NULL ;
2549
2550 if (buf == NULL)
2551 {
2552 //apple specs say at least 32
2553 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2554 tbuf = (char*) malloc( n ) ;
2555 }
2556
2557 ByteCount byteBufferLen = n ;
2558 UniChar* ubuf = NULL ;
2559 #if SIZEOF_WCHAR_T == 4
2560 wxMBConvUTF16 converter ;
2561 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2562 byteInLen = unicharlen ;
2563 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2564 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2565 #else
2566 ubuf = (UniChar*) psz ;
2567 #endif
2568 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2569 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2570 #if SIZEOF_WCHAR_T == 4
2571 free( ubuf ) ;
2572 #endif
2573 if ( buf == NULL )
2574 free(tbuf) ;
2575
2576 size_t res = byteOutLen ;
2577 if ( buf && res < n)
2578 {
2579 buf[res] = 0;
2580
2581 //we need to double-trip to verify it didn't insert any ? in place
2582 //of bogus characters
2583 wxWCharBuffer wcBuf(n);
2584 size_t pszlen = wxWcslen(psz);
2585 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2586 wxWcslen(wcBuf) != pszlen ||
2587 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2588 {
2589 // we didn't obtain the same thing we started from, hence
2590 // the conversion was lossy and we consider that it failed
2591 return (size_t)-1;
2592 }
2593 }
2594
2595 return res ;
2596 }
2597
2598 bool IsOk() const
2599 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2600
2601 private:
2602 TECObjectRef m_MB2WC_converter ;
2603 TECObjectRef m_WC2MB_converter ;
2604
2605 TextEncodingBase m_char_encoding ;
2606 TextEncodingBase m_unicode_encoding ;
2607 };
2608
2609 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2610
2611 // ============================================================================
2612 // wxEncodingConverter based conversion classes
2613 // ============================================================================
2614
2615 #if wxUSE_FONTMAP
2616
2617 class wxMBConv_wxwin : public wxMBConv
2618 {
2619 private:
2620 void Init()
2621 {
2622 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2623 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2624 }
2625
2626 public:
2627 // temporarily just use wxEncodingConverter stuff,
2628 // so that it works while a better implementation is built
2629 wxMBConv_wxwin(const wxChar* name)
2630 {
2631 if (name)
2632 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2633 else
2634 m_enc = wxFONTENCODING_SYSTEM;
2635
2636 Init();
2637 }
2638
2639 wxMBConv_wxwin(wxFontEncoding enc)
2640 {
2641 m_enc = enc;
2642
2643 Init();
2644 }
2645
2646 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2647 {
2648 size_t inbuf = strlen(psz);
2649 if (buf)
2650 {
2651 if (!m2w.Convert(psz,buf))
2652 return (size_t)-1;
2653 }
2654 return inbuf;
2655 }
2656
2657 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2658 {
2659 const size_t inbuf = wxWcslen(psz);
2660 if (buf)
2661 {
2662 if (!w2m.Convert(psz,buf))
2663 return (size_t)-1;
2664 }
2665
2666 return inbuf;
2667 }
2668
2669 virtual size_t GetMBNulLen() const
2670 {
2671 switch ( m_enc )
2672 {
2673 case wxFONTENCODING_UTF16BE:
2674 case wxFONTENCODING_UTF16LE:
2675 return 2;
2676
2677 case wxFONTENCODING_UTF32BE:
2678 case wxFONTENCODING_UTF32LE:
2679 return 4;
2680
2681 default:
2682 return 1;
2683 }
2684 }
2685
2686 bool IsOk() const { return m_ok; }
2687
2688 public:
2689 wxFontEncoding m_enc;
2690 wxEncodingConverter m2w, w2m;
2691
2692 private:
2693 // were we initialized successfully?
2694 bool m_ok;
2695
2696 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2697 };
2698
2699 // make the constructors available for unit testing
2700 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2701 {
2702 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2703 if ( !result->IsOk() )
2704 {
2705 delete result;
2706 return 0;
2707 }
2708 return result;
2709 }
2710
2711 #endif // wxUSE_FONTMAP
2712
2713 // ============================================================================
2714 // wxCSConv implementation
2715 // ============================================================================
2716
2717 void wxCSConv::Init()
2718 {
2719 m_name = NULL;
2720 m_convReal = NULL;
2721 m_deferred = true;
2722 }
2723
2724 wxCSConv::wxCSConv(const wxChar *charset)
2725 {
2726 Init();
2727
2728 if ( charset )
2729 {
2730 SetName(charset);
2731 }
2732
2733 #if wxUSE_FONTMAP
2734 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2735 #else
2736 m_encoding = wxFONTENCODING_SYSTEM;
2737 #endif
2738 }
2739
2740 wxCSConv::wxCSConv(wxFontEncoding encoding)
2741 {
2742 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2743 {
2744 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2745
2746 encoding = wxFONTENCODING_SYSTEM;
2747 }
2748
2749 Init();
2750
2751 m_encoding = encoding;
2752 }
2753
2754 wxCSConv::~wxCSConv()
2755 {
2756 Clear();
2757 }
2758
2759 wxCSConv::wxCSConv(const wxCSConv& conv)
2760 : wxMBConv()
2761 {
2762 Init();
2763
2764 SetName(conv.m_name);
2765 m_encoding = conv.m_encoding;
2766 }
2767
2768 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2769 {
2770 Clear();
2771
2772 SetName(conv.m_name);
2773 m_encoding = conv.m_encoding;
2774
2775 return *this;
2776 }
2777
2778 void wxCSConv::Clear()
2779 {
2780 free(m_name);
2781 delete m_convReal;
2782
2783 m_name = NULL;
2784 m_convReal = NULL;
2785 }
2786
2787 void wxCSConv::SetName(const wxChar *charset)
2788 {
2789 if (charset)
2790 {
2791 m_name = wxStrdup(charset);
2792 m_deferred = true;
2793 }
2794 }
2795
2796 #if wxUSE_FONTMAP
2797 #include "wx/hashmap.h"
2798
2799 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2800 wxEncodingNameCache );
2801
2802 static wxEncodingNameCache gs_nameCache;
2803 #endif
2804
2805 wxMBConv *wxCSConv::DoCreate() const
2806 {
2807 #if wxUSE_FONTMAP
2808 wxLogTrace(TRACE_STRCONV,
2809 wxT("creating conversion for %s"),
2810 (m_name ? m_name
2811 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2812 #endif // wxUSE_FONTMAP
2813
2814 // check for the special case of ASCII or ISO8859-1 charset: as we have
2815 // special knowledge of it anyhow, we don't need to create a special
2816 // conversion object
2817 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2818 m_encoding == wxFONTENCODING_DEFAULT )
2819 {
2820 // don't convert at all
2821 return NULL;
2822 }
2823
2824 // we trust OS to do conversion better than we can so try external
2825 // conversion methods first
2826 //
2827 // the full order is:
2828 // 1. OS conversion (iconv() under Unix or Win32 API)
2829 // 2. hard coded conversions for UTF
2830 // 3. wxEncodingConverter as fall back
2831
2832 // step (1)
2833 #ifdef HAVE_ICONV
2834 #if !wxUSE_FONTMAP
2835 if ( m_name )
2836 #endif // !wxUSE_FONTMAP
2837 {
2838 wxString name(m_name);
2839 wxFontEncoding encoding(m_encoding);
2840
2841 if ( !name.empty() )
2842 {
2843 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2844 if ( conv->IsOk() )
2845 return conv;
2846
2847 delete conv;
2848
2849 #if wxUSE_FONTMAP
2850 encoding =
2851 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2852 #endif // wxUSE_FONTMAP
2853 }
2854 #if wxUSE_FONTMAP
2855 {
2856 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2857 if ( it != gs_nameCache.end() )
2858 {
2859 if ( it->second.empty() )
2860 return NULL;
2861
2862 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2863 if ( conv->IsOk() )
2864 return conv;
2865
2866 delete conv;
2867 }
2868
2869 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2870
2871 for ( ; *names; ++names )
2872 {
2873 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2874 if ( conv->IsOk() )
2875 {
2876 gs_nameCache[encoding] = *names;
2877 return conv;
2878 }
2879
2880 delete conv;
2881 }
2882
2883 gs_nameCache[encoding] = _T(""); // cache the failure
2884 }
2885 #endif // wxUSE_FONTMAP
2886 }
2887 #endif // HAVE_ICONV
2888
2889 #ifdef wxHAVE_WIN32_MB2WC
2890 {
2891 #if wxUSE_FONTMAP
2892 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2893 : new wxMBConv_win32(m_encoding);
2894 if ( conv->IsOk() )
2895 return conv;
2896
2897 delete conv;
2898 #else
2899 return NULL;
2900 #endif
2901 }
2902 #endif // wxHAVE_WIN32_MB2WC
2903 #if defined(__WXMAC__)
2904 {
2905 // leave UTF16 and UTF32 to the built-ins of wx
2906 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2907 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2908 {
2909
2910 #if wxUSE_FONTMAP
2911 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2912 : new wxMBConv_mac(m_encoding);
2913 #else
2914 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2915 #endif
2916 if ( conv->IsOk() )
2917 return conv;
2918
2919 delete conv;
2920 }
2921 }
2922 #endif
2923 #if defined(__WXCOCOA__)
2924 {
2925 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2926 {
2927
2928 #if wxUSE_FONTMAP
2929 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2930 : new wxMBConv_cocoa(m_encoding);
2931 #else
2932 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2933 #endif
2934 if ( conv->IsOk() )
2935 return conv;
2936
2937 delete conv;
2938 }
2939 }
2940 #endif
2941 // step (2)
2942 wxFontEncoding enc = m_encoding;
2943 #if wxUSE_FONTMAP
2944 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2945 {
2946 // use "false" to suppress interactive dialogs -- we can be called from
2947 // anywhere and popping up a dialog from here is the last thing we want to
2948 // do
2949 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2950 }
2951 #endif // wxUSE_FONTMAP
2952
2953 switch ( enc )
2954 {
2955 case wxFONTENCODING_UTF7:
2956 return new wxMBConvUTF7;
2957
2958 case wxFONTENCODING_UTF8:
2959 return new wxMBConvUTF8;
2960
2961 case wxFONTENCODING_UTF16BE:
2962 return new wxMBConvUTF16BE;
2963
2964 case wxFONTENCODING_UTF16LE:
2965 return new wxMBConvUTF16LE;
2966
2967 case wxFONTENCODING_UTF32BE:
2968 return new wxMBConvUTF32BE;
2969
2970 case wxFONTENCODING_UTF32LE:
2971 return new wxMBConvUTF32LE;
2972
2973 default:
2974 // nothing to do but put here to suppress gcc warnings
2975 ;
2976 }
2977
2978 // step (3)
2979 #if wxUSE_FONTMAP
2980 {
2981 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2982 : new wxMBConv_wxwin(m_encoding);
2983 if ( conv->IsOk() )
2984 return conv;
2985
2986 delete conv;
2987 }
2988 #endif // wxUSE_FONTMAP
2989
2990 // NB: This is a hack to prevent deadlock. What could otherwise happen
2991 // in Unicode build: wxConvLocal creation ends up being here
2992 // because of some failure and logs the error. But wxLog will try to
2993 // attach timestamp, for which it will need wxConvLocal (to convert
2994 // time to char* and then wchar_t*), but that fails, tries to log
2995 // error, but wxLog has a (already locked) critical section that
2996 // guards static buffer.
2997 static bool alreadyLoggingError = false;
2998 if (!alreadyLoggingError)
2999 {
3000 alreadyLoggingError = true;
3001 wxLogError(_("Cannot convert from the charset '%s'!"),
3002 m_name ? m_name
3003 :
3004 #if wxUSE_FONTMAP
3005 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3006 #else // !wxUSE_FONTMAP
3007 wxString::Format(_("encoding %s"), m_encoding).c_str()
3008 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3009 );
3010 alreadyLoggingError = false;
3011 }
3012
3013 return NULL;
3014 }
3015
3016 void wxCSConv::CreateConvIfNeeded() const
3017 {
3018 if ( m_deferred )
3019 {
3020 wxCSConv *self = (wxCSConv *)this; // const_cast
3021
3022 #if wxUSE_INTL
3023 // if we don't have neither the name nor the encoding, use the default
3024 // encoding for this system
3025 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3026 {
3027 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3028 }
3029 #endif // wxUSE_INTL
3030
3031 self->m_convReal = DoCreate();
3032 self->m_deferred = false;
3033 }
3034 }
3035
3036 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3037 {
3038 CreateConvIfNeeded();
3039
3040 if (m_convReal)
3041 return m_convReal->MB2WC(buf, psz, n);
3042
3043 // latin-1 (direct)
3044 size_t len = strlen(psz);
3045
3046 if (buf)
3047 {
3048 for (size_t c = 0; c <= len; c++)
3049 buf[c] = (unsigned char)(psz[c]);
3050 }
3051
3052 return len;
3053 }
3054
3055 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3056 {
3057 CreateConvIfNeeded();
3058
3059 if (m_convReal)
3060 return m_convReal->WC2MB(buf, psz, n);
3061
3062 // latin-1 (direct)
3063 const size_t len = wxWcslen(psz);
3064 if (buf)
3065 {
3066 for (size_t c = 0; c <= len; c++)
3067 {
3068 if (psz[c] > 0xFF)
3069 return (size_t)-1;
3070 buf[c] = (char)psz[c];
3071 }
3072 }
3073 else
3074 {
3075 for (size_t c = 0; c <= len; c++)
3076 {
3077 if (psz[c] > 0xFF)
3078 return (size_t)-1;
3079 }
3080 }
3081
3082 return len;
3083 }
3084
3085 size_t wxCSConv::GetMBNulLen() const
3086 {
3087 CreateConvIfNeeded();
3088
3089 if ( m_convReal )
3090 {
3091 return m_convReal->GetMBNulLen();
3092 }
3093
3094 return 1;
3095 }
3096
3097 // ----------------------------------------------------------------------------
3098 // globals
3099 // ----------------------------------------------------------------------------
3100
3101 #ifdef __WINDOWS__
3102 static wxMBConv_win32 wxConvLibcObj;
3103 #elif defined(__WXMAC__) && !defined(__MACH__)
3104 static wxMBConv_mac wxConvLibcObj ;
3105 #else
3106 static wxMBConvLibc wxConvLibcObj;
3107 #endif
3108
3109 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3110 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3111 static wxMBConvUTF7 wxConvUTF7Obj;
3112 static wxMBConvUTF8 wxConvUTF8Obj;
3113
3114 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3115 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3116 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3117 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3118 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3119 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3120 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3121 #ifdef __WXOSX__
3122 wxConvUTF8Obj;
3123 #else
3124 wxConvLibcObj;
3125 #endif
3126
3127
3128 #else // !wxUSE_WCHAR_T
3129
3130 // stand-ins in absence of wchar_t
3131 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3132 wxConvISO8859_1,
3133 wxConvLocal,
3134 wxConvUTF8;
3135
3136 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T