]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
Warning fix.
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #ifndef WX_PRECOMP
31 #include "wx/intl.h"
32 #include "wx/log.h"
33 #endif // WX_PRECOMP
34
35 #include "wx/strconv.h"
36
37 #if wxUSE_WCHAR_T
38
39 #ifdef __WINDOWS__
40 #include "wx/msw/private.h"
41 #include "wx/msw/missing.h"
42 #endif
43
44 #ifndef __WXWINCE__
45 #include <errno.h>
46 #endif
47
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51
52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
53 #define wxHAVE_WIN32_MB2WC
54 #endif // __WIN32__ but !__WXMICROWIN__
55
56 #ifdef __SALFORDC__
57 #include <clib.h>
58 #endif
59
60 #ifdef HAVE_ICONV
61 #include <iconv.h>
62 #include "wx/thread.h"
63 #endif
64
65 #include "wx/encconv.h"
66 #include "wx/fontmap.h"
67 #include "wx/utils.h"
68
69 #ifdef __WXMAC__
70 #ifndef __DARWIN__
71 #include <ATSUnicode.h>
72 #include <TextCommon.h>
73 #include <TextEncodingConverter.h>
74 #endif
75
76 #include "wx/mac/private.h" // includes mac headers
77 #endif
78
79 #define TRACE_STRCONV _T("strconv")
80
81 #if SIZEOF_WCHAR_T == 2
82 #define WC_UTF16
83 #endif
84
85 // ============================================================================
86 // implementation
87 // ============================================================================
88
89 // helper function of cMB2WC(): check if n bytes at this location are all NUL
90 static bool NotAllNULs(const char *p, size_t n)
91 {
92 while ( n && *p++ == '\0' )
93 n--;
94
95 return n != 0;
96 }
97
98 // ----------------------------------------------------------------------------
99 // UTF-16 en/decoding to/from UCS-4
100 // ----------------------------------------------------------------------------
101
102
103 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
104 {
105 if (input<=0xffff)
106 {
107 if (output)
108 *output = (wxUint16) input;
109 return 1;
110 }
111 else if (input>=0x110000)
112 {
113 return (size_t)-1;
114 }
115 else
116 {
117 if (output)
118 {
119 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
120 *output = (wxUint16) ((input&0x3ff)+0xdc00);
121 }
122 return 2;
123 }
124 }
125
126 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
127 {
128 if ((*input<0xd800) || (*input>0xdfff))
129 {
130 output = *input;
131 return 1;
132 }
133 else if ((input[1]<0xdc00) || (input[1]>0xdfff))
134 {
135 output = *input;
136 return (size_t)-1;
137 }
138 else
139 {
140 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
141 return 2;
142 }
143 }
144
145
146 // ----------------------------------------------------------------------------
147 // wxMBConv
148 // ----------------------------------------------------------------------------
149
150 wxMBConv::~wxMBConv()
151 {
152 // nothing to do here (necessary for Darwin linking probably)
153 }
154
155 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
156 {
157 if ( psz )
158 {
159 // calculate the length of the buffer needed first
160 size_t nLen = MB2WC(NULL, psz, 0);
161 if ( nLen != (size_t)-1 )
162 {
163 // now do the actual conversion
164 wxWCharBuffer buf(nLen);
165 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
166 if ( nLen != (size_t)-1 )
167 {
168 return buf;
169 }
170 }
171 }
172
173 wxWCharBuffer buf((wchar_t *)NULL);
174
175 return buf;
176 }
177
178 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
179 {
180 if ( pwz )
181 {
182 size_t nLen = WC2MB(NULL, pwz, 0);
183 if ( nLen != (size_t)-1 )
184 {
185 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
186 nLen = WC2MB(buf.data(), pwz, nLen + 4);
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxCharBuffer buf((char *)NULL);
195
196 return buf;
197 }
198
199 const wxWCharBuffer
200 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
201 {
202 // the currently accumulated wide characters
203 wxWCharBuffer wbuf;
204
205 // the current length of wbuf
206 size_t lenBuf = 0;
207
208 // the number of NULs terminating this string
209 size_t nulLen wxDUMMY_INITIALIZE(0);
210
211 // make a copy of the input string unless it is already properly
212 // NUL-terminated
213 wxCharBuffer bufTmp;
214
215 // if we were not given the input size we just have to assume that the
216 // string is properly terminated as we have no way of knowing how long it
217 // is anyhow, but if we do have the size check whether there are enough
218 // NULs at the end
219 if ( inLen != (size_t)-1 )
220 {
221 // we need to know how to find the end of this string
222 nulLen = GetMinMBCharWidth();
223 if ( nulLen == (size_t)-1 )
224 return wbuf;
225
226 // if there are enough NULs we can avoid the copy
227 if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
228 {
229 // make a copy in order to properly NUL-terminate the string
230 bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
231 char * const p = bufTmp.data();
232 memcpy(p, in, inLen);
233 for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
234 *s = '\0';
235 }
236 }
237
238 if ( bufTmp )
239 in = bufTmp;
240
241 size_t lenChunk;
242 for ( const char * const inEnd = in + inLen;; )
243 {
244 // try to convert the current chunk
245 lenChunk = MB2WC(NULL, in, 0);
246 if ( lenChunk == 0 )
247 {
248 // nothing left in the input string, conversion succeeded
249 break;
250 }
251
252 if ( lenChunk == (size_t)-1 )
253 break;
254
255 // if we already have a previous chunk, leave the NUL separating it
256 // from this one
257 if ( lenBuf )
258 lenBuf++;
259
260 const size_t lenBufNew = lenBuf + lenChunk;
261 if ( !wbuf.extend(lenBufNew) )
262 {
263 lenChunk = (size_t)-1;
264 break;
265 }
266
267 lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
268 if ( lenChunk == (size_t)-1 )
269 break;
270
271 lenBuf = lenBufNew;
272
273 if ( inLen == (size_t)-1 )
274 {
275 // convert only one chunk in this case, as we suppose that the
276 // string is NUL-terminated and so inEnd is not used at all
277 break;
278 }
279
280 // advance the input pointer past the end of this chunk
281 while ( NotAllNULs(in, nulLen) )
282 {
283 // notice that we must skip over multiple bytes here as we suppose
284 // that if NUL takes 2 or 4 bytes, then all the other characters do
285 // too and so if advanced by a single byte we might erroneously
286 // detect sequences of NUL bytes in the middle of the input
287 in += nulLen;
288 }
289
290 in += nulLen; // skipping over its terminator as well
291
292 // note that ">=" (and not just "==") is needed here as the terminator
293 // we skipped just above could be inside or just after the buffer
294 // delimited by inEnd
295 if ( in >= inEnd )
296 break;
297 }
298
299 if ( lenChunk == (size_t)-1 )
300 {
301 // conversion failed
302 lenBuf = 0;
303 wbuf.reset();
304 }
305
306 if ( outLen )
307 *outLen = lenBuf;
308
309 return wbuf;
310 }
311
312 const wxCharBuffer
313 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
314 {
315 // the currently accumulated multibyte characters
316 wxCharBuffer buf;
317
318 // the current length of buf
319 size_t lenBuf = 0;
320
321 // make a copy of the input string unless it is already properly
322 // NUL-terminated
323 //
324 // if we don't know its length we have no choice but to assume that it is,
325 // indeed, properly terminated
326 wxWCharBuffer bufTmp;
327 if ( inLen == (size_t)-1 )
328 {
329 inLen = wxWcslen(in) + 1;
330 }
331 else if ( inLen != 0 && in[inLen - 1] != L'\0' )
332 {
333 // make a copy in order to properly NUL-terminate the string
334 bufTmp = wxWCharBuffer(inLen);
335 memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
336 }
337
338 if ( bufTmp )
339 in = bufTmp;
340
341 for ( const wchar_t * const inEnd = in + inLen;; )
342 {
343 // try to convert the current chunk, if anything left
344 size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
345 if ( lenChunk == 0 )
346 {
347 // nothing left in the input string, conversion succeeded
348 if ( outLen )
349 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
350
351 return buf;
352 }
353
354 if ( lenChunk == (size_t)-1 )
355 break;
356
357 const size_t lenBufNew = lenBuf + lenChunk;
358 if ( !buf.extend(lenBufNew) )
359 break;
360
361 lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
362 if ( lenChunk == (size_t)-1 )
363 break;
364
365 // chunk successfully converted, go to the next one
366 in += wxWcslen(in) + 1 /* skip NUL too */;
367 lenBuf = lenBufNew + 1;
368 }
369
370 // conversion failed
371 if ( outLen )
372 *outLen = 0;
373
374 return wxCharBuffer();
375 }
376
377 // ----------------------------------------------------------------------------
378 // wxMBConvLibc
379 // ----------------------------------------------------------------------------
380
381 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
382 {
383 return wxMB2WC(buf, psz, n);
384 }
385
386 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
387 {
388 return wxWC2MB(buf, psz, n);
389 }
390
391 // ----------------------------------------------------------------------------
392 // wxConvBrokenFileNames
393 // ----------------------------------------------------------------------------
394
395 #ifdef __UNIX__
396
397 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
398 {
399 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
400 || wxStricmp(charset, _T("UTF8")) == 0 )
401 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
402 else
403 m_conv = new wxCSConv(charset);
404 }
405
406 #endif // __UNIX__
407
408 // ----------------------------------------------------------------------------
409 // UTF-7
410 // ----------------------------------------------------------------------------
411
412 // Implementation (C) 2004 Fredrik Roubert
413
414 //
415 // BASE64 decoding table
416 //
417 static const unsigned char utf7unb64[] =
418 {
419 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
420 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
421 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
422 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
423 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
424 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
425 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
426 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
428 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
429 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
430 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
431 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
432 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
433 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
434 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
435 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
436 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
437 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
438 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
440 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
451 };
452
453 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
454 {
455 size_t len = 0;
456
457 while ( *psz && (!buf || (len < n)) )
458 {
459 unsigned char cc = *psz++;
460 if (cc != '+')
461 {
462 // plain ASCII char
463 if (buf)
464 *buf++ = cc;
465 len++;
466 }
467 else if (*psz == '-')
468 {
469 // encoded plus sign
470 if (buf)
471 *buf++ = cc;
472 len++;
473 psz++;
474 }
475 else // start of BASE64 encoded string
476 {
477 bool lsb, ok;
478 unsigned int d, l;
479 for ( ok = lsb = false, d = 0, l = 0;
480 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
481 psz++ )
482 {
483 d <<= 6;
484 d += cc;
485 for (l += 6; l >= 8; lsb = !lsb)
486 {
487 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
488 if (lsb)
489 {
490 if (buf)
491 *buf++ |= c;
492 len ++;
493 }
494 else
495 {
496 if (buf)
497 *buf = (wchar_t)(c << 8);
498 }
499
500 ok = true;
501 }
502 }
503
504 if ( !ok )
505 {
506 // in valid UTF7 we should have valid characters after '+'
507 return (size_t)-1;
508 }
509
510 if (*psz == '-')
511 psz++;
512 }
513 }
514
515 if ( buf && (len < n) )
516 *buf = '\0';
517
518 return len;
519 }
520
521 //
522 // BASE64 encoding table
523 //
524 static const unsigned char utf7enb64[] =
525 {
526 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
527 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
528 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
529 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
530 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
531 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
532 'w', 'x', 'y', 'z', '0', '1', '2', '3',
533 '4', '5', '6', '7', '8', '9', '+', '/'
534 };
535
536 //
537 // UTF-7 encoding table
538 //
539 // 0 - Set D (directly encoded characters)
540 // 1 - Set O (optional direct characters)
541 // 2 - whitespace characters (optional)
542 // 3 - special characters
543 //
544 static const unsigned char utf7encode[128] =
545 {
546 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
547 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
548 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
550 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
552 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
554 };
555
556 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
557 {
558 size_t len = 0;
559
560 while (*psz && ((!buf) || (len < n)))
561 {
562 wchar_t cc = *psz++;
563 if (cc < 0x80 && utf7encode[cc] < 1)
564 {
565 // plain ASCII char
566 if (buf)
567 *buf++ = (char)cc;
568 len++;
569 }
570 #ifndef WC_UTF16
571 else if (((wxUint32)cc) > 0xffff)
572 {
573 // no surrogate pair generation (yet?)
574 return (size_t)-1;
575 }
576 #endif
577 else
578 {
579 if (buf)
580 *buf++ = '+';
581 len++;
582 if (cc != '+')
583 {
584 // BASE64 encode string
585 unsigned int lsb, d, l;
586 for (d = 0, l = 0; /*nothing*/; psz++)
587 {
588 for (lsb = 0; lsb < 2; lsb ++)
589 {
590 d <<= 8;
591 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
592
593 for (l += 8; l >= 6; )
594 {
595 l -= 6;
596 if (buf)
597 *buf++ = utf7enb64[(d >> l) % 64];
598 len++;
599 }
600 }
601 cc = *psz;
602 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
603 break;
604 }
605 if (l != 0)
606 {
607 if (buf)
608 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
609 len++;
610 }
611 }
612 if (buf)
613 *buf++ = '-';
614 len++;
615 }
616 }
617 if (buf && (len < n))
618 *buf = 0;
619 return len;
620 }
621
622 // ----------------------------------------------------------------------------
623 // UTF-8
624 // ----------------------------------------------------------------------------
625
626 static wxUint32 utf8_max[]=
627 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
628
629 // boundaries of the private use area we use to (temporarily) remap invalid
630 // characters invalid in a UTF-8 encoded string
631 const wxUint32 wxUnicodePUA = 0x100000;
632 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
633
634 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
635 {
636 size_t len = 0;
637
638 while (*psz && ((!buf) || (len < n)))
639 {
640 const char *opsz = psz;
641 bool invalid = false;
642 unsigned char cc = *psz++, fc = cc;
643 unsigned cnt;
644 for (cnt = 0; fc & 0x80; cnt++)
645 fc <<= 1;
646 if (!cnt)
647 {
648 // plain ASCII char
649 if (buf)
650 *buf++ = cc;
651 len++;
652
653 // escape the escape character for octal escapes
654 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
655 && cc == '\\' && (!buf || len < n))
656 {
657 if (buf)
658 *buf++ = cc;
659 len++;
660 }
661 }
662 else
663 {
664 cnt--;
665 if (!cnt)
666 {
667 // invalid UTF-8 sequence
668 invalid = true;
669 }
670 else
671 {
672 unsigned ocnt = cnt - 1;
673 wxUint32 res = cc & (0x3f >> cnt);
674 while (cnt--)
675 {
676 cc = *psz;
677 if ((cc & 0xC0) != 0x80)
678 {
679 // invalid UTF-8 sequence
680 invalid = true;
681 break;
682 }
683 psz++;
684 res = (res << 6) | (cc & 0x3f);
685 }
686 if (invalid || res <= utf8_max[ocnt])
687 {
688 // illegal UTF-8 encoding
689 invalid = true;
690 }
691 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
692 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
693 {
694 // if one of our PUA characters turns up externally
695 // it must also be treated as an illegal sequence
696 // (a bit like you have to escape an escape character)
697 invalid = true;
698 }
699 else
700 {
701 #ifdef WC_UTF16
702 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
703 size_t pa = encode_utf16(res, (wxUint16 *)buf);
704 if (pa == (size_t)-1)
705 {
706 invalid = true;
707 }
708 else
709 {
710 if (buf)
711 buf += pa;
712 len += pa;
713 }
714 #else // !WC_UTF16
715 if (buf)
716 *buf++ = (wchar_t)res;
717 len++;
718 #endif // WC_UTF16/!WC_UTF16
719 }
720 }
721 if (invalid)
722 {
723 if (m_options & MAP_INVALID_UTF8_TO_PUA)
724 {
725 while (opsz < psz && (!buf || len < n))
726 {
727 #ifdef WC_UTF16
728 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
729 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
730 wxASSERT(pa != (size_t)-1);
731 if (buf)
732 buf += pa;
733 opsz++;
734 len += pa;
735 #else
736 if (buf)
737 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
738 opsz++;
739 len++;
740 #endif
741 }
742 }
743 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
744 {
745 while (opsz < psz && (!buf || len < n))
746 {
747 if ( buf && len + 3 < n )
748 {
749 unsigned char on = *opsz;
750 *buf++ = L'\\';
751 *buf++ = (wchar_t)( L'0' + on / 0100 );
752 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
753 *buf++ = (wchar_t)( L'0' + on % 010 );
754 }
755 opsz++;
756 len += 4;
757 }
758 }
759 else // MAP_INVALID_UTF8_NOT
760 {
761 return (size_t)-1;
762 }
763 }
764 }
765 }
766 if (buf && (len < n))
767 *buf = 0;
768 return len;
769 }
770
771 static inline bool isoctal(wchar_t wch)
772 {
773 return L'0' <= wch && wch <= L'7';
774 }
775
776 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
777 {
778 size_t len = 0;
779
780 while (*psz && ((!buf) || (len < n)))
781 {
782 wxUint32 cc;
783 #ifdef WC_UTF16
784 // cast is ok for WC_UTF16
785 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
786 psz += (pa == (size_t)-1) ? 1 : pa;
787 #else
788 cc=(*psz++) & 0x7fffffff;
789 #endif
790
791 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
792 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
793 {
794 if (buf)
795 *buf++ = (char)(cc - wxUnicodePUA);
796 len++;
797 }
798 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
799 && cc == L'\\' && psz[0] == L'\\' )
800 {
801 if (buf)
802 *buf++ = (char)cc;
803 psz++;
804 len++;
805 }
806 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
807 cc == L'\\' &&
808 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
809 {
810 if (buf)
811 {
812 *buf++ = (char) ((psz[0] - L'0')*0100 +
813 (psz[1] - L'0')*010 +
814 (psz[2] - L'0'));
815 }
816
817 psz += 3;
818 len++;
819 }
820 else
821 {
822 unsigned cnt;
823 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
824 if (!cnt)
825 {
826 // plain ASCII char
827 if (buf)
828 *buf++ = (char) cc;
829 len++;
830 }
831
832 else
833 {
834 len += cnt + 1;
835 if (buf)
836 {
837 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
838 while (cnt--)
839 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
840 }
841 }
842 }
843 }
844
845 if (buf && (len<n))
846 *buf = 0;
847
848 return len;
849 }
850
851 // ----------------------------------------------------------------------------
852 // UTF-16
853 // ----------------------------------------------------------------------------
854
855 #ifdef WORDS_BIGENDIAN
856 #define wxMBConvUTF16straight wxMBConvUTF16BE
857 #define wxMBConvUTF16swap wxMBConvUTF16LE
858 #else
859 #define wxMBConvUTF16swap wxMBConvUTF16BE
860 #define wxMBConvUTF16straight wxMBConvUTF16LE
861 #endif
862
863
864 #ifdef WC_UTF16
865
866 // copy 16bit MB to 16bit String
867 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
868 {
869 size_t len=0;
870
871 while (*(wxUint16*)psz && (!buf || len < n))
872 {
873 if (buf)
874 *buf++ = *(wxUint16*)psz;
875 len++;
876
877 psz += sizeof(wxUint16);
878 }
879 if (buf && len<n) *buf=0;
880
881 return len;
882 }
883
884
885 // copy 16bit String to 16bit MB
886 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
887 {
888 size_t len=0;
889
890 while (*psz && (!buf || len < n))
891 {
892 if (buf)
893 {
894 *(wxUint16*)buf = *psz;
895 buf += sizeof(wxUint16);
896 }
897 len += sizeof(wxUint16);
898 psz++;
899 }
900 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
901
902 return len;
903 }
904
905
906 // swap 16bit MB to 16bit String
907 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
908 {
909 size_t len = 0;
910
911 // UTF16 string must be terminated by 2 NULs as single NULs may occur
912 // inside the string
913 while ( (psz[0] || psz[1]) && (!buf || len < n) )
914 {
915 if ( buf )
916 {
917 ((char *)buf)[0] = psz[1];
918 ((char *)buf)[1] = psz[0];
919 buf++;
920 }
921 len++;
922 psz += 2;
923 }
924
925 if ( buf && len < n )
926 *buf = L'\0';
927
928 return len;
929 }
930
931
932 // swap 16bit MB to 16bit String
933 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
934 {
935 size_t len = 0;
936
937 while ( *psz && (!buf || len < n) )
938 {
939 if ( buf )
940 {
941 *buf++ = ((char*)psz)[1];
942 *buf++ = ((char*)psz)[0];
943 }
944 len += 2;
945 psz++;
946 }
947
948 if ( buf && len < n )
949 *buf = '\0';
950
951 return len;
952 }
953
954
955 #else // WC_UTF16
956
957
958 // copy 16bit MB to 32bit String
959 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
960 {
961 size_t len=0;
962
963 while (*(wxUint16*)psz && (!buf || len < n))
964 {
965 wxUint32 cc;
966 size_t pa=decode_utf16((wxUint16*)psz, cc);
967 if (pa == (size_t)-1)
968 return pa;
969
970 if (buf)
971 *buf++ = (wchar_t)cc;
972 len++;
973 psz += pa * sizeof(wxUint16);
974 }
975 if (buf && len<n) *buf=0;
976
977 return len;
978 }
979
980
981 // copy 32bit String to 16bit MB
982 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
983 {
984 size_t len=0;
985
986 while (*psz && (!buf || len < n))
987 {
988 wxUint16 cc[2];
989 size_t pa=encode_utf16(*psz, cc);
990
991 if (pa == (size_t)-1)
992 return pa;
993
994 if (buf)
995 {
996 *(wxUint16*)buf = cc[0];
997 buf += sizeof(wxUint16);
998 if (pa > 1)
999 {
1000 *(wxUint16*)buf = cc[1];
1001 buf += sizeof(wxUint16);
1002 }
1003 }
1004
1005 len += pa*sizeof(wxUint16);
1006 psz++;
1007 }
1008 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1009
1010 return len;
1011 }
1012
1013
1014 // swap 16bit MB to 32bit String
1015 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016 {
1017 size_t len=0;
1018
1019 while (*(wxUint16*)psz && (!buf || len < n))
1020 {
1021 wxUint32 cc;
1022 char tmp[4];
1023 tmp[0]=psz[1]; tmp[1]=psz[0];
1024 tmp[2]=psz[3]; tmp[3]=psz[2];
1025
1026 size_t pa=decode_utf16((wxUint16*)tmp, cc);
1027 if (pa == (size_t)-1)
1028 return pa;
1029
1030 if (buf)
1031 *buf++ = (wchar_t)cc;
1032
1033 len++;
1034 psz += pa * sizeof(wxUint16);
1035 }
1036 if (buf && len<n) *buf=0;
1037
1038 return len;
1039 }
1040
1041
1042 // swap 32bit String to 16bit MB
1043 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045 size_t len=0;
1046
1047 while (*psz && (!buf || len < n))
1048 {
1049 wxUint16 cc[2];
1050 size_t pa=encode_utf16(*psz, cc);
1051
1052 if (pa == (size_t)-1)
1053 return pa;
1054
1055 if (buf)
1056 {
1057 *buf++ = ((char*)cc)[1];
1058 *buf++ = ((char*)cc)[0];
1059 if (pa > 1)
1060 {
1061 *buf++ = ((char*)cc)[3];
1062 *buf++ = ((char*)cc)[2];
1063 }
1064 }
1065
1066 len += pa*sizeof(wxUint16);
1067 psz++;
1068 }
1069 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
1070
1071 return len;
1072 }
1073
1074 #endif // WC_UTF16
1075
1076
1077 // ----------------------------------------------------------------------------
1078 // UTF-32
1079 // ----------------------------------------------------------------------------
1080
1081 #ifdef WORDS_BIGENDIAN
1082 #define wxMBConvUTF32straight wxMBConvUTF32BE
1083 #define wxMBConvUTF32swap wxMBConvUTF32LE
1084 #else
1085 #define wxMBConvUTF32swap wxMBConvUTF32BE
1086 #define wxMBConvUTF32straight wxMBConvUTF32LE
1087 #endif
1088
1089
1090 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1091 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1092
1093
1094 #ifdef WC_UTF16
1095
1096 // copy 32bit MB to 16bit String
1097 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1098 {
1099 size_t len=0;
1100
1101 while (*(wxUint32*)psz && (!buf || len < n))
1102 {
1103 wxUint16 cc[2];
1104
1105 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1106 if (pa == (size_t)-1)
1107 return pa;
1108
1109 if (buf)
1110 {
1111 *buf++ = cc[0];
1112 if (pa > 1)
1113 *buf++ = cc[1];
1114 }
1115 len += pa;
1116 psz += sizeof(wxUint32);
1117 }
1118 if (buf && len<n) *buf=0;
1119
1120 return len;
1121 }
1122
1123
1124 // copy 16bit String to 32bit MB
1125 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127 size_t len=0;
1128
1129 while (*psz && (!buf || len < n))
1130 {
1131 wxUint32 cc;
1132
1133 // cast is ok for WC_UTF16
1134 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1135 if (pa == (size_t)-1)
1136 return pa;
1137
1138 if (buf)
1139 {
1140 *(wxUint32*)buf = cc;
1141 buf += sizeof(wxUint32);
1142 }
1143 len += sizeof(wxUint32);
1144 psz += pa;
1145 }
1146
1147 if (buf && len<=n-sizeof(wxUint32))
1148 *(wxUint32*)buf=0;
1149
1150 return len;
1151 }
1152
1153
1154
1155 // swap 32bit MB to 16bit String
1156 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1157 {
1158 size_t len=0;
1159
1160 while (*(wxUint32*)psz && (!buf || len < n))
1161 {
1162 char tmp[4];
1163 tmp[0] = psz[3]; tmp[1] = psz[2];
1164 tmp[2] = psz[1]; tmp[3] = psz[0];
1165
1166
1167 wxUint16 cc[2];
1168
1169 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1170 if (pa == (size_t)-1)
1171 return pa;
1172
1173 if (buf)
1174 {
1175 *buf++ = cc[0];
1176 if (pa > 1)
1177 *buf++ = cc[1];
1178 }
1179 len += pa;
1180 psz += sizeof(wxUint32);
1181 }
1182
1183 if (buf && len<n)
1184 *buf=0;
1185
1186 return len;
1187 }
1188
1189
1190 // swap 16bit String to 32bit MB
1191 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1192 {
1193 size_t len=0;
1194
1195 while (*psz && (!buf || len < n))
1196 {
1197 char cc[4];
1198
1199 // cast is ok for WC_UTF16
1200 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1201 if (pa == (size_t)-1)
1202 return pa;
1203
1204 if (buf)
1205 {
1206 *buf++ = cc[3];
1207 *buf++ = cc[2];
1208 *buf++ = cc[1];
1209 *buf++ = cc[0];
1210 }
1211 len += sizeof(wxUint32);
1212 psz += pa;
1213 }
1214
1215 if (buf && len<=n-sizeof(wxUint32))
1216 *(wxUint32*)buf=0;
1217
1218 return len;
1219 }
1220
1221 #else // WC_UTF16
1222
1223
1224 // copy 32bit MB to 32bit String
1225 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1226 {
1227 size_t len=0;
1228
1229 while (*(wxUint32*)psz && (!buf || len < n))
1230 {
1231 if (buf)
1232 *buf++ = (wchar_t)(*(wxUint32*)psz);
1233 len++;
1234 psz += sizeof(wxUint32);
1235 }
1236
1237 if (buf && len<n)
1238 *buf=0;
1239
1240 return len;
1241 }
1242
1243
1244 // copy 32bit String to 32bit MB
1245 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246 {
1247 size_t len=0;
1248
1249 while (*psz && (!buf || len < n))
1250 {
1251 if (buf)
1252 {
1253 *(wxUint32*)buf = *psz;
1254 buf += sizeof(wxUint32);
1255 }
1256
1257 len += sizeof(wxUint32);
1258 psz++;
1259 }
1260
1261 if (buf && len<=n-sizeof(wxUint32))
1262 *(wxUint32*)buf=0;
1263
1264 return len;
1265 }
1266
1267
1268 // swap 32bit MB to 32bit String
1269 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1270 {
1271 size_t len=0;
1272
1273 while (*(wxUint32*)psz && (!buf || len < n))
1274 {
1275 if (buf)
1276 {
1277 ((char *)buf)[0] = psz[3];
1278 ((char *)buf)[1] = psz[2];
1279 ((char *)buf)[2] = psz[1];
1280 ((char *)buf)[3] = psz[0];
1281 buf++;
1282 }
1283 len++;
1284 psz += sizeof(wxUint32);
1285 }
1286
1287 if (buf && len<n)
1288 *buf=0;
1289
1290 return len;
1291 }
1292
1293
1294 // swap 32bit String to 32bit MB
1295 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1296 {
1297 size_t len=0;
1298
1299 while (*psz && (!buf || len < n))
1300 {
1301 if (buf)
1302 {
1303 *buf++ = ((char *)psz)[3];
1304 *buf++ = ((char *)psz)[2];
1305 *buf++ = ((char *)psz)[1];
1306 *buf++ = ((char *)psz)[0];
1307 }
1308 len += sizeof(wxUint32);
1309 psz++;
1310 }
1311
1312 if (buf && len<=n-sizeof(wxUint32))
1313 *(wxUint32*)buf=0;
1314
1315 return len;
1316 }
1317
1318
1319 #endif // WC_UTF16
1320
1321
1322 // ============================================================================
1323 // The classes doing conversion using the iconv_xxx() functions
1324 // ============================================================================
1325
1326 #ifdef HAVE_ICONV
1327
1328 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1329 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1330 // (unless there's yet another bug in glibc) the only case when iconv()
1331 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1332 // left in the input buffer -- when _real_ error occurs,
1333 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1334 // iconv() failure.
1335 // [This bug does not appear in glibc 2.2.]
1336 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1337 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1338 (errno != E2BIG || bufLeft != 0))
1339 #else
1340 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1341 #endif
1342
1343 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1344
1345 #define ICONV_T_INVALID ((iconv_t)-1)
1346
1347 #if SIZEOF_WCHAR_T == 4
1348 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1349 #define WC_ENC wxFONTENCODING_UTF32
1350 #elif SIZEOF_WCHAR_T == 2
1351 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1352 #define WC_ENC wxFONTENCODING_UTF16
1353 #else // sizeof(wchar_t) != 2 nor 4
1354 // does this ever happen?
1355 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1356 #endif
1357
1358 // ----------------------------------------------------------------------------
1359 // wxMBConv_iconv: encapsulates an iconv character set
1360 // ----------------------------------------------------------------------------
1361
1362 class wxMBConv_iconv : public wxMBConv
1363 {
1364 public:
1365 wxMBConv_iconv(const wxChar *name);
1366 virtual ~wxMBConv_iconv();
1367
1368 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1369 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1370
1371 bool IsOk() const
1372 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1373
1374 protected:
1375 // the iconv handlers used to translate from multibyte to wide char and in
1376 // the other direction
1377 iconv_t m2w,
1378 w2m;
1379 #if wxUSE_THREADS
1380 // guards access to m2w and w2m objects
1381 wxMutex m_iconvMutex;
1382 #endif
1383
1384 private:
1385 // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
1386 // comment
1387 virtual size_t GetMinMBCharWidth() const;
1388
1389 // the name (for iconv_open()) of a wide char charset -- if none is
1390 // available on this machine, it will remain NULL
1391 static wxString ms_wcCharsetName;
1392
1393 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1394 // different endian-ness than the native one
1395 static bool ms_wcNeedsSwap;
1396
1397 // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
1398 // initially
1399 size_t m_minMBCharWidth;
1400 };
1401
1402 // make the constructor available for unit testing
1403 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1404 {
1405 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1406 if ( !result->IsOk() )
1407 {
1408 delete result;
1409 return 0;
1410 }
1411 return result;
1412 }
1413
1414 wxString wxMBConv_iconv::ms_wcCharsetName;
1415 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1416
1417 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1418 {
1419 m_minMBCharWidth = 0;
1420
1421 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1422 // names for the charsets
1423 const wxCharBuffer cname(wxString(name).ToAscii());
1424
1425 // check for charset that represents wchar_t:
1426 if ( ms_wcCharsetName.empty() )
1427 {
1428 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1429
1430 #if wxUSE_FONTMAP
1431 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1432 #else // !wxUSE_FONTMAP
1433 static const wxChar *names[] =
1434 {
1435 #if SIZEOF_WCHAR_T == 4
1436 _T("UCS-4"),
1437 #elif SIZEOF_WCHAR_T = 2
1438 _T("UCS-2"),
1439 #endif
1440 NULL
1441 };
1442 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1443
1444 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1445 {
1446 const wxString nameCS(*names);
1447
1448 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1449 wxString nameXE(nameCS);
1450 #ifdef WORDS_BIGENDIAN
1451 nameXE += _T("BE");
1452 #else // little endian
1453 nameXE += _T("LE");
1454 #endif
1455
1456 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1457 nameXE.c_str());
1458
1459 m2w = iconv_open(nameXE.ToAscii(), cname);
1460 if ( m2w == ICONV_T_INVALID )
1461 {
1462 // try charset w/o bytesex info (e.g. "UCS4")
1463 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1464 nameCS.c_str());
1465 m2w = iconv_open(nameCS.ToAscii(), cname);
1466
1467 // and check for bytesex ourselves:
1468 if ( m2w != ICONV_T_INVALID )
1469 {
1470 char buf[2], *bufPtr;
1471 wchar_t wbuf[2], *wbufPtr;
1472 size_t insz, outsz;
1473 size_t res;
1474
1475 buf[0] = 'A';
1476 buf[1] = 0;
1477 wbuf[0] = 0;
1478 insz = 2;
1479 outsz = SIZEOF_WCHAR_T * 2;
1480 wbufPtr = wbuf;
1481 bufPtr = buf;
1482
1483 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1484 (char**)&wbufPtr, &outsz);
1485
1486 if (ICONV_FAILED(res, insz))
1487 {
1488 wxLogLastError(wxT("iconv"));
1489 wxLogError(_("Conversion to charset '%s' doesn't work."),
1490 nameCS.c_str());
1491 }
1492 else // ok, can convert to this encoding, remember it
1493 {
1494 ms_wcCharsetName = nameCS;
1495 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1496 }
1497 }
1498 }
1499 else // use charset not requiring byte swapping
1500 {
1501 ms_wcCharsetName = nameXE;
1502 }
1503 }
1504
1505 wxLogTrace(TRACE_STRCONV,
1506 wxT("iconv wchar_t charset is \"%s\"%s"),
1507 ms_wcCharsetName.empty() ? _T("<none>")
1508 : ms_wcCharsetName.c_str(),
1509 ms_wcNeedsSwap ? _T(" (needs swap)")
1510 : _T(""));
1511 }
1512 else // we already have ms_wcCharsetName
1513 {
1514 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1515 }
1516
1517 if ( ms_wcCharsetName.empty() )
1518 {
1519 w2m = ICONV_T_INVALID;
1520 }
1521 else
1522 {
1523 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1524 if ( w2m == ICONV_T_INVALID )
1525 {
1526 wxLogTrace(TRACE_STRCONV,
1527 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1528 ms_wcCharsetName.c_str(), cname.data());
1529 }
1530 }
1531 }
1532
1533 wxMBConv_iconv::~wxMBConv_iconv()
1534 {
1535 if ( m2w != ICONV_T_INVALID )
1536 iconv_close(m2w);
1537 if ( w2m != ICONV_T_INVALID )
1538 iconv_close(w2m);
1539 }
1540
1541 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1542 {
1543 // find the string length: notice that must be done differently for
1544 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1545 size_t inbuf;
1546 const size_t nulLen = GetMinMBCharWidth();
1547 switch ( nulLen )
1548 {
1549 default:
1550 return (size_t)-1;
1551
1552 case 1:
1553 inbuf = strlen(psz); // arguably more optimized than our version
1554 break;
1555
1556 case 2:
1557 case 4:
1558 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1559 // they also have to start at character boundary and not span two
1560 // adjacent characters
1561 const char *p;
1562 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1563 ;
1564 inbuf = p - psz;
1565 break;
1566 }
1567
1568 #if wxUSE_THREADS
1569 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1570 // Unfortunately there is a couple of global wxCSConv objects such as
1571 // wxConvLocal that are used all over wx code, so we have to make sure
1572 // the handle is used by at most one thread at the time. Otherwise
1573 // only a few wx classes would be safe to use from non-main threads
1574 // as MB<->WC conversion would fail "randomly".
1575 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1576 #endif // wxUSE_THREADS
1577
1578
1579 size_t outbuf = n * SIZEOF_WCHAR_T;
1580 size_t res, cres;
1581 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1582 wchar_t *bufPtr = buf;
1583 const char *pszPtr = psz;
1584
1585 if (buf)
1586 {
1587 // have destination buffer, convert there
1588 cres = iconv(m2w,
1589 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1590 (char**)&bufPtr, &outbuf);
1591 res = n - (outbuf / SIZEOF_WCHAR_T);
1592
1593 if (ms_wcNeedsSwap)
1594 {
1595 // convert to native endianness
1596 for ( unsigned i = 0; i < res; i++ )
1597 buf[n] = WC_BSWAP(buf[i]);
1598 }
1599
1600 // NUL-terminate the string if there is any space left
1601 if (res < n)
1602 buf[res] = 0;
1603 }
1604 else
1605 {
1606 // no destination buffer... convert using temp buffer
1607 // to calculate destination buffer requirement
1608 wchar_t tbuf[8];
1609 res = 0;
1610 do {
1611 bufPtr = tbuf;
1612 outbuf = 8*SIZEOF_WCHAR_T;
1613
1614 cres = iconv(m2w,
1615 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1616 (char**)&bufPtr, &outbuf );
1617
1618 res += 8-(outbuf/SIZEOF_WCHAR_T);
1619 } while ((cres==(size_t)-1) && (errno==E2BIG));
1620 }
1621
1622 if (ICONV_FAILED(cres, inbuf))
1623 {
1624 //VS: it is ok if iconv fails, hence trace only
1625 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1626 return (size_t)-1;
1627 }
1628
1629 return res;
1630 }
1631
1632 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1633 {
1634 #if wxUSE_THREADS
1635 // NB: explained in MB2WC
1636 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1637 #endif
1638
1639 size_t inlen = wxWcslen(psz);
1640 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1641 size_t outbuf = n;
1642 size_t res, cres;
1643
1644 wchar_t *tmpbuf = 0;
1645
1646 if (ms_wcNeedsSwap)
1647 {
1648 // need to copy to temp buffer to switch endianness
1649 // (doing WC_BSWAP twice on the original buffer won't help, as it
1650 // could be in read-only memory, or be accessed in some other thread)
1651 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1652 for ( size_t i = 0; i < inlen; i++ )
1653 tmpbuf[n] = WC_BSWAP(psz[i]);
1654 tmpbuf[inlen] = L'\0';
1655 psz = tmpbuf;
1656 }
1657
1658 if (buf)
1659 {
1660 // have destination buffer, convert there
1661 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1662
1663 res = n-outbuf;
1664
1665 // NB: iconv was given only wcslen(psz) characters on input, and so
1666 // it couldn't convert the trailing zero. Let's do it ourselves
1667 // if there's some room left for it in the output buffer.
1668 if (res < n)
1669 buf[0] = 0;
1670 }
1671 else
1672 {
1673 // no destination buffer... convert using temp buffer
1674 // to calculate destination buffer requirement
1675 char tbuf[16];
1676 res = 0;
1677 do {
1678 buf = tbuf; outbuf = 16;
1679
1680 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1681
1682 res += 16 - outbuf;
1683 } while ((cres==(size_t)-1) && (errno==E2BIG));
1684 }
1685
1686 if (ms_wcNeedsSwap)
1687 {
1688 free(tmpbuf);
1689 }
1690
1691 if (ICONV_FAILED(cres, inbuf))
1692 {
1693 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1694 return (size_t)-1;
1695 }
1696
1697 return res;
1698 }
1699
1700 size_t wxMBConv_iconv::GetMinMBCharWidth() const
1701 {
1702 if ( m_minMBCharWidth == 0 )
1703 {
1704 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1705
1706 #if wxUSE_THREADS
1707 // NB: explained in MB2WC
1708 wxMutexLocker lock(self->m_iconvMutex);
1709 #endif
1710
1711 wchar_t *wnul = L"";
1712 char buf[8]; // should be enough for NUL in any encoding
1713 size_t inLen = sizeof(wchar_t),
1714 outLen = WXSIZEOF(buf);
1715 char *in = (char *)wnul;
1716 char *out = buf;
1717 if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
1718 {
1719 self->m_minMBCharWidth = (size_t)-1;
1720 }
1721 else // ok
1722 {
1723 self->m_minMBCharWidth = out - buf;
1724 }
1725 }
1726
1727 return m_minMBCharWidth;
1728 }
1729
1730 #endif // HAVE_ICONV
1731
1732
1733 // ============================================================================
1734 // Win32 conversion classes
1735 // ============================================================================
1736
1737 #ifdef wxHAVE_WIN32_MB2WC
1738
1739 // from utils.cpp
1740 #if wxUSE_FONTMAP
1741 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1742 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1743 #endif
1744
1745 class wxMBConv_win32 : public wxMBConv
1746 {
1747 public:
1748 wxMBConv_win32()
1749 {
1750 m_CodePage = CP_ACP;
1751 m_minMBCharWidth = 0;
1752 }
1753
1754 #if wxUSE_FONTMAP
1755 wxMBConv_win32(const wxChar* name)
1756 {
1757 m_CodePage = wxCharsetToCodepage(name);
1758 m_minMBCharWidth = 0;
1759 }
1760
1761 wxMBConv_win32(wxFontEncoding encoding)
1762 {
1763 m_CodePage = wxEncodingToCodepage(encoding);
1764 m_minMBCharWidth = 0;
1765 }
1766 #endif // wxUSE_FONTMAP
1767
1768 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1769 {
1770 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1771 // the behaviour is not compatible with the Unix version (using iconv)
1772 // and break the library itself, e.g. wxTextInputStream::NextChar()
1773 // wouldn't work if reading an incomplete MB char didn't result in an
1774 // error
1775 //
1776 // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1777 // an error (tested under Windows Server 2003) and apparently it is
1778 // done on purpose, i.e. the function accepts any input in this case
1779 // and although I'd prefer to return error on ill-formed output, our
1780 // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1781 // explicitly ill-formed according to RFC 2152) neither so we don't
1782 // even have any fallback here...
1783 //
1784 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1785 // Win XP or newer and if it is specified on older versions, conversion
1786 // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1787 // fails. So we can only use the flag on newer Windows versions.
1788 // Additionally, the flag is not supported by UTF7, symbol and CJK
1789 // encodings. See here:
1790 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1791 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1792 int flags = 0;
1793 if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1794 m_CodePage < 50000 &&
1795 IsAtLeastWin2kSP4() )
1796 {
1797 flags = MB_ERR_INVALID_CHARS;
1798 }
1799 else if ( m_CodePage == CP_UTF8 )
1800 {
1801 // Avoid round-trip in the special case of UTF-8 by using our
1802 // own UTF-8 conversion code:
1803 return wxMBConvUTF8().MB2WC(buf, psz, n);
1804 }
1805
1806 const size_t len = ::MultiByteToWideChar
1807 (
1808 m_CodePage, // code page
1809 flags, // flags: fall on error
1810 psz, // input string
1811 -1, // its length (NUL-terminated)
1812 buf, // output string
1813 buf ? n : 0 // size of output buffer
1814 );
1815 if ( !len )
1816 {
1817 // function totally failed
1818 return (size_t)-1;
1819 }
1820
1821 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1822 // check if we succeeded, by doing a double trip:
1823 if ( !flags && buf )
1824 {
1825 const size_t mbLen = strlen(psz);
1826 wxCharBuffer mbBuf(mbLen);
1827 if ( ::WideCharToMultiByte
1828 (
1829 m_CodePage,
1830 0,
1831 buf,
1832 -1,
1833 mbBuf.data(),
1834 mbLen + 1, // size in bytes, not length
1835 NULL,
1836 NULL
1837 ) == 0 ||
1838 strcmp(mbBuf, psz) != 0 )
1839 {
1840 // we didn't obtain the same thing we started from, hence
1841 // the conversion was lossy and we consider that it failed
1842 return (size_t)-1;
1843 }
1844 }
1845
1846 // note that it returns count of written chars for buf != NULL and size
1847 // of the needed buffer for buf == NULL so in either case the length of
1848 // the string (which never includes the terminating NUL) is one less
1849 return len - 1;
1850 }
1851
1852 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1853 {
1854 /*
1855 we have a problem here: by default, WideCharToMultiByte() may
1856 replace characters unrepresentable in the target code page with bad
1857 quality approximations such as turning "1/2" symbol (U+00BD) into
1858 "1" for the code pages which don't have it and we, obviously, want
1859 to avoid this at any price
1860
1861 the trouble is that this function does it _silently_, i.e. it won't
1862 even tell us whether it did or not... Win98/2000 and higher provide
1863 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1864 we have to resort to a round trip, i.e. check that converting back
1865 results in the same string -- this is, of course, expensive but
1866 otherwise we simply can't be sure to not garble the data.
1867 */
1868
1869 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1870 // it doesn't work with CJK encodings (which we test for rather roughly
1871 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1872 // supporting it
1873 BOOL usedDef wxDUMMY_INITIALIZE(false);
1874 BOOL *pUsedDef;
1875 int flags;
1876 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1877 {
1878 // it's our lucky day
1879 flags = WC_NO_BEST_FIT_CHARS;
1880 pUsedDef = &usedDef;
1881 }
1882 else // old system or unsupported encoding
1883 {
1884 flags = 0;
1885 pUsedDef = NULL;
1886 }
1887
1888 const size_t len = ::WideCharToMultiByte
1889 (
1890 m_CodePage, // code page
1891 flags, // either none or no best fit
1892 pwz, // input string
1893 -1, // it is (wide) NUL-terminated
1894 buf, // output buffer
1895 buf ? n : 0, // and its size
1896 NULL, // default "replacement" char
1897 pUsedDef // [out] was it used?
1898 );
1899
1900 if ( !len )
1901 {
1902 // function totally failed
1903 return (size_t)-1;
1904 }
1905
1906 // if we were really converting, check if we succeeded
1907 if ( buf )
1908 {
1909 if ( flags )
1910 {
1911 // check if the conversion failed, i.e. if any replacements
1912 // were done
1913 if ( usedDef )
1914 return (size_t)-1;
1915 }
1916 else // we must resort to double tripping...
1917 {
1918 wxWCharBuffer wcBuf(n);
1919 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1920 wcscmp(wcBuf, pwz) != 0 )
1921 {
1922 // we didn't obtain the same thing we started from, hence
1923 // the conversion was lossy and we consider that it failed
1924 return (size_t)-1;
1925 }
1926 }
1927 }
1928
1929 // see the comment above for the reason of "len - 1"
1930 return len - 1;
1931 }
1932
1933 bool IsOk() const { return m_CodePage != -1; }
1934
1935 private:
1936 static bool CanUseNoBestFit()
1937 {
1938 static int s_isWin98Or2k = -1;
1939
1940 if ( s_isWin98Or2k == -1 )
1941 {
1942 int verMaj, verMin;
1943 switch ( wxGetOsVersion(&verMaj, &verMin) )
1944 {
1945 case wxWIN95:
1946 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1947 break;
1948
1949 case wxWINDOWS_NT:
1950 s_isWin98Or2k = verMaj >= 5;
1951 break;
1952
1953 default:
1954 // unknown, be conseravtive by default
1955 s_isWin98Or2k = 0;
1956 }
1957
1958 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1959 }
1960
1961 return s_isWin98Or2k == 1;
1962 }
1963
1964 static bool IsAtLeastWin2kSP4()
1965 {
1966 #ifdef __WXWINCE__
1967 return false;
1968 #else
1969 static int s_isAtLeastWin2kSP4 = -1;
1970
1971 if ( s_isAtLeastWin2kSP4 == -1 )
1972 {
1973 OSVERSIONINFOEX ver;
1974
1975 memset(&ver, 0, sizeof(ver));
1976 ver.dwOSVersionInfoSize = sizeof(ver);
1977 GetVersionEx((OSVERSIONINFO*)&ver);
1978
1979 s_isAtLeastWin2kSP4 =
1980 ((ver.dwMajorVersion > 5) || // Vista+
1981 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1982 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1983 ver.wServicePackMajor >= 4)) // 2000 SP4+
1984 ? 1 : 0;
1985 }
1986
1987 return s_isAtLeastWin2kSP4 == 1;
1988 #endif
1989 }
1990
1991 virtual size_t GetMinMBCharWidth() const
1992 {
1993 if ( m_minMBCharWidth == 0 )
1994 {
1995 int len = ::WideCharToMultiByte
1996 (
1997 m_CodePage, // code page
1998 0, // no flags
1999 L"", // input string
2000 1, // translate just the NUL
2001 NULL, // output buffer
2002 0, // and its size
2003 NULL, // no replacement char
2004 NULL // [out] don't care if it was used
2005 );
2006
2007 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2008 switch ( len )
2009 {
2010 default:
2011 wxLogDebug(_T("Unexpected NUL length %d"), len);
2012 // fall through
2013
2014 case 0:
2015 self->m_minMBCharWidth = (size_t)-1;
2016 break;
2017
2018 case 1:
2019 case 2:
2020 case 4:
2021 self->m_minMBCharWidth = len;
2022 break;
2023 }
2024 }
2025
2026 return m_minMBCharWidth;
2027 }
2028
2029 // the code page we're working with
2030 long m_CodePage;
2031
2032 // cached result of GetMinMBCharWidth(), set to 0 initially meaning
2033 // "unknown"
2034 size_t m_minMBCharWidth;
2035 };
2036
2037 #endif // wxHAVE_WIN32_MB2WC
2038
2039 // ============================================================================
2040 // Cocoa conversion classes
2041 // ============================================================================
2042
2043 #if defined(__WXCOCOA__)
2044
2045 // RN: There is no UTF-32 support in either Core Foundation or
2046 // Cocoa. Strangely enough, internally Core Foundation uses
2047 // UTF 32 internally quite a bit - its just not public (yet).
2048
2049 #include <CoreFoundation/CFString.h>
2050 #include <CoreFoundation/CFStringEncodingExt.h>
2051
2052 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2053 {
2054 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2055 if ( encoding == wxFONTENCODING_DEFAULT )
2056 {
2057 enc = CFStringGetSystemEncoding();
2058 }
2059 else switch( encoding)
2060 {
2061 case wxFONTENCODING_ISO8859_1 :
2062 enc = kCFStringEncodingISOLatin1 ;
2063 break ;
2064 case wxFONTENCODING_ISO8859_2 :
2065 enc = kCFStringEncodingISOLatin2;
2066 break ;
2067 case wxFONTENCODING_ISO8859_3 :
2068 enc = kCFStringEncodingISOLatin3 ;
2069 break ;
2070 case wxFONTENCODING_ISO8859_4 :
2071 enc = kCFStringEncodingISOLatin4;
2072 break ;
2073 case wxFONTENCODING_ISO8859_5 :
2074 enc = kCFStringEncodingISOLatinCyrillic;
2075 break ;
2076 case wxFONTENCODING_ISO8859_6 :
2077 enc = kCFStringEncodingISOLatinArabic;
2078 break ;
2079 case wxFONTENCODING_ISO8859_7 :
2080 enc = kCFStringEncodingISOLatinGreek;
2081 break ;
2082 case wxFONTENCODING_ISO8859_8 :
2083 enc = kCFStringEncodingISOLatinHebrew;
2084 break ;
2085 case wxFONTENCODING_ISO8859_9 :
2086 enc = kCFStringEncodingISOLatin5;
2087 break ;
2088 case wxFONTENCODING_ISO8859_10 :
2089 enc = kCFStringEncodingISOLatin6;
2090 break ;
2091 case wxFONTENCODING_ISO8859_11 :
2092 enc = kCFStringEncodingISOLatinThai;
2093 break ;
2094 case wxFONTENCODING_ISO8859_13 :
2095 enc = kCFStringEncodingISOLatin7;
2096 break ;
2097 case wxFONTENCODING_ISO8859_14 :
2098 enc = kCFStringEncodingISOLatin8;
2099 break ;
2100 case wxFONTENCODING_ISO8859_15 :
2101 enc = kCFStringEncodingISOLatin9;
2102 break ;
2103
2104 case wxFONTENCODING_KOI8 :
2105 enc = kCFStringEncodingKOI8_R;
2106 break ;
2107 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2108 enc = kCFStringEncodingDOSRussian;
2109 break ;
2110
2111 // case wxFONTENCODING_BULGARIAN :
2112 // enc = ;
2113 // break ;
2114
2115 case wxFONTENCODING_CP437 :
2116 enc =kCFStringEncodingDOSLatinUS ;
2117 break ;
2118 case wxFONTENCODING_CP850 :
2119 enc = kCFStringEncodingDOSLatin1;
2120 break ;
2121 case wxFONTENCODING_CP852 :
2122 enc = kCFStringEncodingDOSLatin2;
2123 break ;
2124 case wxFONTENCODING_CP855 :
2125 enc = kCFStringEncodingDOSCyrillic;
2126 break ;
2127 case wxFONTENCODING_CP866 :
2128 enc =kCFStringEncodingDOSRussian ;
2129 break ;
2130 case wxFONTENCODING_CP874 :
2131 enc = kCFStringEncodingDOSThai;
2132 break ;
2133 case wxFONTENCODING_CP932 :
2134 enc = kCFStringEncodingDOSJapanese;
2135 break ;
2136 case wxFONTENCODING_CP936 :
2137 enc =kCFStringEncodingDOSChineseSimplif ;
2138 break ;
2139 case wxFONTENCODING_CP949 :
2140 enc = kCFStringEncodingDOSKorean;
2141 break ;
2142 case wxFONTENCODING_CP950 :
2143 enc = kCFStringEncodingDOSChineseTrad;
2144 break ;
2145 case wxFONTENCODING_CP1250 :
2146 enc = kCFStringEncodingWindowsLatin2;
2147 break ;
2148 case wxFONTENCODING_CP1251 :
2149 enc =kCFStringEncodingWindowsCyrillic ;
2150 break ;
2151 case wxFONTENCODING_CP1252 :
2152 enc =kCFStringEncodingWindowsLatin1 ;
2153 break ;
2154 case wxFONTENCODING_CP1253 :
2155 enc = kCFStringEncodingWindowsGreek;
2156 break ;
2157 case wxFONTENCODING_CP1254 :
2158 enc = kCFStringEncodingWindowsLatin5;
2159 break ;
2160 case wxFONTENCODING_CP1255 :
2161 enc =kCFStringEncodingWindowsHebrew ;
2162 break ;
2163 case wxFONTENCODING_CP1256 :
2164 enc =kCFStringEncodingWindowsArabic ;
2165 break ;
2166 case wxFONTENCODING_CP1257 :
2167 enc = kCFStringEncodingWindowsBalticRim;
2168 break ;
2169 // This only really encodes to UTF7 (if that) evidently
2170 // case wxFONTENCODING_UTF7 :
2171 // enc = kCFStringEncodingNonLossyASCII ;
2172 // break ;
2173 case wxFONTENCODING_UTF8 :
2174 enc = kCFStringEncodingUTF8 ;
2175 break ;
2176 case wxFONTENCODING_EUC_JP :
2177 enc = kCFStringEncodingEUC_JP;
2178 break ;
2179 case wxFONTENCODING_UTF16 :
2180 enc = kCFStringEncodingUnicode ;
2181 break ;
2182 case wxFONTENCODING_MACROMAN :
2183 enc = kCFStringEncodingMacRoman ;
2184 break ;
2185 case wxFONTENCODING_MACJAPANESE :
2186 enc = kCFStringEncodingMacJapanese ;
2187 break ;
2188 case wxFONTENCODING_MACCHINESETRAD :
2189 enc = kCFStringEncodingMacChineseTrad ;
2190 break ;
2191 case wxFONTENCODING_MACKOREAN :
2192 enc = kCFStringEncodingMacKorean ;
2193 break ;
2194 case wxFONTENCODING_MACARABIC :
2195 enc = kCFStringEncodingMacArabic ;
2196 break ;
2197 case wxFONTENCODING_MACHEBREW :
2198 enc = kCFStringEncodingMacHebrew ;
2199 break ;
2200 case wxFONTENCODING_MACGREEK :
2201 enc = kCFStringEncodingMacGreek ;
2202 break ;
2203 case wxFONTENCODING_MACCYRILLIC :
2204 enc = kCFStringEncodingMacCyrillic ;
2205 break ;
2206 case wxFONTENCODING_MACDEVANAGARI :
2207 enc = kCFStringEncodingMacDevanagari ;
2208 break ;
2209 case wxFONTENCODING_MACGURMUKHI :
2210 enc = kCFStringEncodingMacGurmukhi ;
2211 break ;
2212 case wxFONTENCODING_MACGUJARATI :
2213 enc = kCFStringEncodingMacGujarati ;
2214 break ;
2215 case wxFONTENCODING_MACORIYA :
2216 enc = kCFStringEncodingMacOriya ;
2217 break ;
2218 case wxFONTENCODING_MACBENGALI :
2219 enc = kCFStringEncodingMacBengali ;
2220 break ;
2221 case wxFONTENCODING_MACTAMIL :
2222 enc = kCFStringEncodingMacTamil ;
2223 break ;
2224 case wxFONTENCODING_MACTELUGU :
2225 enc = kCFStringEncodingMacTelugu ;
2226 break ;
2227 case wxFONTENCODING_MACKANNADA :
2228 enc = kCFStringEncodingMacKannada ;
2229 break ;
2230 case wxFONTENCODING_MACMALAJALAM :
2231 enc = kCFStringEncodingMacMalayalam ;
2232 break ;
2233 case wxFONTENCODING_MACSINHALESE :
2234 enc = kCFStringEncodingMacSinhalese ;
2235 break ;
2236 case wxFONTENCODING_MACBURMESE :
2237 enc = kCFStringEncodingMacBurmese ;
2238 break ;
2239 case wxFONTENCODING_MACKHMER :
2240 enc = kCFStringEncodingMacKhmer ;
2241 break ;
2242 case wxFONTENCODING_MACTHAI :
2243 enc = kCFStringEncodingMacThai ;
2244 break ;
2245 case wxFONTENCODING_MACLAOTIAN :
2246 enc = kCFStringEncodingMacLaotian ;
2247 break ;
2248 case wxFONTENCODING_MACGEORGIAN :
2249 enc = kCFStringEncodingMacGeorgian ;
2250 break ;
2251 case wxFONTENCODING_MACARMENIAN :
2252 enc = kCFStringEncodingMacArmenian ;
2253 break ;
2254 case wxFONTENCODING_MACCHINESESIMP :
2255 enc = kCFStringEncodingMacChineseSimp ;
2256 break ;
2257 case wxFONTENCODING_MACTIBETAN :
2258 enc = kCFStringEncodingMacTibetan ;
2259 break ;
2260 case wxFONTENCODING_MACMONGOLIAN :
2261 enc = kCFStringEncodingMacMongolian ;
2262 break ;
2263 case wxFONTENCODING_MACETHIOPIC :
2264 enc = kCFStringEncodingMacEthiopic ;
2265 break ;
2266 case wxFONTENCODING_MACCENTRALEUR :
2267 enc = kCFStringEncodingMacCentralEurRoman ;
2268 break ;
2269 case wxFONTENCODING_MACVIATNAMESE :
2270 enc = kCFStringEncodingMacVietnamese ;
2271 break ;
2272 case wxFONTENCODING_MACARABICEXT :
2273 enc = kCFStringEncodingMacExtArabic ;
2274 break ;
2275 case wxFONTENCODING_MACSYMBOL :
2276 enc = kCFStringEncodingMacSymbol ;
2277 break ;
2278 case wxFONTENCODING_MACDINGBATS :
2279 enc = kCFStringEncodingMacDingbats ;
2280 break ;
2281 case wxFONTENCODING_MACTURKISH :
2282 enc = kCFStringEncodingMacTurkish ;
2283 break ;
2284 case wxFONTENCODING_MACCROATIAN :
2285 enc = kCFStringEncodingMacCroatian ;
2286 break ;
2287 case wxFONTENCODING_MACICELANDIC :
2288 enc = kCFStringEncodingMacIcelandic ;
2289 break ;
2290 case wxFONTENCODING_MACROMANIAN :
2291 enc = kCFStringEncodingMacRomanian ;
2292 break ;
2293 case wxFONTENCODING_MACCELTIC :
2294 enc = kCFStringEncodingMacCeltic ;
2295 break ;
2296 case wxFONTENCODING_MACGAELIC :
2297 enc = kCFStringEncodingMacGaelic ;
2298 break ;
2299 // case wxFONTENCODING_MACKEYBOARD :
2300 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2301 // break ;
2302 default :
2303 // because gcc is picky
2304 break ;
2305 } ;
2306 return enc ;
2307 }
2308
2309 class wxMBConv_cocoa : public wxMBConv
2310 {
2311 public:
2312 wxMBConv_cocoa()
2313 {
2314 Init(CFStringGetSystemEncoding()) ;
2315 }
2316
2317 #if wxUSE_FONTMAP
2318 wxMBConv_cocoa(const wxChar* name)
2319 {
2320 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2321 }
2322 #endif
2323
2324 wxMBConv_cocoa(wxFontEncoding encoding)
2325 {
2326 Init( wxCFStringEncFromFontEnc(encoding) );
2327 }
2328
2329 ~wxMBConv_cocoa()
2330 {
2331 }
2332
2333 void Init( CFStringEncoding encoding)
2334 {
2335 m_encoding = encoding ;
2336 }
2337
2338 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2339 {
2340 wxASSERT(szUnConv);
2341
2342 CFStringRef theString = CFStringCreateWithBytes (
2343 NULL, //the allocator
2344 (const UInt8*)szUnConv,
2345 strlen(szUnConv),
2346 m_encoding,
2347 false //no BOM/external representation
2348 );
2349
2350 wxASSERT(theString);
2351
2352 size_t nOutLength = CFStringGetLength(theString);
2353
2354 if (szOut == NULL)
2355 {
2356 CFRelease(theString);
2357 return nOutLength;
2358 }
2359
2360 CFRange theRange = { 0, nOutSize };
2361
2362 #if SIZEOF_WCHAR_T == 4
2363 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2364 #endif
2365
2366 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2367
2368 CFRelease(theString);
2369
2370 szUniCharBuffer[nOutLength] = '\0' ;
2371
2372 #if SIZEOF_WCHAR_T == 4
2373 wxMBConvUTF16 converter ;
2374 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2375 delete[] szUniCharBuffer;
2376 #endif
2377
2378 return nOutLength;
2379 }
2380
2381 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2382 {
2383 wxASSERT(szUnConv);
2384
2385 size_t nRealOutSize;
2386 size_t nBufSize = wxWcslen(szUnConv);
2387 UniChar* szUniBuffer = (UniChar*) szUnConv;
2388
2389 #if SIZEOF_WCHAR_T == 4
2390 wxMBConvUTF16 converter ;
2391 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2392 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2393 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2394 nBufSize /= sizeof(UniChar);
2395 #endif
2396
2397 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2398 NULL, //allocator
2399 szUniBuffer,
2400 nBufSize,
2401 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2402 );
2403
2404 wxASSERT(theString);
2405
2406 //Note that CER puts a BOM when converting to unicode
2407 //so we check and use getchars instead in that case
2408 if (m_encoding == kCFStringEncodingUnicode)
2409 {
2410 if (szOut != NULL)
2411 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2412
2413 nRealOutSize = CFStringGetLength(theString) + 1;
2414 }
2415 else
2416 {
2417 CFStringGetBytes(
2418 theString,
2419 CFRangeMake(0, CFStringGetLength(theString)),
2420 m_encoding,
2421 0, //what to put in characters that can't be converted -
2422 //0 tells CFString to return NULL if it meets such a character
2423 false, //not an external representation
2424 (UInt8*) szOut,
2425 nOutSize,
2426 (CFIndex*) &nRealOutSize
2427 );
2428 }
2429
2430 CFRelease(theString);
2431
2432 #if SIZEOF_WCHAR_T == 4
2433 delete[] szUniBuffer;
2434 #endif
2435
2436 return nRealOutSize - 1;
2437 }
2438
2439 bool IsOk() const
2440 {
2441 return m_encoding != kCFStringEncodingInvalidId &&
2442 CFStringIsEncodingAvailable(m_encoding);
2443 }
2444
2445 private:
2446 CFStringEncoding m_encoding ;
2447 };
2448
2449 #endif // defined(__WXCOCOA__)
2450
2451 // ============================================================================
2452 // Mac conversion classes
2453 // ============================================================================
2454
2455 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2456
2457 class wxMBConv_mac : public wxMBConv
2458 {
2459 public:
2460 wxMBConv_mac()
2461 {
2462 Init(CFStringGetSystemEncoding()) ;
2463 }
2464
2465 #if wxUSE_FONTMAP
2466 wxMBConv_mac(const wxChar* name)
2467 {
2468 Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2469 }
2470 #endif
2471
2472 wxMBConv_mac(wxFontEncoding encoding)
2473 {
2474 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2475 }
2476
2477 ~wxMBConv_mac()
2478 {
2479 OSStatus status = noErr ;
2480 status = TECDisposeConverter(m_MB2WC_converter);
2481 status = TECDisposeConverter(m_WC2MB_converter);
2482 }
2483
2484
2485 void Init( TextEncodingBase encoding)
2486 {
2487 OSStatus status = noErr ;
2488 m_char_encoding = encoding ;
2489 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2490
2491 status = TECCreateConverter(&m_MB2WC_converter,
2492 m_char_encoding,
2493 m_unicode_encoding);
2494 status = TECCreateConverter(&m_WC2MB_converter,
2495 m_unicode_encoding,
2496 m_char_encoding);
2497 }
2498
2499 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2500 {
2501 OSStatus status = noErr ;
2502 ByteCount byteOutLen ;
2503 ByteCount byteInLen = strlen(psz) ;
2504 wchar_t *tbuf = NULL ;
2505 UniChar* ubuf = NULL ;
2506 size_t res = 0 ;
2507
2508 if (buf == NULL)
2509 {
2510 //apple specs say at least 32
2511 n = wxMax( 32 , byteInLen ) ;
2512 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2513 }
2514 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2515 #if SIZEOF_WCHAR_T == 4
2516 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2517 #else
2518 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2519 #endif
2520 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2521 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2522 #if SIZEOF_WCHAR_T == 4
2523 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2524 // is not properly terminated we get random characters at the end
2525 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2526 wxMBConvUTF16 converter ;
2527 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2528 free( ubuf ) ;
2529 #else
2530 res = byteOutLen / sizeof( UniChar ) ;
2531 #endif
2532 if ( buf == NULL )
2533 free(tbuf) ;
2534
2535 if ( buf && res < n)
2536 buf[res] = 0;
2537
2538 return res ;
2539 }
2540
2541 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2542 {
2543 OSStatus status = noErr ;
2544 ByteCount byteOutLen ;
2545 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2546
2547 char *tbuf = NULL ;
2548
2549 if (buf == NULL)
2550 {
2551 //apple specs say at least 32
2552 n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2553 tbuf = (char*) malloc( n ) ;
2554 }
2555
2556 ByteCount byteBufferLen = n ;
2557 UniChar* ubuf = NULL ;
2558 #if SIZEOF_WCHAR_T == 4
2559 wxMBConvUTF16 converter ;
2560 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2561 byteInLen = unicharlen ;
2562 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2563 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2564 #else
2565 ubuf = (UniChar*) psz ;
2566 #endif
2567 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2568 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2569 #if SIZEOF_WCHAR_T == 4
2570 free( ubuf ) ;
2571 #endif
2572 if ( buf == NULL )
2573 free(tbuf) ;
2574
2575 size_t res = byteOutLen ;
2576 if ( buf && res < n)
2577 {
2578 buf[res] = 0;
2579
2580 //we need to double-trip to verify it didn't insert any ? in place
2581 //of bogus characters
2582 wxWCharBuffer wcBuf(n);
2583 size_t pszlen = wxWcslen(psz);
2584 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2585 wxWcslen(wcBuf) != pszlen ||
2586 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2587 {
2588 // we didn't obtain the same thing we started from, hence
2589 // the conversion was lossy and we consider that it failed
2590 return (size_t)-1;
2591 }
2592 }
2593
2594 return res ;
2595 }
2596
2597 bool IsOk() const
2598 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2599
2600 private:
2601 TECObjectRef m_MB2WC_converter ;
2602 TECObjectRef m_WC2MB_converter ;
2603
2604 TextEncodingBase m_char_encoding ;
2605 TextEncodingBase m_unicode_encoding ;
2606 };
2607
2608 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2609
2610 // ============================================================================
2611 // wxEncodingConverter based conversion classes
2612 // ============================================================================
2613
2614 #if wxUSE_FONTMAP
2615
2616 class wxMBConv_wxwin : public wxMBConv
2617 {
2618 private:
2619 void Init()
2620 {
2621 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2622 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2623 }
2624
2625 public:
2626 // temporarily just use wxEncodingConverter stuff,
2627 // so that it works while a better implementation is built
2628 wxMBConv_wxwin(const wxChar* name)
2629 {
2630 if (name)
2631 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2632 else
2633 m_enc = wxFONTENCODING_SYSTEM;
2634
2635 Init();
2636 }
2637
2638 wxMBConv_wxwin(wxFontEncoding enc)
2639 {
2640 m_enc = enc;
2641
2642 Init();
2643 }
2644
2645 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2646 {
2647 size_t inbuf = strlen(psz);
2648 if (buf)
2649 {
2650 if (!m2w.Convert(psz,buf))
2651 return (size_t)-1;
2652 }
2653 return inbuf;
2654 }
2655
2656 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2657 {
2658 const size_t inbuf = wxWcslen(psz);
2659 if (buf)
2660 {
2661 if (!w2m.Convert(psz,buf))
2662 return (size_t)-1;
2663 }
2664
2665 return inbuf;
2666 }
2667
2668 bool IsOk() const { return m_ok; }
2669
2670 public:
2671 wxFontEncoding m_enc;
2672 wxEncodingConverter m2w, w2m;
2673
2674 private:
2675 virtual size_t GetMinMBCharWidth() const
2676 {
2677 switch ( m_enc )
2678 {
2679 case wxFONTENCODING_UTF16BE:
2680 case wxFONTENCODING_UTF16LE:
2681 return 2;
2682
2683 case wxFONTENCODING_UTF32BE:
2684 case wxFONTENCODING_UTF32LE:
2685 return 4;
2686
2687 default:
2688 return 1;
2689 }
2690 }
2691
2692 // were we initialized successfully?
2693 bool m_ok;
2694
2695 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2696 };
2697
2698 // make the constructors available for unit testing
2699 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2700 {
2701 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2702 if ( !result->IsOk() )
2703 {
2704 delete result;
2705 return 0;
2706 }
2707 return result;
2708 }
2709
2710 #endif // wxUSE_FONTMAP
2711
2712 // ============================================================================
2713 // wxCSConv implementation
2714 // ============================================================================
2715
2716 void wxCSConv::Init()
2717 {
2718 m_name = NULL;
2719 m_convReal = NULL;
2720 m_deferred = true;
2721 }
2722
2723 wxCSConv::wxCSConv(const wxChar *charset)
2724 {
2725 Init();
2726
2727 if ( charset )
2728 {
2729 SetName(charset);
2730 }
2731
2732 #if wxUSE_FONTMAP
2733 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2734 #else
2735 m_encoding = wxFONTENCODING_SYSTEM;
2736 #endif
2737 }
2738
2739 wxCSConv::wxCSConv(wxFontEncoding encoding)
2740 {
2741 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2742 {
2743 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2744
2745 encoding = wxFONTENCODING_SYSTEM;
2746 }
2747
2748 Init();
2749
2750 m_encoding = encoding;
2751 }
2752
2753 wxCSConv::~wxCSConv()
2754 {
2755 Clear();
2756 }
2757
2758 wxCSConv::wxCSConv(const wxCSConv& conv)
2759 : wxMBConv()
2760 {
2761 Init();
2762
2763 SetName(conv.m_name);
2764 m_encoding = conv.m_encoding;
2765 }
2766
2767 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2768 {
2769 Clear();
2770
2771 SetName(conv.m_name);
2772 m_encoding = conv.m_encoding;
2773
2774 return *this;
2775 }
2776
2777 void wxCSConv::Clear()
2778 {
2779 free(m_name);
2780 delete m_convReal;
2781
2782 m_name = NULL;
2783 m_convReal = NULL;
2784 }
2785
2786 void wxCSConv::SetName(const wxChar *charset)
2787 {
2788 if (charset)
2789 {
2790 m_name = wxStrdup(charset);
2791 m_deferred = true;
2792 }
2793 }
2794
2795 #if wxUSE_FONTMAP
2796 #include "wx/hashmap.h"
2797
2798 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2799 wxEncodingNameCache );
2800
2801 static wxEncodingNameCache gs_nameCache;
2802 #endif
2803
2804 wxMBConv *wxCSConv::DoCreate() const
2805 {
2806 #if wxUSE_FONTMAP
2807 wxLogTrace(TRACE_STRCONV,
2808 wxT("creating conversion for %s"),
2809 (m_name ? m_name
2810 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2811 #endif // wxUSE_FONTMAP
2812
2813 // check for the special case of ASCII or ISO8859-1 charset: as we have
2814 // special knowledge of it anyhow, we don't need to create a special
2815 // conversion object
2816 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2817 m_encoding == wxFONTENCODING_DEFAULT )
2818 {
2819 // don't convert at all
2820 return NULL;
2821 }
2822
2823 // we trust OS to do conversion better than we can so try external
2824 // conversion methods first
2825 //
2826 // the full order is:
2827 // 1. OS conversion (iconv() under Unix or Win32 API)
2828 // 2. hard coded conversions for UTF
2829 // 3. wxEncodingConverter as fall back
2830
2831 // step (1)
2832 #ifdef HAVE_ICONV
2833 #if !wxUSE_FONTMAP
2834 if ( m_name )
2835 #endif // !wxUSE_FONTMAP
2836 {
2837 wxString name(m_name);
2838 wxFontEncoding encoding(m_encoding);
2839
2840 if ( !name.empty() )
2841 {
2842 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2843 if ( conv->IsOk() )
2844 return conv;
2845
2846 delete conv;
2847
2848 #if wxUSE_FONTMAP
2849 encoding =
2850 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2851 #endif // wxUSE_FONTMAP
2852 }
2853 #if wxUSE_FONTMAP
2854 {
2855 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2856 if ( it != gs_nameCache.end() )
2857 {
2858 if ( it->second.empty() )
2859 return NULL;
2860
2861 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2862 if ( conv->IsOk() )
2863 return conv;
2864
2865 delete conv;
2866 }
2867
2868 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2869
2870 for ( ; *names; ++names )
2871 {
2872 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2873 if ( conv->IsOk() )
2874 {
2875 gs_nameCache[encoding] = *names;
2876 return conv;
2877 }
2878
2879 delete conv;
2880 }
2881
2882 gs_nameCache[encoding] = _T(""); // cache the failure
2883 }
2884 #endif // wxUSE_FONTMAP
2885 }
2886 #endif // HAVE_ICONV
2887
2888 #ifdef wxHAVE_WIN32_MB2WC
2889 {
2890 #if wxUSE_FONTMAP
2891 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2892 : new wxMBConv_win32(m_encoding);
2893 if ( conv->IsOk() )
2894 return conv;
2895
2896 delete conv;
2897 #else
2898 return NULL;
2899 #endif
2900 }
2901 #endif // wxHAVE_WIN32_MB2WC
2902 #if defined(__WXMAC__)
2903 {
2904 // leave UTF16 and UTF32 to the built-ins of wx
2905 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2906 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2907 {
2908
2909 #if wxUSE_FONTMAP
2910 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2911 : new wxMBConv_mac(m_encoding);
2912 #else
2913 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2914 #endif
2915 if ( conv->IsOk() )
2916 return conv;
2917
2918 delete conv;
2919 }
2920 }
2921 #endif
2922 #if defined(__WXCOCOA__)
2923 {
2924 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2925 {
2926
2927 #if wxUSE_FONTMAP
2928 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2929 : new wxMBConv_cocoa(m_encoding);
2930 #else
2931 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2932 #endif
2933 if ( conv->IsOk() )
2934 return conv;
2935
2936 delete conv;
2937 }
2938 }
2939 #endif
2940 // step (2)
2941 wxFontEncoding enc = m_encoding;
2942 #if wxUSE_FONTMAP
2943 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2944 {
2945 // use "false" to suppress interactive dialogs -- we can be called from
2946 // anywhere and popping up a dialog from here is the last thing we want to
2947 // do
2948 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2949 }
2950 #endif // wxUSE_FONTMAP
2951
2952 switch ( enc )
2953 {
2954 case wxFONTENCODING_UTF7:
2955 return new wxMBConvUTF7;
2956
2957 case wxFONTENCODING_UTF8:
2958 return new wxMBConvUTF8;
2959
2960 case wxFONTENCODING_UTF16BE:
2961 return new wxMBConvUTF16BE;
2962
2963 case wxFONTENCODING_UTF16LE:
2964 return new wxMBConvUTF16LE;
2965
2966 case wxFONTENCODING_UTF32BE:
2967 return new wxMBConvUTF32BE;
2968
2969 case wxFONTENCODING_UTF32LE:
2970 return new wxMBConvUTF32LE;
2971
2972 default:
2973 // nothing to do but put here to suppress gcc warnings
2974 ;
2975 }
2976
2977 // step (3)
2978 #if wxUSE_FONTMAP
2979 {
2980 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2981 : new wxMBConv_wxwin(m_encoding);
2982 if ( conv->IsOk() )
2983 return conv;
2984
2985 delete conv;
2986 }
2987 #endif // wxUSE_FONTMAP
2988
2989 // NB: This is a hack to prevent deadlock. What could otherwise happen
2990 // in Unicode build: wxConvLocal creation ends up being here
2991 // because of some failure and logs the error. But wxLog will try to
2992 // attach timestamp, for which it will need wxConvLocal (to convert
2993 // time to char* and then wchar_t*), but that fails, tries to log
2994 // error, but wxLog has a (already locked) critical section that
2995 // guards static buffer.
2996 static bool alreadyLoggingError = false;
2997 if (!alreadyLoggingError)
2998 {
2999 alreadyLoggingError = true;
3000 wxLogError(_("Cannot convert from the charset '%s'!"),
3001 m_name ? m_name
3002 :
3003 #if wxUSE_FONTMAP
3004 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3005 #else // !wxUSE_FONTMAP
3006 wxString::Format(_("encoding %s"), m_encoding).c_str()
3007 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3008 );
3009 alreadyLoggingError = false;
3010 }
3011
3012 return NULL;
3013 }
3014
3015 void wxCSConv::CreateConvIfNeeded() const
3016 {
3017 if ( m_deferred )
3018 {
3019 wxCSConv *self = (wxCSConv *)this; // const_cast
3020
3021 #if wxUSE_INTL
3022 // if we don't have neither the name nor the encoding, use the default
3023 // encoding for this system
3024 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3025 {
3026 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3027 }
3028 #endif // wxUSE_INTL
3029
3030 self->m_convReal = DoCreate();
3031 self->m_deferred = false;
3032 }
3033 }
3034
3035 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3036 {
3037 CreateConvIfNeeded();
3038
3039 if (m_convReal)
3040 return m_convReal->MB2WC(buf, psz, n);
3041
3042 // latin-1 (direct)
3043 size_t len = strlen(psz);
3044
3045 if (buf)
3046 {
3047 for (size_t c = 0; c <= len; c++)
3048 buf[c] = (unsigned char)(psz[c]);
3049 }
3050
3051 return len;
3052 }
3053
3054 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3055 {
3056 CreateConvIfNeeded();
3057
3058 if (m_convReal)
3059 return m_convReal->WC2MB(buf, psz, n);
3060
3061 // latin-1 (direct)
3062 const size_t len = wxWcslen(psz);
3063 if (buf)
3064 {
3065 for (size_t c = 0; c <= len; c++)
3066 {
3067 if (psz[c] > 0xFF)
3068 return (size_t)-1;
3069 buf[c] = (char)psz[c];
3070 }
3071 }
3072 else
3073 {
3074 for (size_t c = 0; c <= len; c++)
3075 {
3076 if (psz[c] > 0xFF)
3077 return (size_t)-1;
3078 }
3079 }
3080
3081 return len;
3082 }
3083
3084 size_t wxCSConv::GetMinMBCharWidth() const
3085 {
3086 CreateConvIfNeeded();
3087
3088 if ( m_convReal )
3089 {
3090 // cast needed just to call private function of m_convReal
3091 return ((wxCSConv *)m_convReal)->GetMinMBCharWidth();
3092 }
3093
3094 return 1;
3095 }
3096
3097 // ----------------------------------------------------------------------------
3098 // globals
3099 // ----------------------------------------------------------------------------
3100
3101 #ifdef __WINDOWS__
3102 static wxMBConv_win32 wxConvLibcObj;
3103 #elif defined(__WXMAC__) && !defined(__MACH__)
3104 static wxMBConv_mac wxConvLibcObj ;
3105 #else
3106 static wxMBConvLibc wxConvLibcObj;
3107 #endif
3108
3109 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3110 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3111 static wxMBConvUTF7 wxConvUTF7Obj;
3112 static wxMBConvUTF8 wxConvUTF8Obj;
3113
3114 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3115 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3116 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3117 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3118 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3119 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3120 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3121 #ifdef __WXOSX__
3122 wxConvUTF8Obj;
3123 #else
3124 wxConvLibcObj;
3125 #endif
3126
3127
3128 #else // !wxUSE_WCHAR_T
3129
3130 // stand-ins in absence of wchar_t
3131 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3132 wxConvISO8859_1,
3133 wxConvLocal,
3134 wxConvUTF8;
3135
3136 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T