]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
[ 1054664 ] Implementation of wxMBConvUTF7 (Heavily modified in places), utf7 unit...
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
25 #endif
26
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
29
30 #ifdef __BORLANDC__
31 #pragma hdrstop
32 #endif
33
34 #ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37 #endif // WX_PRECOMP
38
39 #include "wx/strconv.h"
40
41 #if wxUSE_WCHAR_T
42
43 #ifdef __WXMSW__
44 #include "wx/msw/private.h"
45 #endif
46
47 #ifdef __WINDOWS__
48 #include "wx/msw/missing.h"
49 #endif
50
51 #ifndef __WXWINCE__
52 #include <errno.h>
53 #endif
54
55 #include <ctype.h>
56 #include <string.h>
57 #include <stdlib.h>
58
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
62
63 // ----------------------------------------------------------------------------
64 // headers
65 // ----------------------------------------------------------------------------
66
67 #ifdef __SALFORDC__
68 #include <clib.h>
69 #endif
70
71 #ifdef HAVE_ICONV
72 #include <iconv.h>
73 #endif
74
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
77 #include "wx/utils.h"
78
79 #ifdef __WXMAC__
80 #include <ATSUnicode.h>
81 #include <TextCommon.h>
82 #include <TextEncodingConverter.h>
83
84 #include "wx/mac/private.h" // includes mac headers
85 #endif
86 // ----------------------------------------------------------------------------
87 // macros
88 // ----------------------------------------------------------------------------
89
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
92
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
104 #define WC_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 #endif
114
115 // ============================================================================
116 // implementation
117 // ============================================================================
118
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
122
123
124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
125 {
126 if (input<=0xffff)
127 {
128 if (output)
129 *output = (wxUint16) input;
130 return 1;
131 }
132 else if (input>=0x110000)
133 {
134 return (size_t)-1;
135 }
136 else
137 {
138 if (output)
139 {
140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
142 }
143 return 2;
144 }
145 }
146
147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
148 {
149 if ((*input<0xd800) || (*input>0xdfff))
150 {
151 output = *input;
152 return 1;
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
155 {
156 output = *input;
157 return (size_t)-1;
158 }
159 else
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
164 }
165
166
167 // ----------------------------------------------------------------------------
168 // wxMBConv
169 // ----------------------------------------------------------------------------
170
171 wxMBConv::~wxMBConv()
172 {
173 // nothing to do here (necessary for Darwin linking probably)
174 }
175
176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177 {
178 if ( psz )
179 {
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
197 }
198
199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
200 {
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
216
217 return buf;
218 }
219
220 // ----------------------------------------------------------------------------
221 // wxMBConvLibc
222 // ----------------------------------------------------------------------------
223
224 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
225 {
226 return wxMB2WC(buf, psz, n);
227 }
228
229 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
230 {
231 return wxWC2MB(buf, psz, n);
232 }
233 // ----------------------------------------------------------------------------
234 // UTF-7
235 // ----------------------------------------------------------------------------
236
237 // Implementation (C) 2004 Fredrik Roubert
238
239 //
240 // BASE64 decoding table
241 //
242 static const unsigned char utf7unb64[] =
243 {
244 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
245 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
246 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
247 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
248 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
249 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
250 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
251 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
252 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
253 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
254 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
255 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
256 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
257 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
258 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
259 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
260 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
261 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
262 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
263 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
264 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
265 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
266 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
267 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
268 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
269 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
271 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
272 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
273 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
274 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
275 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
276 };
277
278 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
279 {
280
281 size_t len = 0;
282
283 while (*psz && ((!buf) || (len < n)))
284 {
285 unsigned char cc = *psz++;
286 if (cc != '+')
287 {
288 // plain ASCII char
289 if (buf)
290 *buf++ = cc;
291 len++;
292 }
293 else if (*psz == '-')
294 {
295 // encoded plus sign
296 if (buf)
297 *buf++ = cc;
298 len++;
299 psz++;
300 }
301 else
302 {
303 // BASE64 encoded string
304 bool lsb;
305 unsigned char c;
306 unsigned int d, l;
307 for (lsb = false, d = 0, l = 0;
308 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
309 {
310 d <<= 6;
311 d += cc;
312 for (l += 6; l >= 8; lsb = !lsb)
313 {
314 c = (d >> (l -= 8)) % 256;
315 if (lsb)
316 {
317 if (buf)
318 *buf++ |= c;
319 len ++;
320 }
321 else
322 if (buf)
323 *buf = c << 8;
324 }
325 }
326 if (*psz == '-')
327 psz++;
328 }
329 }
330 if (buf && (len < n))
331 *buf = 0;
332 return len;
333 }
334
335 //
336 // BASE64 encoding table
337 //
338 static const unsigned char utf7enb64[] =
339 {
340 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
341 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
342 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
343 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
344 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
345 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
346 'w', 'x', 'y', 'z', '0', '1', '2', '3',
347 '4', '5', '6', '7', '8', '9', '+', '/'
348 };
349
350 //
351 // UTF-7 encoding table
352 //
353 // 0 - Set D (directly encoded characters)
354 // 1 - Set O (optional direct characters)
355 // 2 - whitespace characters (optional)
356 // 3 - special characters
357 //
358 static const unsigned char utf7encode[128] =
359 {
360 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
361 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
362 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
364 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
366 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
368 };
369
370 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
371 *psz, size_t n) const
372 {
373
374
375 size_t len = 0;
376
377 while (*psz && ((!buf) || (len < n)))
378 {
379 wchar_t cc = *psz++;
380 if (cc < 0x80 && utf7encode[cc] < 1)
381 {
382 // plain ASCII char
383 if (buf)
384 *buf++ = (char)cc;
385 len++;
386 }
387 #ifndef WC_UTF16
388 else if (cc > 0xffff)
389 {
390 // no surrogate pair generation (yet?)
391 return (size_t)-1;
392 }
393 #endif
394 else
395 {
396 if (buf)
397 *buf++ = '+';
398 len++;
399 if (cc != '+')
400 {
401 // BASE64 encode string
402 unsigned int lsb, d, l;
403 for (d = 0, l = 0;; psz++)
404 {
405 for (lsb = 0; lsb < 2; lsb ++)
406 {
407 d <<= 8;
408 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
409
410 for (l += 8; l >= 6; )
411 {
412 l -= 6;
413 if (buf)
414 *buf++ = utf7enb64[(d >> l) % 64];
415 len++;
416 }
417 }
418 cc = *psz;
419 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
420 break;
421 }
422 if (l != 0)
423 {
424 if (buf)
425 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
426 len++;
427 }
428 }
429 if (buf)
430 *buf++ = '-';
431 len++;
432 }
433 }
434 if (buf && (len < n))
435 *buf = 0;
436 return len;
437 }
438
439 // ----------------------------------------------------------------------------
440 // UTF-8
441 // ----------------------------------------------------------------------------
442
443 static wxUint32 utf8_max[]=
444 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
445
446 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
447 {
448 size_t len = 0;
449
450 while (*psz && ((!buf) || (len < n)))
451 {
452 unsigned char cc = *psz++, fc = cc;
453 unsigned cnt;
454 for (cnt = 0; fc & 0x80; cnt++)
455 fc <<= 1;
456 if (!cnt)
457 {
458 // plain ASCII char
459 if (buf)
460 *buf++ = cc;
461 len++;
462 }
463 else
464 {
465 cnt--;
466 if (!cnt)
467 {
468 // invalid UTF-8 sequence
469 return (size_t)-1;
470 }
471 else
472 {
473 unsigned ocnt = cnt - 1;
474 wxUint32 res = cc & (0x3f >> cnt);
475 while (cnt--)
476 {
477 cc = *psz++;
478 if ((cc & 0xC0) != 0x80)
479 {
480 // invalid UTF-8 sequence
481 return (size_t)-1;
482 }
483 res = (res << 6) | (cc & 0x3f);
484 }
485 if (res <= utf8_max[ocnt])
486 {
487 // illegal UTF-8 encoding
488 return (size_t)-1;
489 }
490 #ifdef WC_UTF16
491 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
492 size_t pa = encode_utf16(res, (wxUint16 *)buf);
493 if (pa == (size_t)-1)
494 return (size_t)-1;
495 if (buf)
496 buf += pa;
497 len += pa;
498 #else // !WC_UTF16
499 if (buf)
500 *buf++ = res;
501 len++;
502 #endif // WC_UTF16/!WC_UTF16
503 }
504 }
505 }
506 if (buf && (len < n))
507 *buf = 0;
508 return len;
509 }
510
511 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
512 {
513 size_t len = 0;
514
515 while (*psz && ((!buf) || (len < n)))
516 {
517 wxUint32 cc;
518 #ifdef WC_UTF16
519 // cast is ok for WC_UTF16
520 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
521 psz += (pa == (size_t)-1) ? 1 : pa;
522 #else
523 cc=(*psz++) & 0x7fffffff;
524 #endif
525 unsigned cnt;
526 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
527 if (!cnt)
528 {
529 // plain ASCII char
530 if (buf)
531 *buf++ = (char) cc;
532 len++;
533 }
534
535 else
536 {
537 len += cnt + 1;
538 if (buf)
539 {
540 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
541 while (cnt--)
542 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
543 }
544 }
545 }
546
547 if (buf && (len<n)) *buf = 0;
548
549 return len;
550 }
551
552
553
554
555 // ----------------------------------------------------------------------------
556 // UTF-16
557 // ----------------------------------------------------------------------------
558
559 #ifdef WORDS_BIGENDIAN
560 #define wxMBConvUTF16straight wxMBConvUTF16BE
561 #define wxMBConvUTF16swap wxMBConvUTF16LE
562 #else
563 #define wxMBConvUTF16swap wxMBConvUTF16BE
564 #define wxMBConvUTF16straight wxMBConvUTF16LE
565 #endif
566
567
568 #ifdef WC_UTF16
569
570 // copy 16bit MB to 16bit String
571 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
572 {
573 size_t len=0;
574
575 while (*(wxUint16*)psz && (!buf || len < n))
576 {
577 if (buf)
578 *buf++ = *(wxUint16*)psz;
579 len++;
580
581 psz += sizeof(wxUint16);
582 }
583 if (buf && len<n) *buf=0;
584
585 return len;
586 }
587
588
589 // copy 16bit String to 16bit MB
590 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
591 {
592 size_t len=0;
593
594 while (*psz && (!buf || len < n))
595 {
596 if (buf)
597 {
598 *(wxUint16*)buf = *psz;
599 buf += sizeof(wxUint16);
600 }
601 len += sizeof(wxUint16);
602 psz++;
603 }
604 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
605
606 return len;
607 }
608
609
610 // swap 16bit MB to 16bit String
611 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
612 {
613 size_t len=0;
614
615 while (*(wxUint16*)psz && (!buf || len < n))
616 {
617 if (buf)
618 {
619 ((char *)buf)[0] = psz[1];
620 ((char *)buf)[1] = psz[0];
621 buf++;
622 }
623 len++;
624 psz += sizeof(wxUint16);
625 }
626 if (buf && len<n) *buf=0;
627
628 return len;
629 }
630
631
632 // swap 16bit MB to 16bit String
633 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
634 {
635 size_t len=0;
636
637 while (*psz && (!buf || len < n))
638 {
639 if (buf)
640 {
641 *buf++ = ((char*)psz)[1];
642 *buf++ = ((char*)psz)[0];
643 }
644 len += sizeof(wxUint16);
645 psz++;
646 }
647 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
648
649 return len;
650 }
651
652
653 #else // WC_UTF16
654
655
656 // copy 16bit MB to 32bit String
657 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
658 {
659 size_t len=0;
660
661 while (*(wxUint16*)psz && (!buf || len < n))
662 {
663 wxUint32 cc;
664 size_t pa=decode_utf16((wxUint16*)psz, cc);
665 if (pa == (size_t)-1)
666 return pa;
667
668 if (buf)
669 *buf++ = cc;
670 len++;
671 psz += pa * sizeof(wxUint16);
672 }
673 if (buf && len<n) *buf=0;
674
675 return len;
676 }
677
678
679 // copy 32bit String to 16bit MB
680 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
681 {
682 size_t len=0;
683
684 while (*psz && (!buf || len < n))
685 {
686 wxUint16 cc[2];
687 size_t pa=encode_utf16(*psz, cc);
688
689 if (pa == (size_t)-1)
690 return pa;
691
692 if (buf)
693 {
694 *(wxUint16*)buf = cc[0];
695 buf += sizeof(wxUint16);
696 if (pa > 1)
697 {
698 *(wxUint16*)buf = cc[1];
699 buf += sizeof(wxUint16);
700 }
701 }
702
703 len += pa*sizeof(wxUint16);
704 psz++;
705 }
706 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
707
708 return len;
709 }
710
711
712 // swap 16bit MB to 32bit String
713 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
714 {
715 size_t len=0;
716
717 while (*(wxUint16*)psz && (!buf || len < n))
718 {
719 wxUint32 cc;
720 char tmp[4];
721 tmp[0]=psz[1]; tmp[1]=psz[0];
722 tmp[2]=psz[3]; tmp[3]=psz[2];
723
724 size_t pa=decode_utf16((wxUint16*)tmp, cc);
725 if (pa == (size_t)-1)
726 return pa;
727
728 if (buf)
729 *buf++ = cc;
730
731 len++;
732 psz += pa * sizeof(wxUint16);
733 }
734 if (buf && len<n) *buf=0;
735
736 return len;
737 }
738
739
740 // swap 32bit String to 16bit MB
741 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
742 {
743 size_t len=0;
744
745 while (*psz && (!buf || len < n))
746 {
747 wxUint16 cc[2];
748 size_t pa=encode_utf16(*psz, cc);
749
750 if (pa == (size_t)-1)
751 return pa;
752
753 if (buf)
754 {
755 *buf++ = ((char*)cc)[1];
756 *buf++ = ((char*)cc)[0];
757 if (pa > 1)
758 {
759 *buf++ = ((char*)cc)[3];
760 *buf++ = ((char*)cc)[2];
761 }
762 }
763
764 len += pa*sizeof(wxUint16);
765 psz++;
766 }
767 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
768
769 return len;
770 }
771
772 #endif // WC_UTF16
773
774
775 // ----------------------------------------------------------------------------
776 // UTF-32
777 // ----------------------------------------------------------------------------
778
779 #ifdef WORDS_BIGENDIAN
780 #define wxMBConvUTF32straight wxMBConvUTF32BE
781 #define wxMBConvUTF32swap wxMBConvUTF32LE
782 #else
783 #define wxMBConvUTF32swap wxMBConvUTF32BE
784 #define wxMBConvUTF32straight wxMBConvUTF32LE
785 #endif
786
787
788 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
789 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
790
791
792 #ifdef WC_UTF16
793
794 // copy 32bit MB to 16bit String
795 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
796 {
797 size_t len=0;
798
799 while (*(wxUint32*)psz && (!buf || len < n))
800 {
801 wxUint16 cc[2];
802
803 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
804 if (pa == (size_t)-1)
805 return pa;
806
807 if (buf)
808 {
809 *buf++ = cc[0];
810 if (pa > 1)
811 *buf++ = cc[1];
812 }
813 len += pa;
814 psz += sizeof(wxUint32);
815 }
816 if (buf && len<n) *buf=0;
817
818 return len;
819 }
820
821
822 // copy 16bit String to 32bit MB
823 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
824 {
825 size_t len=0;
826
827 while (*psz && (!buf || len < n))
828 {
829 wxUint32 cc;
830
831 // cast is ok for WC_UTF16
832 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
833 if (pa == (size_t)-1)
834 return pa;
835
836 if (buf)
837 {
838 *(wxUint32*)buf = cc;
839 buf += sizeof(wxUint32);
840 }
841 len += sizeof(wxUint32);
842 psz += pa;
843 }
844
845 if (buf && len<=n-sizeof(wxUint32))
846 *(wxUint32*)buf=0;
847
848 return len;
849 }
850
851
852
853 // swap 32bit MB to 16bit String
854 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
855 {
856 size_t len=0;
857
858 while (*(wxUint32*)psz && (!buf || len < n))
859 {
860 char tmp[4];
861 tmp[0] = psz[3]; tmp[1] = psz[2];
862 tmp[2] = psz[1]; tmp[3] = psz[0];
863
864
865 wxUint16 cc[2];
866
867 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
868 if (pa == (size_t)-1)
869 return pa;
870
871 if (buf)
872 {
873 *buf++ = cc[0];
874 if (pa > 1)
875 *buf++ = cc[1];
876 }
877 len += pa;
878 psz += sizeof(wxUint32);
879 }
880
881 if (buf && len<n)
882 *buf=0;
883
884 return len;
885 }
886
887
888 // swap 16bit String to 32bit MB
889 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
890 {
891 size_t len=0;
892
893 while (*psz && (!buf || len < n))
894 {
895 char cc[4];
896
897 // cast is ok for WC_UTF16
898 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
899 if (pa == (size_t)-1)
900 return pa;
901
902 if (buf)
903 {
904 *buf++ = cc[3];
905 *buf++ = cc[2];
906 *buf++ = cc[1];
907 *buf++ = cc[0];
908 }
909 len += sizeof(wxUint32);
910 psz += pa;
911 }
912
913 if (buf && len<=n-sizeof(wxUint32))
914 *(wxUint32*)buf=0;
915
916 return len;
917 }
918
919 #else // WC_UTF16
920
921
922 // copy 32bit MB to 32bit String
923 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
924 {
925 size_t len=0;
926
927 while (*(wxUint32*)psz && (!buf || len < n))
928 {
929 if (buf)
930 *buf++ = *(wxUint32*)psz;
931 len++;
932 psz += sizeof(wxUint32);
933 }
934
935 if (buf && len<n)
936 *buf=0;
937
938 return len;
939 }
940
941
942 // copy 32bit String to 32bit MB
943 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
944 {
945 size_t len=0;
946
947 while (*psz && (!buf || len < n))
948 {
949 if (buf)
950 {
951 *(wxUint32*)buf = *psz;
952 buf += sizeof(wxUint32);
953 }
954
955 len += sizeof(wxUint32);
956 psz++;
957 }
958
959 if (buf && len<=n-sizeof(wxUint32))
960 *(wxUint32*)buf=0;
961
962 return len;
963 }
964
965
966 // swap 32bit MB to 32bit String
967 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
968 {
969 size_t len=0;
970
971 while (*(wxUint32*)psz && (!buf || len < n))
972 {
973 if (buf)
974 {
975 ((char *)buf)[0] = psz[3];
976 ((char *)buf)[1] = psz[2];
977 ((char *)buf)[2] = psz[1];
978 ((char *)buf)[3] = psz[0];
979 buf++;
980 }
981 len++;
982 psz += sizeof(wxUint32);
983 }
984
985 if (buf && len<n)
986 *buf=0;
987
988 return len;
989 }
990
991
992 // swap 32bit String to 32bit MB
993 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
994 {
995 size_t len=0;
996
997 while (*psz && (!buf || len < n))
998 {
999 if (buf)
1000 {
1001 *buf++ = ((char *)psz)[3];
1002 *buf++ = ((char *)psz)[2];
1003 *buf++ = ((char *)psz)[1];
1004 *buf++ = ((char *)psz)[0];
1005 }
1006 len += sizeof(wxUint32);
1007 psz++;
1008 }
1009
1010 if (buf && len<=n-sizeof(wxUint32))
1011 *(wxUint32*)buf=0;
1012
1013 return len;
1014 }
1015
1016
1017 #endif // WC_UTF16
1018
1019
1020 // ============================================================================
1021 // The classes doing conversion using the iconv_xxx() functions
1022 // ============================================================================
1023
1024 #ifdef HAVE_ICONV
1025
1026 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1027 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
1028 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1029 // (which means error) and says there are 0 bytes left in the input buffer --
1030 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1031 // this alternative test for iconv() failure.
1032 // [This bug does not appear in glibc 2.2.]
1033 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1034 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1035 (errno != E2BIG || bufLeft != 0))
1036 #else
1037 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1038 #endif
1039
1040 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1041
1042 // ----------------------------------------------------------------------------
1043 // wxMBConv_iconv: encapsulates an iconv character set
1044 // ----------------------------------------------------------------------------
1045
1046 class wxMBConv_iconv : public wxMBConv
1047 {
1048 public:
1049 wxMBConv_iconv(const wxChar *name);
1050 virtual ~wxMBConv_iconv();
1051
1052 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1053 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1054
1055 bool IsOk() const
1056 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1057
1058 protected:
1059 // the iconv handlers used to translate from multibyte to wide char and in
1060 // the other direction
1061 iconv_t m2w,
1062 w2m;
1063
1064 private:
1065 // the name (for iconv_open()) of a wide char charset -- if none is
1066 // available on this machine, it will remain NULL
1067 static const char *ms_wcCharsetName;
1068
1069 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1070 // different endian-ness than the native one
1071 static bool ms_wcNeedsSwap;
1072 };
1073
1074 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1075 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1076
1077 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1078 {
1079 // Do it the hard way
1080 char cname[100];
1081 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1082 cname[i] = (char) name[i];
1083
1084 // check for charset that represents wchar_t:
1085 if (ms_wcCharsetName == NULL)
1086 {
1087 ms_wcNeedsSwap = false;
1088
1089 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1090 ms_wcCharsetName = WC_NAME_BEST;
1091 m2w = iconv_open(ms_wcCharsetName, cname);
1092
1093 if (m2w == (iconv_t)-1)
1094 {
1095 // try charset w/o bytesex info (e.g. "UCS4")
1096 // and check for bytesex ourselves:
1097 ms_wcCharsetName = WC_NAME;
1098 m2w = iconv_open(ms_wcCharsetName, cname);
1099
1100 // last bet, try if it knows WCHAR_T pseudo-charset
1101 if (m2w == (iconv_t)-1)
1102 {
1103 ms_wcCharsetName = "WCHAR_T";
1104 m2w = iconv_open(ms_wcCharsetName, cname);
1105 }
1106
1107 if (m2w != (iconv_t)-1)
1108 {
1109 char buf[2], *bufPtr;
1110 wchar_t wbuf[2], *wbufPtr;
1111 size_t insz, outsz;
1112 size_t res;
1113
1114 buf[0] = 'A';
1115 buf[1] = 0;
1116 wbuf[0] = 0;
1117 insz = 2;
1118 outsz = SIZEOF_WCHAR_T * 2;
1119 wbufPtr = wbuf;
1120 bufPtr = buf;
1121
1122 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1123 (char**)&wbufPtr, &outsz);
1124
1125 if (ICONV_FAILED(res, insz))
1126 {
1127 ms_wcCharsetName = NULL;
1128 wxLogLastError(wxT("iconv"));
1129 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1130 }
1131 else
1132 {
1133 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1134 }
1135 }
1136 else
1137 {
1138 ms_wcCharsetName = NULL;
1139
1140 // VS: we must not output an error here, since wxWidgets will safely
1141 // fall back to using wxEncodingConverter.
1142 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1143 //wxLogError(
1144 }
1145 }
1146 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1147 }
1148 else // we already have ms_wcCharsetName
1149 {
1150 m2w = iconv_open(ms_wcCharsetName, cname);
1151 }
1152
1153 // NB: don't ever pass NULL to iconv_open(), it may crash!
1154 if ( ms_wcCharsetName )
1155 {
1156 w2m = iconv_open( cname, ms_wcCharsetName);
1157 }
1158 else
1159 {
1160 w2m = (iconv_t)-1;
1161 }
1162 }
1163
1164 wxMBConv_iconv::~wxMBConv_iconv()
1165 {
1166 if ( m2w != (iconv_t)-1 )
1167 iconv_close(m2w);
1168 if ( w2m != (iconv_t)-1 )
1169 iconv_close(w2m);
1170 }
1171
1172 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1173 {
1174 size_t inbuf = strlen(psz);
1175 size_t outbuf = n * SIZEOF_WCHAR_T;
1176 size_t res, cres;
1177 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1178 wchar_t *bufPtr = buf;
1179 const char *pszPtr = psz;
1180
1181 if (buf)
1182 {
1183 // have destination buffer, convert there
1184 cres = iconv(m2w,
1185 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1186 (char**)&bufPtr, &outbuf);
1187 res = n - (outbuf / SIZEOF_WCHAR_T);
1188
1189 if (ms_wcNeedsSwap)
1190 {
1191 // convert to native endianness
1192 WC_BSWAP(buf /* _not_ bufPtr */, res)
1193 }
1194
1195 // NB: iconv was given only strlen(psz) characters on input, and so
1196 // it couldn't convert the trailing zero. Let's do it ourselves
1197 // if there's some room left for it in the output buffer.
1198 if (res < n)
1199 buf[res] = 0;
1200 }
1201 else
1202 {
1203 // no destination buffer... convert using temp buffer
1204 // to calculate destination buffer requirement
1205 wchar_t tbuf[8];
1206 res = 0;
1207 do {
1208 bufPtr = tbuf;
1209 outbuf = 8*SIZEOF_WCHAR_T;
1210
1211 cres = iconv(m2w,
1212 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1213 (char**)&bufPtr, &outbuf );
1214
1215 res += 8-(outbuf/SIZEOF_WCHAR_T);
1216 } while ((cres==(size_t)-1) && (errno==E2BIG));
1217 }
1218
1219 if (ICONV_FAILED(cres, inbuf))
1220 {
1221 //VS: it is ok if iconv fails, hence trace only
1222 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1223 return (size_t)-1;
1224 }
1225
1226 return res;
1227 }
1228
1229 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1230 {
1231 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1232 size_t outbuf = n;
1233 size_t res, cres;
1234
1235 wchar_t *tmpbuf = 0;
1236
1237 if (ms_wcNeedsSwap)
1238 {
1239 // need to copy to temp buffer to switch endianness
1240 // this absolutely doesn't rock!
1241 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1242 // could be in read-only memory, or be accessed in some other thread)
1243 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1244 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1245 WC_BSWAP(tmpbuf, inbuf)
1246 psz=tmpbuf;
1247 }
1248
1249 if (buf)
1250 {
1251 // have destination buffer, convert there
1252 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1253
1254 res = n-outbuf;
1255
1256 // NB: iconv was given only wcslen(psz) characters on input, and so
1257 // it couldn't convert the trailing zero. Let's do it ourselves
1258 // if there's some room left for it in the output buffer.
1259 if (res < n)
1260 buf[0] = 0;
1261 }
1262 else
1263 {
1264 // no destination buffer... convert using temp buffer
1265 // to calculate destination buffer requirement
1266 char tbuf[16];
1267 res = 0;
1268 do {
1269 buf = tbuf; outbuf = 16;
1270
1271 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1272
1273 res += 16 - outbuf;
1274 } while ((cres==(size_t)-1) && (errno==E2BIG));
1275 }
1276
1277 if (ms_wcNeedsSwap)
1278 {
1279 free(tmpbuf);
1280 }
1281
1282 if (ICONV_FAILED(cres, inbuf))
1283 {
1284 //VS: it is ok if iconv fails, hence trace only
1285 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1286 return (size_t)-1;
1287 }
1288
1289 return res;
1290 }
1291
1292 #endif // HAVE_ICONV
1293
1294
1295 // ============================================================================
1296 // Win32 conversion classes
1297 // ============================================================================
1298
1299 #ifdef wxHAVE_WIN32_MB2WC
1300
1301 // from utils.cpp
1302 #if wxUSE_FONTMAP
1303 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1304 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1305 #endif
1306
1307 class wxMBConv_win32 : public wxMBConv
1308 {
1309 public:
1310 wxMBConv_win32()
1311 {
1312 m_CodePage = CP_ACP;
1313 }
1314
1315 #if wxUSE_FONTMAP
1316 wxMBConv_win32(const wxChar* name)
1317 {
1318 m_CodePage = wxCharsetToCodepage(name);
1319 }
1320
1321 wxMBConv_win32(wxFontEncoding encoding)
1322 {
1323 m_CodePage = wxEncodingToCodepage(encoding);
1324 }
1325 #endif
1326
1327 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1328 {
1329 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1330 // the behaviour is not compatible with the Unix version (using iconv)
1331 // and break the library itself, e.g. wxTextInputStream::NextChar()
1332 // wouldn't work if reading an incomplete MB char didn't result in an
1333 // error
1334 const size_t len = ::MultiByteToWideChar
1335 (
1336 m_CodePage, // code page
1337 MB_ERR_INVALID_CHARS, // flags: fall on error
1338 psz, // input string
1339 -1, // its length (NUL-terminated)
1340 buf, // output string
1341 buf ? n : 0 // size of output buffer
1342 );
1343
1344 // note that it returns count of written chars for buf != NULL and size
1345 // of the needed buffer for buf == NULL so in either case the length of
1346 // the string (which never includes the terminating NUL) is one less
1347 return len ? len - 1 : (size_t)-1;
1348 }
1349
1350 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1351 {
1352 /*
1353 we have a problem here: by default, WideCharToMultiByte() may
1354 replace characters unrepresentable in the target code page with bad
1355 quality approximations such as turning "1/2" symbol (U+00BD) into
1356 "1" for the code pages which don't have it and we, obviously, want
1357 to avoid this at any price
1358
1359 the trouble is that this function does it _silently_, i.e. it won't
1360 even tell us whether it did or not... Win98/2000 and higher provide
1361 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1362 we have to resort to a round trip, i.e. check that converting back
1363 results in the same string -- this is, of course, expensive but
1364 otherwise we simply can't be sure to not garble the data.
1365 */
1366
1367 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1368 // it doesn't work with CJK encodings (which we test for rather roughly
1369 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1370 // supporting it
1371 BOOL usedDef wxDUMMY_INITIALIZE(false);
1372 BOOL *pUsedDef;
1373 int flags;
1374 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1375 {
1376 // it's our lucky day
1377 flags = WC_NO_BEST_FIT_CHARS;
1378 pUsedDef = &usedDef;
1379 }
1380 else // old system or unsupported encoding
1381 {
1382 flags = 0;
1383 pUsedDef = NULL;
1384 }
1385
1386 const size_t len = ::WideCharToMultiByte
1387 (
1388 m_CodePage, // code page
1389 flags, // either none or no best fit
1390 pwz, // input string
1391 -1, // it is (wide) NUL-terminated
1392 buf, // output buffer
1393 buf ? n : 0, // and its size
1394 NULL, // default "replacement" char
1395 pUsedDef // [out] was it used?
1396 );
1397
1398 if ( !len )
1399 {
1400 // function totally failed
1401 return (size_t)-1;
1402 }
1403
1404 // if we were really converting, check if we succeeded
1405 if ( buf )
1406 {
1407 if ( flags )
1408 {
1409 // check if the conversion failed, i.e. if any replacements
1410 // were done
1411 if ( usedDef )
1412 return (size_t)-1;
1413 }
1414 else // we must resort to double tripping...
1415 {
1416 wxWCharBuffer wcBuf(n);
1417 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1418 wcscmp(wcBuf, pwz) != 0 )
1419 {
1420 // we didn't obtain the same thing we started from, hence
1421 // the conversion was lossy and we consider that it failed
1422 return (size_t)-1;
1423 }
1424 }
1425 }
1426
1427 // see the comment above for the reason of "len - 1"
1428 return len - 1;
1429 }
1430
1431 bool IsOk() const { return m_CodePage != -1; }
1432
1433 private:
1434 static bool CanUseNoBestFit()
1435 {
1436 static int s_isWin98Or2k = -1;
1437
1438 if ( s_isWin98Or2k == -1 )
1439 {
1440 int verMaj, verMin;
1441 switch ( wxGetOsVersion(&verMaj, &verMin) )
1442 {
1443 case wxWIN95:
1444 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1445 break;
1446
1447 case wxWINDOWS_NT:
1448 s_isWin98Or2k = verMaj >= 5;
1449 break;
1450
1451 default:
1452 // unknown, be conseravtive by default
1453 s_isWin98Or2k = 0;
1454 }
1455
1456 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1457 }
1458
1459 return s_isWin98Or2k == 1;
1460 }
1461
1462 long m_CodePage;
1463 };
1464
1465 #endif // wxHAVE_WIN32_MB2WC
1466
1467 // ============================================================================
1468 // Cocoa conversion classes
1469 // ============================================================================
1470
1471 #if defined(__WXCOCOA__)
1472
1473 // RN: There is no UTF-32 support in either Core Foundation or
1474 // Cocoa. Strangely enough, internally Core Foundation uses
1475 // UTF 32 internally quite a bit - its just not public (yet).
1476
1477 #include <CoreFoundation/CFString.h>
1478 #include <CoreFoundation/CFStringEncodingExt.h>
1479
1480 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1481 {
1482 CFStringEncoding enc = 0 ;
1483 if ( encoding == wxFONTENCODING_DEFAULT )
1484 {
1485 #if wxUSE_GUI
1486 encoding = wxFont::GetDefaultEncoding() ;
1487 #else
1488 encoding = wxLocale::GetSystemEncoding() ;
1489 #endif
1490 }
1491 else switch( encoding)
1492 {
1493 case wxFONTENCODING_ISO8859_1 :
1494 enc = kCFStringEncodingISOLatin1 ;
1495 break ;
1496 case wxFONTENCODING_ISO8859_2 :
1497 enc = kCFStringEncodingISOLatin2;
1498 break ;
1499 case wxFONTENCODING_ISO8859_3 :
1500 enc = kCFStringEncodingISOLatin3 ;
1501 break ;
1502 case wxFONTENCODING_ISO8859_4 :
1503 enc = kCFStringEncodingISOLatin4;
1504 break ;
1505 case wxFONTENCODING_ISO8859_5 :
1506 enc = kCFStringEncodingISOLatinCyrillic;
1507 break ;
1508 case wxFONTENCODING_ISO8859_6 :
1509 enc = kCFStringEncodingISOLatinArabic;
1510 break ;
1511 case wxFONTENCODING_ISO8859_7 :
1512 enc = kCFStringEncodingISOLatinGreek;
1513 break ;
1514 case wxFONTENCODING_ISO8859_8 :
1515 enc = kCFStringEncodingISOLatinHebrew;
1516 break ;
1517 case wxFONTENCODING_ISO8859_9 :
1518 enc = kCFStringEncodingISOLatin5;
1519 break ;
1520 case wxFONTENCODING_ISO8859_10 :
1521 enc = kCFStringEncodingISOLatin6;
1522 break ;
1523 case wxFONTENCODING_ISO8859_11 :
1524 enc = kCFStringEncodingISOLatinThai;
1525 break ;
1526 case wxFONTENCODING_ISO8859_13 :
1527 enc = kCFStringEncodingISOLatin7;
1528 break ;
1529 case wxFONTENCODING_ISO8859_14 :
1530 enc = kCFStringEncodingISOLatin8;
1531 break ;
1532 case wxFONTENCODING_ISO8859_15 :
1533 enc = kCFStringEncodingISOLatin9;
1534 break ;
1535
1536 case wxFONTENCODING_KOI8 :
1537 enc = kCFStringEncodingKOI8_R;
1538 break ;
1539 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1540 enc = kCFStringEncodingDOSRussian;
1541 break ;
1542
1543 // case wxFONTENCODING_BULGARIAN :
1544 // enc = ;
1545 // break ;
1546
1547 case wxFONTENCODING_CP437 :
1548 enc =kCFStringEncodingDOSLatinUS ;
1549 break ;
1550 case wxFONTENCODING_CP850 :
1551 enc = kCFStringEncodingDOSLatin1;
1552 break ;
1553 case wxFONTENCODING_CP852 :
1554 enc = kCFStringEncodingDOSLatin2;
1555 break ;
1556 case wxFONTENCODING_CP855 :
1557 enc = kCFStringEncodingDOSCyrillic;
1558 break ;
1559 case wxFONTENCODING_CP866 :
1560 enc =kCFStringEncodingDOSRussian ;
1561 break ;
1562 case wxFONTENCODING_CP874 :
1563 enc = kCFStringEncodingDOSThai;
1564 break ;
1565 case wxFONTENCODING_CP932 :
1566 enc = kCFStringEncodingDOSJapanese;
1567 break ;
1568 case wxFONTENCODING_CP936 :
1569 enc =kCFStringEncodingDOSChineseSimplif ;
1570 break ;
1571 case wxFONTENCODING_CP949 :
1572 enc = kCFStringEncodingDOSKorean;
1573 break ;
1574 case wxFONTENCODING_CP950 :
1575 enc = kCFStringEncodingDOSChineseTrad;
1576 break ;
1577
1578 case wxFONTENCODING_CP1250 :
1579 enc = kCFStringEncodingWindowsLatin2;
1580 break ;
1581 case wxFONTENCODING_CP1251 :
1582 enc =kCFStringEncodingWindowsCyrillic ;
1583 break ;
1584 case wxFONTENCODING_CP1252 :
1585 enc =kCFStringEncodingWindowsLatin1 ;
1586 break ;
1587 case wxFONTENCODING_CP1253 :
1588 enc = kCFStringEncodingWindowsGreek;
1589 break ;
1590 case wxFONTENCODING_CP1254 :
1591 enc = kCFStringEncodingWindowsLatin5;
1592 break ;
1593 case wxFONTENCODING_CP1255 :
1594 enc =kCFStringEncodingWindowsHebrew ;
1595 break ;
1596 case wxFONTENCODING_CP1256 :
1597 enc =kCFStringEncodingWindowsArabic ;
1598 break ;
1599 case wxFONTENCODING_CP1257 :
1600 enc = kCFStringEncodingWindowsBalticRim;
1601 break ;
1602 case wxFONTENCODING_UTF7 :
1603 enc = kCFStringEncodingNonLossyASCII ;
1604 break ;
1605 case wxFONTENCODING_UTF8 :
1606 enc = kCFStringEncodingUTF8 ;
1607 break ;
1608 case wxFONTENCODING_EUC_JP :
1609 enc = kCFStringEncodingEUC_JP;
1610 break ;
1611 case wxFONTENCODING_UTF16 :
1612 enc = kCFStringEncodingUnicode ;
1613 break ;
1614 case wxFONTENCODING_MACROMAN :
1615 enc = kCFStringEncodingMacRoman ;
1616 break ;
1617 case wxFONTENCODING_MACJAPANESE :
1618 enc = kCFStringEncodingMacJapanese ;
1619 break ;
1620 case wxFONTENCODING_MACCHINESETRAD :
1621 enc = kCFStringEncodingMacChineseTrad ;
1622 break ;
1623 case wxFONTENCODING_MACKOREAN :
1624 enc = kCFStringEncodingMacKorean ;
1625 break ;
1626 case wxFONTENCODING_MACARABIC :
1627 enc = kCFStringEncodingMacArabic ;
1628 break ;
1629 case wxFONTENCODING_MACHEBREW :
1630 enc = kCFStringEncodingMacHebrew ;
1631 break ;
1632 case wxFONTENCODING_MACGREEK :
1633 enc = kCFStringEncodingMacGreek ;
1634 break ;
1635 case wxFONTENCODING_MACCYRILLIC :
1636 enc = kCFStringEncodingMacCyrillic ;
1637 break ;
1638 case wxFONTENCODING_MACDEVANAGARI :
1639 enc = kCFStringEncodingMacDevanagari ;
1640 break ;
1641 case wxFONTENCODING_MACGURMUKHI :
1642 enc = kCFStringEncodingMacGurmukhi ;
1643 break ;
1644 case wxFONTENCODING_MACGUJARATI :
1645 enc = kCFStringEncodingMacGujarati ;
1646 break ;
1647 case wxFONTENCODING_MACORIYA :
1648 enc = kCFStringEncodingMacOriya ;
1649 break ;
1650 case wxFONTENCODING_MACBENGALI :
1651 enc = kCFStringEncodingMacBengali ;
1652 break ;
1653 case wxFONTENCODING_MACTAMIL :
1654 enc = kCFStringEncodingMacTamil ;
1655 break ;
1656 case wxFONTENCODING_MACTELUGU :
1657 enc = kCFStringEncodingMacTelugu ;
1658 break ;
1659 case wxFONTENCODING_MACKANNADA :
1660 enc = kCFStringEncodingMacKannada ;
1661 break ;
1662 case wxFONTENCODING_MACMALAJALAM :
1663 enc = kCFStringEncodingMacMalayalam ;
1664 break ;
1665 case wxFONTENCODING_MACSINHALESE :
1666 enc = kCFStringEncodingMacSinhalese ;
1667 break ;
1668 case wxFONTENCODING_MACBURMESE :
1669 enc = kCFStringEncodingMacBurmese ;
1670 break ;
1671 case wxFONTENCODING_MACKHMER :
1672 enc = kCFStringEncodingMacKhmer ;
1673 break ;
1674 case wxFONTENCODING_MACTHAI :
1675 enc = kCFStringEncodingMacThai ;
1676 break ;
1677 case wxFONTENCODING_MACLAOTIAN :
1678 enc = kCFStringEncodingMacLaotian ;
1679 break ;
1680 case wxFONTENCODING_MACGEORGIAN :
1681 enc = kCFStringEncodingMacGeorgian ;
1682 break ;
1683 case wxFONTENCODING_MACARMENIAN :
1684 enc = kCFStringEncodingMacArmenian ;
1685 break ;
1686 case wxFONTENCODING_MACCHINESESIMP :
1687 enc = kCFStringEncodingMacChineseSimp ;
1688 break ;
1689 case wxFONTENCODING_MACTIBETAN :
1690 enc = kCFStringEncodingMacTibetan ;
1691 break ;
1692 case wxFONTENCODING_MACMONGOLIAN :
1693 enc = kCFStringEncodingMacMongolian ;
1694 break ;
1695 case wxFONTENCODING_MACETHIOPIC :
1696 enc = kCFStringEncodingMacEthiopic ;
1697 break ;
1698 case wxFONTENCODING_MACCENTRALEUR :
1699 enc = kCFStringEncodingMacCentralEurRoman ;
1700 break ;
1701 case wxFONTENCODING_MACVIATNAMESE :
1702 enc = kCFStringEncodingMacVietnamese ;
1703 break ;
1704 case wxFONTENCODING_MACARABICEXT :
1705 enc = kCFStringEncodingMacExtArabic ;
1706 break ;
1707 case wxFONTENCODING_MACSYMBOL :
1708 enc = kCFStringEncodingMacSymbol ;
1709 break ;
1710 case wxFONTENCODING_MACDINGBATS :
1711 enc = kCFStringEncodingMacDingbats ;
1712 break ;
1713 case wxFONTENCODING_MACTURKISH :
1714 enc = kCFStringEncodingMacTurkish ;
1715 break ;
1716 case wxFONTENCODING_MACCROATIAN :
1717 enc = kCFStringEncodingMacCroatian ;
1718 break ;
1719 case wxFONTENCODING_MACICELANDIC :
1720 enc = kCFStringEncodingMacIcelandic ;
1721 break ;
1722 case wxFONTENCODING_MACROMANIAN :
1723 enc = kCFStringEncodingMacRomanian ;
1724 break ;
1725 case wxFONTENCODING_MACCELTIC :
1726 enc = kCFStringEncodingMacCeltic ;
1727 break ;
1728 case wxFONTENCODING_MACGAELIC :
1729 enc = kCFStringEncodingMacGaelic ;
1730 break ;
1731 // case wxFONTENCODING_MACKEYBOARD :
1732 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1733 // break ;
1734 default :
1735 // because gcc is picky
1736 break ;
1737 } ;
1738 return enc ;
1739 }
1740
1741 wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
1742 {
1743 wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1744
1745 switch( encoding)
1746 {
1747 case kCFStringEncodingISOLatin1 :
1748 enc = wxFONTENCODING_ISO8859_1 ;
1749 break ;
1750 case kCFStringEncodingISOLatin2 :
1751 enc = wxFONTENCODING_ISO8859_2;
1752 break ;
1753 case kCFStringEncodingISOLatin3 :
1754 enc = wxFONTENCODING_ISO8859_3 ;
1755 break ;
1756 case kCFStringEncodingISOLatin4 :
1757 enc = wxFONTENCODING_ISO8859_4;
1758 break ;
1759 case kCFStringEncodingISOLatinCyrillic :
1760 enc = wxFONTENCODING_ISO8859_5;
1761 break ;
1762 case kCFStringEncodingISOLatinArabic :
1763 enc = wxFONTENCODING_ISO8859_6;
1764 break ;
1765 case kCFStringEncodingISOLatinGreek :
1766 enc = wxFONTENCODING_ISO8859_7;
1767 break ;
1768 case kCFStringEncodingISOLatinHebrew :
1769 enc = wxFONTENCODING_ISO8859_8;
1770 break ;
1771 case kCFStringEncodingISOLatin5 :
1772 enc = wxFONTENCODING_ISO8859_9;
1773 break ;
1774 case kCFStringEncodingISOLatin6 :
1775 enc = wxFONTENCODING_ISO8859_10;
1776 break ;
1777 case kCFStringEncodingISOLatin7 :
1778 enc = wxFONTENCODING_ISO8859_13;
1779 break ;
1780 case kCFStringEncodingISOLatin8 :
1781 enc = wxFONTENCODING_ISO8859_14;
1782 break ;
1783 case kCFStringEncodingISOLatin9 :
1784 enc =wxFONTENCODING_ISO8859_15 ;
1785 break ;
1786
1787 case kCFStringEncodingKOI8_R :
1788 enc = wxFONTENCODING_KOI8;
1789 break ;
1790
1791 // case :
1792 // enc = wxFONTENCODING_BULGARIAN;
1793 // break ;
1794
1795 case kCFStringEncodingDOSLatinUS :
1796 enc = wxFONTENCODING_CP437;
1797 break ;
1798 case kCFStringEncodingDOSLatin1 :
1799 enc = wxFONTENCODING_CP850;
1800 break ;
1801 case kCFStringEncodingDOSLatin2 :
1802 enc =wxFONTENCODING_CP852 ;
1803 break ;
1804 case kCFStringEncodingDOSCyrillic :
1805 enc = wxFONTENCODING_CP855;
1806 break ;
1807 case kCFStringEncodingDOSRussian :
1808 enc = wxFONTENCODING_CP866;
1809 break ;
1810 case kCFStringEncodingDOSThai :
1811 enc =wxFONTENCODING_CP874 ;
1812 break ;
1813 case kCFStringEncodingDOSJapanese :
1814 enc = wxFONTENCODING_CP932;
1815 break ;
1816 case kCFStringEncodingDOSChineseSimplif :
1817 enc = wxFONTENCODING_CP936;
1818 break ;
1819 case kCFStringEncodingDOSKorean :
1820 enc = wxFONTENCODING_CP949;
1821 break ;
1822 case kCFStringEncodingDOSChineseTrad :
1823 enc = wxFONTENCODING_CP950;
1824 break ;
1825
1826 case kCFStringEncodingWindowsLatin2 :
1827 enc = wxFONTENCODING_CP1250;
1828 break ;
1829 case kCFStringEncodingWindowsCyrillic :
1830 enc = wxFONTENCODING_CP1251;
1831 break ;
1832 case kCFStringEncodingWindowsLatin1 :
1833 enc = wxFONTENCODING_CP1252;
1834 break ;
1835 case kCFStringEncodingWindowsGreek :
1836 enc = wxFONTENCODING_CP1253;
1837 break ;
1838 case kCFStringEncodingWindowsLatin5 :
1839 enc = wxFONTENCODING_CP1254;
1840 break ;
1841 case kCFStringEncodingWindowsHebrew :
1842 enc = wxFONTENCODING_CP1255;
1843 break ;
1844 case kCFStringEncodingWindowsArabic :
1845 enc = wxFONTENCODING_CP1256;
1846 break ;
1847 case kCFStringEncodingWindowsBalticRim :
1848 enc =wxFONTENCODING_CP1257 ;
1849 break ;
1850 case kCFStringEncodingEUC_JP :
1851 enc = wxFONTENCODING_EUC_JP;
1852 break ;
1853 case kCFStringEncodingUnicode :
1854 enc = wxFONTENCODING_UTF16;
1855 break;
1856 case kCFStringEncodingMacRoman :
1857 enc = wxFONTENCODING_MACROMAN ;
1858 break ;
1859 case kCFStringEncodingMacJapanese :
1860 enc = wxFONTENCODING_MACJAPANESE ;
1861 break ;
1862 case kCFStringEncodingMacChineseTrad :
1863 enc = wxFONTENCODING_MACCHINESETRAD ;
1864 break ;
1865 case kCFStringEncodingMacKorean :
1866 enc = wxFONTENCODING_MACKOREAN ;
1867 break ;
1868 case kCFStringEncodingMacArabic :
1869 enc =wxFONTENCODING_MACARABIC ;
1870 break ;
1871 case kCFStringEncodingMacHebrew :
1872 enc = wxFONTENCODING_MACHEBREW ;
1873 break ;
1874 case kCFStringEncodingMacGreek :
1875 enc = wxFONTENCODING_MACGREEK ;
1876 break ;
1877 case kCFStringEncodingMacCyrillic :
1878 enc = wxFONTENCODING_MACCYRILLIC ;
1879 break ;
1880 case kCFStringEncodingMacDevanagari :
1881 enc = wxFONTENCODING_MACDEVANAGARI ;
1882 break ;
1883 case kCFStringEncodingMacGurmukhi :
1884 enc = wxFONTENCODING_MACGURMUKHI ;
1885 break ;
1886 case kCFStringEncodingMacGujarati :
1887 enc = wxFONTENCODING_MACGUJARATI ;
1888 break ;
1889 case kCFStringEncodingMacOriya :
1890 enc =wxFONTENCODING_MACORIYA ;
1891 break ;
1892 case kCFStringEncodingMacBengali :
1893 enc =wxFONTENCODING_MACBENGALI ;
1894 break ;
1895 case kCFStringEncodingMacTamil :
1896 enc = wxFONTENCODING_MACTAMIL ;
1897 break ;
1898 case kCFStringEncodingMacTelugu :
1899 enc = wxFONTENCODING_MACTELUGU ;
1900 break ;
1901 case kCFStringEncodingMacKannada :
1902 enc = wxFONTENCODING_MACKANNADA ;
1903 break ;
1904 case kCFStringEncodingMacMalayalam :
1905 enc = wxFONTENCODING_MACMALAJALAM ;
1906 break ;
1907 case kCFStringEncodingMacSinhalese :
1908 enc = wxFONTENCODING_MACSINHALESE ;
1909 break ;
1910 case kCFStringEncodingMacBurmese :
1911 enc = wxFONTENCODING_MACBURMESE ;
1912 break ;
1913 case kCFStringEncodingMacKhmer :
1914 enc = wxFONTENCODING_MACKHMER ;
1915 break ;
1916 case kCFStringEncodingMacThai :
1917 enc = wxFONTENCODING_MACTHAI ;
1918 break ;
1919 case kCFStringEncodingMacLaotian :
1920 enc = wxFONTENCODING_MACLAOTIAN ;
1921 break ;
1922 case kCFStringEncodingMacGeorgian :
1923 enc = wxFONTENCODING_MACGEORGIAN ;
1924 break ;
1925 case kCFStringEncodingMacArmenian :
1926 enc = wxFONTENCODING_MACARMENIAN ;
1927 break ;
1928 case kCFStringEncodingMacChineseSimp :
1929 enc = wxFONTENCODING_MACCHINESESIMP ;
1930 break ;
1931 case kCFStringEncodingMacTibetan :
1932 enc = wxFONTENCODING_MACTIBETAN ;
1933 break ;
1934 case kCFStringEncodingMacMongolian :
1935 enc = wxFONTENCODING_MACMONGOLIAN ;
1936 break ;
1937 case kCFStringEncodingMacEthiopic :
1938 enc = wxFONTENCODING_MACETHIOPIC ;
1939 break ;
1940 case kCFStringEncodingMacCentralEurRoman:
1941 enc = wxFONTENCODING_MACCENTRALEUR ;
1942 break ;
1943 case kCFStringEncodingMacVietnamese:
1944 enc = wxFONTENCODING_MACVIATNAMESE ;
1945 break ;
1946 case kCFStringEncodingMacExtArabic :
1947 enc = wxFONTENCODING_MACARABICEXT ;
1948 break ;
1949 case kCFStringEncodingMacSymbol :
1950 enc = wxFONTENCODING_MACSYMBOL ;
1951 break ;
1952 case kCFStringEncodingMacDingbats :
1953 enc = wxFONTENCODING_MACDINGBATS ;
1954 break ;
1955 case kCFStringEncodingMacTurkish :
1956 enc = wxFONTENCODING_MACTURKISH ;
1957 break ;
1958 case kCFStringEncodingMacCroatian :
1959 enc = wxFONTENCODING_MACCROATIAN ;
1960 break ;
1961 case kCFStringEncodingMacIcelandic :
1962 enc = wxFONTENCODING_MACICELANDIC ;
1963 break ;
1964 case kCFStringEncodingMacRomanian :
1965 enc = wxFONTENCODING_MACROMANIAN ;
1966 break ;
1967 case kCFStringEncodingMacCeltic :
1968 enc = wxFONTENCODING_MACCELTIC ;
1969 break ;
1970 case kCFStringEncodingMacGaelic :
1971 enc = wxFONTENCODING_MACGAELIC ;
1972 break ;
1973 // case kCFStringEncodingMacKeyboardGlyphs :
1974 // enc = wxFONTENCODING_MACKEYBOARD ;
1975 // break ;
1976 } ;
1977 return enc ;
1978 }
1979
1980 class wxMBConv_cocoa : public wxMBConv
1981 {
1982 public:
1983 wxMBConv_cocoa()
1984 {
1985 Init(CFStringGetSystemEncoding()) ;
1986 }
1987
1988 wxMBConv_cocoa(const wxChar* name)
1989 {
1990 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1991 }
1992
1993 wxMBConv_cocoa(wxFontEncoding encoding)
1994 {
1995 Init( wxCFStringEncFromFontEnc(encoding) );
1996 }
1997
1998 ~wxMBConv_cocoa()
1999 {
2000 }
2001
2002 void Init( CFStringEncoding encoding)
2003 {
2004 m_char_encoding = encoding ;
2005 m_unicode_encoding = kCFStringEncodingUnicode;
2006 }
2007
2008 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2009 {
2010 wxASSERT(szUnConv);
2011
2012 size_t nBufSize = strlen(szUnConv) + 1;
2013 size_t nRealOutSize;
2014
2015 UniChar* szUniCharBuffer = (UniChar*) szOut;
2016 wchar_t* szConvBuffer = szOut;
2017
2018 if (szConvBuffer == NULL && nOutSize != 0)
2019 {
2020 szConvBuffer = new wchar_t[nOutSize] ;
2021 }
2022
2023 #if SIZEOF_WCHAR_T == 4
2024 szUniCharBuffer = new UniChar[nOutSize];
2025 #endif
2026
2027 CFDataRef theData = CFDataCreateWithBytesNoCopy (
2028 NULL, //allocator
2029 (const UInt8*)szUnConv,
2030 nBufSize - 1,
2031 NULL //deallocator
2032 );
2033
2034 wxASSERT(theData);
2035
2036 CFStringRef theString = CFStringCreateFromExternalRepresentation (
2037 NULL,
2038 theData,
2039 m_char_encoding
2040 );
2041
2042 wxASSERT(theString);
2043
2044 if (nOutSize == 0)
2045 {
2046 nRealOutSize = CFStringGetLength(theString) + 1;
2047 CFRelease(theString);
2048 return nRealOutSize - 1;
2049 }
2050
2051 CFRange theRange = { 0, CFStringGetLength(theString) };
2052
2053 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2054
2055
2056 nRealOutSize = (CFStringGetLength(theString) + 1);
2057
2058 CFRelease(theString);
2059
2060 szUniCharBuffer[nRealOutSize-1] = '\0' ;
2061
2062 #if SIZEOF_WCHAR_T == 4
2063 wxMBConvUTF16 converter ;
2064 converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
2065 delete[] szUniCharBuffer;
2066 #endif
2067 if ( szOut == NULL )
2068 delete [] szConvBuffer;
2069
2070 return nRealOutSize ;
2071 }
2072
2073 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2074 {
2075 size_t nBufSize = wxWcslen(szUnConv) + 1;
2076 size_t nRealOutSize;
2077 char* szBuffer = szOut;
2078 UniChar* szUniBuffer = (UniChar*) szUnConv;
2079
2080 if (szOut == NULL)
2081 {
2082 // worst case
2083 nRealOutSize = wxString::WorstEncodingCase(nBufSize - 1, *this)+1 ;
2084 szBuffer = new char[ nRealOutSize ] ;
2085 }
2086 else
2087 nRealOutSize = nOutSize;
2088
2089 #if SIZEOF_WCHAR_T == 4
2090 wxMBConvUTF16BE converter ;
2091 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2092 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2093 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2094 nBufSize /= sizeof(UniChar);
2095 ++nBufSize;
2096 #endif
2097
2098 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2099 NULL, //allocator
2100 szUniBuffer,
2101 nBufSize,
2102 NULL //deallocator
2103 );
2104
2105 wxASSERT(theString);
2106
2107 //Note that CER puts a BOM when converting to unicode
2108 //so we may want to check and use getchars instead in that case
2109 CFDataRef theData = CFStringCreateExternalRepresentation(
2110 NULL, //allocator
2111 theString,
2112 m_char_encoding,
2113 0 //what to put in characters that can't be converted -
2114 //0 tells CFString to return NULL if it meets such a character
2115 );
2116
2117 if(!theData)
2118 return (size_t)-1;
2119
2120 CFRelease(theString);
2121
2122 nRealOutSize = CFDataGetLength(theData);
2123
2124 if ( szOut == NULL )
2125 delete[] szBuffer;
2126
2127 if(nOutSize == 0)
2128 {
2129 //TODO: This gets flagged as a non-malloced address by the debugger...
2130 //#if SIZEOF_WCHAR_T == 4
2131 // delete[] szUniBuffer;
2132 //#endif
2133 CFRelease(theData);
2134 return nRealOutSize - 1;
2135 }
2136
2137 CFRange theRange = {0, CFDataGetLength(theData) };
2138 CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
2139
2140 CFRelease(theData);
2141
2142 //TODO: This gets flagged as a non-malloced address by the debugger...
2143 //#if SIZEOF_WCHAR_T == 4
2144 // delete[] szUniBuffer;
2145 //#endif
2146 return nRealOutSize - 1;
2147 }
2148
2149 bool IsOk() const
2150 {
2151 //TODO: check for invalid en/de/coding
2152 return true;
2153 }
2154
2155 private:
2156 CFStringEncoding m_char_encoding ;
2157 CFStringEncoding m_unicode_encoding ;
2158 };
2159
2160 #endif // defined(__WXCOCOA__)
2161
2162 // ============================================================================
2163 // Mac conversion classes
2164 // ============================================================================
2165
2166 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2167
2168 class wxMBConv_mac : public wxMBConv
2169 {
2170 public:
2171 wxMBConv_mac()
2172 {
2173 Init(CFStringGetSystemEncoding()) ;
2174 }
2175
2176 wxMBConv_mac(const wxChar* name)
2177 {
2178 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2179 }
2180
2181 wxMBConv_mac(wxFontEncoding encoding)
2182 {
2183 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2184 }
2185
2186 ~wxMBConv_mac()
2187 {
2188 OSStatus status = noErr ;
2189 status = TECDisposeConverter(m_MB2WC_converter);
2190 status = TECDisposeConverter(m_WC2MB_converter);
2191 }
2192
2193
2194 void Init( TextEncodingBase encoding)
2195 {
2196 OSStatus status = noErr ;
2197 m_char_encoding = encoding ;
2198 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2199
2200 status = TECCreateConverter(&m_MB2WC_converter,
2201 m_char_encoding,
2202 m_unicode_encoding);
2203 status = TECCreateConverter(&m_WC2MB_converter,
2204 m_unicode_encoding,
2205 m_char_encoding);
2206 }
2207
2208 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2209 {
2210 OSStatus status = noErr ;
2211 ByteCount byteOutLen ;
2212 ByteCount byteInLen = strlen(psz) ;
2213 wchar_t *tbuf = NULL ;
2214 UniChar* ubuf = NULL ;
2215 size_t res = 0 ;
2216
2217 if (buf == NULL)
2218 {
2219 n = byteInLen ;
2220 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2221 }
2222 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2223 #if SIZEOF_WCHAR_T == 4
2224 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2225 #else
2226 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2227 #endif
2228 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2229 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2230 #if SIZEOF_WCHAR_T == 4
2231 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2232 // is not properly terminated we get random characters at the end
2233 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2234 wxMBConvUTF16BE converter ;
2235 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2236 free( ubuf ) ;
2237 #else
2238 res = byteOutLen / sizeof( UniChar ) ;
2239 #endif
2240 if ( buf == NULL )
2241 free(tbuf) ;
2242
2243 if ( buf && res < n)
2244 buf[res] = 0;
2245
2246 return res ;
2247 }
2248
2249 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2250 {
2251 OSStatus status = noErr ;
2252 ByteCount byteOutLen ;
2253 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2254
2255 char *tbuf = NULL ;
2256
2257 if (buf == NULL)
2258 {
2259 // worst case
2260 n = wxString::WorstEncodingCase(byteInLen / SIZEOF_WCHAR_T, *this) + SIZEOF_WCHAR_T;
2261 tbuf = (char*) malloc( n ) ;
2262 }
2263
2264 ByteCount byteBufferLen = n ;
2265 UniChar* ubuf = NULL ;
2266 #if SIZEOF_WCHAR_T == 4
2267 wxMBConvUTF16BE converter ;
2268 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2269 byteInLen = unicharlen ;
2270 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2271 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2272 #else
2273 ubuf = (UniChar*) psz ;
2274 #endif
2275 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2276 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2277 #if SIZEOF_WCHAR_T == 4
2278 free( ubuf ) ;
2279 #endif
2280 if ( buf == NULL )
2281 free(tbuf) ;
2282
2283 size_t res = byteOutLen ;
2284 if ( buf && res < n)
2285 buf[res] = 0;
2286
2287 return res ;
2288 }
2289
2290 bool IsOk() const
2291 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2292
2293 private:
2294 TECObjectRef m_MB2WC_converter ;
2295 TECObjectRef m_WC2MB_converter ;
2296
2297 TextEncodingBase m_char_encoding ;
2298 TextEncodingBase m_unicode_encoding ;
2299 };
2300
2301 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2302
2303 // ============================================================================
2304 // wxEncodingConverter based conversion classes
2305 // ============================================================================
2306
2307 #if wxUSE_FONTMAP
2308
2309 class wxMBConv_wxwin : public wxMBConv
2310 {
2311 private:
2312 void Init()
2313 {
2314 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2315 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2316 }
2317
2318 public:
2319 // temporarily just use wxEncodingConverter stuff,
2320 // so that it works while a better implementation is built
2321 wxMBConv_wxwin(const wxChar* name)
2322 {
2323 if (name)
2324 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2325 else
2326 m_enc = wxFONTENCODING_SYSTEM;
2327
2328 Init();
2329 }
2330
2331 wxMBConv_wxwin(wxFontEncoding enc)
2332 {
2333 m_enc = enc;
2334
2335 Init();
2336 }
2337
2338 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2339 {
2340 size_t inbuf = strlen(psz);
2341 if (buf)
2342 m2w.Convert(psz,buf);
2343 return inbuf;
2344 }
2345
2346 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2347 {
2348 const size_t inbuf = wxWcslen(psz);
2349 if (buf)
2350 w2m.Convert(psz,buf);
2351
2352 return inbuf;
2353 }
2354
2355 bool IsOk() const { return m_ok; }
2356
2357 public:
2358 wxFontEncoding m_enc;
2359 wxEncodingConverter m2w, w2m;
2360
2361 // were we initialized successfully?
2362 bool m_ok;
2363
2364 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2365 };
2366
2367 #endif // wxUSE_FONTMAP
2368
2369 // ============================================================================
2370 // wxCSConv implementation
2371 // ============================================================================
2372
2373 void wxCSConv::Init()
2374 {
2375 m_name = NULL;
2376 m_convReal = NULL;
2377 m_deferred = true;
2378 }
2379
2380 wxCSConv::wxCSConv(const wxChar *charset)
2381 {
2382 Init();
2383
2384 if ( charset )
2385 {
2386 SetName(charset);
2387 }
2388
2389 m_encoding = wxFONTENCODING_SYSTEM;
2390 }
2391
2392 wxCSConv::wxCSConv(wxFontEncoding encoding)
2393 {
2394 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2395 {
2396 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2397
2398 encoding = wxFONTENCODING_SYSTEM;
2399 }
2400
2401 Init();
2402
2403 m_encoding = encoding;
2404 }
2405
2406 wxCSConv::~wxCSConv()
2407 {
2408 Clear();
2409 }
2410
2411 wxCSConv::wxCSConv(const wxCSConv& conv)
2412 : wxMBConv()
2413 {
2414 Init();
2415
2416 SetName(conv.m_name);
2417 m_encoding = conv.m_encoding;
2418 }
2419
2420 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2421 {
2422 Clear();
2423
2424 SetName(conv.m_name);
2425 m_encoding = conv.m_encoding;
2426
2427 return *this;
2428 }
2429
2430 void wxCSConv::Clear()
2431 {
2432 free(m_name);
2433 delete m_convReal;
2434
2435 m_name = NULL;
2436 m_convReal = NULL;
2437 }
2438
2439 void wxCSConv::SetName(const wxChar *charset)
2440 {
2441 if (charset)
2442 {
2443 m_name = wxStrdup(charset);
2444 m_deferred = true;
2445 }
2446 }
2447
2448 wxMBConv *wxCSConv::DoCreate() const
2449 {
2450 // check for the special case of ASCII or ISO8859-1 charset: as we have
2451 // special knowledge of it anyhow, we don't need to create a special
2452 // conversion object
2453 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2454 {
2455 // don't convert at all
2456 return NULL;
2457 }
2458
2459 // we trust OS to do conversion better than we can so try external
2460 // conversion methods first
2461 //
2462 // the full order is:
2463 // 1. OS conversion (iconv() under Unix or Win32 API)
2464 // 2. hard coded conversions for UTF
2465 // 3. wxEncodingConverter as fall back
2466
2467 // step (1)
2468 #ifdef HAVE_ICONV
2469 #if !wxUSE_FONTMAP
2470 if ( m_name )
2471 #endif // !wxUSE_FONTMAP
2472 {
2473 wxString name(m_name);
2474
2475 #if wxUSE_FONTMAP
2476 if ( name.empty() )
2477 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2478 #endif // wxUSE_FONTMAP
2479
2480 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2481 if ( conv->IsOk() )
2482 return conv;
2483
2484 delete conv;
2485 }
2486 #endif // HAVE_ICONV
2487
2488 #ifdef wxHAVE_WIN32_MB2WC
2489 {
2490 #if wxUSE_FONTMAP
2491 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2492 : new wxMBConv_win32(m_encoding);
2493 if ( conv->IsOk() )
2494 return conv;
2495
2496 delete conv;
2497 #else
2498 return NULL;
2499 #endif
2500 }
2501 #endif // wxHAVE_WIN32_MB2WC
2502 #if defined(__WXMAC__)
2503 {
2504 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2505 {
2506
2507 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2508 : new wxMBConv_mac(m_encoding);
2509 if ( conv->IsOk() )
2510 return conv;
2511
2512 delete conv;
2513 }
2514 }
2515 #endif
2516 #if defined(__WXCOCOA__)
2517 {
2518 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2519 {
2520
2521 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2522 : new wxMBConv_cocoa(m_encoding);
2523 if ( conv->IsOk() )
2524 return conv;
2525
2526 delete conv;
2527 }
2528 }
2529 #endif
2530 // step (2)
2531 wxFontEncoding enc = m_encoding;
2532 #if wxUSE_FONTMAP
2533 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2534 {
2535 // use "false" to suppress interactive dialogs -- we can be called from
2536 // anywhere and popping up a dialog from here is the last thing we want to
2537 // do
2538 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2539 }
2540 #endif // wxUSE_FONTMAP
2541
2542 switch ( enc )
2543 {
2544 case wxFONTENCODING_UTF7:
2545 return new wxMBConvUTF7;
2546
2547 case wxFONTENCODING_UTF8:
2548 return new wxMBConvUTF8;
2549
2550 case wxFONTENCODING_UTF16BE:
2551 return new wxMBConvUTF16BE;
2552
2553 case wxFONTENCODING_UTF16LE:
2554 return new wxMBConvUTF16LE;
2555
2556 case wxFONTENCODING_UTF32BE:
2557 return new wxMBConvUTF32BE;
2558
2559 case wxFONTENCODING_UTF32LE:
2560 return new wxMBConvUTF32LE;
2561
2562 default:
2563 // nothing to do but put here to suppress gcc warnings
2564 ;
2565 }
2566
2567 // step (3)
2568 #if wxUSE_FONTMAP
2569 {
2570 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2571 : new wxMBConv_wxwin(m_encoding);
2572 if ( conv->IsOk() )
2573 return conv;
2574
2575 delete conv;
2576 }
2577 #endif // wxUSE_FONTMAP
2578
2579 // NB: This is a hack to prevent deadlock. What could otherwise happen
2580 // in Unicode build: wxConvLocal creation ends up being here
2581 // because of some failure and logs the error. But wxLog will try to
2582 // attach timestamp, for which it will need wxConvLocal (to convert
2583 // time to char* and then wchar_t*), but that fails, tries to log
2584 // error, but wxLog has a (already locked) critical section that
2585 // guards static buffer.
2586 static bool alreadyLoggingError = false;
2587 if (!alreadyLoggingError)
2588 {
2589 alreadyLoggingError = true;
2590 wxLogError(_("Cannot convert from the charset '%s'!"),
2591 m_name ? m_name
2592 :
2593 #if wxUSE_FONTMAP
2594 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2595 #else // !wxUSE_FONTMAP
2596 wxString::Format(_("encoding %s"), m_encoding).c_str()
2597 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2598 );
2599 alreadyLoggingError = false;
2600 }
2601
2602 return NULL;
2603 }
2604
2605 void wxCSConv::CreateConvIfNeeded() const
2606 {
2607 if ( m_deferred )
2608 {
2609 wxCSConv *self = (wxCSConv *)this; // const_cast
2610
2611 #if wxUSE_INTL
2612 // if we don't have neither the name nor the encoding, use the default
2613 // encoding for this system
2614 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2615 {
2616 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2617 }
2618 #endif // wxUSE_INTL
2619
2620 self->m_convReal = DoCreate();
2621 self->m_deferred = false;
2622 }
2623 }
2624
2625 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2626 {
2627 CreateConvIfNeeded();
2628
2629 if (m_convReal)
2630 return m_convReal->MB2WC(buf, psz, n);
2631
2632 // latin-1 (direct)
2633 size_t len = strlen(psz);
2634
2635 if (buf)
2636 {
2637 for (size_t c = 0; c <= len; c++)
2638 buf[c] = (unsigned char)(psz[c]);
2639 }
2640
2641 return len;
2642 }
2643
2644 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2645 {
2646 CreateConvIfNeeded();
2647
2648 if (m_convReal)
2649 return m_convReal->WC2MB(buf, psz, n);
2650
2651 // latin-1 (direct)
2652 const size_t len = wxWcslen(psz);
2653 if (buf)
2654 {
2655 for (size_t c = 0; c <= len; c++)
2656 {
2657 if (psz[c] > 0xFF)
2658 return (size_t)-1;
2659 buf[c] = (char)psz[c];
2660 }
2661 }
2662 else
2663 {
2664 for (size_t c = 0; c <= len; c++)
2665 {
2666 if (psz[c] > 0xFF)
2667 return (size_t)-1;
2668 }
2669 }
2670
2671 return len;
2672 }
2673
2674 // ----------------------------------------------------------------------------
2675 // globals
2676 // ----------------------------------------------------------------------------
2677
2678 #ifdef __WINDOWS__
2679 static wxMBConv_win32 wxConvLibcObj;
2680 #elif defined(__WXMAC__) && !defined(__MACH__)
2681 static wxMBConv_mac wxConvLibcObj ;
2682 #else
2683 static wxMBConvLibc wxConvLibcObj;
2684 #endif
2685
2686 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2687 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2688 static wxMBConvUTF7 wxConvUTF7Obj;
2689 static wxMBConvUTF8 wxConvUTF8Obj;
2690
2691
2692 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2693 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2694 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2695 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2696 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2697 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2698
2699 #else // !wxUSE_WCHAR_T
2700
2701 // stand-ins in absence of wchar_t
2702 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2703 wxConvISO8859_1,
2704 wxConvLocal,
2705 wxConvUTF8;
2706
2707 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2708
2709