]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
note broken code, cast hack to fix warning on pickier compilers
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
25 #endif
26
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
29
30 #ifdef __BORLANDC__
31 #pragma hdrstop
32 #endif
33
34 #ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37 #endif // WX_PRECOMP
38
39 #include "wx/strconv.h"
40
41 #if wxUSE_WCHAR_T
42
43 #ifdef __WXMSW__
44 #include "wx/msw/private.h"
45 #endif
46
47 #ifdef __WINDOWS__
48 #include "wx/msw/missing.h"
49 #endif
50
51 #ifndef __WXWINCE__
52 #include <errno.h>
53 #endif
54
55 #include <ctype.h>
56 #include <string.h>
57 #include <stdlib.h>
58
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
62
63 // ----------------------------------------------------------------------------
64 // headers
65 // ----------------------------------------------------------------------------
66
67 #ifdef __SALFORDC__
68 #include <clib.h>
69 #endif
70
71 #ifdef HAVE_ICONV
72 #include <iconv.h>
73 #endif
74
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
77 #include "wx/utils.h"
78
79 #ifdef __WXMAC__
80 #include <ATSUnicode.h>
81 #include <TextCommon.h>
82 #include <TextEncodingConverter.h>
83
84 #include "wx/mac/private.h" // includes mac headers
85 #endif
86 // ----------------------------------------------------------------------------
87 // macros
88 // ----------------------------------------------------------------------------
89
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
92
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
104 #define WC_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 #endif
114
115 // ============================================================================
116 // implementation
117 // ============================================================================
118
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
122
123
124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
125 {
126 if (input<=0xffff)
127 {
128 if (output)
129 *output = (wxUint16) input;
130 return 1;
131 }
132 else if (input>=0x110000)
133 {
134 return (size_t)-1;
135 }
136 else
137 {
138 if (output)
139 {
140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
142 }
143 return 2;
144 }
145 }
146
147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
148 {
149 if ((*input<0xd800) || (*input>0xdfff))
150 {
151 output = *input;
152 return 1;
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
155 {
156 output = *input;
157 return (size_t)-1;
158 }
159 else
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
164 }
165
166
167 // ----------------------------------------------------------------------------
168 // wxMBConv
169 // ----------------------------------------------------------------------------
170
171 wxMBConv::~wxMBConv()
172 {
173 // nothing to do here (necessary for Darwin linking probably)
174 }
175
176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177 {
178 if ( psz )
179 {
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
197 }
198
199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
200 {
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
216
217 return buf;
218 }
219
220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
221 size_t outsize, size_t nStringLen) const
222 {
223 const char* szEnd = szString + nStringLen + 1;
224 const char* szPos = szString;
225 const char* szStart = szPos;
226
227 size_t nActualLength = 0;
228
229 //Convert the string until the length() is reached, continuing the
230 //loop every time a null character is reached
231 while(szPos != szEnd)
232 {
233 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
234
235 //Get the length of the current (sub)string
236 size_t nLen = MB2WC(NULL, szPos, 0);
237
238 //Invalid conversion?
239 if( nLen == (size_t)-1 )
240 return nLen;
241
242 //Increase the actual length (+1 for current null character)
243 nActualLength += nLen + 1;
244
245 //Only copy data in if buffer size is big enough
246 if (szBuffer != NULL &&
247 nActualLength <= outsize)
248 {
249 //Convert the current (sub)string
250 if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
251 return (size_t)-1;
252 }
253
254 //Increment to next (sub)string
255 //Note that we have to use strlen here instead of nLen
256 //here because XX2XX gives us the size of the output buffer,
257 //not neccessarly the length of the string
258 szPos += strlen(szPos) + 1;
259 }
260
261 return nActualLength - 1; //success - return actual length
262 }
263
264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
265 size_t outsize, size_t nStringLen) const
266 {
267 const wchar_t* szEnd = szString + nStringLen + 1;
268 const wchar_t* szPos = szString;
269 const wchar_t* szStart = szPos;
270
271 size_t nActualLength = 0;
272
273 //Convert the string until the length() is reached, continuing the
274 //loop every time a null character is reached
275 while(szPos != szEnd)
276 {
277 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
278
279 //Get the length of the current (sub)string
280 size_t nLen = WC2MB(NULL, szPos, 0);
281
282 //Invalid conversion?
283 if( nLen == (size_t)-1 )
284 return nLen;
285
286 //Increase the actual length (+1 for current null character)
287 nActualLength += nLen + 1;
288
289 //Only copy data in if buffer size is big enough
290 if (szBuffer != NULL &&
291 nActualLength <= outsize)
292 {
293 //Convert the current (sub)string
294 if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
295 return (size_t)-1;
296 }
297
298 //Increment to next (sub)string
299 //Note that we have to use wxWcslen here instead of nLen
300 //here because XX2XX gives us the size of the output buffer,
301 //not neccessarly the length of the string
302 szPos += wxWcslen(szPos) + 1;
303 }
304
305 return nActualLength - 1; //success - return actual length
306 }
307
308 // ----------------------------------------------------------------------------
309 // wxMBConvLibc
310 // ----------------------------------------------------------------------------
311
312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
313 {
314 return wxMB2WC(buf, psz, n);
315 }
316
317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
318 {
319 return wxWC2MB(buf, psz, n);
320 }
321 // ----------------------------------------------------------------------------
322 // UTF-7
323 // ----------------------------------------------------------------------------
324
325 // Implementation (C) 2004 Fredrik Roubert
326
327 //
328 // BASE64 decoding table
329 //
330 static const unsigned char utf7unb64[] =
331 {
332 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
335 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
336 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
338 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
339 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
340 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
341 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
342 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
343 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
344 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
345 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
346 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
347 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
351 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
352 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
353 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
355 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
356 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
357 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
358 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
359 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
360 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
361 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
362 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
363 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
364 };
365
366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
367 {
368
369 size_t len = 0;
370
371 while (*psz && ((!buf) || (len < n)))
372 {
373 unsigned char cc = *psz++;
374 if (cc != '+')
375 {
376 // plain ASCII char
377 if (buf)
378 *buf++ = cc;
379 len++;
380 }
381 else if (*psz == '-')
382 {
383 // encoded plus sign
384 if (buf)
385 *buf++ = cc;
386 len++;
387 psz++;
388 }
389 else
390 {
391 // BASE64 encoded string
392 bool lsb;
393 unsigned char c;
394 unsigned int d, l;
395 for (lsb = false, d = 0, l = 0;
396 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
397 {
398 d <<= 6;
399 d += cc;
400 for (l += 6; l >= 8; lsb = !lsb)
401 {
402 c = (d >> (l -= 8)) % 256;
403 if (lsb)
404 {
405 if (buf)
406 *buf++ |= c;
407 len ++;
408 }
409 else
410 if (buf)
411 *buf = c << 8;
412 }
413 }
414 if (*psz == '-')
415 psz++;
416 }
417 }
418 if (buf && (len < n))
419 *buf = 0;
420 return len;
421 }
422
423 //
424 // BASE64 encoding table
425 //
426 static const unsigned char utf7enb64[] =
427 {
428 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
429 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
430 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
431 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
432 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
433 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
434 'w', 'x', 'y', 'z', '0', '1', '2', '3',
435 '4', '5', '6', '7', '8', '9', '+', '/'
436 };
437
438 //
439 // UTF-7 encoding table
440 //
441 // 0 - Set D (directly encoded characters)
442 // 1 - Set O (optional direct characters)
443 // 2 - whitespace characters (optional)
444 // 3 - special characters
445 //
446 static const unsigned char utf7encode[128] =
447 {
448 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
450 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
452 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
454 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
456 };
457
458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
459 *psz, size_t n) const
460 {
461
462
463 size_t len = 0;
464
465 while (*psz && ((!buf) || (len < n)))
466 {
467 wchar_t cc = *psz++;
468 if (cc < 0x80 && utf7encode[cc] < 1)
469 {
470 // plain ASCII char
471 if (buf)
472 *buf++ = (char)cc;
473 len++;
474 }
475 #ifndef WC_UTF16
476 else if (cc > ((const wchar_t)0xffff))
477 {
478 // no surrogate pair generation (yet?)
479 return (size_t)-1;
480 }
481 #endif
482 else
483 {
484 if (buf)
485 *buf++ = '+';
486 len++;
487 if (cc != '+')
488 {
489 // BASE64 encode string
490 unsigned int lsb, d, l;
491 for (d = 0, l = 0;; psz++)
492 {
493 for (lsb = 0; lsb < 2; lsb ++)
494 {
495 d <<= 8;
496 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
497
498 for (l += 8; l >= 6; )
499 {
500 l -= 6;
501 if (buf)
502 *buf++ = utf7enb64[(d >> l) % 64];
503 len++;
504 }
505 }
506 cc = *psz;
507 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
508 break;
509 }
510 if (l != 0)
511 {
512 if (buf)
513 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
514 len++;
515 }
516 }
517 if (buf)
518 *buf++ = '-';
519 len++;
520 }
521 }
522 if (buf && (len < n))
523 *buf = 0;
524 return len;
525 }
526
527 // ----------------------------------------------------------------------------
528 // UTF-8
529 // ----------------------------------------------------------------------------
530
531 static wxUint32 utf8_max[]=
532 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
533
534 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535 {
536 size_t len = 0;
537
538 while (*psz && ((!buf) || (len < n)))
539 {
540 unsigned char cc = *psz++, fc = cc;
541 unsigned cnt;
542 for (cnt = 0; fc & 0x80; cnt++)
543 fc <<= 1;
544 if (!cnt)
545 {
546 // plain ASCII char
547 if (buf)
548 *buf++ = cc;
549 len++;
550 }
551 else
552 {
553 cnt--;
554 if (!cnt)
555 {
556 // invalid UTF-8 sequence
557 return (size_t)-1;
558 }
559 else
560 {
561 unsigned ocnt = cnt - 1;
562 wxUint32 res = cc & (0x3f >> cnt);
563 while (cnt--)
564 {
565 cc = *psz++;
566 if ((cc & 0xC0) != 0x80)
567 {
568 // invalid UTF-8 sequence
569 return (size_t)-1;
570 }
571 res = (res << 6) | (cc & 0x3f);
572 }
573 if (res <= utf8_max[ocnt])
574 {
575 // illegal UTF-8 encoding
576 return (size_t)-1;
577 }
578 #ifdef WC_UTF16
579 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
580 size_t pa = encode_utf16(res, (wxUint16 *)buf);
581 if (pa == (size_t)-1)
582 return (size_t)-1;
583 if (buf)
584 buf += pa;
585 len += pa;
586 #else // !WC_UTF16
587 if (buf)
588 *buf++ = res;
589 len++;
590 #endif // WC_UTF16/!WC_UTF16
591 }
592 }
593 }
594 if (buf && (len < n))
595 *buf = 0;
596 return len;
597 }
598
599 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
600 {
601 size_t len = 0;
602
603 while (*psz && ((!buf) || (len < n)))
604 {
605 wxUint32 cc;
606 #ifdef WC_UTF16
607 // cast is ok for WC_UTF16
608 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
609 psz += (pa == (size_t)-1) ? 1 : pa;
610 #else
611 cc=(*psz++) & 0x7fffffff;
612 #endif
613 unsigned cnt;
614 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
615 if (!cnt)
616 {
617 // plain ASCII char
618 if (buf)
619 *buf++ = (char) cc;
620 len++;
621 }
622
623 else
624 {
625 len += cnt + 1;
626 if (buf)
627 {
628 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
629 while (cnt--)
630 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
631 }
632 }
633 }
634
635 if (buf && (len<n)) *buf = 0;
636
637 return len;
638 }
639
640
641
642
643 // ----------------------------------------------------------------------------
644 // UTF-16
645 // ----------------------------------------------------------------------------
646
647 #ifdef WORDS_BIGENDIAN
648 #define wxMBConvUTF16straight wxMBConvUTF16BE
649 #define wxMBConvUTF16swap wxMBConvUTF16LE
650 #else
651 #define wxMBConvUTF16swap wxMBConvUTF16BE
652 #define wxMBConvUTF16straight wxMBConvUTF16LE
653 #endif
654
655
656 #ifdef WC_UTF16
657
658 // copy 16bit MB to 16bit String
659 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
660 {
661 size_t len=0;
662
663 while (*(wxUint16*)psz && (!buf || len < n))
664 {
665 if (buf)
666 *buf++ = *(wxUint16*)psz;
667 len++;
668
669 psz += sizeof(wxUint16);
670 }
671 if (buf && len<n) *buf=0;
672
673 return len;
674 }
675
676
677 // copy 16bit String to 16bit MB
678 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
679 {
680 size_t len=0;
681
682 while (*psz && (!buf || len < n))
683 {
684 if (buf)
685 {
686 *(wxUint16*)buf = *psz;
687 buf += sizeof(wxUint16);
688 }
689 len += sizeof(wxUint16);
690 psz++;
691 }
692 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
693
694 return len;
695 }
696
697
698 // swap 16bit MB to 16bit String
699 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
700 {
701 size_t len=0;
702
703 while (*(wxUint16*)psz && (!buf || len < n))
704 {
705 if (buf)
706 {
707 ((char *)buf)[0] = psz[1];
708 ((char *)buf)[1] = psz[0];
709 buf++;
710 }
711 len++;
712 psz += sizeof(wxUint16);
713 }
714 if (buf && len<n) *buf=0;
715
716 return len;
717 }
718
719
720 // swap 16bit MB to 16bit String
721 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
722 {
723 size_t len=0;
724
725 while (*psz && (!buf || len < n))
726 {
727 if (buf)
728 {
729 *buf++ = ((char*)psz)[1];
730 *buf++ = ((char*)psz)[0];
731 }
732 len += sizeof(wxUint16);
733 psz++;
734 }
735 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
736
737 return len;
738 }
739
740
741 #else // WC_UTF16
742
743
744 // copy 16bit MB to 32bit String
745 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
746 {
747 size_t len=0;
748
749 while (*(wxUint16*)psz && (!buf || len < n))
750 {
751 wxUint32 cc;
752 size_t pa=decode_utf16((wxUint16*)psz, cc);
753 if (pa == (size_t)-1)
754 return pa;
755
756 if (buf)
757 *buf++ = cc;
758 len++;
759 psz += pa * sizeof(wxUint16);
760 }
761 if (buf && len<n) *buf=0;
762
763 return len;
764 }
765
766
767 // copy 32bit String to 16bit MB
768 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
769 {
770 size_t len=0;
771
772 while (*psz && (!buf || len < n))
773 {
774 wxUint16 cc[2];
775 size_t pa=encode_utf16(*psz, cc);
776
777 if (pa == (size_t)-1)
778 return pa;
779
780 if (buf)
781 {
782 *(wxUint16*)buf = cc[0];
783 buf += sizeof(wxUint16);
784 if (pa > 1)
785 {
786 *(wxUint16*)buf = cc[1];
787 buf += sizeof(wxUint16);
788 }
789 }
790
791 len += pa*sizeof(wxUint16);
792 psz++;
793 }
794 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
795
796 return len;
797 }
798
799
800 // swap 16bit MB to 32bit String
801 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
802 {
803 size_t len=0;
804
805 while (*(wxUint16*)psz && (!buf || len < n))
806 {
807 wxUint32 cc;
808 char tmp[4];
809 tmp[0]=psz[1]; tmp[1]=psz[0];
810 tmp[2]=psz[3]; tmp[3]=psz[2];
811
812 size_t pa=decode_utf16((wxUint16*)tmp, cc);
813 if (pa == (size_t)-1)
814 return pa;
815
816 if (buf)
817 *buf++ = cc;
818
819 len++;
820 psz += pa * sizeof(wxUint16);
821 }
822 if (buf && len<n) *buf=0;
823
824 return len;
825 }
826
827
828 // swap 32bit String to 16bit MB
829 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
830 {
831 size_t len=0;
832
833 while (*psz && (!buf || len < n))
834 {
835 wxUint16 cc[2];
836 size_t pa=encode_utf16(*psz, cc);
837
838 if (pa == (size_t)-1)
839 return pa;
840
841 if (buf)
842 {
843 *buf++ = ((char*)cc)[1];
844 *buf++ = ((char*)cc)[0];
845 if (pa > 1)
846 {
847 *buf++ = ((char*)cc)[3];
848 *buf++ = ((char*)cc)[2];
849 }
850 }
851
852 len += pa*sizeof(wxUint16);
853 psz++;
854 }
855 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
856
857 return len;
858 }
859
860 #endif // WC_UTF16
861
862
863 // ----------------------------------------------------------------------------
864 // UTF-32
865 // ----------------------------------------------------------------------------
866
867 #ifdef WORDS_BIGENDIAN
868 #define wxMBConvUTF32straight wxMBConvUTF32BE
869 #define wxMBConvUTF32swap wxMBConvUTF32LE
870 #else
871 #define wxMBConvUTF32swap wxMBConvUTF32BE
872 #define wxMBConvUTF32straight wxMBConvUTF32LE
873 #endif
874
875
876 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
877 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
878
879
880 #ifdef WC_UTF16
881
882 // copy 32bit MB to 16bit String
883 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
884 {
885 size_t len=0;
886
887 while (*(wxUint32*)psz && (!buf || len < n))
888 {
889 wxUint16 cc[2];
890
891 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
892 if (pa == (size_t)-1)
893 return pa;
894
895 if (buf)
896 {
897 *buf++ = cc[0];
898 if (pa > 1)
899 *buf++ = cc[1];
900 }
901 len += pa;
902 psz += sizeof(wxUint32);
903 }
904 if (buf && len<n) *buf=0;
905
906 return len;
907 }
908
909
910 // copy 16bit String to 32bit MB
911 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
912 {
913 size_t len=0;
914
915 while (*psz && (!buf || len < n))
916 {
917 wxUint32 cc;
918
919 // cast is ok for WC_UTF16
920 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
921 if (pa == (size_t)-1)
922 return pa;
923
924 if (buf)
925 {
926 *(wxUint32*)buf = cc;
927 buf += sizeof(wxUint32);
928 }
929 len += sizeof(wxUint32);
930 psz += pa;
931 }
932
933 if (buf && len<=n-sizeof(wxUint32))
934 *(wxUint32*)buf=0;
935
936 return len;
937 }
938
939
940
941 // swap 32bit MB to 16bit String
942 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
943 {
944 size_t len=0;
945
946 while (*(wxUint32*)psz && (!buf || len < n))
947 {
948 char tmp[4];
949 tmp[0] = psz[3]; tmp[1] = psz[2];
950 tmp[2] = psz[1]; tmp[3] = psz[0];
951
952
953 wxUint16 cc[2];
954
955 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
956 if (pa == (size_t)-1)
957 return pa;
958
959 if (buf)
960 {
961 *buf++ = cc[0];
962 if (pa > 1)
963 *buf++ = cc[1];
964 }
965 len += pa;
966 psz += sizeof(wxUint32);
967 }
968
969 if (buf && len<n)
970 *buf=0;
971
972 return len;
973 }
974
975
976 // swap 16bit String to 32bit MB
977 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
978 {
979 size_t len=0;
980
981 while (*psz && (!buf || len < n))
982 {
983 char cc[4];
984
985 // cast is ok for WC_UTF16
986 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
987 if (pa == (size_t)-1)
988 return pa;
989
990 if (buf)
991 {
992 *buf++ = cc[3];
993 *buf++ = cc[2];
994 *buf++ = cc[1];
995 *buf++ = cc[0];
996 }
997 len += sizeof(wxUint32);
998 psz += pa;
999 }
1000
1001 if (buf && len<=n-sizeof(wxUint32))
1002 *(wxUint32*)buf=0;
1003
1004 return len;
1005 }
1006
1007 #else // WC_UTF16
1008
1009
1010 // copy 32bit MB to 32bit String
1011 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1012 {
1013 size_t len=0;
1014
1015 while (*(wxUint32*)psz && (!buf || len < n))
1016 {
1017 if (buf)
1018 *buf++ = *(wxUint32*)psz;
1019 len++;
1020 psz += sizeof(wxUint32);
1021 }
1022
1023 if (buf && len<n)
1024 *buf=0;
1025
1026 return len;
1027 }
1028
1029
1030 // copy 32bit String to 32bit MB
1031 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1032 {
1033 size_t len=0;
1034
1035 while (*psz && (!buf || len < n))
1036 {
1037 if (buf)
1038 {
1039 *(wxUint32*)buf = *psz;
1040 buf += sizeof(wxUint32);
1041 }
1042
1043 len += sizeof(wxUint32);
1044 psz++;
1045 }
1046
1047 if (buf && len<=n-sizeof(wxUint32))
1048 *(wxUint32*)buf=0;
1049
1050 return len;
1051 }
1052
1053
1054 // swap 32bit MB to 32bit String
1055 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1056 {
1057 size_t len=0;
1058
1059 while (*(wxUint32*)psz && (!buf || len < n))
1060 {
1061 if (buf)
1062 {
1063 ((char *)buf)[0] = psz[3];
1064 ((char *)buf)[1] = psz[2];
1065 ((char *)buf)[2] = psz[1];
1066 ((char *)buf)[3] = psz[0];
1067 buf++;
1068 }
1069 len++;
1070 psz += sizeof(wxUint32);
1071 }
1072
1073 if (buf && len<n)
1074 *buf=0;
1075
1076 return len;
1077 }
1078
1079
1080 // swap 32bit String to 32bit MB
1081 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1082 {
1083 size_t len=0;
1084
1085 while (*psz && (!buf || len < n))
1086 {
1087 if (buf)
1088 {
1089 *buf++ = ((char *)psz)[3];
1090 *buf++ = ((char *)psz)[2];
1091 *buf++ = ((char *)psz)[1];
1092 *buf++ = ((char *)psz)[0];
1093 }
1094 len += sizeof(wxUint32);
1095 psz++;
1096 }
1097
1098 if (buf && len<=n-sizeof(wxUint32))
1099 *(wxUint32*)buf=0;
1100
1101 return len;
1102 }
1103
1104
1105 #endif // WC_UTF16
1106
1107
1108 // ============================================================================
1109 // The classes doing conversion using the iconv_xxx() functions
1110 // ============================================================================
1111
1112 #ifdef HAVE_ICONV
1113
1114 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1115 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
1116 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1117 // (which means error) and says there are 0 bytes left in the input buffer --
1118 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1119 // this alternative test for iconv() failure.
1120 // [This bug does not appear in glibc 2.2.]
1121 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1122 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1123 (errno != E2BIG || bufLeft != 0))
1124 #else
1125 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1126 #endif
1127
1128 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1129
1130 // ----------------------------------------------------------------------------
1131 // wxMBConv_iconv: encapsulates an iconv character set
1132 // ----------------------------------------------------------------------------
1133
1134 class wxMBConv_iconv : public wxMBConv
1135 {
1136 public:
1137 wxMBConv_iconv(const wxChar *name);
1138 virtual ~wxMBConv_iconv();
1139
1140 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1141 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1142
1143 bool IsOk() const
1144 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1145
1146 protected:
1147 // the iconv handlers used to translate from multibyte to wide char and in
1148 // the other direction
1149 iconv_t m2w,
1150 w2m;
1151
1152 private:
1153 // the name (for iconv_open()) of a wide char charset -- if none is
1154 // available on this machine, it will remain NULL
1155 static const char *ms_wcCharsetName;
1156
1157 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1158 // different endian-ness than the native one
1159 static bool ms_wcNeedsSwap;
1160 };
1161
1162 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1163 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1164
1165 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1166 {
1167 // Do it the hard way
1168 char cname[100];
1169 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1170 cname[i] = (char) name[i];
1171
1172 // check for charset that represents wchar_t:
1173 if (ms_wcCharsetName == NULL)
1174 {
1175 ms_wcNeedsSwap = false;
1176
1177 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1178 ms_wcCharsetName = WC_NAME_BEST;
1179 m2w = iconv_open(ms_wcCharsetName, cname);
1180
1181 if (m2w == (iconv_t)-1)
1182 {
1183 // try charset w/o bytesex info (e.g. "UCS4")
1184 // and check for bytesex ourselves:
1185 ms_wcCharsetName = WC_NAME;
1186 m2w = iconv_open(ms_wcCharsetName, cname);
1187
1188 // last bet, try if it knows WCHAR_T pseudo-charset
1189 if (m2w == (iconv_t)-1)
1190 {
1191 ms_wcCharsetName = "WCHAR_T";
1192 m2w = iconv_open(ms_wcCharsetName, cname);
1193 }
1194
1195 if (m2w != (iconv_t)-1)
1196 {
1197 char buf[2], *bufPtr;
1198 wchar_t wbuf[2], *wbufPtr;
1199 size_t insz, outsz;
1200 size_t res;
1201
1202 buf[0] = 'A';
1203 buf[1] = 0;
1204 wbuf[0] = 0;
1205 insz = 2;
1206 outsz = SIZEOF_WCHAR_T * 2;
1207 wbufPtr = wbuf;
1208 bufPtr = buf;
1209
1210 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1211 (char**)&wbufPtr, &outsz);
1212
1213 if (ICONV_FAILED(res, insz))
1214 {
1215 ms_wcCharsetName = NULL;
1216 wxLogLastError(wxT("iconv"));
1217 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1218 }
1219 else
1220 {
1221 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1222 }
1223 }
1224 else
1225 {
1226 ms_wcCharsetName = NULL;
1227
1228 // VS: we must not output an error here, since wxWidgets will safely
1229 // fall back to using wxEncodingConverter.
1230 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1231 //wxLogError(
1232 }
1233 }
1234 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1235 }
1236 else // we already have ms_wcCharsetName
1237 {
1238 m2w = iconv_open(ms_wcCharsetName, cname);
1239 }
1240
1241 // NB: don't ever pass NULL to iconv_open(), it may crash!
1242 if ( ms_wcCharsetName )
1243 {
1244 w2m = iconv_open( cname, ms_wcCharsetName);
1245 }
1246 else
1247 {
1248 w2m = (iconv_t)-1;
1249 }
1250 }
1251
1252 wxMBConv_iconv::~wxMBConv_iconv()
1253 {
1254 if ( m2w != (iconv_t)-1 )
1255 iconv_close(m2w);
1256 if ( w2m != (iconv_t)-1 )
1257 iconv_close(w2m);
1258 }
1259
1260 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1261 {
1262 size_t inbuf = strlen(psz);
1263 size_t outbuf = n * SIZEOF_WCHAR_T;
1264 size_t res, cres;
1265 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1266 wchar_t *bufPtr = buf;
1267 const char *pszPtr = psz;
1268
1269 if (buf)
1270 {
1271 // have destination buffer, convert there
1272 cres = iconv(m2w,
1273 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1274 (char**)&bufPtr, &outbuf);
1275 res = n - (outbuf / SIZEOF_WCHAR_T);
1276
1277 if (ms_wcNeedsSwap)
1278 {
1279 // convert to native endianness
1280 WC_BSWAP(buf /* _not_ bufPtr */, res)
1281 }
1282
1283 // NB: iconv was given only strlen(psz) characters on input, and so
1284 // it couldn't convert the trailing zero. Let's do it ourselves
1285 // if there's some room left for it in the output buffer.
1286 if (res < n)
1287 buf[res] = 0;
1288 }
1289 else
1290 {
1291 // no destination buffer... convert using temp buffer
1292 // to calculate destination buffer requirement
1293 wchar_t tbuf[8];
1294 res = 0;
1295 do {
1296 bufPtr = tbuf;
1297 outbuf = 8*SIZEOF_WCHAR_T;
1298
1299 cres = iconv(m2w,
1300 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1301 (char**)&bufPtr, &outbuf );
1302
1303 res += 8-(outbuf/SIZEOF_WCHAR_T);
1304 } while ((cres==(size_t)-1) && (errno==E2BIG));
1305 }
1306
1307 if (ICONV_FAILED(cres, inbuf))
1308 {
1309 //VS: it is ok if iconv fails, hence trace only
1310 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1311 return (size_t)-1;
1312 }
1313
1314 return res;
1315 }
1316
1317 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1318 {
1319 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1320 size_t outbuf = n;
1321 size_t res, cres;
1322
1323 wchar_t *tmpbuf = 0;
1324
1325 if (ms_wcNeedsSwap)
1326 {
1327 // need to copy to temp buffer to switch endianness
1328 // this absolutely doesn't rock!
1329 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1330 // could be in read-only memory, or be accessed in some other thread)
1331 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1332 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1333 WC_BSWAP(tmpbuf, inbuf)
1334 psz=tmpbuf;
1335 }
1336
1337 if (buf)
1338 {
1339 // have destination buffer, convert there
1340 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1341
1342 res = n-outbuf;
1343
1344 // NB: iconv was given only wcslen(psz) characters on input, and so
1345 // it couldn't convert the trailing zero. Let's do it ourselves
1346 // if there's some room left for it in the output buffer.
1347 if (res < n)
1348 buf[0] = 0;
1349 }
1350 else
1351 {
1352 // no destination buffer... convert using temp buffer
1353 // to calculate destination buffer requirement
1354 char tbuf[16];
1355 res = 0;
1356 do {
1357 buf = tbuf; outbuf = 16;
1358
1359 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1360
1361 res += 16 - outbuf;
1362 } while ((cres==(size_t)-1) && (errno==E2BIG));
1363 }
1364
1365 if (ms_wcNeedsSwap)
1366 {
1367 free(tmpbuf);
1368 }
1369
1370 if (ICONV_FAILED(cres, inbuf))
1371 {
1372 //VS: it is ok if iconv fails, hence trace only
1373 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1374 return (size_t)-1;
1375 }
1376
1377 return res;
1378 }
1379
1380 #endif // HAVE_ICONV
1381
1382
1383 // ============================================================================
1384 // Win32 conversion classes
1385 // ============================================================================
1386
1387 #ifdef wxHAVE_WIN32_MB2WC
1388
1389 // from utils.cpp
1390 #if wxUSE_FONTMAP
1391 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1392 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1393 #endif
1394
1395 class wxMBConv_win32 : public wxMBConv
1396 {
1397 public:
1398 wxMBConv_win32()
1399 {
1400 m_CodePage = CP_ACP;
1401 }
1402
1403 #if wxUSE_FONTMAP
1404 wxMBConv_win32(const wxChar* name)
1405 {
1406 m_CodePage = wxCharsetToCodepage(name);
1407 }
1408
1409 wxMBConv_win32(wxFontEncoding encoding)
1410 {
1411 m_CodePage = wxEncodingToCodepage(encoding);
1412 }
1413 #endif
1414
1415 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1416 {
1417 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1418 // the behaviour is not compatible with the Unix version (using iconv)
1419 // and break the library itself, e.g. wxTextInputStream::NextChar()
1420 // wouldn't work if reading an incomplete MB char didn't result in an
1421 // error
1422 const size_t len = ::MultiByteToWideChar
1423 (
1424 m_CodePage, // code page
1425 MB_ERR_INVALID_CHARS, // flags: fall on error
1426 psz, // input string
1427 -1, // its length (NUL-terminated)
1428 buf, // output string
1429 buf ? n : 0 // size of output buffer
1430 );
1431
1432 // note that it returns count of written chars for buf != NULL and size
1433 // of the needed buffer for buf == NULL so in either case the length of
1434 // the string (which never includes the terminating NUL) is one less
1435 return len ? len - 1 : (size_t)-1;
1436 }
1437
1438 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1439 {
1440 /*
1441 we have a problem here: by default, WideCharToMultiByte() may
1442 replace characters unrepresentable in the target code page with bad
1443 quality approximations such as turning "1/2" symbol (U+00BD) into
1444 "1" for the code pages which don't have it and we, obviously, want
1445 to avoid this at any price
1446
1447 the trouble is that this function does it _silently_, i.e. it won't
1448 even tell us whether it did or not... Win98/2000 and higher provide
1449 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1450 we have to resort to a round trip, i.e. check that converting back
1451 results in the same string -- this is, of course, expensive but
1452 otherwise we simply can't be sure to not garble the data.
1453 */
1454
1455 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1456 // it doesn't work with CJK encodings (which we test for rather roughly
1457 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1458 // supporting it
1459 BOOL usedDef wxDUMMY_INITIALIZE(false);
1460 BOOL *pUsedDef;
1461 int flags;
1462 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1463 {
1464 // it's our lucky day
1465 flags = WC_NO_BEST_FIT_CHARS;
1466 pUsedDef = &usedDef;
1467 }
1468 else // old system or unsupported encoding
1469 {
1470 flags = 0;
1471 pUsedDef = NULL;
1472 }
1473
1474 const size_t len = ::WideCharToMultiByte
1475 (
1476 m_CodePage, // code page
1477 flags, // either none or no best fit
1478 pwz, // input string
1479 -1, // it is (wide) NUL-terminated
1480 buf, // output buffer
1481 buf ? n : 0, // and its size
1482 NULL, // default "replacement" char
1483 pUsedDef // [out] was it used?
1484 );
1485
1486 if ( !len )
1487 {
1488 // function totally failed
1489 return (size_t)-1;
1490 }
1491
1492 // if we were really converting, check if we succeeded
1493 if ( buf )
1494 {
1495 if ( flags )
1496 {
1497 // check if the conversion failed, i.e. if any replacements
1498 // were done
1499 if ( usedDef )
1500 return (size_t)-1;
1501 }
1502 else // we must resort to double tripping...
1503 {
1504 wxWCharBuffer wcBuf(n);
1505 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1506 wcscmp(wcBuf, pwz) != 0 )
1507 {
1508 // we didn't obtain the same thing we started from, hence
1509 // the conversion was lossy and we consider that it failed
1510 return (size_t)-1;
1511 }
1512 }
1513 }
1514
1515 // see the comment above for the reason of "len - 1"
1516 return len - 1;
1517 }
1518
1519 bool IsOk() const { return m_CodePage != -1; }
1520
1521 private:
1522 static bool CanUseNoBestFit()
1523 {
1524 static int s_isWin98Or2k = -1;
1525
1526 if ( s_isWin98Or2k == -1 )
1527 {
1528 int verMaj, verMin;
1529 switch ( wxGetOsVersion(&verMaj, &verMin) )
1530 {
1531 case wxWIN95:
1532 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1533 break;
1534
1535 case wxWINDOWS_NT:
1536 s_isWin98Or2k = verMaj >= 5;
1537 break;
1538
1539 default:
1540 // unknown, be conseravtive by default
1541 s_isWin98Or2k = 0;
1542 }
1543
1544 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1545 }
1546
1547 return s_isWin98Or2k == 1;
1548 }
1549
1550 long m_CodePage;
1551 };
1552
1553 #endif // wxHAVE_WIN32_MB2WC
1554
1555 // ============================================================================
1556 // Cocoa conversion classes
1557 // ============================================================================
1558
1559 #if defined(__WXCOCOA__)
1560
1561 // RN: There is no UTF-32 support in either Core Foundation or
1562 // Cocoa. Strangely enough, internally Core Foundation uses
1563 // UTF 32 internally quite a bit - its just not public (yet).
1564
1565 #include <CoreFoundation/CFString.h>
1566 #include <CoreFoundation/CFStringEncodingExt.h>
1567
1568 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1569 {
1570 CFStringEncoding enc = 0 ;
1571 if ( encoding == wxFONTENCODING_DEFAULT )
1572 {
1573 #if wxUSE_GUI
1574 encoding = wxFont::GetDefaultEncoding() ;
1575 #else
1576 encoding = wxLocale::GetSystemEncoding() ;
1577 #endif
1578 }
1579 else switch( encoding)
1580 {
1581 case wxFONTENCODING_ISO8859_1 :
1582 enc = kCFStringEncodingISOLatin1 ;
1583 break ;
1584 case wxFONTENCODING_ISO8859_2 :
1585 enc = kCFStringEncodingISOLatin2;
1586 break ;
1587 case wxFONTENCODING_ISO8859_3 :
1588 enc = kCFStringEncodingISOLatin3 ;
1589 break ;
1590 case wxFONTENCODING_ISO8859_4 :
1591 enc = kCFStringEncodingISOLatin4;
1592 break ;
1593 case wxFONTENCODING_ISO8859_5 :
1594 enc = kCFStringEncodingISOLatinCyrillic;
1595 break ;
1596 case wxFONTENCODING_ISO8859_6 :
1597 enc = kCFStringEncodingISOLatinArabic;
1598 break ;
1599 case wxFONTENCODING_ISO8859_7 :
1600 enc = kCFStringEncodingISOLatinGreek;
1601 break ;
1602 case wxFONTENCODING_ISO8859_8 :
1603 enc = kCFStringEncodingISOLatinHebrew;
1604 break ;
1605 case wxFONTENCODING_ISO8859_9 :
1606 enc = kCFStringEncodingISOLatin5;
1607 break ;
1608 case wxFONTENCODING_ISO8859_10 :
1609 enc = kCFStringEncodingISOLatin6;
1610 break ;
1611 case wxFONTENCODING_ISO8859_11 :
1612 enc = kCFStringEncodingISOLatinThai;
1613 break ;
1614 case wxFONTENCODING_ISO8859_13 :
1615 enc = kCFStringEncodingISOLatin7;
1616 break ;
1617 case wxFONTENCODING_ISO8859_14 :
1618 enc = kCFStringEncodingISOLatin8;
1619 break ;
1620 case wxFONTENCODING_ISO8859_15 :
1621 enc = kCFStringEncodingISOLatin9;
1622 break ;
1623
1624 case wxFONTENCODING_KOI8 :
1625 enc = kCFStringEncodingKOI8_R;
1626 break ;
1627 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1628 enc = kCFStringEncodingDOSRussian;
1629 break ;
1630
1631 // case wxFONTENCODING_BULGARIAN :
1632 // enc = ;
1633 // break ;
1634
1635 case wxFONTENCODING_CP437 :
1636 enc =kCFStringEncodingDOSLatinUS ;
1637 break ;
1638 case wxFONTENCODING_CP850 :
1639 enc = kCFStringEncodingDOSLatin1;
1640 break ;
1641 case wxFONTENCODING_CP852 :
1642 enc = kCFStringEncodingDOSLatin2;
1643 break ;
1644 case wxFONTENCODING_CP855 :
1645 enc = kCFStringEncodingDOSCyrillic;
1646 break ;
1647 case wxFONTENCODING_CP866 :
1648 enc =kCFStringEncodingDOSRussian ;
1649 break ;
1650 case wxFONTENCODING_CP874 :
1651 enc = kCFStringEncodingDOSThai;
1652 break ;
1653 case wxFONTENCODING_CP932 :
1654 enc = kCFStringEncodingDOSJapanese;
1655 break ;
1656 case wxFONTENCODING_CP936 :
1657 enc =kCFStringEncodingDOSChineseSimplif ;
1658 break ;
1659 case wxFONTENCODING_CP949 :
1660 enc = kCFStringEncodingDOSKorean;
1661 break ;
1662 case wxFONTENCODING_CP950 :
1663 enc = kCFStringEncodingDOSChineseTrad;
1664 break ;
1665
1666 case wxFONTENCODING_CP1250 :
1667 enc = kCFStringEncodingWindowsLatin2;
1668 break ;
1669 case wxFONTENCODING_CP1251 :
1670 enc =kCFStringEncodingWindowsCyrillic ;
1671 break ;
1672 case wxFONTENCODING_CP1252 :
1673 enc =kCFStringEncodingWindowsLatin1 ;
1674 break ;
1675 case wxFONTENCODING_CP1253 :
1676 enc = kCFStringEncodingWindowsGreek;
1677 break ;
1678 case wxFONTENCODING_CP1254 :
1679 enc = kCFStringEncodingWindowsLatin5;
1680 break ;
1681 case wxFONTENCODING_CP1255 :
1682 enc =kCFStringEncodingWindowsHebrew ;
1683 break ;
1684 case wxFONTENCODING_CP1256 :
1685 enc =kCFStringEncodingWindowsArabic ;
1686 break ;
1687 case wxFONTENCODING_CP1257 :
1688 enc = kCFStringEncodingWindowsBalticRim;
1689 break ;
1690 case wxFONTENCODING_UTF7 :
1691 enc = kCFStringEncodingNonLossyASCII ;
1692 break ;
1693 case wxFONTENCODING_UTF8 :
1694 enc = kCFStringEncodingUTF8 ;
1695 break ;
1696 case wxFONTENCODING_EUC_JP :
1697 enc = kCFStringEncodingEUC_JP;
1698 break ;
1699 case wxFONTENCODING_UTF16 :
1700 enc = kCFStringEncodingUnicode ;
1701 break ;
1702 case wxFONTENCODING_MACROMAN :
1703 enc = kCFStringEncodingMacRoman ;
1704 break ;
1705 case wxFONTENCODING_MACJAPANESE :
1706 enc = kCFStringEncodingMacJapanese ;
1707 break ;
1708 case wxFONTENCODING_MACCHINESETRAD :
1709 enc = kCFStringEncodingMacChineseTrad ;
1710 break ;
1711 case wxFONTENCODING_MACKOREAN :
1712 enc = kCFStringEncodingMacKorean ;
1713 break ;
1714 case wxFONTENCODING_MACARABIC :
1715 enc = kCFStringEncodingMacArabic ;
1716 break ;
1717 case wxFONTENCODING_MACHEBREW :
1718 enc = kCFStringEncodingMacHebrew ;
1719 break ;
1720 case wxFONTENCODING_MACGREEK :
1721 enc = kCFStringEncodingMacGreek ;
1722 break ;
1723 case wxFONTENCODING_MACCYRILLIC :
1724 enc = kCFStringEncodingMacCyrillic ;
1725 break ;
1726 case wxFONTENCODING_MACDEVANAGARI :
1727 enc = kCFStringEncodingMacDevanagari ;
1728 break ;
1729 case wxFONTENCODING_MACGURMUKHI :
1730 enc = kCFStringEncodingMacGurmukhi ;
1731 break ;
1732 case wxFONTENCODING_MACGUJARATI :
1733 enc = kCFStringEncodingMacGujarati ;
1734 break ;
1735 case wxFONTENCODING_MACORIYA :
1736 enc = kCFStringEncodingMacOriya ;
1737 break ;
1738 case wxFONTENCODING_MACBENGALI :
1739 enc = kCFStringEncodingMacBengali ;
1740 break ;
1741 case wxFONTENCODING_MACTAMIL :
1742 enc = kCFStringEncodingMacTamil ;
1743 break ;
1744 case wxFONTENCODING_MACTELUGU :
1745 enc = kCFStringEncodingMacTelugu ;
1746 break ;
1747 case wxFONTENCODING_MACKANNADA :
1748 enc = kCFStringEncodingMacKannada ;
1749 break ;
1750 case wxFONTENCODING_MACMALAJALAM :
1751 enc = kCFStringEncodingMacMalayalam ;
1752 break ;
1753 case wxFONTENCODING_MACSINHALESE :
1754 enc = kCFStringEncodingMacSinhalese ;
1755 break ;
1756 case wxFONTENCODING_MACBURMESE :
1757 enc = kCFStringEncodingMacBurmese ;
1758 break ;
1759 case wxFONTENCODING_MACKHMER :
1760 enc = kCFStringEncodingMacKhmer ;
1761 break ;
1762 case wxFONTENCODING_MACTHAI :
1763 enc = kCFStringEncodingMacThai ;
1764 break ;
1765 case wxFONTENCODING_MACLAOTIAN :
1766 enc = kCFStringEncodingMacLaotian ;
1767 break ;
1768 case wxFONTENCODING_MACGEORGIAN :
1769 enc = kCFStringEncodingMacGeorgian ;
1770 break ;
1771 case wxFONTENCODING_MACARMENIAN :
1772 enc = kCFStringEncodingMacArmenian ;
1773 break ;
1774 case wxFONTENCODING_MACCHINESESIMP :
1775 enc = kCFStringEncodingMacChineseSimp ;
1776 break ;
1777 case wxFONTENCODING_MACTIBETAN :
1778 enc = kCFStringEncodingMacTibetan ;
1779 break ;
1780 case wxFONTENCODING_MACMONGOLIAN :
1781 enc = kCFStringEncodingMacMongolian ;
1782 break ;
1783 case wxFONTENCODING_MACETHIOPIC :
1784 enc = kCFStringEncodingMacEthiopic ;
1785 break ;
1786 case wxFONTENCODING_MACCENTRALEUR :
1787 enc = kCFStringEncodingMacCentralEurRoman ;
1788 break ;
1789 case wxFONTENCODING_MACVIATNAMESE :
1790 enc = kCFStringEncodingMacVietnamese ;
1791 break ;
1792 case wxFONTENCODING_MACARABICEXT :
1793 enc = kCFStringEncodingMacExtArabic ;
1794 break ;
1795 case wxFONTENCODING_MACSYMBOL :
1796 enc = kCFStringEncodingMacSymbol ;
1797 break ;
1798 case wxFONTENCODING_MACDINGBATS :
1799 enc = kCFStringEncodingMacDingbats ;
1800 break ;
1801 case wxFONTENCODING_MACTURKISH :
1802 enc = kCFStringEncodingMacTurkish ;
1803 break ;
1804 case wxFONTENCODING_MACCROATIAN :
1805 enc = kCFStringEncodingMacCroatian ;
1806 break ;
1807 case wxFONTENCODING_MACICELANDIC :
1808 enc = kCFStringEncodingMacIcelandic ;
1809 break ;
1810 case wxFONTENCODING_MACROMANIAN :
1811 enc = kCFStringEncodingMacRomanian ;
1812 break ;
1813 case wxFONTENCODING_MACCELTIC :
1814 enc = kCFStringEncodingMacCeltic ;
1815 break ;
1816 case wxFONTENCODING_MACGAELIC :
1817 enc = kCFStringEncodingMacGaelic ;
1818 break ;
1819 // case wxFONTENCODING_MACKEYBOARD :
1820 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1821 // break ;
1822 default :
1823 // because gcc is picky
1824 break ;
1825 } ;
1826 return enc ;
1827 }
1828
1829 wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
1830 {
1831 wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1832
1833 switch( encoding)
1834 {
1835 case kCFStringEncodingISOLatin1 :
1836 enc = wxFONTENCODING_ISO8859_1 ;
1837 break ;
1838 case kCFStringEncodingISOLatin2 :
1839 enc = wxFONTENCODING_ISO8859_2;
1840 break ;
1841 case kCFStringEncodingISOLatin3 :
1842 enc = wxFONTENCODING_ISO8859_3 ;
1843 break ;
1844 case kCFStringEncodingISOLatin4 :
1845 enc = wxFONTENCODING_ISO8859_4;
1846 break ;
1847 case kCFStringEncodingISOLatinCyrillic :
1848 enc = wxFONTENCODING_ISO8859_5;
1849 break ;
1850 case kCFStringEncodingISOLatinArabic :
1851 enc = wxFONTENCODING_ISO8859_6;
1852 break ;
1853 case kCFStringEncodingISOLatinGreek :
1854 enc = wxFONTENCODING_ISO8859_7;
1855 break ;
1856 case kCFStringEncodingISOLatinHebrew :
1857 enc = wxFONTENCODING_ISO8859_8;
1858 break ;
1859 case kCFStringEncodingISOLatin5 :
1860 enc = wxFONTENCODING_ISO8859_9;
1861 break ;
1862 case kCFStringEncodingISOLatin6 :
1863 enc = wxFONTENCODING_ISO8859_10;
1864 break ;
1865 case kCFStringEncodingISOLatin7 :
1866 enc = wxFONTENCODING_ISO8859_13;
1867 break ;
1868 case kCFStringEncodingISOLatin8 :
1869 enc = wxFONTENCODING_ISO8859_14;
1870 break ;
1871 case kCFStringEncodingISOLatin9 :
1872 enc =wxFONTENCODING_ISO8859_15 ;
1873 break ;
1874
1875 case kCFStringEncodingKOI8_R :
1876 enc = wxFONTENCODING_KOI8;
1877 break ;
1878
1879 // case :
1880 // enc = wxFONTENCODING_BULGARIAN;
1881 // break ;
1882
1883 case kCFStringEncodingDOSLatinUS :
1884 enc = wxFONTENCODING_CP437;
1885 break ;
1886 case kCFStringEncodingDOSLatin1 :
1887 enc = wxFONTENCODING_CP850;
1888 break ;
1889 case kCFStringEncodingDOSLatin2 :
1890 enc =wxFONTENCODING_CP852 ;
1891 break ;
1892 case kCFStringEncodingDOSCyrillic :
1893 enc = wxFONTENCODING_CP855;
1894 break ;
1895 case kCFStringEncodingDOSRussian :
1896 enc = wxFONTENCODING_CP866;
1897 break ;
1898 case kCFStringEncodingDOSThai :
1899 enc =wxFONTENCODING_CP874 ;
1900 break ;
1901 case kCFStringEncodingDOSJapanese :
1902 enc = wxFONTENCODING_CP932;
1903 break ;
1904 case kCFStringEncodingDOSChineseSimplif :
1905 enc = wxFONTENCODING_CP936;
1906 break ;
1907 case kCFStringEncodingDOSKorean :
1908 enc = wxFONTENCODING_CP949;
1909 break ;
1910 case kCFStringEncodingDOSChineseTrad :
1911 enc = wxFONTENCODING_CP950;
1912 break ;
1913
1914 case kCFStringEncodingWindowsLatin2 :
1915 enc = wxFONTENCODING_CP1250;
1916 break ;
1917 case kCFStringEncodingWindowsCyrillic :
1918 enc = wxFONTENCODING_CP1251;
1919 break ;
1920 case kCFStringEncodingWindowsLatin1 :
1921 enc = wxFONTENCODING_CP1252;
1922 break ;
1923 case kCFStringEncodingWindowsGreek :
1924 enc = wxFONTENCODING_CP1253;
1925 break ;
1926 case kCFStringEncodingWindowsLatin5 :
1927 enc = wxFONTENCODING_CP1254;
1928 break ;
1929 case kCFStringEncodingWindowsHebrew :
1930 enc = wxFONTENCODING_CP1255;
1931 break ;
1932 case kCFStringEncodingWindowsArabic :
1933 enc = wxFONTENCODING_CP1256;
1934 break ;
1935 case kCFStringEncodingWindowsBalticRim :
1936 enc =wxFONTENCODING_CP1257 ;
1937 break ;
1938 case kCFStringEncodingEUC_JP :
1939 enc = wxFONTENCODING_EUC_JP;
1940 break ;
1941 case kCFStringEncodingUnicode :
1942 enc = wxFONTENCODING_UTF16;
1943 break;
1944 case kCFStringEncodingMacRoman :
1945 enc = wxFONTENCODING_MACROMAN ;
1946 break ;
1947 case kCFStringEncodingMacJapanese :
1948 enc = wxFONTENCODING_MACJAPANESE ;
1949 break ;
1950 case kCFStringEncodingMacChineseTrad :
1951 enc = wxFONTENCODING_MACCHINESETRAD ;
1952 break ;
1953 case kCFStringEncodingMacKorean :
1954 enc = wxFONTENCODING_MACKOREAN ;
1955 break ;
1956 case kCFStringEncodingMacArabic :
1957 enc =wxFONTENCODING_MACARABIC ;
1958 break ;
1959 case kCFStringEncodingMacHebrew :
1960 enc = wxFONTENCODING_MACHEBREW ;
1961 break ;
1962 case kCFStringEncodingMacGreek :
1963 enc = wxFONTENCODING_MACGREEK ;
1964 break ;
1965 case kCFStringEncodingMacCyrillic :
1966 enc = wxFONTENCODING_MACCYRILLIC ;
1967 break ;
1968 case kCFStringEncodingMacDevanagari :
1969 enc = wxFONTENCODING_MACDEVANAGARI ;
1970 break ;
1971 case kCFStringEncodingMacGurmukhi :
1972 enc = wxFONTENCODING_MACGURMUKHI ;
1973 break ;
1974 case kCFStringEncodingMacGujarati :
1975 enc = wxFONTENCODING_MACGUJARATI ;
1976 break ;
1977 case kCFStringEncodingMacOriya :
1978 enc =wxFONTENCODING_MACORIYA ;
1979 break ;
1980 case kCFStringEncodingMacBengali :
1981 enc =wxFONTENCODING_MACBENGALI ;
1982 break ;
1983 case kCFStringEncodingMacTamil :
1984 enc = wxFONTENCODING_MACTAMIL ;
1985 break ;
1986 case kCFStringEncodingMacTelugu :
1987 enc = wxFONTENCODING_MACTELUGU ;
1988 break ;
1989 case kCFStringEncodingMacKannada :
1990 enc = wxFONTENCODING_MACKANNADA ;
1991 break ;
1992 case kCFStringEncodingMacMalayalam :
1993 enc = wxFONTENCODING_MACMALAJALAM ;
1994 break ;
1995 case kCFStringEncodingMacSinhalese :
1996 enc = wxFONTENCODING_MACSINHALESE ;
1997 break ;
1998 case kCFStringEncodingMacBurmese :
1999 enc = wxFONTENCODING_MACBURMESE ;
2000 break ;
2001 case kCFStringEncodingMacKhmer :
2002 enc = wxFONTENCODING_MACKHMER ;
2003 break ;
2004 case kCFStringEncodingMacThai :
2005 enc = wxFONTENCODING_MACTHAI ;
2006 break ;
2007 case kCFStringEncodingMacLaotian :
2008 enc = wxFONTENCODING_MACLAOTIAN ;
2009 break ;
2010 case kCFStringEncodingMacGeorgian :
2011 enc = wxFONTENCODING_MACGEORGIAN ;
2012 break ;
2013 case kCFStringEncodingMacArmenian :
2014 enc = wxFONTENCODING_MACARMENIAN ;
2015 break ;
2016 case kCFStringEncodingMacChineseSimp :
2017 enc = wxFONTENCODING_MACCHINESESIMP ;
2018 break ;
2019 case kCFStringEncodingMacTibetan :
2020 enc = wxFONTENCODING_MACTIBETAN ;
2021 break ;
2022 case kCFStringEncodingMacMongolian :
2023 enc = wxFONTENCODING_MACMONGOLIAN ;
2024 break ;
2025 case kCFStringEncodingMacEthiopic :
2026 enc = wxFONTENCODING_MACETHIOPIC ;
2027 break ;
2028 case kCFStringEncodingMacCentralEurRoman:
2029 enc = wxFONTENCODING_MACCENTRALEUR ;
2030 break ;
2031 case kCFStringEncodingMacVietnamese:
2032 enc = wxFONTENCODING_MACVIATNAMESE ;
2033 break ;
2034 case kCFStringEncodingMacExtArabic :
2035 enc = wxFONTENCODING_MACARABICEXT ;
2036 break ;
2037 case kCFStringEncodingMacSymbol :
2038 enc = wxFONTENCODING_MACSYMBOL ;
2039 break ;
2040 case kCFStringEncodingMacDingbats :
2041 enc = wxFONTENCODING_MACDINGBATS ;
2042 break ;
2043 case kCFStringEncodingMacTurkish :
2044 enc = wxFONTENCODING_MACTURKISH ;
2045 break ;
2046 case kCFStringEncodingMacCroatian :
2047 enc = wxFONTENCODING_MACCROATIAN ;
2048 break ;
2049 case kCFStringEncodingMacIcelandic :
2050 enc = wxFONTENCODING_MACICELANDIC ;
2051 break ;
2052 case kCFStringEncodingMacRomanian :
2053 enc = wxFONTENCODING_MACROMANIAN ;
2054 break ;
2055 case kCFStringEncodingMacCeltic :
2056 enc = wxFONTENCODING_MACCELTIC ;
2057 break ;
2058 case kCFStringEncodingMacGaelic :
2059 enc = wxFONTENCODING_MACGAELIC ;
2060 break ;
2061 // case kCFStringEncodingMacKeyboardGlyphs :
2062 // enc = wxFONTENCODING_MACKEYBOARD ;
2063 // break ;
2064 } ;
2065 return enc ;
2066 }
2067
2068 class wxMBConv_cocoa : public wxMBConv
2069 {
2070 public:
2071 wxMBConv_cocoa()
2072 {
2073 Init(CFStringGetSystemEncoding()) ;
2074 }
2075
2076 wxMBConv_cocoa(const wxChar* name)
2077 {
2078 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2079 }
2080
2081 wxMBConv_cocoa(wxFontEncoding encoding)
2082 {
2083 Init( wxCFStringEncFromFontEnc(encoding) );
2084 }
2085
2086 ~wxMBConv_cocoa()
2087 {
2088 }
2089
2090 void Init( CFStringEncoding encoding)
2091 {
2092 m_char_encoding = encoding ;
2093 m_unicode_encoding = kCFStringEncodingUnicode;
2094 }
2095
2096 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2097 {
2098 wxASSERT(szUnConv);
2099
2100 size_t nBufSize = strlen(szUnConv) + 1;
2101 size_t nRealOutSize;
2102
2103 UniChar* szUniCharBuffer = (UniChar*) szOut;
2104 wchar_t* szConvBuffer = szOut;
2105
2106 if (szConvBuffer == NULL && nOutSize != 0)
2107 {
2108 szConvBuffer = new wchar_t[nOutSize] ;
2109 }
2110
2111 #if SIZEOF_WCHAR_T == 4
2112 szUniCharBuffer = new UniChar[nOutSize];
2113 #endif
2114
2115 CFDataRef theData = CFDataCreateWithBytesNoCopy (
2116 NULL, //allocator
2117 (const UInt8*)szUnConv,
2118 nBufSize - 1,
2119 NULL //deallocator
2120 );
2121
2122 wxASSERT(theData);
2123
2124 CFStringRef theString = CFStringCreateFromExternalRepresentation (
2125 NULL,
2126 theData,
2127 m_char_encoding
2128 );
2129
2130 wxASSERT(theString);
2131
2132 if (nOutSize == 0)
2133 {
2134 nRealOutSize = CFStringGetLength(theString) + 1;
2135 CFRelease(theString);
2136 return nRealOutSize - 1;
2137 }
2138
2139 CFRange theRange = { 0, CFStringGetLength(theString) };
2140
2141 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2142
2143
2144 nRealOutSize = (CFStringGetLength(theString) + 1);
2145
2146 CFRelease(theString);
2147
2148 szUniCharBuffer[nRealOutSize-1] = '\0' ;
2149
2150 #if SIZEOF_WCHAR_T == 4
2151 wxMBConvUTF16 converter ;
2152 converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
2153 delete[] szUniCharBuffer;
2154 #endif
2155 if ( szOut == NULL )
2156 delete [] szConvBuffer;
2157
2158 return nRealOutSize ;
2159 }
2160
2161 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2162 {
2163 size_t nBufSize = wxWcslen(szUnConv) + 1;
2164 size_t nRealOutSize;
2165 char* szBuffer = szOut;
2166 UniChar* szUniBuffer = (UniChar*) szUnConv;
2167
2168 if (szOut == NULL)
2169 {
2170 // worst case
2171 nRealOutSize = ((nBufSize - 1) * 8) +1 ;
2172 szBuffer = new char[ nRealOutSize ] ;
2173 }
2174 else
2175 nRealOutSize = nOutSize;
2176
2177 #if SIZEOF_WCHAR_T == 4
2178 wxMBConvUTF16BE converter ;
2179 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2180 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2181 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2182 nBufSize /= sizeof(UniChar);
2183 ++nBufSize;
2184 #endif
2185
2186 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2187 NULL, //allocator
2188 szUniBuffer,
2189 nBufSize,
2190 NULL //deallocator
2191 );
2192
2193 wxASSERT(theString);
2194
2195 //Note that CER puts a BOM when converting to unicode
2196 //so we may want to check and use getchars instead in that case
2197 CFDataRef theData = CFStringCreateExternalRepresentation(
2198 NULL, //allocator
2199 theString,
2200 m_char_encoding,
2201 0 //what to put in characters that can't be converted -
2202 //0 tells CFString to return NULL if it meets such a character
2203 );
2204
2205 if(!theData)
2206 return (size_t)-1;
2207
2208 CFRelease(theString);
2209
2210 nRealOutSize = CFDataGetLength(theData);
2211
2212 if ( szOut == NULL )
2213 delete[] szBuffer;
2214
2215 if(nOutSize == 0)
2216 {
2217 //TODO: This gets flagged as a non-malloced address by the debugger...
2218 //#if SIZEOF_WCHAR_T == 4
2219 // delete[] szUniBuffer;
2220 //#endif
2221 CFRelease(theData);
2222 return nRealOutSize - 1;
2223 }
2224
2225 CFRange theRange = {0, CFDataGetLength(theData) };
2226 CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
2227
2228 CFRelease(theData);
2229
2230 //TODO: This gets flagged as a non-malloced address by the debugger...
2231 //#if SIZEOF_WCHAR_T == 4
2232 // delete[] szUniBuffer;
2233 //#endif
2234 return nRealOutSize - 1;
2235 }
2236
2237 bool IsOk() const
2238 {
2239 //TODO: check for invalid en/de/coding
2240 return true;
2241 }
2242
2243 private:
2244 CFStringEncoding m_char_encoding ;
2245 CFStringEncoding m_unicode_encoding ;
2246 };
2247
2248 #endif // defined(__WXCOCOA__)
2249
2250 // ============================================================================
2251 // Mac conversion classes
2252 // ============================================================================
2253
2254 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2255
2256 class wxMBConv_mac : public wxMBConv
2257 {
2258 public:
2259 wxMBConv_mac()
2260 {
2261 Init(CFStringGetSystemEncoding()) ;
2262 }
2263
2264 wxMBConv_mac(const wxChar* name)
2265 {
2266 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2267 }
2268
2269 wxMBConv_mac(wxFontEncoding encoding)
2270 {
2271 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2272 }
2273
2274 ~wxMBConv_mac()
2275 {
2276 OSStatus status = noErr ;
2277 status = TECDisposeConverter(m_MB2WC_converter);
2278 status = TECDisposeConverter(m_WC2MB_converter);
2279 }
2280
2281
2282 void Init( TextEncodingBase encoding)
2283 {
2284 OSStatus status = noErr ;
2285 m_char_encoding = encoding ;
2286 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2287
2288 status = TECCreateConverter(&m_MB2WC_converter,
2289 m_char_encoding,
2290 m_unicode_encoding);
2291 status = TECCreateConverter(&m_WC2MB_converter,
2292 m_unicode_encoding,
2293 m_char_encoding);
2294 }
2295
2296 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2297 {
2298 OSStatus status = noErr ;
2299 ByteCount byteOutLen ;
2300 ByteCount byteInLen = strlen(psz) ;
2301 wchar_t *tbuf = NULL ;
2302 UniChar* ubuf = NULL ;
2303 size_t res = 0 ;
2304
2305 if (buf == NULL)
2306 {
2307 n = byteInLen ;
2308 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2309 }
2310 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2311 #if SIZEOF_WCHAR_T == 4
2312 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2313 #else
2314 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2315 #endif
2316 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2317 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2318 #if SIZEOF_WCHAR_T == 4
2319 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2320 // is not properly terminated we get random characters at the end
2321 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2322 wxMBConvUTF16BE converter ;
2323 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2324 free( ubuf ) ;
2325 #else
2326 res = byteOutLen / sizeof( UniChar ) ;
2327 #endif
2328 if ( buf == NULL )
2329 free(tbuf) ;
2330
2331 if ( buf && res < n)
2332 buf[res] = 0;
2333
2334 return res ;
2335 }
2336
2337 //NB: This is _broken_ - in invalid conversions, instead of returning -1
2338 //like it should, it (sometimes?) converts invalid characters of the encoding to a question
2339 //mark character '?'.
2340 //
2341 //We need to do the msw double-pass check for the question marks as Vadim
2342 //lines out above (RN: I don't recall this happening in the core foundation version,
2343 //but it might do it there also, ick)
2344 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2345 {
2346 OSStatus status = noErr ;
2347 ByteCount byteOutLen ;
2348 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2349
2350 char *tbuf = NULL ;
2351
2352 if (buf == NULL)
2353 {
2354 // worst case
2355 n = ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T;
2356 tbuf = (char*) malloc( n ) ;
2357 }
2358
2359 ByteCount byteBufferLen = n ;
2360 UniChar* ubuf = NULL ;
2361 #if SIZEOF_WCHAR_T == 4
2362 wxMBConvUTF16BE converter ;
2363 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2364 byteInLen = unicharlen ;
2365 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2366 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2367 #else
2368 ubuf = (UniChar*) psz ;
2369 #endif
2370 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2371 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2372 #if SIZEOF_WCHAR_T == 4
2373 free( ubuf ) ;
2374 #endif
2375 if ( buf == NULL )
2376 free(tbuf) ;
2377
2378 size_t res = byteOutLen ;
2379 if ( buf && res < n)
2380 buf[res] = 0;
2381
2382 return res ;
2383 }
2384
2385 bool IsOk() const
2386 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2387
2388 private:
2389 TECObjectRef m_MB2WC_converter ;
2390 TECObjectRef m_WC2MB_converter ;
2391
2392 TextEncodingBase m_char_encoding ;
2393 TextEncodingBase m_unicode_encoding ;
2394 };
2395
2396 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2397
2398 // ============================================================================
2399 // wxEncodingConverter based conversion classes
2400 // ============================================================================
2401
2402 #if wxUSE_FONTMAP
2403
2404 class wxMBConv_wxwin : public wxMBConv
2405 {
2406 private:
2407 void Init()
2408 {
2409 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2410 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2411 }
2412
2413 public:
2414 // temporarily just use wxEncodingConverter stuff,
2415 // so that it works while a better implementation is built
2416 wxMBConv_wxwin(const wxChar* name)
2417 {
2418 if (name)
2419 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2420 else
2421 m_enc = wxFONTENCODING_SYSTEM;
2422
2423 Init();
2424 }
2425
2426 wxMBConv_wxwin(wxFontEncoding enc)
2427 {
2428 m_enc = enc;
2429
2430 Init();
2431 }
2432
2433 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2434 {
2435 size_t inbuf = strlen(psz);
2436 if (buf)
2437 m2w.Convert(psz,buf);
2438 return inbuf;
2439 }
2440
2441 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2442 {
2443 const size_t inbuf = wxWcslen(psz);
2444 if (buf)
2445 w2m.Convert(psz,buf);
2446
2447 return inbuf;
2448 }
2449
2450 bool IsOk() const { return m_ok; }
2451
2452 public:
2453 wxFontEncoding m_enc;
2454 wxEncodingConverter m2w, w2m;
2455
2456 // were we initialized successfully?
2457 bool m_ok;
2458
2459 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2460 };
2461
2462 #endif // wxUSE_FONTMAP
2463
2464 // ============================================================================
2465 // wxCSConv implementation
2466 // ============================================================================
2467
2468 void wxCSConv::Init()
2469 {
2470 m_name = NULL;
2471 m_convReal = NULL;
2472 m_deferred = true;
2473 }
2474
2475 wxCSConv::wxCSConv(const wxChar *charset)
2476 {
2477 Init();
2478
2479 if ( charset )
2480 {
2481 SetName(charset);
2482 }
2483
2484 m_encoding = wxFONTENCODING_SYSTEM;
2485 }
2486
2487 wxCSConv::wxCSConv(wxFontEncoding encoding)
2488 {
2489 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2490 {
2491 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2492
2493 encoding = wxFONTENCODING_SYSTEM;
2494 }
2495
2496 Init();
2497
2498 m_encoding = encoding;
2499 }
2500
2501 wxCSConv::~wxCSConv()
2502 {
2503 Clear();
2504 }
2505
2506 wxCSConv::wxCSConv(const wxCSConv& conv)
2507 : wxMBConv()
2508 {
2509 Init();
2510
2511 SetName(conv.m_name);
2512 m_encoding = conv.m_encoding;
2513 }
2514
2515 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2516 {
2517 Clear();
2518
2519 SetName(conv.m_name);
2520 m_encoding = conv.m_encoding;
2521
2522 return *this;
2523 }
2524
2525 void wxCSConv::Clear()
2526 {
2527 free(m_name);
2528 delete m_convReal;
2529
2530 m_name = NULL;
2531 m_convReal = NULL;
2532 }
2533
2534 void wxCSConv::SetName(const wxChar *charset)
2535 {
2536 if (charset)
2537 {
2538 m_name = wxStrdup(charset);
2539 m_deferred = true;
2540 }
2541 }
2542
2543 wxMBConv *wxCSConv::DoCreate() const
2544 {
2545 // check for the special case of ASCII or ISO8859-1 charset: as we have
2546 // special knowledge of it anyhow, we don't need to create a special
2547 // conversion object
2548 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2549 {
2550 // don't convert at all
2551 return NULL;
2552 }
2553
2554 // we trust OS to do conversion better than we can so try external
2555 // conversion methods first
2556 //
2557 // the full order is:
2558 // 1. OS conversion (iconv() under Unix or Win32 API)
2559 // 2. hard coded conversions for UTF
2560 // 3. wxEncodingConverter as fall back
2561
2562 // step (1)
2563 #ifdef HAVE_ICONV
2564 #if !wxUSE_FONTMAP
2565 if ( m_name )
2566 #endif // !wxUSE_FONTMAP
2567 {
2568 wxString name(m_name);
2569
2570 #if wxUSE_FONTMAP
2571 if ( name.empty() )
2572 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2573 #endif // wxUSE_FONTMAP
2574
2575 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2576 if ( conv->IsOk() )
2577 return conv;
2578
2579 delete conv;
2580 }
2581 #endif // HAVE_ICONV
2582
2583 #ifdef wxHAVE_WIN32_MB2WC
2584 {
2585 #if wxUSE_FONTMAP
2586 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2587 : new wxMBConv_win32(m_encoding);
2588 if ( conv->IsOk() )
2589 return conv;
2590
2591 delete conv;
2592 #else
2593 return NULL;
2594 #endif
2595 }
2596 #endif // wxHAVE_WIN32_MB2WC
2597 #if defined(__WXMAC__)
2598 {
2599 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2600 {
2601
2602 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2603 : new wxMBConv_mac(m_encoding);
2604 if ( conv->IsOk() )
2605 return conv;
2606
2607 delete conv;
2608 }
2609 }
2610 #endif
2611 #if defined(__WXCOCOA__)
2612 {
2613 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2614 {
2615
2616 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2617 : new wxMBConv_cocoa(m_encoding);
2618 if ( conv->IsOk() )
2619 return conv;
2620
2621 delete conv;
2622 }
2623 }
2624 #endif
2625 // step (2)
2626 wxFontEncoding enc = m_encoding;
2627 #if wxUSE_FONTMAP
2628 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2629 {
2630 // use "false" to suppress interactive dialogs -- we can be called from
2631 // anywhere and popping up a dialog from here is the last thing we want to
2632 // do
2633 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2634 }
2635 #endif // wxUSE_FONTMAP
2636
2637 switch ( enc )
2638 {
2639 case wxFONTENCODING_UTF7:
2640 return new wxMBConvUTF7;
2641
2642 case wxFONTENCODING_UTF8:
2643 return new wxMBConvUTF8;
2644
2645 case wxFONTENCODING_UTF16BE:
2646 return new wxMBConvUTF16BE;
2647
2648 case wxFONTENCODING_UTF16LE:
2649 return new wxMBConvUTF16LE;
2650
2651 case wxFONTENCODING_UTF32BE:
2652 return new wxMBConvUTF32BE;
2653
2654 case wxFONTENCODING_UTF32LE:
2655 return new wxMBConvUTF32LE;
2656
2657 default:
2658 // nothing to do but put here to suppress gcc warnings
2659 ;
2660 }
2661
2662 // step (3)
2663 #if wxUSE_FONTMAP
2664 {
2665 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2666 : new wxMBConv_wxwin(m_encoding);
2667 if ( conv->IsOk() )
2668 return conv;
2669
2670 delete conv;
2671 }
2672 #endif // wxUSE_FONTMAP
2673
2674 // NB: This is a hack to prevent deadlock. What could otherwise happen
2675 // in Unicode build: wxConvLocal creation ends up being here
2676 // because of some failure and logs the error. But wxLog will try to
2677 // attach timestamp, for which it will need wxConvLocal (to convert
2678 // time to char* and then wchar_t*), but that fails, tries to log
2679 // error, but wxLog has a (already locked) critical section that
2680 // guards static buffer.
2681 static bool alreadyLoggingError = false;
2682 if (!alreadyLoggingError)
2683 {
2684 alreadyLoggingError = true;
2685 wxLogError(_("Cannot convert from the charset '%s'!"),
2686 m_name ? m_name
2687 :
2688 #if wxUSE_FONTMAP
2689 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2690 #else // !wxUSE_FONTMAP
2691 wxString::Format(_("encoding %s"), m_encoding).c_str()
2692 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2693 );
2694 alreadyLoggingError = false;
2695 }
2696
2697 return NULL;
2698 }
2699
2700 void wxCSConv::CreateConvIfNeeded() const
2701 {
2702 if ( m_deferred )
2703 {
2704 wxCSConv *self = (wxCSConv *)this; // const_cast
2705
2706 #if wxUSE_INTL
2707 // if we don't have neither the name nor the encoding, use the default
2708 // encoding for this system
2709 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2710 {
2711 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2712 }
2713 #endif // wxUSE_INTL
2714
2715 self->m_convReal = DoCreate();
2716 self->m_deferred = false;
2717 }
2718 }
2719
2720 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2721 {
2722 CreateConvIfNeeded();
2723
2724 if (m_convReal)
2725 return m_convReal->MB2WC(buf, psz, n);
2726
2727 // latin-1 (direct)
2728 size_t len = strlen(psz);
2729
2730 if (buf)
2731 {
2732 for (size_t c = 0; c <= len; c++)
2733 buf[c] = (unsigned char)(psz[c]);
2734 }
2735
2736 return len;
2737 }
2738
2739 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2740 {
2741 CreateConvIfNeeded();
2742
2743 if (m_convReal)
2744 return m_convReal->WC2MB(buf, psz, n);
2745
2746 // latin-1 (direct)
2747 const size_t len = wxWcslen(psz);
2748 if (buf)
2749 {
2750 for (size_t c = 0; c <= len; c++)
2751 {
2752 if (psz[c] > 0xFF)
2753 return (size_t)-1;
2754 buf[c] = (char)psz[c];
2755 }
2756 }
2757 else
2758 {
2759 for (size_t c = 0; c <= len; c++)
2760 {
2761 if (psz[c] > 0xFF)
2762 return (size_t)-1;
2763 }
2764 }
2765
2766 return len;
2767 }
2768
2769 // ----------------------------------------------------------------------------
2770 // globals
2771 // ----------------------------------------------------------------------------
2772
2773 #ifdef __WINDOWS__
2774 static wxMBConv_win32 wxConvLibcObj;
2775 #elif defined(__WXMAC__) && !defined(__MACH__)
2776 static wxMBConv_mac wxConvLibcObj ;
2777 #else
2778 static wxMBConvLibc wxConvLibcObj;
2779 #endif
2780
2781 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2782 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2783 static wxMBConvUTF7 wxConvUTF7Obj;
2784 static wxMBConvUTF8 wxConvUTF8Obj;
2785
2786
2787 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2788 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2789 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2790 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2791 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2792 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2793
2794 #else // !wxUSE_WCHAR_T
2795
2796 // stand-ins in absence of wchar_t
2797 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2798 wxConvISO8859_1,
2799 wxConvLocal,
2800 wxConvUTF8;
2801
2802 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2803
2804