]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
rewrite core foundation conversion to be efficient, double-pass mac WC2MB to verify...
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
25 #endif
26
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
29
30 #ifdef __BORLANDC__
31 #pragma hdrstop
32 #endif
33
34 #ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37 #endif // WX_PRECOMP
38
39 #include "wx/strconv.h"
40
41 #if wxUSE_WCHAR_T
42
43 #ifdef __WXMSW__
44 #include "wx/msw/private.h"
45 #endif
46
47 #ifdef __WINDOWS__
48 #include "wx/msw/missing.h"
49 #endif
50
51 #ifndef __WXWINCE__
52 #include <errno.h>
53 #endif
54
55 #include <ctype.h>
56 #include <string.h>
57 #include <stdlib.h>
58
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
62
63 // ----------------------------------------------------------------------------
64 // headers
65 // ----------------------------------------------------------------------------
66
67 #ifdef __SALFORDC__
68 #include <clib.h>
69 #endif
70
71 #ifdef HAVE_ICONV
72 #include <iconv.h>
73 #endif
74
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
77 #include "wx/utils.h"
78
79 #ifdef __WXMAC__
80 #include <ATSUnicode.h>
81 #include <TextCommon.h>
82 #include <TextEncodingConverter.h>
83
84 #include "wx/mac/private.h" // includes mac headers
85 #endif
86 // ----------------------------------------------------------------------------
87 // macros
88 // ----------------------------------------------------------------------------
89
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
92
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
104 #define WC_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 #endif
114
115 // ============================================================================
116 // implementation
117 // ============================================================================
118
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
122
123
124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
125 {
126 if (input<=0xffff)
127 {
128 if (output)
129 *output = (wxUint16) input;
130 return 1;
131 }
132 else if (input>=0x110000)
133 {
134 return (size_t)-1;
135 }
136 else
137 {
138 if (output)
139 {
140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
142 }
143 return 2;
144 }
145 }
146
147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
148 {
149 if ((*input<0xd800) || (*input>0xdfff))
150 {
151 output = *input;
152 return 1;
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
155 {
156 output = *input;
157 return (size_t)-1;
158 }
159 else
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
164 }
165
166
167 // ----------------------------------------------------------------------------
168 // wxMBConv
169 // ----------------------------------------------------------------------------
170
171 wxMBConv::~wxMBConv()
172 {
173 // nothing to do here (necessary for Darwin linking probably)
174 }
175
176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177 {
178 if ( psz )
179 {
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
197 }
198
199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
200 {
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
216
217 return buf;
218 }
219
220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
221 size_t outsize, size_t nStringLen) const
222 {
223 const char* szEnd = szString + nStringLen + 1;
224 const char* szPos = szString;
225 const char* szStart = szPos;
226
227 size_t nActualLength = 0;
228
229 //Convert the string until the length() is reached, continuing the
230 //loop every time a null character is reached
231 while(szPos != szEnd)
232 {
233 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
234
235 //Get the length of the current (sub)string
236 size_t nLen = MB2WC(NULL, szPos, 0);
237
238 //Invalid conversion?
239 if( nLen == (size_t)-1 )
240 return nLen;
241
242 //Increase the actual length (+1 for current null character)
243 nActualLength += nLen + 1;
244
245 //Only copy data in if buffer size is big enough
246 if (szBuffer != NULL &&
247 nActualLength <= outsize)
248 {
249 //Convert the current (sub)string
250 if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
251 return (size_t)-1;
252 }
253
254 //Increment to next (sub)string
255 //Note that we have to use strlen here instead of nLen
256 //here because XX2XX gives us the size of the output buffer,
257 //not neccessarly the length of the string
258 szPos += strlen(szPos) + 1;
259 }
260
261 return nActualLength - 1; //success - return actual length
262 }
263
264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
265 size_t outsize, size_t nStringLen) const
266 {
267 const wchar_t* szEnd = szString + nStringLen + 1;
268 const wchar_t* szPos = szString;
269 const wchar_t* szStart = szPos;
270
271 size_t nActualLength = 0;
272
273 //Convert the string until the length() is reached, continuing the
274 //loop every time a null character is reached
275 while(szPos != szEnd)
276 {
277 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
278
279 //Get the length of the current (sub)string
280 size_t nLen = WC2MB(NULL, szPos, 0);
281
282 //Invalid conversion?
283 if( nLen == (size_t)-1 )
284 return nLen;
285
286 //Increase the actual length (+1 for current null character)
287 nActualLength += nLen + 1;
288
289 //Only copy data in if buffer size is big enough
290 if (szBuffer != NULL &&
291 nActualLength <= outsize)
292 {
293 //Convert the current (sub)string
294 if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
295 return (size_t)-1;
296 }
297
298 //Increment to next (sub)string
299 //Note that we have to use wxWcslen here instead of nLen
300 //here because XX2XX gives us the size of the output buffer,
301 //not neccessarly the length of the string
302 szPos += wxWcslen(szPos) + 1;
303 }
304
305 return nActualLength - 1; //success - return actual length
306 }
307
308 // ----------------------------------------------------------------------------
309 // wxMBConvLibc
310 // ----------------------------------------------------------------------------
311
312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
313 {
314 return wxMB2WC(buf, psz, n);
315 }
316
317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
318 {
319 return wxWC2MB(buf, psz, n);
320 }
321 // ----------------------------------------------------------------------------
322 // UTF-7
323 // ----------------------------------------------------------------------------
324
325 // Implementation (C) 2004 Fredrik Roubert
326
327 //
328 // BASE64 decoding table
329 //
330 static const unsigned char utf7unb64[] =
331 {
332 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
335 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
336 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
338 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
339 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
340 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
341 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
342 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
343 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
344 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
345 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
346 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
347 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
351 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
352 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
353 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
355 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
356 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
357 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
358 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
359 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
360 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
361 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
362 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
363 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
364 };
365
366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
367 {
368
369 size_t len = 0;
370
371 while (*psz && ((!buf) || (len < n)))
372 {
373 unsigned char cc = *psz++;
374 if (cc != '+')
375 {
376 // plain ASCII char
377 if (buf)
378 *buf++ = cc;
379 len++;
380 }
381 else if (*psz == '-')
382 {
383 // encoded plus sign
384 if (buf)
385 *buf++ = cc;
386 len++;
387 psz++;
388 }
389 else
390 {
391 // BASE64 encoded string
392 bool lsb;
393 unsigned char c;
394 unsigned int d, l;
395 for (lsb = false, d = 0, l = 0;
396 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
397 {
398 d <<= 6;
399 d += cc;
400 for (l += 6; l >= 8; lsb = !lsb)
401 {
402 c = (d >> (l -= 8)) % 256;
403 if (lsb)
404 {
405 if (buf)
406 *buf++ |= c;
407 len ++;
408 }
409 else
410 if (buf)
411 *buf = c << 8;
412 }
413 }
414 if (*psz == '-')
415 psz++;
416 }
417 }
418 if (buf && (len < n))
419 *buf = 0;
420 return len;
421 }
422
423 //
424 // BASE64 encoding table
425 //
426 static const unsigned char utf7enb64[] =
427 {
428 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
429 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
430 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
431 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
432 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
433 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
434 'w', 'x', 'y', 'z', '0', '1', '2', '3',
435 '4', '5', '6', '7', '8', '9', '+', '/'
436 };
437
438 //
439 // UTF-7 encoding table
440 //
441 // 0 - Set D (directly encoded characters)
442 // 1 - Set O (optional direct characters)
443 // 2 - whitespace characters (optional)
444 // 3 - special characters
445 //
446 static const unsigned char utf7encode[128] =
447 {
448 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
450 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
452 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
454 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
456 };
457
458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
459 *psz, size_t n) const
460 {
461
462
463 size_t len = 0;
464
465 while (*psz && ((!buf) || (len < n)))
466 {
467 wchar_t cc = *psz++;
468 if (cc < 0x80 && utf7encode[cc] < 1)
469 {
470 // plain ASCII char
471 if (buf)
472 *buf++ = (char)cc;
473 len++;
474 }
475 #ifndef WC_UTF16
476 else if (cc > ((const wchar_t)0xffff))
477 {
478 // no surrogate pair generation (yet?)
479 return (size_t)-1;
480 }
481 #endif
482 else
483 {
484 if (buf)
485 *buf++ = '+';
486 len++;
487 if (cc != '+')
488 {
489 // BASE64 encode string
490 unsigned int lsb, d, l;
491 for (d = 0, l = 0;; psz++)
492 {
493 for (lsb = 0; lsb < 2; lsb ++)
494 {
495 d <<= 8;
496 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
497
498 for (l += 8; l >= 6; )
499 {
500 l -= 6;
501 if (buf)
502 *buf++ = utf7enb64[(d >> l) % 64];
503 len++;
504 }
505 }
506 cc = *psz;
507 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
508 break;
509 }
510 if (l != 0)
511 {
512 if (buf)
513 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
514 len++;
515 }
516 }
517 if (buf)
518 *buf++ = '-';
519 len++;
520 }
521 }
522 if (buf && (len < n))
523 *buf = 0;
524 return len;
525 }
526
527 // ----------------------------------------------------------------------------
528 // UTF-8
529 // ----------------------------------------------------------------------------
530
531 static wxUint32 utf8_max[]=
532 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
533
534 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535 {
536 size_t len = 0;
537
538 while (*psz && ((!buf) || (len < n)))
539 {
540 unsigned char cc = *psz++, fc = cc;
541 unsigned cnt;
542 for (cnt = 0; fc & 0x80; cnt++)
543 fc <<= 1;
544 if (!cnt)
545 {
546 // plain ASCII char
547 if (buf)
548 *buf++ = cc;
549 len++;
550 }
551 else
552 {
553 cnt--;
554 if (!cnt)
555 {
556 // invalid UTF-8 sequence
557 return (size_t)-1;
558 }
559 else
560 {
561 unsigned ocnt = cnt - 1;
562 wxUint32 res = cc & (0x3f >> cnt);
563 while (cnt--)
564 {
565 cc = *psz++;
566 if ((cc & 0xC0) != 0x80)
567 {
568 // invalid UTF-8 sequence
569 return (size_t)-1;
570 }
571 res = (res << 6) | (cc & 0x3f);
572 }
573 if (res <= utf8_max[ocnt])
574 {
575 // illegal UTF-8 encoding
576 return (size_t)-1;
577 }
578 #ifdef WC_UTF16
579 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
580 size_t pa = encode_utf16(res, (wxUint16 *)buf);
581 if (pa == (size_t)-1)
582 return (size_t)-1;
583 if (buf)
584 buf += pa;
585 len += pa;
586 #else // !WC_UTF16
587 if (buf)
588 *buf++ = res;
589 len++;
590 #endif // WC_UTF16/!WC_UTF16
591 }
592 }
593 }
594 if (buf && (len < n))
595 *buf = 0;
596 return len;
597 }
598
599 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
600 {
601 size_t len = 0;
602
603 while (*psz && ((!buf) || (len < n)))
604 {
605 wxUint32 cc;
606 #ifdef WC_UTF16
607 // cast is ok for WC_UTF16
608 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
609 psz += (pa == (size_t)-1) ? 1 : pa;
610 #else
611 cc=(*psz++) & 0x7fffffff;
612 #endif
613 unsigned cnt;
614 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
615 if (!cnt)
616 {
617 // plain ASCII char
618 if (buf)
619 *buf++ = (char) cc;
620 len++;
621 }
622
623 else
624 {
625 len += cnt + 1;
626 if (buf)
627 {
628 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
629 while (cnt--)
630 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
631 }
632 }
633 }
634
635 if (buf && (len<n)) *buf = 0;
636
637 return len;
638 }
639
640
641
642
643 // ----------------------------------------------------------------------------
644 // UTF-16
645 // ----------------------------------------------------------------------------
646
647 #ifdef WORDS_BIGENDIAN
648 #define wxMBConvUTF16straight wxMBConvUTF16BE
649 #define wxMBConvUTF16swap wxMBConvUTF16LE
650 #else
651 #define wxMBConvUTF16swap wxMBConvUTF16BE
652 #define wxMBConvUTF16straight wxMBConvUTF16LE
653 #endif
654
655
656 #ifdef WC_UTF16
657
658 // copy 16bit MB to 16bit String
659 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
660 {
661 size_t len=0;
662
663 while (*(wxUint16*)psz && (!buf || len < n))
664 {
665 if (buf)
666 *buf++ = *(wxUint16*)psz;
667 len++;
668
669 psz += sizeof(wxUint16);
670 }
671 if (buf && len<n) *buf=0;
672
673 return len;
674 }
675
676
677 // copy 16bit String to 16bit MB
678 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
679 {
680 size_t len=0;
681
682 while (*psz && (!buf || len < n))
683 {
684 if (buf)
685 {
686 *(wxUint16*)buf = *psz;
687 buf += sizeof(wxUint16);
688 }
689 len += sizeof(wxUint16);
690 psz++;
691 }
692 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
693
694 return len;
695 }
696
697
698 // swap 16bit MB to 16bit String
699 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
700 {
701 size_t len=0;
702
703 while (*(wxUint16*)psz && (!buf || len < n))
704 {
705 if (buf)
706 {
707 ((char *)buf)[0] = psz[1];
708 ((char *)buf)[1] = psz[0];
709 buf++;
710 }
711 len++;
712 psz += sizeof(wxUint16);
713 }
714 if (buf && len<n) *buf=0;
715
716 return len;
717 }
718
719
720 // swap 16bit MB to 16bit String
721 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
722 {
723 size_t len=0;
724
725 while (*psz && (!buf || len < n))
726 {
727 if (buf)
728 {
729 *buf++ = ((char*)psz)[1];
730 *buf++ = ((char*)psz)[0];
731 }
732 len += sizeof(wxUint16);
733 psz++;
734 }
735 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
736
737 return len;
738 }
739
740
741 #else // WC_UTF16
742
743
744 // copy 16bit MB to 32bit String
745 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
746 {
747 size_t len=0;
748
749 while (*(wxUint16*)psz && (!buf || len < n))
750 {
751 wxUint32 cc;
752 size_t pa=decode_utf16((wxUint16*)psz, cc);
753 if (pa == (size_t)-1)
754 return pa;
755
756 if (buf)
757 *buf++ = cc;
758 len++;
759 psz += pa * sizeof(wxUint16);
760 }
761 if (buf && len<n) *buf=0;
762
763 return len;
764 }
765
766
767 // copy 32bit String to 16bit MB
768 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
769 {
770 size_t len=0;
771
772 while (*psz && (!buf || len < n))
773 {
774 wxUint16 cc[2];
775 size_t pa=encode_utf16(*psz, cc);
776
777 if (pa == (size_t)-1)
778 return pa;
779
780 if (buf)
781 {
782 *(wxUint16*)buf = cc[0];
783 buf += sizeof(wxUint16);
784 if (pa > 1)
785 {
786 *(wxUint16*)buf = cc[1];
787 buf += sizeof(wxUint16);
788 }
789 }
790
791 len += pa*sizeof(wxUint16);
792 psz++;
793 }
794 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
795
796 return len;
797 }
798
799
800 // swap 16bit MB to 32bit String
801 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
802 {
803 size_t len=0;
804
805 while (*(wxUint16*)psz && (!buf || len < n))
806 {
807 wxUint32 cc;
808 char tmp[4];
809 tmp[0]=psz[1]; tmp[1]=psz[0];
810 tmp[2]=psz[3]; tmp[3]=psz[2];
811
812 size_t pa=decode_utf16((wxUint16*)tmp, cc);
813 if (pa == (size_t)-1)
814 return pa;
815
816 if (buf)
817 *buf++ = cc;
818
819 len++;
820 psz += pa * sizeof(wxUint16);
821 }
822 if (buf && len<n) *buf=0;
823
824 return len;
825 }
826
827
828 // swap 32bit String to 16bit MB
829 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
830 {
831 size_t len=0;
832
833 while (*psz && (!buf || len < n))
834 {
835 wxUint16 cc[2];
836 size_t pa=encode_utf16(*psz, cc);
837
838 if (pa == (size_t)-1)
839 return pa;
840
841 if (buf)
842 {
843 *buf++ = ((char*)cc)[1];
844 *buf++ = ((char*)cc)[0];
845 if (pa > 1)
846 {
847 *buf++ = ((char*)cc)[3];
848 *buf++ = ((char*)cc)[2];
849 }
850 }
851
852 len += pa*sizeof(wxUint16);
853 psz++;
854 }
855 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
856
857 return len;
858 }
859
860 #endif // WC_UTF16
861
862
863 // ----------------------------------------------------------------------------
864 // UTF-32
865 // ----------------------------------------------------------------------------
866
867 #ifdef WORDS_BIGENDIAN
868 #define wxMBConvUTF32straight wxMBConvUTF32BE
869 #define wxMBConvUTF32swap wxMBConvUTF32LE
870 #else
871 #define wxMBConvUTF32swap wxMBConvUTF32BE
872 #define wxMBConvUTF32straight wxMBConvUTF32LE
873 #endif
874
875
876 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
877 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
878
879
880 #ifdef WC_UTF16
881
882 // copy 32bit MB to 16bit String
883 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
884 {
885 size_t len=0;
886
887 while (*(wxUint32*)psz && (!buf || len < n))
888 {
889 wxUint16 cc[2];
890
891 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
892 if (pa == (size_t)-1)
893 return pa;
894
895 if (buf)
896 {
897 *buf++ = cc[0];
898 if (pa > 1)
899 *buf++ = cc[1];
900 }
901 len += pa;
902 psz += sizeof(wxUint32);
903 }
904 if (buf && len<n) *buf=0;
905
906 return len;
907 }
908
909
910 // copy 16bit String to 32bit MB
911 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
912 {
913 size_t len=0;
914
915 while (*psz && (!buf || len < n))
916 {
917 wxUint32 cc;
918
919 // cast is ok for WC_UTF16
920 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
921 if (pa == (size_t)-1)
922 return pa;
923
924 if (buf)
925 {
926 *(wxUint32*)buf = cc;
927 buf += sizeof(wxUint32);
928 }
929 len += sizeof(wxUint32);
930 psz += pa;
931 }
932
933 if (buf && len<=n-sizeof(wxUint32))
934 *(wxUint32*)buf=0;
935
936 return len;
937 }
938
939
940
941 // swap 32bit MB to 16bit String
942 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
943 {
944 size_t len=0;
945
946 while (*(wxUint32*)psz && (!buf || len < n))
947 {
948 char tmp[4];
949 tmp[0] = psz[3]; tmp[1] = psz[2];
950 tmp[2] = psz[1]; tmp[3] = psz[0];
951
952
953 wxUint16 cc[2];
954
955 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
956 if (pa == (size_t)-1)
957 return pa;
958
959 if (buf)
960 {
961 *buf++ = cc[0];
962 if (pa > 1)
963 *buf++ = cc[1];
964 }
965 len += pa;
966 psz += sizeof(wxUint32);
967 }
968
969 if (buf && len<n)
970 *buf=0;
971
972 return len;
973 }
974
975
976 // swap 16bit String to 32bit MB
977 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
978 {
979 size_t len=0;
980
981 while (*psz && (!buf || len < n))
982 {
983 char cc[4];
984
985 // cast is ok for WC_UTF16
986 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
987 if (pa == (size_t)-1)
988 return pa;
989
990 if (buf)
991 {
992 *buf++ = cc[3];
993 *buf++ = cc[2];
994 *buf++ = cc[1];
995 *buf++ = cc[0];
996 }
997 len += sizeof(wxUint32);
998 psz += pa;
999 }
1000
1001 if (buf && len<=n-sizeof(wxUint32))
1002 *(wxUint32*)buf=0;
1003
1004 return len;
1005 }
1006
1007 #else // WC_UTF16
1008
1009
1010 // copy 32bit MB to 32bit String
1011 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1012 {
1013 size_t len=0;
1014
1015 while (*(wxUint32*)psz && (!buf || len < n))
1016 {
1017 if (buf)
1018 *buf++ = *(wxUint32*)psz;
1019 len++;
1020 psz += sizeof(wxUint32);
1021 }
1022
1023 if (buf && len<n)
1024 *buf=0;
1025
1026 return len;
1027 }
1028
1029
1030 // copy 32bit String to 32bit MB
1031 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1032 {
1033 size_t len=0;
1034
1035 while (*psz && (!buf || len < n))
1036 {
1037 if (buf)
1038 {
1039 *(wxUint32*)buf = *psz;
1040 buf += sizeof(wxUint32);
1041 }
1042
1043 len += sizeof(wxUint32);
1044 psz++;
1045 }
1046
1047 if (buf && len<=n-sizeof(wxUint32))
1048 *(wxUint32*)buf=0;
1049
1050 return len;
1051 }
1052
1053
1054 // swap 32bit MB to 32bit String
1055 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1056 {
1057 size_t len=0;
1058
1059 while (*(wxUint32*)psz && (!buf || len < n))
1060 {
1061 if (buf)
1062 {
1063 ((char *)buf)[0] = psz[3];
1064 ((char *)buf)[1] = psz[2];
1065 ((char *)buf)[2] = psz[1];
1066 ((char *)buf)[3] = psz[0];
1067 buf++;
1068 }
1069 len++;
1070 psz += sizeof(wxUint32);
1071 }
1072
1073 if (buf && len<n)
1074 *buf=0;
1075
1076 return len;
1077 }
1078
1079
1080 // swap 32bit String to 32bit MB
1081 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1082 {
1083 size_t len=0;
1084
1085 while (*psz && (!buf || len < n))
1086 {
1087 if (buf)
1088 {
1089 *buf++ = ((char *)psz)[3];
1090 *buf++ = ((char *)psz)[2];
1091 *buf++ = ((char *)psz)[1];
1092 *buf++ = ((char *)psz)[0];
1093 }
1094 len += sizeof(wxUint32);
1095 psz++;
1096 }
1097
1098 if (buf && len<=n-sizeof(wxUint32))
1099 *(wxUint32*)buf=0;
1100
1101 return len;
1102 }
1103
1104
1105 #endif // WC_UTF16
1106
1107
1108 // ============================================================================
1109 // The classes doing conversion using the iconv_xxx() functions
1110 // ============================================================================
1111
1112 #ifdef HAVE_ICONV
1113
1114 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1115 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
1116 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1117 // (which means error) and says there are 0 bytes left in the input buffer --
1118 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1119 // this alternative test for iconv() failure.
1120 // [This bug does not appear in glibc 2.2.]
1121 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1122 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1123 (errno != E2BIG || bufLeft != 0))
1124 #else
1125 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1126 #endif
1127
1128 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1129
1130 // ----------------------------------------------------------------------------
1131 // wxMBConv_iconv: encapsulates an iconv character set
1132 // ----------------------------------------------------------------------------
1133
1134 class wxMBConv_iconv : public wxMBConv
1135 {
1136 public:
1137 wxMBConv_iconv(const wxChar *name);
1138 virtual ~wxMBConv_iconv();
1139
1140 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1141 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1142
1143 bool IsOk() const
1144 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1145
1146 protected:
1147 // the iconv handlers used to translate from multibyte to wide char and in
1148 // the other direction
1149 iconv_t m2w,
1150 w2m;
1151
1152 private:
1153 // the name (for iconv_open()) of a wide char charset -- if none is
1154 // available on this machine, it will remain NULL
1155 static const char *ms_wcCharsetName;
1156
1157 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1158 // different endian-ness than the native one
1159 static bool ms_wcNeedsSwap;
1160 };
1161
1162 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1163 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1164
1165 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1166 {
1167 // Do it the hard way
1168 char cname[100];
1169 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1170 cname[i] = (char) name[i];
1171
1172 // check for charset that represents wchar_t:
1173 if (ms_wcCharsetName == NULL)
1174 {
1175 ms_wcNeedsSwap = false;
1176
1177 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1178 ms_wcCharsetName = WC_NAME_BEST;
1179 m2w = iconv_open(ms_wcCharsetName, cname);
1180
1181 if (m2w == (iconv_t)-1)
1182 {
1183 // try charset w/o bytesex info (e.g. "UCS4")
1184 // and check for bytesex ourselves:
1185 ms_wcCharsetName = WC_NAME;
1186 m2w = iconv_open(ms_wcCharsetName, cname);
1187
1188 // last bet, try if it knows WCHAR_T pseudo-charset
1189 if (m2w == (iconv_t)-1)
1190 {
1191 ms_wcCharsetName = "WCHAR_T";
1192 m2w = iconv_open(ms_wcCharsetName, cname);
1193 }
1194
1195 if (m2w != (iconv_t)-1)
1196 {
1197 char buf[2], *bufPtr;
1198 wchar_t wbuf[2], *wbufPtr;
1199 size_t insz, outsz;
1200 size_t res;
1201
1202 buf[0] = 'A';
1203 buf[1] = 0;
1204 wbuf[0] = 0;
1205 insz = 2;
1206 outsz = SIZEOF_WCHAR_T * 2;
1207 wbufPtr = wbuf;
1208 bufPtr = buf;
1209
1210 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1211 (char**)&wbufPtr, &outsz);
1212
1213 if (ICONV_FAILED(res, insz))
1214 {
1215 ms_wcCharsetName = NULL;
1216 wxLogLastError(wxT("iconv"));
1217 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1218 }
1219 else
1220 {
1221 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1222 }
1223 }
1224 else
1225 {
1226 ms_wcCharsetName = NULL;
1227
1228 // VS: we must not output an error here, since wxWidgets will safely
1229 // fall back to using wxEncodingConverter.
1230 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1231 //wxLogError(
1232 }
1233 }
1234 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1235 }
1236 else // we already have ms_wcCharsetName
1237 {
1238 m2w = iconv_open(ms_wcCharsetName, cname);
1239 }
1240
1241 // NB: don't ever pass NULL to iconv_open(), it may crash!
1242 if ( ms_wcCharsetName )
1243 {
1244 w2m = iconv_open( cname, ms_wcCharsetName);
1245 }
1246 else
1247 {
1248 w2m = (iconv_t)-1;
1249 }
1250 }
1251
1252 wxMBConv_iconv::~wxMBConv_iconv()
1253 {
1254 if ( m2w != (iconv_t)-1 )
1255 iconv_close(m2w);
1256 if ( w2m != (iconv_t)-1 )
1257 iconv_close(w2m);
1258 }
1259
1260 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1261 {
1262 size_t inbuf = strlen(psz);
1263 size_t outbuf = n * SIZEOF_WCHAR_T;
1264 size_t res, cres;
1265 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1266 wchar_t *bufPtr = buf;
1267 const char *pszPtr = psz;
1268
1269 if (buf)
1270 {
1271 // have destination buffer, convert there
1272 cres = iconv(m2w,
1273 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1274 (char**)&bufPtr, &outbuf);
1275 res = n - (outbuf / SIZEOF_WCHAR_T);
1276
1277 if (ms_wcNeedsSwap)
1278 {
1279 // convert to native endianness
1280 WC_BSWAP(buf /* _not_ bufPtr */, res)
1281 }
1282
1283 // NB: iconv was given only strlen(psz) characters on input, and so
1284 // it couldn't convert the trailing zero. Let's do it ourselves
1285 // if there's some room left for it in the output buffer.
1286 if (res < n)
1287 buf[res] = 0;
1288 }
1289 else
1290 {
1291 // no destination buffer... convert using temp buffer
1292 // to calculate destination buffer requirement
1293 wchar_t tbuf[8];
1294 res = 0;
1295 do {
1296 bufPtr = tbuf;
1297 outbuf = 8*SIZEOF_WCHAR_T;
1298
1299 cres = iconv(m2w,
1300 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1301 (char**)&bufPtr, &outbuf );
1302
1303 res += 8-(outbuf/SIZEOF_WCHAR_T);
1304 } while ((cres==(size_t)-1) && (errno==E2BIG));
1305 }
1306
1307 if (ICONV_FAILED(cres, inbuf))
1308 {
1309 //VS: it is ok if iconv fails, hence trace only
1310 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1311 return (size_t)-1;
1312 }
1313
1314 return res;
1315 }
1316
1317 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1318 {
1319 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1320 size_t outbuf = n;
1321 size_t res, cres;
1322
1323 wchar_t *tmpbuf = 0;
1324
1325 if (ms_wcNeedsSwap)
1326 {
1327 // need to copy to temp buffer to switch endianness
1328 // this absolutely doesn't rock!
1329 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1330 // could be in read-only memory, or be accessed in some other thread)
1331 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1332 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1333 WC_BSWAP(tmpbuf, inbuf)
1334 psz=tmpbuf;
1335 }
1336
1337 if (buf)
1338 {
1339 // have destination buffer, convert there
1340 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1341
1342 res = n-outbuf;
1343
1344 // NB: iconv was given only wcslen(psz) characters on input, and so
1345 // it couldn't convert the trailing zero. Let's do it ourselves
1346 // if there's some room left for it in the output buffer.
1347 if (res < n)
1348 buf[0] = 0;
1349 }
1350 else
1351 {
1352 // no destination buffer... convert using temp buffer
1353 // to calculate destination buffer requirement
1354 char tbuf[16];
1355 res = 0;
1356 do {
1357 buf = tbuf; outbuf = 16;
1358
1359 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1360
1361 res += 16 - outbuf;
1362 } while ((cres==(size_t)-1) && (errno==E2BIG));
1363 }
1364
1365 if (ms_wcNeedsSwap)
1366 {
1367 free(tmpbuf);
1368 }
1369
1370 if (ICONV_FAILED(cres, inbuf))
1371 {
1372 //VS: it is ok if iconv fails, hence trace only
1373 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1374 return (size_t)-1;
1375 }
1376
1377 return res;
1378 }
1379
1380 #endif // HAVE_ICONV
1381
1382
1383 // ============================================================================
1384 // Win32 conversion classes
1385 // ============================================================================
1386
1387 #ifdef wxHAVE_WIN32_MB2WC
1388
1389 // from utils.cpp
1390 #if wxUSE_FONTMAP
1391 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1392 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1393 #endif
1394
1395 class wxMBConv_win32 : public wxMBConv
1396 {
1397 public:
1398 wxMBConv_win32()
1399 {
1400 m_CodePage = CP_ACP;
1401 }
1402
1403 #if wxUSE_FONTMAP
1404 wxMBConv_win32(const wxChar* name)
1405 {
1406 m_CodePage = wxCharsetToCodepage(name);
1407 }
1408
1409 wxMBConv_win32(wxFontEncoding encoding)
1410 {
1411 m_CodePage = wxEncodingToCodepage(encoding);
1412 }
1413 #endif
1414
1415 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1416 {
1417 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1418 // the behaviour is not compatible with the Unix version (using iconv)
1419 // and break the library itself, e.g. wxTextInputStream::NextChar()
1420 // wouldn't work if reading an incomplete MB char didn't result in an
1421 // error
1422 const size_t len = ::MultiByteToWideChar
1423 (
1424 m_CodePage, // code page
1425 MB_ERR_INVALID_CHARS, // flags: fall on error
1426 psz, // input string
1427 -1, // its length (NUL-terminated)
1428 buf, // output string
1429 buf ? n : 0 // size of output buffer
1430 );
1431
1432 // note that it returns count of written chars for buf != NULL and size
1433 // of the needed buffer for buf == NULL so in either case the length of
1434 // the string (which never includes the terminating NUL) is one less
1435 return len ? len - 1 : (size_t)-1;
1436 }
1437
1438 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1439 {
1440 /*
1441 we have a problem here: by default, WideCharToMultiByte() may
1442 replace characters unrepresentable in the target code page with bad
1443 quality approximations such as turning "1/2" symbol (U+00BD) into
1444 "1" for the code pages which don't have it and we, obviously, want
1445 to avoid this at any price
1446
1447 the trouble is that this function does it _silently_, i.e. it won't
1448 even tell us whether it did or not... Win98/2000 and higher provide
1449 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1450 we have to resort to a round trip, i.e. check that converting back
1451 results in the same string -- this is, of course, expensive but
1452 otherwise we simply can't be sure to not garble the data.
1453 */
1454
1455 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1456 // it doesn't work with CJK encodings (which we test for rather roughly
1457 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1458 // supporting it
1459 BOOL usedDef wxDUMMY_INITIALIZE(false);
1460 BOOL *pUsedDef;
1461 int flags;
1462 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1463 {
1464 // it's our lucky day
1465 flags = WC_NO_BEST_FIT_CHARS;
1466 pUsedDef = &usedDef;
1467 }
1468 else // old system or unsupported encoding
1469 {
1470 flags = 0;
1471 pUsedDef = NULL;
1472 }
1473
1474 const size_t len = ::WideCharToMultiByte
1475 (
1476 m_CodePage, // code page
1477 flags, // either none or no best fit
1478 pwz, // input string
1479 -1, // it is (wide) NUL-terminated
1480 buf, // output buffer
1481 buf ? n : 0, // and its size
1482 NULL, // default "replacement" char
1483 pUsedDef // [out] was it used?
1484 );
1485
1486 if ( !len )
1487 {
1488 // function totally failed
1489 return (size_t)-1;
1490 }
1491
1492 // if we were really converting, check if we succeeded
1493 if ( buf )
1494 {
1495 if ( flags )
1496 {
1497 // check if the conversion failed, i.e. if any replacements
1498 // were done
1499 if ( usedDef )
1500 return (size_t)-1;
1501 }
1502 else // we must resort to double tripping...
1503 {
1504 wxWCharBuffer wcBuf(n);
1505 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1506 wcscmp(wcBuf, pwz) != 0 )
1507 {
1508 // we didn't obtain the same thing we started from, hence
1509 // the conversion was lossy and we consider that it failed
1510 return (size_t)-1;
1511 }
1512 }
1513 }
1514
1515 // see the comment above for the reason of "len - 1"
1516 return len - 1;
1517 }
1518
1519 bool IsOk() const { return m_CodePage != -1; }
1520
1521 private:
1522 static bool CanUseNoBestFit()
1523 {
1524 static int s_isWin98Or2k = -1;
1525
1526 if ( s_isWin98Or2k == -1 )
1527 {
1528 int verMaj, verMin;
1529 switch ( wxGetOsVersion(&verMaj, &verMin) )
1530 {
1531 case wxWIN95:
1532 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1533 break;
1534
1535 case wxWINDOWS_NT:
1536 s_isWin98Or2k = verMaj >= 5;
1537 break;
1538
1539 default:
1540 // unknown, be conseravtive by default
1541 s_isWin98Or2k = 0;
1542 }
1543
1544 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1545 }
1546
1547 return s_isWin98Or2k == 1;
1548 }
1549
1550 long m_CodePage;
1551 };
1552
1553 #endif // wxHAVE_WIN32_MB2WC
1554
1555 // ============================================================================
1556 // Cocoa conversion classes
1557 // ============================================================================
1558
1559 #if defined(__WXCOCOA__)
1560
1561 // RN: There is no UTF-32 support in either Core Foundation or
1562 // Cocoa. Strangely enough, internally Core Foundation uses
1563 // UTF 32 internally quite a bit - its just not public (yet).
1564
1565 #include <CoreFoundation/CFString.h>
1566 #include <CoreFoundation/CFStringEncodingExt.h>
1567
1568 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1569 {
1570 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1571 if ( encoding == wxFONTENCODING_DEFAULT )
1572 {
1573 enc = CFStringGetSystemEncoding();
1574 }
1575 else switch( encoding)
1576 {
1577 case wxFONTENCODING_ISO8859_1 :
1578 enc = kCFStringEncodingISOLatin1 ;
1579 break ;
1580 case wxFONTENCODING_ISO8859_2 :
1581 enc = kCFStringEncodingISOLatin2;
1582 break ;
1583 case wxFONTENCODING_ISO8859_3 :
1584 enc = kCFStringEncodingISOLatin3 ;
1585 break ;
1586 case wxFONTENCODING_ISO8859_4 :
1587 enc = kCFStringEncodingISOLatin4;
1588 break ;
1589 case wxFONTENCODING_ISO8859_5 :
1590 enc = kCFStringEncodingISOLatinCyrillic;
1591 break ;
1592 case wxFONTENCODING_ISO8859_6 :
1593 enc = kCFStringEncodingISOLatinArabic;
1594 break ;
1595 case wxFONTENCODING_ISO8859_7 :
1596 enc = kCFStringEncodingISOLatinGreek;
1597 break ;
1598 case wxFONTENCODING_ISO8859_8 :
1599 enc = kCFStringEncodingISOLatinHebrew;
1600 break ;
1601 case wxFONTENCODING_ISO8859_9 :
1602 enc = kCFStringEncodingISOLatin5;
1603 break ;
1604 case wxFONTENCODING_ISO8859_10 :
1605 enc = kCFStringEncodingISOLatin6;
1606 break ;
1607 case wxFONTENCODING_ISO8859_11 :
1608 enc = kCFStringEncodingISOLatinThai;
1609 break ;
1610 case wxFONTENCODING_ISO8859_13 :
1611 enc = kCFStringEncodingISOLatin7;
1612 break ;
1613 case wxFONTENCODING_ISO8859_14 :
1614 enc = kCFStringEncodingISOLatin8;
1615 break ;
1616 case wxFONTENCODING_ISO8859_15 :
1617 enc = kCFStringEncodingISOLatin9;
1618 break ;
1619
1620 case wxFONTENCODING_KOI8 :
1621 enc = kCFStringEncodingKOI8_R;
1622 break ;
1623 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1624 enc = kCFStringEncodingDOSRussian;
1625 break ;
1626
1627 // case wxFONTENCODING_BULGARIAN :
1628 // enc = ;
1629 // break ;
1630
1631 case wxFONTENCODING_CP437 :
1632 enc =kCFStringEncodingDOSLatinUS ;
1633 break ;
1634 case wxFONTENCODING_CP850 :
1635 enc = kCFStringEncodingDOSLatin1;
1636 break ;
1637 case wxFONTENCODING_CP852 :
1638 enc = kCFStringEncodingDOSLatin2;
1639 break ;
1640 case wxFONTENCODING_CP855 :
1641 enc = kCFStringEncodingDOSCyrillic;
1642 break ;
1643 case wxFONTENCODING_CP866 :
1644 enc =kCFStringEncodingDOSRussian ;
1645 break ;
1646 case wxFONTENCODING_CP874 :
1647 enc = kCFStringEncodingDOSThai;
1648 break ;
1649 case wxFONTENCODING_CP932 :
1650 enc = kCFStringEncodingDOSJapanese;
1651 break ;
1652 case wxFONTENCODING_CP936 :
1653 enc =kCFStringEncodingDOSChineseSimplif ;
1654 break ;
1655 case wxFONTENCODING_CP949 :
1656 enc = kCFStringEncodingDOSKorean;
1657 break ;
1658 case wxFONTENCODING_CP950 :
1659 enc = kCFStringEncodingDOSChineseTrad;
1660 break ;
1661 case wxFONTENCODING_CP1250 :
1662 enc = kCFStringEncodingWindowsLatin2;
1663 break ;
1664 case wxFONTENCODING_CP1251 :
1665 enc =kCFStringEncodingWindowsCyrillic ;
1666 break ;
1667 case wxFONTENCODING_CP1252 :
1668 enc =kCFStringEncodingWindowsLatin1 ;
1669 break ;
1670 case wxFONTENCODING_CP1253 :
1671 enc = kCFStringEncodingWindowsGreek;
1672 break ;
1673 case wxFONTENCODING_CP1254 :
1674 enc = kCFStringEncodingWindowsLatin5;
1675 break ;
1676 case wxFONTENCODING_CP1255 :
1677 enc =kCFStringEncodingWindowsHebrew ;
1678 break ;
1679 case wxFONTENCODING_CP1256 :
1680 enc =kCFStringEncodingWindowsArabic ;
1681 break ;
1682 case wxFONTENCODING_CP1257 :
1683 enc = kCFStringEncodingWindowsBalticRim;
1684 break ;
1685 // This only really encodes to UTF7 (if that) evidently
1686 // case wxFONTENCODING_UTF7 :
1687 // enc = kCFStringEncodingNonLossyASCII ;
1688 // break ;
1689 case wxFONTENCODING_UTF8 :
1690 enc = kCFStringEncodingUTF8 ;
1691 break ;
1692 case wxFONTENCODING_EUC_JP :
1693 enc = kCFStringEncodingEUC_JP;
1694 break ;
1695 case wxFONTENCODING_UTF16 :
1696 enc = kCFStringEncodingUnicode ;
1697 break ;
1698 case wxFONTENCODING_MACROMAN :
1699 enc = kCFStringEncodingMacRoman ;
1700 break ;
1701 case wxFONTENCODING_MACJAPANESE :
1702 enc = kCFStringEncodingMacJapanese ;
1703 break ;
1704 case wxFONTENCODING_MACCHINESETRAD :
1705 enc = kCFStringEncodingMacChineseTrad ;
1706 break ;
1707 case wxFONTENCODING_MACKOREAN :
1708 enc = kCFStringEncodingMacKorean ;
1709 break ;
1710 case wxFONTENCODING_MACARABIC :
1711 enc = kCFStringEncodingMacArabic ;
1712 break ;
1713 case wxFONTENCODING_MACHEBREW :
1714 enc = kCFStringEncodingMacHebrew ;
1715 break ;
1716 case wxFONTENCODING_MACGREEK :
1717 enc = kCFStringEncodingMacGreek ;
1718 break ;
1719 case wxFONTENCODING_MACCYRILLIC :
1720 enc = kCFStringEncodingMacCyrillic ;
1721 break ;
1722 case wxFONTENCODING_MACDEVANAGARI :
1723 enc = kCFStringEncodingMacDevanagari ;
1724 break ;
1725 case wxFONTENCODING_MACGURMUKHI :
1726 enc = kCFStringEncodingMacGurmukhi ;
1727 break ;
1728 case wxFONTENCODING_MACGUJARATI :
1729 enc = kCFStringEncodingMacGujarati ;
1730 break ;
1731 case wxFONTENCODING_MACORIYA :
1732 enc = kCFStringEncodingMacOriya ;
1733 break ;
1734 case wxFONTENCODING_MACBENGALI :
1735 enc = kCFStringEncodingMacBengali ;
1736 break ;
1737 case wxFONTENCODING_MACTAMIL :
1738 enc = kCFStringEncodingMacTamil ;
1739 break ;
1740 case wxFONTENCODING_MACTELUGU :
1741 enc = kCFStringEncodingMacTelugu ;
1742 break ;
1743 case wxFONTENCODING_MACKANNADA :
1744 enc = kCFStringEncodingMacKannada ;
1745 break ;
1746 case wxFONTENCODING_MACMALAJALAM :
1747 enc = kCFStringEncodingMacMalayalam ;
1748 break ;
1749 case wxFONTENCODING_MACSINHALESE :
1750 enc = kCFStringEncodingMacSinhalese ;
1751 break ;
1752 case wxFONTENCODING_MACBURMESE :
1753 enc = kCFStringEncodingMacBurmese ;
1754 break ;
1755 case wxFONTENCODING_MACKHMER :
1756 enc = kCFStringEncodingMacKhmer ;
1757 break ;
1758 case wxFONTENCODING_MACTHAI :
1759 enc = kCFStringEncodingMacThai ;
1760 break ;
1761 case wxFONTENCODING_MACLAOTIAN :
1762 enc = kCFStringEncodingMacLaotian ;
1763 break ;
1764 case wxFONTENCODING_MACGEORGIAN :
1765 enc = kCFStringEncodingMacGeorgian ;
1766 break ;
1767 case wxFONTENCODING_MACARMENIAN :
1768 enc = kCFStringEncodingMacArmenian ;
1769 break ;
1770 case wxFONTENCODING_MACCHINESESIMP :
1771 enc = kCFStringEncodingMacChineseSimp ;
1772 break ;
1773 case wxFONTENCODING_MACTIBETAN :
1774 enc = kCFStringEncodingMacTibetan ;
1775 break ;
1776 case wxFONTENCODING_MACMONGOLIAN :
1777 enc = kCFStringEncodingMacMongolian ;
1778 break ;
1779 case wxFONTENCODING_MACETHIOPIC :
1780 enc = kCFStringEncodingMacEthiopic ;
1781 break ;
1782 case wxFONTENCODING_MACCENTRALEUR :
1783 enc = kCFStringEncodingMacCentralEurRoman ;
1784 break ;
1785 case wxFONTENCODING_MACVIATNAMESE :
1786 enc = kCFStringEncodingMacVietnamese ;
1787 break ;
1788 case wxFONTENCODING_MACARABICEXT :
1789 enc = kCFStringEncodingMacExtArabic ;
1790 break ;
1791 case wxFONTENCODING_MACSYMBOL :
1792 enc = kCFStringEncodingMacSymbol ;
1793 break ;
1794 case wxFONTENCODING_MACDINGBATS :
1795 enc = kCFStringEncodingMacDingbats ;
1796 break ;
1797 case wxFONTENCODING_MACTURKISH :
1798 enc = kCFStringEncodingMacTurkish ;
1799 break ;
1800 case wxFONTENCODING_MACCROATIAN :
1801 enc = kCFStringEncodingMacCroatian ;
1802 break ;
1803 case wxFONTENCODING_MACICELANDIC :
1804 enc = kCFStringEncodingMacIcelandic ;
1805 break ;
1806 case wxFONTENCODING_MACROMANIAN :
1807 enc = kCFStringEncodingMacRomanian ;
1808 break ;
1809 case wxFONTENCODING_MACCELTIC :
1810 enc = kCFStringEncodingMacCeltic ;
1811 break ;
1812 case wxFONTENCODING_MACGAELIC :
1813 enc = kCFStringEncodingMacGaelic ;
1814 break ;
1815 // case wxFONTENCODING_MACKEYBOARD :
1816 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1817 // break ;
1818 default :
1819 // because gcc is picky
1820 break ;
1821 } ;
1822 return enc ;
1823 }
1824
1825 class wxMBConv_cocoa : public wxMBConv
1826 {
1827 public:
1828 wxMBConv_cocoa()
1829 {
1830 Init(CFStringGetSystemEncoding()) ;
1831 }
1832
1833 wxMBConv_cocoa(const wxChar* name)
1834 {
1835 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1836 }
1837
1838 wxMBConv_cocoa(wxFontEncoding encoding)
1839 {
1840 Init( wxCFStringEncFromFontEnc(encoding) );
1841 }
1842
1843 ~wxMBConv_cocoa()
1844 {
1845 }
1846
1847 void Init( CFStringEncoding encoding)
1848 {
1849 m_encoding = encoding ;
1850 }
1851
1852 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1853 {
1854 wxASSERT(szUnConv);
1855
1856 CFStringRef theString = CFStringCreateWithBytes (
1857 NULL, //the allocator
1858 (const UInt8*)szUnConv,
1859 strlen(szUnConv),
1860 m_encoding,
1861 false //no BOM/external representation
1862 );
1863
1864 wxASSERT(theString);
1865
1866 size_t nOutLength = CFStringGetLength(theString);
1867
1868 if (szOut == NULL)
1869 {
1870 CFRelease(theString);
1871 return nOutLength;
1872 }
1873
1874 CFRange theRange = { 0, nOutSize };
1875
1876 #if SIZEOF_WCHAR_T == 4
1877 UniChar* szUniCharBuffer = new UniChar[nOutSize];
1878 #endif
1879
1880 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1881
1882 CFRelease(theString);
1883
1884 szUniCharBuffer[nOutLength] = '\0' ;
1885
1886 #if SIZEOF_WCHAR_T == 4
1887 wxMBConvUTF16 converter ;
1888 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
1889 delete[] szUniCharBuffer;
1890 #endif
1891
1892 return nOutLength;
1893 }
1894
1895 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1896 {
1897 wxASSERT(szUnConv);
1898
1899 size_t nRealOutSize;
1900 size_t nBufSize = wxWcslen(szUnConv);
1901 UniChar* szUniBuffer = (UniChar*) szUnConv;
1902
1903 #if SIZEOF_WCHAR_T == 4
1904 wxMBConvUTF16BE converter ;
1905 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1906 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1907 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1908 nBufSize /= sizeof(UniChar);
1909 #endif
1910
1911 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1912 NULL, //allocator
1913 szUniBuffer,
1914 nBufSize,
1915 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
1916 );
1917
1918 wxASSERT(theString);
1919
1920 //Note that CER puts a BOM when converting to unicode
1921 //so we check and use getchars instead in that case
1922 if (m_encoding == kCFStringEncodingUnicode)
1923 {
1924 if (szOut != NULL)
1925 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1926
1927 nRealOutSize = CFStringGetLength(theString) + 1;
1928 }
1929 else
1930 {
1931 CFStringGetBytes(
1932 theString,
1933 CFRangeMake(0, CFStringGetLength(theString)),
1934 m_encoding,
1935 0, //what to put in characters that can't be converted -
1936 //0 tells CFString to return NULL if it meets such a character
1937 false, //not an external representation
1938 (UInt8*) szOut,
1939 nOutSize,
1940 (CFIndex*) &nRealOutSize
1941 );
1942 }
1943
1944 CFRelease(theString);
1945
1946 #if SIZEOF_WCHAR_T == 4
1947 delete[] szUniBuffer;
1948 #endif
1949
1950 return nRealOutSize - 1;
1951 }
1952
1953 bool IsOk() const
1954 {
1955 return m_encoding != kCFStringEncodingInvalidId &&
1956 CFStringIsEncodingAvailable(m_encoding);
1957 }
1958
1959 private:
1960 CFStringEncoding m_encoding ;
1961 };
1962
1963 #endif // defined(__WXCOCOA__)
1964
1965 // ============================================================================
1966 // Mac conversion classes
1967 // ============================================================================
1968
1969 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1970
1971 class wxMBConv_mac : public wxMBConv
1972 {
1973 public:
1974 wxMBConv_mac()
1975 {
1976 Init(CFStringGetSystemEncoding()) ;
1977 }
1978
1979 wxMBConv_mac(const wxChar* name)
1980 {
1981 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1982 }
1983
1984 wxMBConv_mac(wxFontEncoding encoding)
1985 {
1986 Init( wxMacGetSystemEncFromFontEnc(encoding) );
1987 }
1988
1989 ~wxMBConv_mac()
1990 {
1991 OSStatus status = noErr ;
1992 status = TECDisposeConverter(m_MB2WC_converter);
1993 status = TECDisposeConverter(m_WC2MB_converter);
1994 }
1995
1996
1997 void Init( TextEncodingBase encoding)
1998 {
1999 OSStatus status = noErr ;
2000 m_char_encoding = encoding ;
2001 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2002
2003 status = TECCreateConverter(&m_MB2WC_converter,
2004 m_char_encoding,
2005 m_unicode_encoding);
2006 status = TECCreateConverter(&m_WC2MB_converter,
2007 m_unicode_encoding,
2008 m_char_encoding);
2009 }
2010
2011 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2012 {
2013 OSStatus status = noErr ;
2014 ByteCount byteOutLen ;
2015 ByteCount byteInLen = strlen(psz) ;
2016 wchar_t *tbuf = NULL ;
2017 UniChar* ubuf = NULL ;
2018 size_t res = 0 ;
2019
2020 if (buf == NULL)
2021 {
2022 //apple specs say at least 32
2023 n = 32 ;
2024 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2025 }
2026 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2027 #if SIZEOF_WCHAR_T == 4
2028 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2029 #else
2030 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2031 #endif
2032 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2033 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2034 #if SIZEOF_WCHAR_T == 4
2035 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2036 // is not properly terminated we get random characters at the end
2037 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2038 wxMBConvUTF16BE converter ;
2039 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2040 free( ubuf ) ;
2041 #else
2042 res = byteOutLen / sizeof( UniChar ) ;
2043 #endif
2044 if ( buf == NULL )
2045 free(tbuf) ;
2046
2047 if ( buf && res < n)
2048 buf[res] = 0;
2049
2050 return res ;
2051 }
2052
2053 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2054 {
2055 OSStatus status = noErr ;
2056 ByteCount byteOutLen ;
2057 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2058
2059 char *tbuf = NULL ;
2060
2061 if (buf == NULL)
2062 {
2063 //apple specs say at least 32
2064 n = 32;
2065 tbuf = (char*) malloc( n ) ;
2066 }
2067
2068 ByteCount byteBufferLen = n ;
2069 UniChar* ubuf = NULL ;
2070 #if SIZEOF_WCHAR_T == 4
2071 wxMBConvUTF16BE converter ;
2072 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2073 byteInLen = unicharlen ;
2074 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2075 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2076 #else
2077 ubuf = (UniChar*) psz ;
2078 #endif
2079 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2080 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2081 #if SIZEOF_WCHAR_T == 4
2082 free( ubuf ) ;
2083 #endif
2084 if ( buf == NULL )
2085 free(tbuf) ;
2086
2087 size_t res = byteOutLen ;
2088 if ( buf && res < n)
2089 {
2090 buf[res] = 0;
2091
2092 //we need to double-trip to verify it didn't insert any ? in place
2093 //of bogus characters
2094 wxWCharBuffer wcBuf(n);
2095 size_t pszlen = wxWcslen(psz);
2096 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2097 wxWcslen(wcBuf) != pszlen ||
2098 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2099 {
2100 // we didn't obtain the same thing we started from, hence
2101 // the conversion was lossy and we consider that it failed
2102 return (size_t)-1;
2103 }
2104 }
2105
2106 return res ;
2107 }
2108
2109 bool IsOk() const
2110 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2111
2112 private:
2113 TECObjectRef m_MB2WC_converter ;
2114 TECObjectRef m_WC2MB_converter ;
2115
2116 TextEncodingBase m_char_encoding ;
2117 TextEncodingBase m_unicode_encoding ;
2118 };
2119
2120 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2121
2122 // ============================================================================
2123 // wxEncodingConverter based conversion classes
2124 // ============================================================================
2125
2126 #if wxUSE_FONTMAP
2127
2128 class wxMBConv_wxwin : public wxMBConv
2129 {
2130 private:
2131 void Init()
2132 {
2133 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2134 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2135 }
2136
2137 public:
2138 // temporarily just use wxEncodingConverter stuff,
2139 // so that it works while a better implementation is built
2140 wxMBConv_wxwin(const wxChar* name)
2141 {
2142 if (name)
2143 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2144 else
2145 m_enc = wxFONTENCODING_SYSTEM;
2146
2147 Init();
2148 }
2149
2150 wxMBConv_wxwin(wxFontEncoding enc)
2151 {
2152 m_enc = enc;
2153
2154 Init();
2155 }
2156
2157 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2158 {
2159 size_t inbuf = strlen(psz);
2160 if (buf)
2161 m2w.Convert(psz,buf);
2162 return inbuf;
2163 }
2164
2165 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2166 {
2167 const size_t inbuf = wxWcslen(psz);
2168 if (buf)
2169 w2m.Convert(psz,buf);
2170
2171 return inbuf;
2172 }
2173
2174 bool IsOk() const { return m_ok; }
2175
2176 public:
2177 wxFontEncoding m_enc;
2178 wxEncodingConverter m2w, w2m;
2179
2180 // were we initialized successfully?
2181 bool m_ok;
2182
2183 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2184 };
2185
2186 #endif // wxUSE_FONTMAP
2187
2188 // ============================================================================
2189 // wxCSConv implementation
2190 // ============================================================================
2191
2192 void wxCSConv::Init()
2193 {
2194 m_name = NULL;
2195 m_convReal = NULL;
2196 m_deferred = true;
2197 }
2198
2199 wxCSConv::wxCSConv(const wxChar *charset)
2200 {
2201 Init();
2202
2203 if ( charset )
2204 {
2205 SetName(charset);
2206 }
2207
2208 m_encoding = wxFONTENCODING_SYSTEM;
2209 }
2210
2211 wxCSConv::wxCSConv(wxFontEncoding encoding)
2212 {
2213 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2214 {
2215 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2216
2217 encoding = wxFONTENCODING_SYSTEM;
2218 }
2219
2220 Init();
2221
2222 m_encoding = encoding;
2223 }
2224
2225 wxCSConv::~wxCSConv()
2226 {
2227 Clear();
2228 }
2229
2230 wxCSConv::wxCSConv(const wxCSConv& conv)
2231 : wxMBConv()
2232 {
2233 Init();
2234
2235 SetName(conv.m_name);
2236 m_encoding = conv.m_encoding;
2237 }
2238
2239 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2240 {
2241 Clear();
2242
2243 SetName(conv.m_name);
2244 m_encoding = conv.m_encoding;
2245
2246 return *this;
2247 }
2248
2249 void wxCSConv::Clear()
2250 {
2251 free(m_name);
2252 delete m_convReal;
2253
2254 m_name = NULL;
2255 m_convReal = NULL;
2256 }
2257
2258 void wxCSConv::SetName(const wxChar *charset)
2259 {
2260 if (charset)
2261 {
2262 m_name = wxStrdup(charset);
2263 m_deferred = true;
2264 }
2265 }
2266
2267 wxMBConv *wxCSConv::DoCreate() const
2268 {
2269 // check for the special case of ASCII or ISO8859-1 charset: as we have
2270 // special knowledge of it anyhow, we don't need to create a special
2271 // conversion object
2272 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2273 {
2274 // don't convert at all
2275 return NULL;
2276 }
2277
2278 // we trust OS to do conversion better than we can so try external
2279 // conversion methods first
2280 //
2281 // the full order is:
2282 // 1. OS conversion (iconv() under Unix or Win32 API)
2283 // 2. hard coded conversions for UTF
2284 // 3. wxEncodingConverter as fall back
2285
2286 // step (1)
2287 #ifdef HAVE_ICONV
2288 #if !wxUSE_FONTMAP
2289 if ( m_name )
2290 #endif // !wxUSE_FONTMAP
2291 {
2292 wxString name(m_name);
2293
2294 #if wxUSE_FONTMAP
2295 if ( name.empty() )
2296 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2297 #endif // wxUSE_FONTMAP
2298
2299 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2300 if ( conv->IsOk() )
2301 return conv;
2302
2303 delete conv;
2304 }
2305 #endif // HAVE_ICONV
2306
2307 #ifdef wxHAVE_WIN32_MB2WC
2308 {
2309 #if wxUSE_FONTMAP
2310 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2311 : new wxMBConv_win32(m_encoding);
2312 if ( conv->IsOk() )
2313 return conv;
2314
2315 delete conv;
2316 #else
2317 return NULL;
2318 #endif
2319 }
2320 #endif // wxHAVE_WIN32_MB2WC
2321 #if defined(__WXMAC__)
2322 {
2323 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2324 {
2325
2326 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2327 : new wxMBConv_mac(m_encoding);
2328 if ( conv->IsOk() )
2329 return conv;
2330
2331 delete conv;
2332 }
2333 }
2334 #endif
2335 #if defined(__WXCOCOA__)
2336 {
2337 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2338 {
2339
2340 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2341 : new wxMBConv_cocoa(m_encoding);
2342 if ( conv->IsOk() )
2343 return conv;
2344
2345 delete conv;
2346 }
2347 }
2348 #endif
2349 // step (2)
2350 wxFontEncoding enc = m_encoding;
2351 #if wxUSE_FONTMAP
2352 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2353 {
2354 // use "false" to suppress interactive dialogs -- we can be called from
2355 // anywhere and popping up a dialog from here is the last thing we want to
2356 // do
2357 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2358 }
2359 #endif // wxUSE_FONTMAP
2360
2361 switch ( enc )
2362 {
2363 case wxFONTENCODING_UTF7:
2364 return new wxMBConvUTF7;
2365
2366 case wxFONTENCODING_UTF8:
2367 return new wxMBConvUTF8;
2368
2369 case wxFONTENCODING_UTF16BE:
2370 return new wxMBConvUTF16BE;
2371
2372 case wxFONTENCODING_UTF16LE:
2373 return new wxMBConvUTF16LE;
2374
2375 case wxFONTENCODING_UTF32BE:
2376 return new wxMBConvUTF32BE;
2377
2378 case wxFONTENCODING_UTF32LE:
2379 return new wxMBConvUTF32LE;
2380
2381 default:
2382 // nothing to do but put here to suppress gcc warnings
2383 ;
2384 }
2385
2386 // step (3)
2387 #if wxUSE_FONTMAP
2388 {
2389 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2390 : new wxMBConv_wxwin(m_encoding);
2391 if ( conv->IsOk() )
2392 return conv;
2393
2394 delete conv;
2395 }
2396 #endif // wxUSE_FONTMAP
2397
2398 // NB: This is a hack to prevent deadlock. What could otherwise happen
2399 // in Unicode build: wxConvLocal creation ends up being here
2400 // because of some failure and logs the error. But wxLog will try to
2401 // attach timestamp, for which it will need wxConvLocal (to convert
2402 // time to char* and then wchar_t*), but that fails, tries to log
2403 // error, but wxLog has a (already locked) critical section that
2404 // guards static buffer.
2405 static bool alreadyLoggingError = false;
2406 if (!alreadyLoggingError)
2407 {
2408 alreadyLoggingError = true;
2409 wxLogError(_("Cannot convert from the charset '%s'!"),
2410 m_name ? m_name
2411 :
2412 #if wxUSE_FONTMAP
2413 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2414 #else // !wxUSE_FONTMAP
2415 wxString::Format(_("encoding %s"), m_encoding).c_str()
2416 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2417 );
2418 alreadyLoggingError = false;
2419 }
2420
2421 return NULL;
2422 }
2423
2424 void wxCSConv::CreateConvIfNeeded() const
2425 {
2426 if ( m_deferred )
2427 {
2428 wxCSConv *self = (wxCSConv *)this; // const_cast
2429
2430 #if wxUSE_INTL
2431 // if we don't have neither the name nor the encoding, use the default
2432 // encoding for this system
2433 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2434 {
2435 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2436 }
2437 #endif // wxUSE_INTL
2438
2439 self->m_convReal = DoCreate();
2440 self->m_deferred = false;
2441 }
2442 }
2443
2444 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2445 {
2446 CreateConvIfNeeded();
2447
2448 if (m_convReal)
2449 return m_convReal->MB2WC(buf, psz, n);
2450
2451 // latin-1 (direct)
2452 size_t len = strlen(psz);
2453
2454 if (buf)
2455 {
2456 for (size_t c = 0; c <= len; c++)
2457 buf[c] = (unsigned char)(psz[c]);
2458 }
2459
2460 return len;
2461 }
2462
2463 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2464 {
2465 CreateConvIfNeeded();
2466
2467 if (m_convReal)
2468 return m_convReal->WC2MB(buf, psz, n);
2469
2470 // latin-1 (direct)
2471 const size_t len = wxWcslen(psz);
2472 if (buf)
2473 {
2474 for (size_t c = 0; c <= len; c++)
2475 {
2476 if (psz[c] > 0xFF)
2477 return (size_t)-1;
2478 buf[c] = (char)psz[c];
2479 }
2480 }
2481 else
2482 {
2483 for (size_t c = 0; c <= len; c++)
2484 {
2485 if (psz[c] > 0xFF)
2486 return (size_t)-1;
2487 }
2488 }
2489
2490 return len;
2491 }
2492
2493 // ----------------------------------------------------------------------------
2494 // globals
2495 // ----------------------------------------------------------------------------
2496
2497 #ifdef __WINDOWS__
2498 static wxMBConv_win32 wxConvLibcObj;
2499 #elif defined(__WXMAC__) && !defined(__MACH__)
2500 static wxMBConv_mac wxConvLibcObj ;
2501 #else
2502 static wxMBConvLibc wxConvLibcObj;
2503 #endif
2504
2505 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2506 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2507 static wxMBConvUTF7 wxConvUTF7Obj;
2508 static wxMBConvUTF8 wxConvUTF8Obj;
2509
2510
2511 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2512 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2513 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2514 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2515 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2516 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2517
2518 #else // !wxUSE_WCHAR_T
2519
2520 // stand-ins in absence of wchar_t
2521 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2522 wxConvISO8859_1,
2523 wxConvLocal,
2524 wxConvUTF8;
2525
2526 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2527
2528