]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
correcting allocated buffer size
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // ============================================================================
16 // declarations
17 // ============================================================================
18
19 // ----------------------------------------------------------------------------
20 // headers
21 // ----------------------------------------------------------------------------
22
23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
24 #pragma implementation "strconv.h"
25 #endif
26
27 // For compilers that support precompilation, includes "wx.h".
28 #include "wx/wxprec.h"
29
30 #ifdef __BORLANDC__
31 #pragma hdrstop
32 #endif
33
34 #ifndef WX_PRECOMP
35 #include "wx/intl.h"
36 #include "wx/log.h"
37 #endif // WX_PRECOMP
38
39 #include "wx/strconv.h"
40
41 #if wxUSE_WCHAR_T
42
43 #ifdef __WXMSW__
44 #include "wx/msw/private.h"
45 #endif
46
47 #ifdef __WINDOWS__
48 #include "wx/msw/missing.h"
49 #endif
50
51 #ifndef __WXWINCE__
52 #include <errno.h>
53 #endif
54
55 #include <ctype.h>
56 #include <string.h>
57 #include <stdlib.h>
58
59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
60 #define wxHAVE_WIN32_MB2WC
61 #endif // __WIN32__ but !__WXMICROWIN__
62
63 // ----------------------------------------------------------------------------
64 // headers
65 // ----------------------------------------------------------------------------
66
67 #ifdef __SALFORDC__
68 #include <clib.h>
69 #endif
70
71 #ifdef HAVE_ICONV
72 #include <iconv.h>
73 #endif
74
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
77 #include "wx/utils.h"
78
79 #ifdef __WXMAC__
80 #include <ATSUnicode.h>
81 #include <TextCommon.h>
82 #include <TextEncodingConverter.h>
83
84 #include "wx/mac/private.h" // includes mac headers
85 #endif
86 // ----------------------------------------------------------------------------
87 // macros
88 // ----------------------------------------------------------------------------
89
90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
92
93 #if SIZEOF_WCHAR_T == 4
94 #define WC_NAME "UCS4"
95 #define WC_BSWAP BSWAP_UCS4
96 #ifdef WORDS_BIGENDIAN
97 #define WC_NAME_BEST "UCS-4BE"
98 #else
99 #define WC_NAME_BEST "UCS-4LE"
100 #endif
101 #elif SIZEOF_WCHAR_T == 2
102 #define WC_NAME "UTF16"
103 #define WC_BSWAP BSWAP_UTF16
104 #define WC_UTF16
105 #ifdef WORDS_BIGENDIAN
106 #define WC_NAME_BEST "UTF-16BE"
107 #else
108 #define WC_NAME_BEST "UTF-16LE"
109 #endif
110 #else // sizeof(wchar_t) != 2 nor 4
111 // does this ever happen?
112 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
113 #endif
114
115 // ============================================================================
116 // implementation
117 // ============================================================================
118
119 // ----------------------------------------------------------------------------
120 // UTF-16 en/decoding to/from UCS-4
121 // ----------------------------------------------------------------------------
122
123
124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
125 {
126 if (input<=0xffff)
127 {
128 if (output)
129 *output = (wxUint16) input;
130 return 1;
131 }
132 else if (input>=0x110000)
133 {
134 return (size_t)-1;
135 }
136 else
137 {
138 if (output)
139 {
140 *output++ = (wxUint16) ((input >> 10)+0xd7c0);
141 *output = (wxUint16) ((input&0x3ff)+0xdc00);
142 }
143 return 2;
144 }
145 }
146
147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
148 {
149 if ((*input<0xd800) || (*input>0xdfff))
150 {
151 output = *input;
152 return 1;
153 }
154 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
155 {
156 output = *input;
157 return (size_t)-1;
158 }
159 else
160 {
161 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
162 return 2;
163 }
164 }
165
166
167 // ----------------------------------------------------------------------------
168 // wxMBConv
169 // ----------------------------------------------------------------------------
170
171 wxMBConv::~wxMBConv()
172 {
173 // nothing to do here (necessary for Darwin linking probably)
174 }
175
176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
177 {
178 if ( psz )
179 {
180 // calculate the length of the buffer needed first
181 size_t nLen = MB2WC(NULL, psz, 0);
182 if ( nLen != (size_t)-1 )
183 {
184 // now do the actual conversion
185 wxWCharBuffer buf(nLen);
186 nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
187 if ( nLen != (size_t)-1 )
188 {
189 return buf;
190 }
191 }
192 }
193
194 wxWCharBuffer buf((wchar_t *)NULL);
195
196 return buf;
197 }
198
199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
200 {
201 if ( pwz )
202 {
203 size_t nLen = WC2MB(NULL, pwz, 0);
204 if ( nLen != (size_t)-1 )
205 {
206 wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
207 nLen = WC2MB(buf.data(), pwz, nLen + 4);
208 if ( nLen != (size_t)-1 )
209 {
210 return buf;
211 }
212 }
213 }
214
215 wxCharBuffer buf((char *)NULL);
216
217 return buf;
218 }
219
220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
221 size_t outsize, size_t nStringLen) const
222 {
223 const char* szEnd = szString + nStringLen + 1;
224 const char* szPos = szString;
225 const char* szStart = szPos;
226
227 size_t nActualLength = 0;
228
229 //Convert the string until the length() is reached, continuing the
230 //loop every time a null character is reached
231 while(szPos != szEnd)
232 {
233 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
234
235 //Get the length of the current (sub)string
236 size_t nLen = MB2WC(NULL, szPos, 0);
237
238 //Invalid conversion?
239 if( nLen == (size_t)-1 )
240 return nLen;
241
242 //Increase the actual length (+1 for current null character)
243 nActualLength += nLen + 1;
244
245 //Only copy data in if buffer size is big enough
246 if (szBuffer != NULL &&
247 nActualLength <= outsize)
248 {
249 //Convert the current (sub)string
250 if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
251 return (size_t)-1;
252 }
253
254 //Increment to next (sub)string
255 //Note that we have to use strlen here instead of nLen
256 //here because XX2XX gives us the size of the output buffer,
257 //not neccessarly the length of the string
258 szPos += strlen(szPos) + 1;
259 }
260
261 return nActualLength - 1; //success - return actual length
262 }
263
264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
265 size_t outsize, size_t nStringLen) const
266 {
267 const wchar_t* szEnd = szString + nStringLen + 1;
268 const wchar_t* szPos = szString;
269 const wchar_t* szStart = szPos;
270
271 size_t nActualLength = 0;
272
273 //Convert the string until the length() is reached, continuing the
274 //loop every time a null character is reached
275 while(szPos != szEnd)
276 {
277 wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
278
279 //Get the length of the current (sub)string
280 size_t nLen = WC2MB(NULL, szPos, 0);
281
282 //Invalid conversion?
283 if( nLen == (size_t)-1 )
284 return nLen;
285
286 //Increase the actual length (+1 for current null character)
287 nActualLength += nLen + 1;
288
289 //Only copy data in if buffer size is big enough
290 if (szBuffer != NULL &&
291 nActualLength <= outsize)
292 {
293 //Convert the current (sub)string
294 if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
295 return (size_t)-1;
296 }
297
298 //Increment to next (sub)string
299 //Note that we have to use wxWcslen here instead of nLen
300 //here because XX2XX gives us the size of the output buffer,
301 //not neccessarly the length of the string
302 szPos += wxWcslen(szPos) + 1;
303 }
304
305 return nActualLength - 1; //success - return actual length
306 }
307
308 // ----------------------------------------------------------------------------
309 // wxMBConvLibc
310 // ----------------------------------------------------------------------------
311
312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
313 {
314 return wxMB2WC(buf, psz, n);
315 }
316
317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
318 {
319 return wxWC2MB(buf, psz, n);
320 }
321 // ----------------------------------------------------------------------------
322 // UTF-7
323 // ----------------------------------------------------------------------------
324
325 // Implementation (C) 2004 Fredrik Roubert
326
327 //
328 // BASE64 decoding table
329 //
330 static const unsigned char utf7unb64[] =
331 {
332 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
335 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
336 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
338 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
339 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
340 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
341 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
342 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
343 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
344 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
345 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
346 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
347 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
351 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
352 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
353 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
355 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
356 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
357 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
358 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
359 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
360 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
361 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
362 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
363 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
364 };
365
366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
367 {
368
369 size_t len = 0;
370
371 while (*psz && ((!buf) || (len < n)))
372 {
373 unsigned char cc = *psz++;
374 if (cc != '+')
375 {
376 // plain ASCII char
377 if (buf)
378 *buf++ = cc;
379 len++;
380 }
381 else if (*psz == '-')
382 {
383 // encoded plus sign
384 if (buf)
385 *buf++ = cc;
386 len++;
387 psz++;
388 }
389 else
390 {
391 // BASE64 encoded string
392 bool lsb;
393 unsigned char c;
394 unsigned int d, l;
395 for (lsb = false, d = 0, l = 0;
396 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
397 {
398 d <<= 6;
399 d += cc;
400 for (l += 6; l >= 8; lsb = !lsb)
401 {
402 c = (d >> (l -= 8)) % 256;
403 if (lsb)
404 {
405 if (buf)
406 *buf++ |= c;
407 len ++;
408 }
409 else
410 if (buf)
411 *buf = c << 8;
412 }
413 }
414 if (*psz == '-')
415 psz++;
416 }
417 }
418 if (buf && (len < n))
419 *buf = 0;
420 return len;
421 }
422
423 //
424 // BASE64 encoding table
425 //
426 static const unsigned char utf7enb64[] =
427 {
428 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
429 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
430 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
431 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
432 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
433 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
434 'w', 'x', 'y', 'z', '0', '1', '2', '3',
435 '4', '5', '6', '7', '8', '9', '+', '/'
436 };
437
438 //
439 // UTF-7 encoding table
440 //
441 // 0 - Set D (directly encoded characters)
442 // 1 - Set O (optional direct characters)
443 // 2 - whitespace characters (optional)
444 // 3 - special characters
445 //
446 static const unsigned char utf7encode[128] =
447 {
448 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
450 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
452 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
454 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
456 };
457
458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
459 *psz, size_t n) const
460 {
461
462
463 size_t len = 0;
464
465 while (*psz && ((!buf) || (len < n)))
466 {
467 wchar_t cc = *psz++;
468 if (cc < 0x80 && utf7encode[cc] < 1)
469 {
470 // plain ASCII char
471 if (buf)
472 *buf++ = (char)cc;
473 len++;
474 }
475 #ifndef WC_UTF16
476 #ifdef __VMS
477 else if (cc > 0xffff)
478 #else
479 else if (cc > ((const wchar_t)0xffff))
480 #endif
481 {
482 // no surrogate pair generation (yet?)
483 return (size_t)-1;
484 }
485 #endif
486 else
487 {
488 if (buf)
489 *buf++ = '+';
490 len++;
491 if (cc != '+')
492 {
493 // BASE64 encode string
494 unsigned int lsb, d, l;
495 for (d = 0, l = 0;; psz++)
496 {
497 for (lsb = 0; lsb < 2; lsb ++)
498 {
499 d <<= 8;
500 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
501
502 for (l += 8; l >= 6; )
503 {
504 l -= 6;
505 if (buf)
506 *buf++ = utf7enb64[(d >> l) % 64];
507 len++;
508 }
509 }
510 cc = *psz;
511 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
512 break;
513 }
514 if (l != 0)
515 {
516 if (buf)
517 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
518 len++;
519 }
520 }
521 if (buf)
522 *buf++ = '-';
523 len++;
524 }
525 }
526 if (buf && (len < n))
527 *buf = 0;
528 return len;
529 }
530
531 // ----------------------------------------------------------------------------
532 // UTF-8
533 // ----------------------------------------------------------------------------
534
535 static wxUint32 utf8_max[]=
536 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
537
538 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
539 {
540 size_t len = 0;
541
542 while (*psz && ((!buf) || (len < n)))
543 {
544 unsigned char cc = *psz++, fc = cc;
545 unsigned cnt;
546 for (cnt = 0; fc & 0x80; cnt++)
547 fc <<= 1;
548 if (!cnt)
549 {
550 // plain ASCII char
551 if (buf)
552 *buf++ = cc;
553 len++;
554 }
555 else
556 {
557 cnt--;
558 if (!cnt)
559 {
560 // invalid UTF-8 sequence
561 return (size_t)-1;
562 }
563 else
564 {
565 unsigned ocnt = cnt - 1;
566 wxUint32 res = cc & (0x3f >> cnt);
567 while (cnt--)
568 {
569 cc = *psz++;
570 if ((cc & 0xC0) != 0x80)
571 {
572 // invalid UTF-8 sequence
573 return (size_t)-1;
574 }
575 res = (res << 6) | (cc & 0x3f);
576 }
577 if (res <= utf8_max[ocnt])
578 {
579 // illegal UTF-8 encoding
580 return (size_t)-1;
581 }
582 #ifdef WC_UTF16
583 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
584 size_t pa = encode_utf16(res, (wxUint16 *)buf);
585 if (pa == (size_t)-1)
586 return (size_t)-1;
587 if (buf)
588 buf += pa;
589 len += pa;
590 #else // !WC_UTF16
591 if (buf)
592 *buf++ = res;
593 len++;
594 #endif // WC_UTF16/!WC_UTF16
595 }
596 }
597 }
598 if (buf && (len < n))
599 *buf = 0;
600 return len;
601 }
602
603 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
604 {
605 size_t len = 0;
606
607 while (*psz && ((!buf) || (len < n)))
608 {
609 wxUint32 cc;
610 #ifdef WC_UTF16
611 // cast is ok for WC_UTF16
612 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
613 psz += (pa == (size_t)-1) ? 1 : pa;
614 #else
615 cc=(*psz++) & 0x7fffffff;
616 #endif
617 unsigned cnt;
618 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
619 if (!cnt)
620 {
621 // plain ASCII char
622 if (buf)
623 *buf++ = (char) cc;
624 len++;
625 }
626
627 else
628 {
629 len += cnt + 1;
630 if (buf)
631 {
632 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
633 while (cnt--)
634 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
635 }
636 }
637 }
638
639 if (buf && (len<n)) *buf = 0;
640
641 return len;
642 }
643
644
645
646
647 // ----------------------------------------------------------------------------
648 // UTF-16
649 // ----------------------------------------------------------------------------
650
651 #ifdef WORDS_BIGENDIAN
652 #define wxMBConvUTF16straight wxMBConvUTF16BE
653 #define wxMBConvUTF16swap wxMBConvUTF16LE
654 #else
655 #define wxMBConvUTF16swap wxMBConvUTF16BE
656 #define wxMBConvUTF16straight wxMBConvUTF16LE
657 #endif
658
659
660 #ifdef WC_UTF16
661
662 // copy 16bit MB to 16bit String
663 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
664 {
665 size_t len=0;
666
667 while (*(wxUint16*)psz && (!buf || len < n))
668 {
669 if (buf)
670 *buf++ = *(wxUint16*)psz;
671 len++;
672
673 psz += sizeof(wxUint16);
674 }
675 if (buf && len<n) *buf=0;
676
677 return len;
678 }
679
680
681 // copy 16bit String to 16bit MB
682 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
683 {
684 size_t len=0;
685
686 while (*psz && (!buf || len < n))
687 {
688 if (buf)
689 {
690 *(wxUint16*)buf = *psz;
691 buf += sizeof(wxUint16);
692 }
693 len += sizeof(wxUint16);
694 psz++;
695 }
696 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
697
698 return len;
699 }
700
701
702 // swap 16bit MB to 16bit String
703 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
704 {
705 size_t len=0;
706
707 while (*(wxUint16*)psz && (!buf || len < n))
708 {
709 if (buf)
710 {
711 ((char *)buf)[0] = psz[1];
712 ((char *)buf)[1] = psz[0];
713 buf++;
714 }
715 len++;
716 psz += sizeof(wxUint16);
717 }
718 if (buf && len<n) *buf=0;
719
720 return len;
721 }
722
723
724 // swap 16bit MB to 16bit String
725 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
726 {
727 size_t len=0;
728
729 while (*psz && (!buf || len < n))
730 {
731 if (buf)
732 {
733 *buf++ = ((char*)psz)[1];
734 *buf++ = ((char*)psz)[0];
735 }
736 len += sizeof(wxUint16);
737 psz++;
738 }
739 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
740
741 return len;
742 }
743
744
745 #else // WC_UTF16
746
747
748 // copy 16bit MB to 32bit String
749 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
750 {
751 size_t len=0;
752
753 while (*(wxUint16*)psz && (!buf || len < n))
754 {
755 wxUint32 cc;
756 size_t pa=decode_utf16((wxUint16*)psz, cc);
757 if (pa == (size_t)-1)
758 return pa;
759
760 if (buf)
761 *buf++ = cc;
762 len++;
763 psz += pa * sizeof(wxUint16);
764 }
765 if (buf && len<n) *buf=0;
766
767 return len;
768 }
769
770
771 // copy 32bit String to 16bit MB
772 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
773 {
774 size_t len=0;
775
776 while (*psz && (!buf || len < n))
777 {
778 wxUint16 cc[2];
779 size_t pa=encode_utf16(*psz, cc);
780
781 if (pa == (size_t)-1)
782 return pa;
783
784 if (buf)
785 {
786 *(wxUint16*)buf = cc[0];
787 buf += sizeof(wxUint16);
788 if (pa > 1)
789 {
790 *(wxUint16*)buf = cc[1];
791 buf += sizeof(wxUint16);
792 }
793 }
794
795 len += pa*sizeof(wxUint16);
796 psz++;
797 }
798 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
799
800 return len;
801 }
802
803
804 // swap 16bit MB to 32bit String
805 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
806 {
807 size_t len=0;
808
809 while (*(wxUint16*)psz && (!buf || len < n))
810 {
811 wxUint32 cc;
812 char tmp[4];
813 tmp[0]=psz[1]; tmp[1]=psz[0];
814 tmp[2]=psz[3]; tmp[3]=psz[2];
815
816 size_t pa=decode_utf16((wxUint16*)tmp, cc);
817 if (pa == (size_t)-1)
818 return pa;
819
820 if (buf)
821 *buf++ = cc;
822
823 len++;
824 psz += pa * sizeof(wxUint16);
825 }
826 if (buf && len<n) *buf=0;
827
828 return len;
829 }
830
831
832 // swap 32bit String to 16bit MB
833 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
834 {
835 size_t len=0;
836
837 while (*psz && (!buf || len < n))
838 {
839 wxUint16 cc[2];
840 size_t pa=encode_utf16(*psz, cc);
841
842 if (pa == (size_t)-1)
843 return pa;
844
845 if (buf)
846 {
847 *buf++ = ((char*)cc)[1];
848 *buf++ = ((char*)cc)[0];
849 if (pa > 1)
850 {
851 *buf++ = ((char*)cc)[3];
852 *buf++ = ((char*)cc)[2];
853 }
854 }
855
856 len += pa*sizeof(wxUint16);
857 psz++;
858 }
859 if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
860
861 return len;
862 }
863
864 #endif // WC_UTF16
865
866
867 // ----------------------------------------------------------------------------
868 // UTF-32
869 // ----------------------------------------------------------------------------
870
871 #ifdef WORDS_BIGENDIAN
872 #define wxMBConvUTF32straight wxMBConvUTF32BE
873 #define wxMBConvUTF32swap wxMBConvUTF32LE
874 #else
875 #define wxMBConvUTF32swap wxMBConvUTF32BE
876 #define wxMBConvUTF32straight wxMBConvUTF32LE
877 #endif
878
879
880 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
881 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
882
883
884 #ifdef WC_UTF16
885
886 // copy 32bit MB to 16bit String
887 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
888 {
889 size_t len=0;
890
891 while (*(wxUint32*)psz && (!buf || len < n))
892 {
893 wxUint16 cc[2];
894
895 size_t pa=encode_utf16(*(wxUint32*)psz, cc);
896 if (pa == (size_t)-1)
897 return pa;
898
899 if (buf)
900 {
901 *buf++ = cc[0];
902 if (pa > 1)
903 *buf++ = cc[1];
904 }
905 len += pa;
906 psz += sizeof(wxUint32);
907 }
908 if (buf && len<n) *buf=0;
909
910 return len;
911 }
912
913
914 // copy 16bit String to 32bit MB
915 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
916 {
917 size_t len=0;
918
919 while (*psz && (!buf || len < n))
920 {
921 wxUint32 cc;
922
923 // cast is ok for WC_UTF16
924 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
925 if (pa == (size_t)-1)
926 return pa;
927
928 if (buf)
929 {
930 *(wxUint32*)buf = cc;
931 buf += sizeof(wxUint32);
932 }
933 len += sizeof(wxUint32);
934 psz += pa;
935 }
936
937 if (buf && len<=n-sizeof(wxUint32))
938 *(wxUint32*)buf=0;
939
940 return len;
941 }
942
943
944
945 // swap 32bit MB to 16bit String
946 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
947 {
948 size_t len=0;
949
950 while (*(wxUint32*)psz && (!buf || len < n))
951 {
952 char tmp[4];
953 tmp[0] = psz[3]; tmp[1] = psz[2];
954 tmp[2] = psz[1]; tmp[3] = psz[0];
955
956
957 wxUint16 cc[2];
958
959 size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
960 if (pa == (size_t)-1)
961 return pa;
962
963 if (buf)
964 {
965 *buf++ = cc[0];
966 if (pa > 1)
967 *buf++ = cc[1];
968 }
969 len += pa;
970 psz += sizeof(wxUint32);
971 }
972
973 if (buf && len<n)
974 *buf=0;
975
976 return len;
977 }
978
979
980 // swap 16bit String to 32bit MB
981 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
982 {
983 size_t len=0;
984
985 while (*psz && (!buf || len < n))
986 {
987 char cc[4];
988
989 // cast is ok for WC_UTF16
990 size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
991 if (pa == (size_t)-1)
992 return pa;
993
994 if (buf)
995 {
996 *buf++ = cc[3];
997 *buf++ = cc[2];
998 *buf++ = cc[1];
999 *buf++ = cc[0];
1000 }
1001 len += sizeof(wxUint32);
1002 psz += pa;
1003 }
1004
1005 if (buf && len<=n-sizeof(wxUint32))
1006 *(wxUint32*)buf=0;
1007
1008 return len;
1009 }
1010
1011 #else // WC_UTF16
1012
1013
1014 // copy 32bit MB to 32bit String
1015 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016 {
1017 size_t len=0;
1018
1019 while (*(wxUint32*)psz && (!buf || len < n))
1020 {
1021 if (buf)
1022 *buf++ = *(wxUint32*)psz;
1023 len++;
1024 psz += sizeof(wxUint32);
1025 }
1026
1027 if (buf && len<n)
1028 *buf=0;
1029
1030 return len;
1031 }
1032
1033
1034 // copy 32bit String to 32bit MB
1035 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1036 {
1037 size_t len=0;
1038
1039 while (*psz && (!buf || len < n))
1040 {
1041 if (buf)
1042 {
1043 *(wxUint32*)buf = *psz;
1044 buf += sizeof(wxUint32);
1045 }
1046
1047 len += sizeof(wxUint32);
1048 psz++;
1049 }
1050
1051 if (buf && len<=n-sizeof(wxUint32))
1052 *(wxUint32*)buf=0;
1053
1054 return len;
1055 }
1056
1057
1058 // swap 32bit MB to 32bit String
1059 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1060 {
1061 size_t len=0;
1062
1063 while (*(wxUint32*)psz && (!buf || len < n))
1064 {
1065 if (buf)
1066 {
1067 ((char *)buf)[0] = psz[3];
1068 ((char *)buf)[1] = psz[2];
1069 ((char *)buf)[2] = psz[1];
1070 ((char *)buf)[3] = psz[0];
1071 buf++;
1072 }
1073 len++;
1074 psz += sizeof(wxUint32);
1075 }
1076
1077 if (buf && len<n)
1078 *buf=0;
1079
1080 return len;
1081 }
1082
1083
1084 // swap 32bit String to 32bit MB
1085 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1086 {
1087 size_t len=0;
1088
1089 while (*psz && (!buf || len < n))
1090 {
1091 if (buf)
1092 {
1093 *buf++ = ((char *)psz)[3];
1094 *buf++ = ((char *)psz)[2];
1095 *buf++ = ((char *)psz)[1];
1096 *buf++ = ((char *)psz)[0];
1097 }
1098 len += sizeof(wxUint32);
1099 psz++;
1100 }
1101
1102 if (buf && len<=n-sizeof(wxUint32))
1103 *(wxUint32*)buf=0;
1104
1105 return len;
1106 }
1107
1108
1109 #endif // WC_UTF16
1110
1111
1112 // ============================================================================
1113 // The classes doing conversion using the iconv_xxx() functions
1114 // ============================================================================
1115
1116 #ifdef HAVE_ICONV
1117
1118 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1119 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
1120 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1121 // (which means error) and says there are 0 bytes left in the input buffer --
1122 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1123 // this alternative test for iconv() failure.
1124 // [This bug does not appear in glibc 2.2.]
1125 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1126 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1127 (errno != E2BIG || bufLeft != 0))
1128 #else
1129 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1130 #endif
1131
1132 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1133
1134 // ----------------------------------------------------------------------------
1135 // wxMBConv_iconv: encapsulates an iconv character set
1136 // ----------------------------------------------------------------------------
1137
1138 class wxMBConv_iconv : public wxMBConv
1139 {
1140 public:
1141 wxMBConv_iconv(const wxChar *name);
1142 virtual ~wxMBConv_iconv();
1143
1144 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1145 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1146
1147 bool IsOk() const
1148 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1149
1150 protected:
1151 // the iconv handlers used to translate from multibyte to wide char and in
1152 // the other direction
1153 iconv_t m2w,
1154 w2m;
1155
1156 private:
1157 // the name (for iconv_open()) of a wide char charset -- if none is
1158 // available on this machine, it will remain NULL
1159 static const char *ms_wcCharsetName;
1160
1161 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1162 // different endian-ness than the native one
1163 static bool ms_wcNeedsSwap;
1164 };
1165
1166 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1167 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1168
1169 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1170 {
1171 // Do it the hard way
1172 char cname[100];
1173 for (size_t i = 0; i < wxStrlen(name)+1; i++)
1174 cname[i] = (char) name[i];
1175
1176 // check for charset that represents wchar_t:
1177 if (ms_wcCharsetName == NULL)
1178 {
1179 ms_wcNeedsSwap = false;
1180
1181 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1182 ms_wcCharsetName = WC_NAME_BEST;
1183 m2w = iconv_open(ms_wcCharsetName, cname);
1184
1185 if (m2w == (iconv_t)-1)
1186 {
1187 // try charset w/o bytesex info (e.g. "UCS4")
1188 // and check for bytesex ourselves:
1189 ms_wcCharsetName = WC_NAME;
1190 m2w = iconv_open(ms_wcCharsetName, cname);
1191
1192 // last bet, try if it knows WCHAR_T pseudo-charset
1193 if (m2w == (iconv_t)-1)
1194 {
1195 ms_wcCharsetName = "WCHAR_T";
1196 m2w = iconv_open(ms_wcCharsetName, cname);
1197 }
1198
1199 if (m2w != (iconv_t)-1)
1200 {
1201 char buf[2], *bufPtr;
1202 wchar_t wbuf[2], *wbufPtr;
1203 size_t insz, outsz;
1204 size_t res;
1205
1206 buf[0] = 'A';
1207 buf[1] = 0;
1208 wbuf[0] = 0;
1209 insz = 2;
1210 outsz = SIZEOF_WCHAR_T * 2;
1211 wbufPtr = wbuf;
1212 bufPtr = buf;
1213
1214 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1215 (char**)&wbufPtr, &outsz);
1216
1217 if (ICONV_FAILED(res, insz))
1218 {
1219 ms_wcCharsetName = NULL;
1220 wxLogLastError(wxT("iconv"));
1221 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1222 }
1223 else
1224 {
1225 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1226 }
1227 }
1228 else
1229 {
1230 ms_wcCharsetName = NULL;
1231
1232 // VS: we must not output an error here, since wxWidgets will safely
1233 // fall back to using wxEncodingConverter.
1234 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1235 //wxLogError(
1236 }
1237 }
1238 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1239 }
1240 else // we already have ms_wcCharsetName
1241 {
1242 m2w = iconv_open(ms_wcCharsetName, cname);
1243 }
1244
1245 // NB: don't ever pass NULL to iconv_open(), it may crash!
1246 if ( ms_wcCharsetName )
1247 {
1248 w2m = iconv_open( cname, ms_wcCharsetName);
1249 }
1250 else
1251 {
1252 w2m = (iconv_t)-1;
1253 }
1254 }
1255
1256 wxMBConv_iconv::~wxMBConv_iconv()
1257 {
1258 if ( m2w != (iconv_t)-1 )
1259 iconv_close(m2w);
1260 if ( w2m != (iconv_t)-1 )
1261 iconv_close(w2m);
1262 }
1263
1264 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1265 {
1266 size_t inbuf = strlen(psz);
1267 size_t outbuf = n * SIZEOF_WCHAR_T;
1268 size_t res, cres;
1269 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1270 wchar_t *bufPtr = buf;
1271 const char *pszPtr = psz;
1272
1273 if (buf)
1274 {
1275 // have destination buffer, convert there
1276 cres = iconv(m2w,
1277 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1278 (char**)&bufPtr, &outbuf);
1279 res = n - (outbuf / SIZEOF_WCHAR_T);
1280
1281 if (ms_wcNeedsSwap)
1282 {
1283 // convert to native endianness
1284 WC_BSWAP(buf /* _not_ bufPtr */, res)
1285 }
1286
1287 // NB: iconv was given only strlen(psz) characters on input, and so
1288 // it couldn't convert the trailing zero. Let's do it ourselves
1289 // if there's some room left for it in the output buffer.
1290 if (res < n)
1291 buf[res] = 0;
1292 }
1293 else
1294 {
1295 // no destination buffer... convert using temp buffer
1296 // to calculate destination buffer requirement
1297 wchar_t tbuf[8];
1298 res = 0;
1299 do {
1300 bufPtr = tbuf;
1301 outbuf = 8*SIZEOF_WCHAR_T;
1302
1303 cres = iconv(m2w,
1304 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1305 (char**)&bufPtr, &outbuf );
1306
1307 res += 8-(outbuf/SIZEOF_WCHAR_T);
1308 } while ((cres==(size_t)-1) && (errno==E2BIG));
1309 }
1310
1311 if (ICONV_FAILED(cres, inbuf))
1312 {
1313 //VS: it is ok if iconv fails, hence trace only
1314 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1315 return (size_t)-1;
1316 }
1317
1318 return res;
1319 }
1320
1321 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1322 {
1323 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1324 size_t outbuf = n;
1325 size_t res, cres;
1326
1327 wchar_t *tmpbuf = 0;
1328
1329 if (ms_wcNeedsSwap)
1330 {
1331 // need to copy to temp buffer to switch endianness
1332 // this absolutely doesn't rock!
1333 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1334 // could be in read-only memory, or be accessed in some other thread)
1335 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1336 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1337 WC_BSWAP(tmpbuf, inbuf)
1338 psz=tmpbuf;
1339 }
1340
1341 if (buf)
1342 {
1343 // have destination buffer, convert there
1344 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1345
1346 res = n-outbuf;
1347
1348 // NB: iconv was given only wcslen(psz) characters on input, and so
1349 // it couldn't convert the trailing zero. Let's do it ourselves
1350 // if there's some room left for it in the output buffer.
1351 if (res < n)
1352 buf[0] = 0;
1353 }
1354 else
1355 {
1356 // no destination buffer... convert using temp buffer
1357 // to calculate destination buffer requirement
1358 char tbuf[16];
1359 res = 0;
1360 do {
1361 buf = tbuf; outbuf = 16;
1362
1363 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1364
1365 res += 16 - outbuf;
1366 } while ((cres==(size_t)-1) && (errno==E2BIG));
1367 }
1368
1369 if (ms_wcNeedsSwap)
1370 {
1371 free(tmpbuf);
1372 }
1373
1374 if (ICONV_FAILED(cres, inbuf))
1375 {
1376 //VS: it is ok if iconv fails, hence trace only
1377 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1378 return (size_t)-1;
1379 }
1380
1381 return res;
1382 }
1383
1384 #endif // HAVE_ICONV
1385
1386
1387 // ============================================================================
1388 // Win32 conversion classes
1389 // ============================================================================
1390
1391 #ifdef wxHAVE_WIN32_MB2WC
1392
1393 // from utils.cpp
1394 #if wxUSE_FONTMAP
1395 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1396 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1397 #endif
1398
1399 class wxMBConv_win32 : public wxMBConv
1400 {
1401 public:
1402 wxMBConv_win32()
1403 {
1404 m_CodePage = CP_ACP;
1405 }
1406
1407 #if wxUSE_FONTMAP
1408 wxMBConv_win32(const wxChar* name)
1409 {
1410 m_CodePage = wxCharsetToCodepage(name);
1411 }
1412
1413 wxMBConv_win32(wxFontEncoding encoding)
1414 {
1415 m_CodePage = wxEncodingToCodepage(encoding);
1416 }
1417 #endif
1418
1419 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1420 {
1421 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1422 // the behaviour is not compatible with the Unix version (using iconv)
1423 // and break the library itself, e.g. wxTextInputStream::NextChar()
1424 // wouldn't work if reading an incomplete MB char didn't result in an
1425 // error
1426 const size_t len = ::MultiByteToWideChar
1427 (
1428 m_CodePage, // code page
1429 MB_ERR_INVALID_CHARS, // flags: fall on error
1430 psz, // input string
1431 -1, // its length (NUL-terminated)
1432 buf, // output string
1433 buf ? n : 0 // size of output buffer
1434 );
1435
1436 // note that it returns count of written chars for buf != NULL and size
1437 // of the needed buffer for buf == NULL so in either case the length of
1438 // the string (which never includes the terminating NUL) is one less
1439 return len ? len - 1 : (size_t)-1;
1440 }
1441
1442 size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1443 {
1444 /*
1445 we have a problem here: by default, WideCharToMultiByte() may
1446 replace characters unrepresentable in the target code page with bad
1447 quality approximations such as turning "1/2" symbol (U+00BD) into
1448 "1" for the code pages which don't have it and we, obviously, want
1449 to avoid this at any price
1450
1451 the trouble is that this function does it _silently_, i.e. it won't
1452 even tell us whether it did or not... Win98/2000 and higher provide
1453 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1454 we have to resort to a round trip, i.e. check that converting back
1455 results in the same string -- this is, of course, expensive but
1456 otherwise we simply can't be sure to not garble the data.
1457 */
1458
1459 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1460 // it doesn't work with CJK encodings (which we test for rather roughly
1461 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1462 // supporting it
1463 BOOL usedDef wxDUMMY_INITIALIZE(false);
1464 BOOL *pUsedDef;
1465 int flags;
1466 if ( CanUseNoBestFit() && m_CodePage < 50000 )
1467 {
1468 // it's our lucky day
1469 flags = WC_NO_BEST_FIT_CHARS;
1470 pUsedDef = &usedDef;
1471 }
1472 else // old system or unsupported encoding
1473 {
1474 flags = 0;
1475 pUsedDef = NULL;
1476 }
1477
1478 const size_t len = ::WideCharToMultiByte
1479 (
1480 m_CodePage, // code page
1481 flags, // either none or no best fit
1482 pwz, // input string
1483 -1, // it is (wide) NUL-terminated
1484 buf, // output buffer
1485 buf ? n : 0, // and its size
1486 NULL, // default "replacement" char
1487 pUsedDef // [out] was it used?
1488 );
1489
1490 if ( !len )
1491 {
1492 // function totally failed
1493 return (size_t)-1;
1494 }
1495
1496 // if we were really converting, check if we succeeded
1497 if ( buf )
1498 {
1499 if ( flags )
1500 {
1501 // check if the conversion failed, i.e. if any replacements
1502 // were done
1503 if ( usedDef )
1504 return (size_t)-1;
1505 }
1506 else // we must resort to double tripping...
1507 {
1508 wxWCharBuffer wcBuf(n);
1509 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1510 wcscmp(wcBuf, pwz) != 0 )
1511 {
1512 // we didn't obtain the same thing we started from, hence
1513 // the conversion was lossy and we consider that it failed
1514 return (size_t)-1;
1515 }
1516 }
1517 }
1518
1519 // see the comment above for the reason of "len - 1"
1520 return len - 1;
1521 }
1522
1523 bool IsOk() const { return m_CodePage != -1; }
1524
1525 private:
1526 static bool CanUseNoBestFit()
1527 {
1528 static int s_isWin98Or2k = -1;
1529
1530 if ( s_isWin98Or2k == -1 )
1531 {
1532 int verMaj, verMin;
1533 switch ( wxGetOsVersion(&verMaj, &verMin) )
1534 {
1535 case wxWIN95:
1536 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1537 break;
1538
1539 case wxWINDOWS_NT:
1540 s_isWin98Or2k = verMaj >= 5;
1541 break;
1542
1543 default:
1544 // unknown, be conseravtive by default
1545 s_isWin98Or2k = 0;
1546 }
1547
1548 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1549 }
1550
1551 return s_isWin98Or2k == 1;
1552 }
1553
1554 long m_CodePage;
1555 };
1556
1557 #endif // wxHAVE_WIN32_MB2WC
1558
1559 // ============================================================================
1560 // Cocoa conversion classes
1561 // ============================================================================
1562
1563 #if defined(__WXCOCOA__)
1564
1565 // RN: There is no UTF-32 support in either Core Foundation or
1566 // Cocoa. Strangely enough, internally Core Foundation uses
1567 // UTF 32 internally quite a bit - its just not public (yet).
1568
1569 #include <CoreFoundation/CFString.h>
1570 #include <CoreFoundation/CFStringEncodingExt.h>
1571
1572 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1573 {
1574 CFStringEncoding enc = kCFStringEncodingInvalidId ;
1575 if ( encoding == wxFONTENCODING_DEFAULT )
1576 {
1577 enc = CFStringGetSystemEncoding();
1578 }
1579 else switch( encoding)
1580 {
1581 case wxFONTENCODING_ISO8859_1 :
1582 enc = kCFStringEncodingISOLatin1 ;
1583 break ;
1584 case wxFONTENCODING_ISO8859_2 :
1585 enc = kCFStringEncodingISOLatin2;
1586 break ;
1587 case wxFONTENCODING_ISO8859_3 :
1588 enc = kCFStringEncodingISOLatin3 ;
1589 break ;
1590 case wxFONTENCODING_ISO8859_4 :
1591 enc = kCFStringEncodingISOLatin4;
1592 break ;
1593 case wxFONTENCODING_ISO8859_5 :
1594 enc = kCFStringEncodingISOLatinCyrillic;
1595 break ;
1596 case wxFONTENCODING_ISO8859_6 :
1597 enc = kCFStringEncodingISOLatinArabic;
1598 break ;
1599 case wxFONTENCODING_ISO8859_7 :
1600 enc = kCFStringEncodingISOLatinGreek;
1601 break ;
1602 case wxFONTENCODING_ISO8859_8 :
1603 enc = kCFStringEncodingISOLatinHebrew;
1604 break ;
1605 case wxFONTENCODING_ISO8859_9 :
1606 enc = kCFStringEncodingISOLatin5;
1607 break ;
1608 case wxFONTENCODING_ISO8859_10 :
1609 enc = kCFStringEncodingISOLatin6;
1610 break ;
1611 case wxFONTENCODING_ISO8859_11 :
1612 enc = kCFStringEncodingISOLatinThai;
1613 break ;
1614 case wxFONTENCODING_ISO8859_13 :
1615 enc = kCFStringEncodingISOLatin7;
1616 break ;
1617 case wxFONTENCODING_ISO8859_14 :
1618 enc = kCFStringEncodingISOLatin8;
1619 break ;
1620 case wxFONTENCODING_ISO8859_15 :
1621 enc = kCFStringEncodingISOLatin9;
1622 break ;
1623
1624 case wxFONTENCODING_KOI8 :
1625 enc = kCFStringEncodingKOI8_R;
1626 break ;
1627 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1628 enc = kCFStringEncodingDOSRussian;
1629 break ;
1630
1631 // case wxFONTENCODING_BULGARIAN :
1632 // enc = ;
1633 // break ;
1634
1635 case wxFONTENCODING_CP437 :
1636 enc =kCFStringEncodingDOSLatinUS ;
1637 break ;
1638 case wxFONTENCODING_CP850 :
1639 enc = kCFStringEncodingDOSLatin1;
1640 break ;
1641 case wxFONTENCODING_CP852 :
1642 enc = kCFStringEncodingDOSLatin2;
1643 break ;
1644 case wxFONTENCODING_CP855 :
1645 enc = kCFStringEncodingDOSCyrillic;
1646 break ;
1647 case wxFONTENCODING_CP866 :
1648 enc =kCFStringEncodingDOSRussian ;
1649 break ;
1650 case wxFONTENCODING_CP874 :
1651 enc = kCFStringEncodingDOSThai;
1652 break ;
1653 case wxFONTENCODING_CP932 :
1654 enc = kCFStringEncodingDOSJapanese;
1655 break ;
1656 case wxFONTENCODING_CP936 :
1657 enc =kCFStringEncodingDOSChineseSimplif ;
1658 break ;
1659 case wxFONTENCODING_CP949 :
1660 enc = kCFStringEncodingDOSKorean;
1661 break ;
1662 case wxFONTENCODING_CP950 :
1663 enc = kCFStringEncodingDOSChineseTrad;
1664 break ;
1665 case wxFONTENCODING_CP1250 :
1666 enc = kCFStringEncodingWindowsLatin2;
1667 break ;
1668 case wxFONTENCODING_CP1251 :
1669 enc =kCFStringEncodingWindowsCyrillic ;
1670 break ;
1671 case wxFONTENCODING_CP1252 :
1672 enc =kCFStringEncodingWindowsLatin1 ;
1673 break ;
1674 case wxFONTENCODING_CP1253 :
1675 enc = kCFStringEncodingWindowsGreek;
1676 break ;
1677 case wxFONTENCODING_CP1254 :
1678 enc = kCFStringEncodingWindowsLatin5;
1679 break ;
1680 case wxFONTENCODING_CP1255 :
1681 enc =kCFStringEncodingWindowsHebrew ;
1682 break ;
1683 case wxFONTENCODING_CP1256 :
1684 enc =kCFStringEncodingWindowsArabic ;
1685 break ;
1686 case wxFONTENCODING_CP1257 :
1687 enc = kCFStringEncodingWindowsBalticRim;
1688 break ;
1689 // This only really encodes to UTF7 (if that) evidently
1690 // case wxFONTENCODING_UTF7 :
1691 // enc = kCFStringEncodingNonLossyASCII ;
1692 // break ;
1693 case wxFONTENCODING_UTF8 :
1694 enc = kCFStringEncodingUTF8 ;
1695 break ;
1696 case wxFONTENCODING_EUC_JP :
1697 enc = kCFStringEncodingEUC_JP;
1698 break ;
1699 case wxFONTENCODING_UTF16 :
1700 enc = kCFStringEncodingUnicode ;
1701 break ;
1702 case wxFONTENCODING_MACROMAN :
1703 enc = kCFStringEncodingMacRoman ;
1704 break ;
1705 case wxFONTENCODING_MACJAPANESE :
1706 enc = kCFStringEncodingMacJapanese ;
1707 break ;
1708 case wxFONTENCODING_MACCHINESETRAD :
1709 enc = kCFStringEncodingMacChineseTrad ;
1710 break ;
1711 case wxFONTENCODING_MACKOREAN :
1712 enc = kCFStringEncodingMacKorean ;
1713 break ;
1714 case wxFONTENCODING_MACARABIC :
1715 enc = kCFStringEncodingMacArabic ;
1716 break ;
1717 case wxFONTENCODING_MACHEBREW :
1718 enc = kCFStringEncodingMacHebrew ;
1719 break ;
1720 case wxFONTENCODING_MACGREEK :
1721 enc = kCFStringEncodingMacGreek ;
1722 break ;
1723 case wxFONTENCODING_MACCYRILLIC :
1724 enc = kCFStringEncodingMacCyrillic ;
1725 break ;
1726 case wxFONTENCODING_MACDEVANAGARI :
1727 enc = kCFStringEncodingMacDevanagari ;
1728 break ;
1729 case wxFONTENCODING_MACGURMUKHI :
1730 enc = kCFStringEncodingMacGurmukhi ;
1731 break ;
1732 case wxFONTENCODING_MACGUJARATI :
1733 enc = kCFStringEncodingMacGujarati ;
1734 break ;
1735 case wxFONTENCODING_MACORIYA :
1736 enc = kCFStringEncodingMacOriya ;
1737 break ;
1738 case wxFONTENCODING_MACBENGALI :
1739 enc = kCFStringEncodingMacBengali ;
1740 break ;
1741 case wxFONTENCODING_MACTAMIL :
1742 enc = kCFStringEncodingMacTamil ;
1743 break ;
1744 case wxFONTENCODING_MACTELUGU :
1745 enc = kCFStringEncodingMacTelugu ;
1746 break ;
1747 case wxFONTENCODING_MACKANNADA :
1748 enc = kCFStringEncodingMacKannada ;
1749 break ;
1750 case wxFONTENCODING_MACMALAJALAM :
1751 enc = kCFStringEncodingMacMalayalam ;
1752 break ;
1753 case wxFONTENCODING_MACSINHALESE :
1754 enc = kCFStringEncodingMacSinhalese ;
1755 break ;
1756 case wxFONTENCODING_MACBURMESE :
1757 enc = kCFStringEncodingMacBurmese ;
1758 break ;
1759 case wxFONTENCODING_MACKHMER :
1760 enc = kCFStringEncodingMacKhmer ;
1761 break ;
1762 case wxFONTENCODING_MACTHAI :
1763 enc = kCFStringEncodingMacThai ;
1764 break ;
1765 case wxFONTENCODING_MACLAOTIAN :
1766 enc = kCFStringEncodingMacLaotian ;
1767 break ;
1768 case wxFONTENCODING_MACGEORGIAN :
1769 enc = kCFStringEncodingMacGeorgian ;
1770 break ;
1771 case wxFONTENCODING_MACARMENIAN :
1772 enc = kCFStringEncodingMacArmenian ;
1773 break ;
1774 case wxFONTENCODING_MACCHINESESIMP :
1775 enc = kCFStringEncodingMacChineseSimp ;
1776 break ;
1777 case wxFONTENCODING_MACTIBETAN :
1778 enc = kCFStringEncodingMacTibetan ;
1779 break ;
1780 case wxFONTENCODING_MACMONGOLIAN :
1781 enc = kCFStringEncodingMacMongolian ;
1782 break ;
1783 case wxFONTENCODING_MACETHIOPIC :
1784 enc = kCFStringEncodingMacEthiopic ;
1785 break ;
1786 case wxFONTENCODING_MACCENTRALEUR :
1787 enc = kCFStringEncodingMacCentralEurRoman ;
1788 break ;
1789 case wxFONTENCODING_MACVIATNAMESE :
1790 enc = kCFStringEncodingMacVietnamese ;
1791 break ;
1792 case wxFONTENCODING_MACARABICEXT :
1793 enc = kCFStringEncodingMacExtArabic ;
1794 break ;
1795 case wxFONTENCODING_MACSYMBOL :
1796 enc = kCFStringEncodingMacSymbol ;
1797 break ;
1798 case wxFONTENCODING_MACDINGBATS :
1799 enc = kCFStringEncodingMacDingbats ;
1800 break ;
1801 case wxFONTENCODING_MACTURKISH :
1802 enc = kCFStringEncodingMacTurkish ;
1803 break ;
1804 case wxFONTENCODING_MACCROATIAN :
1805 enc = kCFStringEncodingMacCroatian ;
1806 break ;
1807 case wxFONTENCODING_MACICELANDIC :
1808 enc = kCFStringEncodingMacIcelandic ;
1809 break ;
1810 case wxFONTENCODING_MACROMANIAN :
1811 enc = kCFStringEncodingMacRomanian ;
1812 break ;
1813 case wxFONTENCODING_MACCELTIC :
1814 enc = kCFStringEncodingMacCeltic ;
1815 break ;
1816 case wxFONTENCODING_MACGAELIC :
1817 enc = kCFStringEncodingMacGaelic ;
1818 break ;
1819 // case wxFONTENCODING_MACKEYBOARD :
1820 // enc = kCFStringEncodingMacKeyboardGlyphs ;
1821 // break ;
1822 default :
1823 // because gcc is picky
1824 break ;
1825 } ;
1826 return enc ;
1827 }
1828
1829 class wxMBConv_cocoa : public wxMBConv
1830 {
1831 public:
1832 wxMBConv_cocoa()
1833 {
1834 Init(CFStringGetSystemEncoding()) ;
1835 }
1836
1837 wxMBConv_cocoa(const wxChar* name)
1838 {
1839 Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1840 }
1841
1842 wxMBConv_cocoa(wxFontEncoding encoding)
1843 {
1844 Init( wxCFStringEncFromFontEnc(encoding) );
1845 }
1846
1847 ~wxMBConv_cocoa()
1848 {
1849 }
1850
1851 void Init( CFStringEncoding encoding)
1852 {
1853 m_encoding = encoding ;
1854 }
1855
1856 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1857 {
1858 wxASSERT(szUnConv);
1859
1860 CFStringRef theString = CFStringCreateWithBytes (
1861 NULL, //the allocator
1862 (const UInt8*)szUnConv,
1863 strlen(szUnConv),
1864 m_encoding,
1865 false //no BOM/external representation
1866 );
1867
1868 wxASSERT(theString);
1869
1870 size_t nOutLength = CFStringGetLength(theString);
1871
1872 if (szOut == NULL)
1873 {
1874 CFRelease(theString);
1875 return nOutLength;
1876 }
1877
1878 CFRange theRange = { 0, nOutSize };
1879
1880 #if SIZEOF_WCHAR_T == 4
1881 UniChar* szUniCharBuffer = new UniChar[nOutSize];
1882 #endif
1883
1884 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1885
1886 CFRelease(theString);
1887
1888 szUniCharBuffer[nOutLength] = '\0' ;
1889
1890 #if SIZEOF_WCHAR_T == 4
1891 wxMBConvUTF16 converter ;
1892 converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
1893 delete[] szUniCharBuffer;
1894 #endif
1895
1896 return nOutLength;
1897 }
1898
1899 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1900 {
1901 wxASSERT(szUnConv);
1902
1903 size_t nRealOutSize;
1904 size_t nBufSize = wxWcslen(szUnConv);
1905 UniChar* szUniBuffer = (UniChar*) szUnConv;
1906
1907 #if SIZEOF_WCHAR_T == 4
1908 wxMBConvUTF16BE converter ;
1909 nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1910 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1911 converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1912 nBufSize /= sizeof(UniChar);
1913 #endif
1914
1915 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1916 NULL, //allocator
1917 szUniBuffer,
1918 nBufSize,
1919 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
1920 );
1921
1922 wxASSERT(theString);
1923
1924 //Note that CER puts a BOM when converting to unicode
1925 //so we check and use getchars instead in that case
1926 if (m_encoding == kCFStringEncodingUnicode)
1927 {
1928 if (szOut != NULL)
1929 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1930
1931 nRealOutSize = CFStringGetLength(theString) + 1;
1932 }
1933 else
1934 {
1935 CFStringGetBytes(
1936 theString,
1937 CFRangeMake(0, CFStringGetLength(theString)),
1938 m_encoding,
1939 0, //what to put in characters that can't be converted -
1940 //0 tells CFString to return NULL if it meets such a character
1941 false, //not an external representation
1942 (UInt8*) szOut,
1943 nOutSize,
1944 (CFIndex*) &nRealOutSize
1945 );
1946 }
1947
1948 CFRelease(theString);
1949
1950 #if SIZEOF_WCHAR_T == 4
1951 delete[] szUniBuffer;
1952 #endif
1953
1954 return nRealOutSize - 1;
1955 }
1956
1957 bool IsOk() const
1958 {
1959 return m_encoding != kCFStringEncodingInvalidId &&
1960 CFStringIsEncodingAvailable(m_encoding);
1961 }
1962
1963 private:
1964 CFStringEncoding m_encoding ;
1965 };
1966
1967 #endif // defined(__WXCOCOA__)
1968
1969 // ============================================================================
1970 // Mac conversion classes
1971 // ============================================================================
1972
1973 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1974
1975 class wxMBConv_mac : public wxMBConv
1976 {
1977 public:
1978 wxMBConv_mac()
1979 {
1980 Init(CFStringGetSystemEncoding()) ;
1981 }
1982
1983 wxMBConv_mac(const wxChar* name)
1984 {
1985 Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1986 }
1987
1988 wxMBConv_mac(wxFontEncoding encoding)
1989 {
1990 Init( wxMacGetSystemEncFromFontEnc(encoding) );
1991 }
1992
1993 ~wxMBConv_mac()
1994 {
1995 OSStatus status = noErr ;
1996 status = TECDisposeConverter(m_MB2WC_converter);
1997 status = TECDisposeConverter(m_WC2MB_converter);
1998 }
1999
2000
2001 void Init( TextEncodingBase encoding)
2002 {
2003 OSStatus status = noErr ;
2004 m_char_encoding = encoding ;
2005 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2006
2007 status = TECCreateConverter(&m_MB2WC_converter,
2008 m_char_encoding,
2009 m_unicode_encoding);
2010 status = TECCreateConverter(&m_WC2MB_converter,
2011 m_unicode_encoding,
2012 m_char_encoding);
2013 }
2014
2015 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2016 {
2017 OSStatus status = noErr ;
2018 ByteCount byteOutLen ;
2019 ByteCount byteInLen = strlen(psz) ;
2020 wchar_t *tbuf = NULL ;
2021 UniChar* ubuf = NULL ;
2022 size_t res = 0 ;
2023
2024 if (buf == NULL)
2025 {
2026 //apple specs say at least 32
2027 n = max( 32 , byteInLen ) ;
2028 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2029 }
2030 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2031 #if SIZEOF_WCHAR_T == 4
2032 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2033 #else
2034 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2035 #endif
2036 status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2037 (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2038 #if SIZEOF_WCHAR_T == 4
2039 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2040 // is not properly terminated we get random characters at the end
2041 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2042 wxMBConvUTF16BE converter ;
2043 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2044 free( ubuf ) ;
2045 #else
2046 res = byteOutLen / sizeof( UniChar ) ;
2047 #endif
2048 if ( buf == NULL )
2049 free(tbuf) ;
2050
2051 if ( buf && res < n)
2052 buf[res] = 0;
2053
2054 return res ;
2055 }
2056
2057 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2058 {
2059 OSStatus status = noErr ;
2060 ByteCount byteOutLen ;
2061 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2062
2063 char *tbuf = NULL ;
2064
2065 if (buf == NULL)
2066 {
2067 //apple specs say at least 32
2068 n = max( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2069 tbuf = (char*) malloc( n ) ;
2070 }
2071
2072 ByteCount byteBufferLen = n ;
2073 UniChar* ubuf = NULL ;
2074 #if SIZEOF_WCHAR_T == 4
2075 wxMBConvUTF16BE converter ;
2076 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2077 byteInLen = unicharlen ;
2078 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2079 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2080 #else
2081 ubuf = (UniChar*) psz ;
2082 #endif
2083 status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2084 (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2085 #if SIZEOF_WCHAR_T == 4
2086 free( ubuf ) ;
2087 #endif
2088 if ( buf == NULL )
2089 free(tbuf) ;
2090
2091 size_t res = byteOutLen ;
2092 if ( buf && res < n)
2093 {
2094 buf[res] = 0;
2095
2096 //we need to double-trip to verify it didn't insert any ? in place
2097 //of bogus characters
2098 wxWCharBuffer wcBuf(n);
2099 size_t pszlen = wxWcslen(psz);
2100 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2101 wxWcslen(wcBuf) != pszlen ||
2102 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2103 {
2104 // we didn't obtain the same thing we started from, hence
2105 // the conversion was lossy and we consider that it failed
2106 return (size_t)-1;
2107 }
2108 }
2109
2110 return res ;
2111 }
2112
2113 bool IsOk() const
2114 { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
2115
2116 private:
2117 TECObjectRef m_MB2WC_converter ;
2118 TECObjectRef m_WC2MB_converter ;
2119
2120 TextEncodingBase m_char_encoding ;
2121 TextEncodingBase m_unicode_encoding ;
2122 };
2123
2124 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2125
2126 // ============================================================================
2127 // wxEncodingConverter based conversion classes
2128 // ============================================================================
2129
2130 #if wxUSE_FONTMAP
2131
2132 class wxMBConv_wxwin : public wxMBConv
2133 {
2134 private:
2135 void Init()
2136 {
2137 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2138 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2139 }
2140
2141 public:
2142 // temporarily just use wxEncodingConverter stuff,
2143 // so that it works while a better implementation is built
2144 wxMBConv_wxwin(const wxChar* name)
2145 {
2146 if (name)
2147 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2148 else
2149 m_enc = wxFONTENCODING_SYSTEM;
2150
2151 Init();
2152 }
2153
2154 wxMBConv_wxwin(wxFontEncoding enc)
2155 {
2156 m_enc = enc;
2157
2158 Init();
2159 }
2160
2161 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2162 {
2163 size_t inbuf = strlen(psz);
2164 if (buf)
2165 m2w.Convert(psz,buf);
2166 return inbuf;
2167 }
2168
2169 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2170 {
2171 const size_t inbuf = wxWcslen(psz);
2172 if (buf)
2173 w2m.Convert(psz,buf);
2174
2175 return inbuf;
2176 }
2177
2178 bool IsOk() const { return m_ok; }
2179
2180 public:
2181 wxFontEncoding m_enc;
2182 wxEncodingConverter m2w, w2m;
2183
2184 // were we initialized successfully?
2185 bool m_ok;
2186
2187 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2188 };
2189
2190 #endif // wxUSE_FONTMAP
2191
2192 // ============================================================================
2193 // wxCSConv implementation
2194 // ============================================================================
2195
2196 void wxCSConv::Init()
2197 {
2198 m_name = NULL;
2199 m_convReal = NULL;
2200 m_deferred = true;
2201 }
2202
2203 wxCSConv::wxCSConv(const wxChar *charset)
2204 {
2205 Init();
2206
2207 if ( charset )
2208 {
2209 SetName(charset);
2210 }
2211
2212 m_encoding = wxFONTENCODING_SYSTEM;
2213 }
2214
2215 wxCSConv::wxCSConv(wxFontEncoding encoding)
2216 {
2217 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2218 {
2219 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2220
2221 encoding = wxFONTENCODING_SYSTEM;
2222 }
2223
2224 Init();
2225
2226 m_encoding = encoding;
2227 }
2228
2229 wxCSConv::~wxCSConv()
2230 {
2231 Clear();
2232 }
2233
2234 wxCSConv::wxCSConv(const wxCSConv& conv)
2235 : wxMBConv()
2236 {
2237 Init();
2238
2239 SetName(conv.m_name);
2240 m_encoding = conv.m_encoding;
2241 }
2242
2243 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2244 {
2245 Clear();
2246
2247 SetName(conv.m_name);
2248 m_encoding = conv.m_encoding;
2249
2250 return *this;
2251 }
2252
2253 void wxCSConv::Clear()
2254 {
2255 free(m_name);
2256 delete m_convReal;
2257
2258 m_name = NULL;
2259 m_convReal = NULL;
2260 }
2261
2262 void wxCSConv::SetName(const wxChar *charset)
2263 {
2264 if (charset)
2265 {
2266 m_name = wxStrdup(charset);
2267 m_deferred = true;
2268 }
2269 }
2270
2271 wxMBConv *wxCSConv::DoCreate() const
2272 {
2273 // check for the special case of ASCII or ISO8859-1 charset: as we have
2274 // special knowledge of it anyhow, we don't need to create a special
2275 // conversion object
2276 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2277 {
2278 // don't convert at all
2279 return NULL;
2280 }
2281
2282 // we trust OS to do conversion better than we can so try external
2283 // conversion methods first
2284 //
2285 // the full order is:
2286 // 1. OS conversion (iconv() under Unix or Win32 API)
2287 // 2. hard coded conversions for UTF
2288 // 3. wxEncodingConverter as fall back
2289
2290 // step (1)
2291 #ifdef HAVE_ICONV
2292 #if !wxUSE_FONTMAP
2293 if ( m_name )
2294 #endif // !wxUSE_FONTMAP
2295 {
2296 wxString name(m_name);
2297
2298 #if wxUSE_FONTMAP
2299 if ( name.empty() )
2300 name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2301 #endif // wxUSE_FONTMAP
2302
2303 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2304 if ( conv->IsOk() )
2305 return conv;
2306
2307 delete conv;
2308 }
2309 #endif // HAVE_ICONV
2310
2311 #ifdef wxHAVE_WIN32_MB2WC
2312 {
2313 #if wxUSE_FONTMAP
2314 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2315 : new wxMBConv_win32(m_encoding);
2316 if ( conv->IsOk() )
2317 return conv;
2318
2319 delete conv;
2320 #else
2321 return NULL;
2322 #endif
2323 }
2324 #endif // wxHAVE_WIN32_MB2WC
2325 #if defined(__WXMAC__)
2326 {
2327 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2328 {
2329
2330 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2331 : new wxMBConv_mac(m_encoding);
2332 if ( conv->IsOk() )
2333 return conv;
2334
2335 delete conv;
2336 }
2337 }
2338 #endif
2339 #if defined(__WXCOCOA__)
2340 {
2341 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2342 {
2343
2344 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2345 : new wxMBConv_cocoa(m_encoding);
2346 if ( conv->IsOk() )
2347 return conv;
2348
2349 delete conv;
2350 }
2351 }
2352 #endif
2353 // step (2)
2354 wxFontEncoding enc = m_encoding;
2355 #if wxUSE_FONTMAP
2356 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2357 {
2358 // use "false" to suppress interactive dialogs -- we can be called from
2359 // anywhere and popping up a dialog from here is the last thing we want to
2360 // do
2361 enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2362 }
2363 #endif // wxUSE_FONTMAP
2364
2365 switch ( enc )
2366 {
2367 case wxFONTENCODING_UTF7:
2368 return new wxMBConvUTF7;
2369
2370 case wxFONTENCODING_UTF8:
2371 return new wxMBConvUTF8;
2372
2373 case wxFONTENCODING_UTF16BE:
2374 return new wxMBConvUTF16BE;
2375
2376 case wxFONTENCODING_UTF16LE:
2377 return new wxMBConvUTF16LE;
2378
2379 case wxFONTENCODING_UTF32BE:
2380 return new wxMBConvUTF32BE;
2381
2382 case wxFONTENCODING_UTF32LE:
2383 return new wxMBConvUTF32LE;
2384
2385 default:
2386 // nothing to do but put here to suppress gcc warnings
2387 ;
2388 }
2389
2390 // step (3)
2391 #if wxUSE_FONTMAP
2392 {
2393 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2394 : new wxMBConv_wxwin(m_encoding);
2395 if ( conv->IsOk() )
2396 return conv;
2397
2398 delete conv;
2399 }
2400 #endif // wxUSE_FONTMAP
2401
2402 // NB: This is a hack to prevent deadlock. What could otherwise happen
2403 // in Unicode build: wxConvLocal creation ends up being here
2404 // because of some failure and logs the error. But wxLog will try to
2405 // attach timestamp, for which it will need wxConvLocal (to convert
2406 // time to char* and then wchar_t*), but that fails, tries to log
2407 // error, but wxLog has a (already locked) critical section that
2408 // guards static buffer.
2409 static bool alreadyLoggingError = false;
2410 if (!alreadyLoggingError)
2411 {
2412 alreadyLoggingError = true;
2413 wxLogError(_("Cannot convert from the charset '%s'!"),
2414 m_name ? m_name
2415 :
2416 #if wxUSE_FONTMAP
2417 wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2418 #else // !wxUSE_FONTMAP
2419 wxString::Format(_("encoding %s"), m_encoding).c_str()
2420 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2421 );
2422 alreadyLoggingError = false;
2423 }
2424
2425 return NULL;
2426 }
2427
2428 void wxCSConv::CreateConvIfNeeded() const
2429 {
2430 if ( m_deferred )
2431 {
2432 wxCSConv *self = (wxCSConv *)this; // const_cast
2433
2434 #if wxUSE_INTL
2435 // if we don't have neither the name nor the encoding, use the default
2436 // encoding for this system
2437 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2438 {
2439 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2440 }
2441 #endif // wxUSE_INTL
2442
2443 self->m_convReal = DoCreate();
2444 self->m_deferred = false;
2445 }
2446 }
2447
2448 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2449 {
2450 CreateConvIfNeeded();
2451
2452 if (m_convReal)
2453 return m_convReal->MB2WC(buf, psz, n);
2454
2455 // latin-1 (direct)
2456 size_t len = strlen(psz);
2457
2458 if (buf)
2459 {
2460 for (size_t c = 0; c <= len; c++)
2461 buf[c] = (unsigned char)(psz[c]);
2462 }
2463
2464 return len;
2465 }
2466
2467 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2468 {
2469 CreateConvIfNeeded();
2470
2471 if (m_convReal)
2472 return m_convReal->WC2MB(buf, psz, n);
2473
2474 // latin-1 (direct)
2475 const size_t len = wxWcslen(psz);
2476 if (buf)
2477 {
2478 for (size_t c = 0; c <= len; c++)
2479 {
2480 if (psz[c] > 0xFF)
2481 return (size_t)-1;
2482 buf[c] = (char)psz[c];
2483 }
2484 }
2485 else
2486 {
2487 for (size_t c = 0; c <= len; c++)
2488 {
2489 if (psz[c] > 0xFF)
2490 return (size_t)-1;
2491 }
2492 }
2493
2494 return len;
2495 }
2496
2497 // ----------------------------------------------------------------------------
2498 // globals
2499 // ----------------------------------------------------------------------------
2500
2501 #ifdef __WINDOWS__
2502 static wxMBConv_win32 wxConvLibcObj;
2503 #elif defined(__WXMAC__) && !defined(__MACH__)
2504 static wxMBConv_mac wxConvLibcObj ;
2505 #else
2506 static wxMBConvLibc wxConvLibcObj;
2507 #endif
2508
2509 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2510 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2511 static wxMBConvUTF7 wxConvUTF7Obj;
2512 static wxMBConvUTF8 wxConvUTF8Obj;
2513
2514
2515 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2516 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2517 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2518 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2519 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2520 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2521
2522 #else // !wxUSE_WCHAR_T
2523
2524 // stand-ins in absence of wchar_t
2525 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2526 wxConvISO8859_1,
2527 wxConvLocal,
2528 wxConvUTF8;
2529
2530 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2531
2532