]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
b5e8925decc8f748b9780666c1f2f125e9cc84f1
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
21 #pragma implementation "strconv.h"
22 #endif
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/intl.h"
33 #include "wx/log.h"
34 #endif // WX_PRECOMP
35
36 #ifdef __WXMSW__
37 #include "wx/msw/private.h"
38 #endif
39
40 #ifndef __WXWINCE__
41 #include <errno.h>
42 #endif
43
44 #include <ctype.h>
45 #include <string.h>
46 #include <stdlib.h>
47
48 #include "wx/module.h"
49 #include "wx/strconv.h"
50
51 // ----------------------------------------------------------------------------
52 // globals
53 // ----------------------------------------------------------------------------
54
55 #if wxUSE_WCHAR_T
56 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc;
57 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvLocal((const wxChar *)NULL);
58 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvISO8859_1(_T("iso-8859-1"));
59 #else
60 // stand-ins in absence of wchar_t
61 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
62 wxConvFile,
63 wxConvISO8859_1,
64 wxConvLocal,
65 wxConvUTF8;
66 #endif // wxUSE_WCHAR_T
67
68 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibc;
69
70 class wxStrConvModule: public wxModule
71 {
72 public:
73 wxStrConvModule() : wxModule() { }
74 virtual bool OnInit() { return TRUE; }
75 virtual void OnExit()
76 {
77 #if wxUSE_WCHAR_T
78 wxConvLocal.Clear();
79 wxConvISO8859_1.Clear();
80 #endif
81 }
82
83 DECLARE_DYNAMIC_CLASS(wxStrConvModule)
84 };
85
86 IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule, wxModule)
87
88
89 // ----------------------------------------------------------------------------
90 // headers
91 // ----------------------------------------------------------------------------
92
93 #if wxUSE_WCHAR_T
94
95 #ifdef __SALFORDC__
96 #include <clib.h>
97 #endif
98
99 #ifdef HAVE_ICONV
100 #include <iconv.h>
101 #endif
102
103 #include "wx/encconv.h"
104 #include "wx/fontmap.h"
105
106 // ----------------------------------------------------------------------------
107 // macros
108 // ----------------------------------------------------------------------------
109
110 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
111 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
112
113 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
114 // it might be not defined - assume the most common value
115 #ifndef SIZEOF_WCHAR_T
116 #define SIZEOF_WCHAR_T 2
117 #endif // !defined(SIZEOF_WCHAR_T)
118
119 #if SIZEOF_WCHAR_T == 4
120 #define WC_NAME "UCS4"
121 #define WC_BSWAP BSWAP_UCS4
122 #ifdef WORDS_BIGENDIAN
123 #define WC_NAME_BEST "UCS-4BE"
124 #else
125 #define WC_NAME_BEST "UCS-4LE"
126 #endif
127 #elif SIZEOF_WCHAR_T == 2
128 #define WC_NAME "UTF16"
129 #define WC_BSWAP BSWAP_UTF16
130 #define WC_UTF16
131 #ifdef WORDS_BIGENDIAN
132 #define WC_NAME_BEST "UTF-16BE"
133 #else
134 #define WC_NAME_BEST "UTF-16LE"
135 #endif
136 #else // sizeof(wchar_t) != 2 nor 4
137 // I don't know what to do about this
138 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
139 #endif
140
141 // ============================================================================
142 // implementation
143 // ============================================================================
144
145 // ----------------------------------------------------------------------------
146 // UTF-16 en/decoding
147 // ----------------------------------------------------------------------------
148
149 #ifdef WC_UTF16
150
151 static size_t encode_utf16(wxUint32 input, wchar_t *output)
152 {
153 if (input<=0xffff)
154 {
155 if (output) *output++ = (wchar_t) input;
156 return 1;
157 }
158 else if (input>=0x110000)
159 {
160 return (size_t)-1;
161 }
162 else
163 {
164 if (output)
165 {
166 *output++ = (wchar_t) ((input >> 10)+0xd7c0);
167 *output++ = (wchar_t) ((input&0x3ff)+0xdc00);
168 }
169 return 2;
170 }
171 }
172
173 static size_t decode_utf16(const wchar_t* input, wxUint32& output)
174 {
175 if ((*input<0xd800) || (*input>0xdfff))
176 {
177 output = *input;
178 return 1;
179 }
180 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
181 {
182 output = *input;
183 return (size_t)-1;
184 }
185 else
186 {
187 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
188 return 2;
189 }
190 }
191
192 #endif // WC_UTF16
193
194 // ----------------------------------------------------------------------------
195 // wxMBConv
196 // ----------------------------------------------------------------------------
197
198 #define IGNORE_LIBC 0
199
200 wxMBConv::~wxMBConv()
201 {
202 // nothing to do here
203 }
204
205 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
206 {
207 #if IGNORE_LIBC
208 if (buf)
209 {
210 for (size_t i = 0; i < strlen( psz )+1; i++)
211 buf[i] = (wchar_t) psz[i];
212 return strlen( psz );
213 }
214 else
215 {
216 return strlen( psz );
217 }
218 #else
219 return wxMB2WC(buf, psz, n);
220 #endif
221 }
222
223 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
224 {
225 #if IGNORE_LIBC
226 if (buf)
227 {
228 for (size_t i = 0; i < wxStrlen( psz )+1; i++)
229 buf[i] = (char) psz[i];
230 return wxStrlen( psz );
231 }
232 else
233 {
234 return wxStrlen( psz );
235 }
236 #else
237 return wxWC2MB(buf, psz, n);
238 #endif
239 }
240
241 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
242 {
243 if ( psz )
244 {
245 // calculate the length of the buffer needed first
246 size_t nLen = MB2WC(NULL, psz, 0);
247 if ( nLen != (size_t)-1 )
248 {
249 // now do the actual conversion
250 wxWCharBuffer buf(nLen);
251 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
252
253 return buf;
254 }
255 }
256
257 wxWCharBuffer buf((wchar_t *)NULL);
258
259 return buf;
260 }
261
262 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
263 {
264 if ( pwz )
265 {
266 size_t nLen = WC2MB(NULL, pwz, 0);
267 if ( nLen != (size_t)-1 )
268 {
269 wxCharBuffer buf(nLen);
270 WC2MB(buf.data(), pwz, nLen + 1);
271
272 return buf;
273 }
274 }
275
276 wxCharBuffer buf((char *)NULL);
277
278 return buf;
279 }
280
281 // ----------------------------------------------------------------------------
282 // UTF-7
283 // ----------------------------------------------------------------------------
284
285 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7) wxConvUTF7;
286
287 #if 0
288 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
289 "abcdefghijklmnopqrstuvwxyz"
290 "0123456789'(),-./:?";
291 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
292 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
293 "abcdefghijklmnopqrstuvwxyz"
294 "0123456789+/";
295 #endif
296
297 // TODO: write actual implementations of UTF-7 here
298 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
299 const char * WXUNUSED(psz),
300 size_t WXUNUSED(n)) const
301 {
302 return 0;
303 }
304
305 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
306 const wchar_t * WXUNUSED(psz),
307 size_t WXUNUSED(n)) const
308 {
309 return 0;
310 }
311
312 // ----------------------------------------------------------------------------
313 // UTF-8
314 // ----------------------------------------------------------------------------
315
316 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8) wxConvUTF8;
317
318 static wxUint32 utf8_max[]=
319 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
320
321 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
322 {
323 size_t len = 0;
324
325 while (*psz && ((!buf) || (len < n)))
326 {
327 unsigned char cc = *psz++, fc = cc;
328 unsigned cnt;
329 for (cnt = 0; fc & 0x80; cnt++)
330 fc <<= 1;
331 if (!cnt)
332 {
333 // plain ASCII char
334 if (buf)
335 *buf++ = cc;
336 len++;
337 }
338 else
339 {
340 cnt--;
341 if (!cnt)
342 {
343 // invalid UTF-8 sequence
344 return (size_t)-1;
345 }
346 else
347 {
348 unsigned ocnt = cnt - 1;
349 wxUint32 res = cc & (0x3f >> cnt);
350 while (cnt--)
351 {
352 cc = *psz++;
353 if ((cc & 0xC0) != 0x80)
354 {
355 // invalid UTF-8 sequence
356 return (size_t)-1;
357 }
358 res = (res << 6) | (cc & 0x3f);
359 }
360 if (res <= utf8_max[ocnt])
361 {
362 // illegal UTF-8 encoding
363 return (size_t)-1;
364 }
365 #ifdef WC_UTF16
366 size_t pa = encode_utf16(res, buf);
367 if (pa == (size_t)-1)
368 return (size_t)-1;
369 if (buf)
370 buf += pa;
371 len += pa;
372 #else // !WC_UTF16
373 if (buf)
374 *buf++ = res;
375 len++;
376 #endif // WC_UTF16/!WC_UTF16
377 }
378 }
379 }
380 if (buf && (len < n))
381 *buf = 0;
382 return len;
383 }
384
385 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
386 {
387 size_t len = 0;
388
389 while (*psz && ((!buf) || (len < n)))
390 {
391 wxUint32 cc;
392 #ifdef WC_UTF16
393 size_t pa = decode_utf16(psz, cc);
394 psz += (pa == (size_t)-1) ? 1 : pa;
395 #else
396 cc=(*psz++) & 0x7fffffff;
397 #endif
398 unsigned cnt;
399 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
400 if (!cnt)
401 {
402 // plain ASCII char
403 if (buf)
404 *buf++ = (char) cc;
405 len++;
406 }
407
408 else
409 {
410 len += cnt + 1;
411 if (buf)
412 {
413 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
414 while (cnt--)
415 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
416 }
417 }
418 }
419
420 if (buf && (len<n)) *buf = 0;
421
422 return len;
423 }
424
425 // ============================================================================
426 // wxCharacterSet and derived classes
427 // ============================================================================
428
429 // ----------------------------------------------------------------------------
430 // wxCharacterSet is the ABC for the classes below
431 // ----------------------------------------------------------------------------
432
433 class wxCharacterSet
434 {
435 public:
436 wxCharacterSet() { }
437 virtual ~wxCharacterSet() {}
438
439 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) = 0;
440 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) = 0;
441 virtual bool usable() const = 0;
442 };
443
444 // ----------------------------------------------------------------------------
445 // ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
446 // ----------------------------------------------------------------------------
447
448 class ID_CharSet : public wxCharacterSet
449 {
450 public:
451 ID_CharSet(wxMBConv *cnv) : work(cnv) {}
452
453 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
454 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
455
456 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
457 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
458
459 bool usable() const
460 { return work!=NULL; }
461 public:
462 wxMBConv*work;
463 };
464
465
466 // ============================================================================
467 // The classes doing conversion using the iconv_xxx() functions
468 // ============================================================================
469
470 #ifdef HAVE_ICONV
471
472 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
473 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
474 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
475 // (which means error) and says there are 0 bytes left in the input buffer --
476 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
477 // this alternative test for iconv() failure.
478 // [This bug does not appear in glibc 2.2.]
479 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
480 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
481 (errno != E2BIG || bufLeft != 0))
482 #else
483 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
484 #endif
485
486 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
487
488 // ----------------------------------------------------------------------------
489 // IC_CharSet: encapsulates an iconv character set
490 // ----------------------------------------------------------------------------
491
492 class IC_CharSet : public wxCharacterSet
493 {
494 public:
495 IC_CharSet(const wxChar *name);
496 virtual ~IC_CharSet();
497
498 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
499 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
500
501 bool usable() const
502 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
503
504 protected:
505 // the iconv handlers used to translate from multibyte to wide char and in
506 // the other direction
507 iconv_t m2w,
508 w2m;
509
510 private:
511 // the name (for iconv_open()) of a wide char charset - if none is
512 // available on this machine, it will remain NULL
513 static const char *ms_wcCharsetName;
514
515 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
516 // different endian-ness than the native one
517 static bool ms_wcNeedsSwap;
518 };
519
520 const char *IC_CharSet::ms_wcCharsetName = NULL;
521 bool IC_CharSet::ms_wcNeedsSwap = FALSE;
522
523 IC_CharSet::IC_CharSet(const wxChar *name)
524 : wxCharacterSet(name)
525 {
526 // Do it the hard way
527 char cname[100];
528 for (size_t i = 0; i < wxStrlen(name)+1; i++)
529 cname[i] = (char) name[i];
530
531 // check for charset that represents wchar_t:
532 if (ms_wcCharsetName == NULL)
533 {
534 ms_wcNeedsSwap = FALSE;
535
536 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
537 ms_wcCharsetName = WC_NAME_BEST;
538 m2w = iconv_open(ms_wcCharsetName, cname);
539
540 if (m2w == (iconv_t)-1)
541 {
542 // try charset w/o bytesex info (e.g. "UCS4")
543 // and check for bytesex ourselves:
544 ms_wcCharsetName = WC_NAME;
545 m2w = iconv_open(ms_wcCharsetName, cname);
546
547 // last bet, try if it knows WCHAR_T pseudo-charset
548 if (m2w == (iconv_t)-1)
549 {
550 ms_wcCharsetName = "WCHAR_T";
551 m2w = iconv_open(ms_wcCharsetName, cname);
552 }
553
554 if (m2w != (iconv_t)-1)
555 {
556 char buf[2], *bufPtr;
557 wchar_t wbuf[2], *wbufPtr;
558 size_t insz, outsz;
559 size_t res;
560
561 buf[0] = 'A';
562 buf[1] = 0;
563 wbuf[0] = 0;
564 insz = 2;
565 outsz = SIZEOF_WCHAR_T * 2;
566 wbufPtr = wbuf;
567 bufPtr = buf;
568
569 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
570 (char**)&wbufPtr, &outsz);
571
572 if (ICONV_FAILED(res, insz))
573 {
574 ms_wcCharsetName = NULL;
575 wxLogLastError(wxT("iconv"));
576 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
577 }
578 else
579 {
580 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
581 }
582 }
583 else
584 {
585 ms_wcCharsetName = NULL;
586
587 // VS: we must not output an error here, since wxWindows will safely
588 // fall back to using wxEncodingConverter.
589 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
590 //wxLogError(
591 }
592 }
593 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
594 }
595 else // we already have ms_wcCharsetName
596 {
597 m2w = iconv_open(ms_wcCharsetName, cname);
598 }
599
600 // NB: don't ever pass NULL to iconv_open(), it may crash!
601 if ( ms_wcCharsetName )
602 {
603 w2m = iconv_open( cname, ms_wcCharsetName);
604 }
605 else
606 {
607 w2m = (iconv_t)-1;
608 }
609 }
610
611 IC_CharSet::~IC_CharSet()
612 {
613 if ( m2w != (iconv_t)-1 )
614 iconv_close(m2w);
615 if ( w2m != (iconv_t)-1 )
616 iconv_close(w2m);
617 }
618
619 size_t IC_CharSet::MB2WC(wchar_t *buf, const char *psz, size_t n)
620 {
621 size_t inbuf = strlen(psz);
622 size_t outbuf = n * SIZEOF_WCHAR_T;
623 size_t res, cres;
624 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
625 wchar_t *bufPtr = buf;
626 const char *pszPtr = psz;
627
628 if (buf)
629 {
630 // have destination buffer, convert there
631 cres = iconv(m2w,
632 ICONV_CHAR_CAST(&pszPtr), &inbuf,
633 (char**)&bufPtr, &outbuf);
634 res = n - (outbuf / SIZEOF_WCHAR_T);
635
636 if (ms_wcNeedsSwap)
637 {
638 // convert to native endianness
639 WC_BSWAP(buf /* _not_ bufPtr */, res)
640 }
641
642 // NB: iconv was given only strlen(psz) characters on input, and so
643 // it couldn't convert the trailing zero. Let's do it ourselves
644 // if there's some room left for it in the output buffer.
645 if (res < n)
646 buf[res] = 0;
647 }
648 else
649 {
650 // no destination buffer... convert using temp buffer
651 // to calculate destination buffer requirement
652 wchar_t tbuf[8];
653 res = 0;
654 do {
655 bufPtr = tbuf;
656 outbuf = 8*SIZEOF_WCHAR_T;
657
658 cres = iconv(m2w,
659 ICONV_CHAR_CAST(&pszPtr), &inbuf,
660 (char**)&bufPtr, &outbuf );
661
662 res += 8-(outbuf/SIZEOF_WCHAR_T);
663 } while ((cres==(size_t)-1) && (errno==E2BIG));
664 }
665
666 if (ICONV_FAILED(cres, inbuf))
667 {
668 //VS: it is ok if iconv fails, hence trace only
669 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
670 return (size_t)-1;
671 }
672
673 return res;
674 }
675
676 size_t IC_CharSet::WC2MB(char *buf, const wchar_t *psz, size_t n)
677 {
678 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
679 size_t outbuf = n;
680 size_t res, cres;
681
682 wchar_t *tmpbuf = 0;
683
684 if (ms_wcNeedsSwap)
685 {
686 // need to copy to temp buffer to switch endianness
687 // this absolutely doesn't rock!
688 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
689 // could be in read-only memory, or be accessed in some other thread)
690 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
691 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
692 WC_BSWAP(tmpbuf, inbuf)
693 psz=tmpbuf;
694 }
695
696 if (buf)
697 {
698 // have destination buffer, convert there
699 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
700
701 res = n-outbuf;
702
703 // NB: iconv was given only wcslen(psz) characters on input, and so
704 // it couldn't convert the trailing zero. Let's do it ourselves
705 // if there's some room left for it in the output buffer.
706 if (res < n)
707 buf[0] = 0;
708 }
709 else
710 {
711 // no destination buffer... convert using temp buffer
712 // to calculate destination buffer requirement
713 char tbuf[16];
714 res = 0;
715 do {
716 buf = tbuf; outbuf = 16;
717
718 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
719
720 res += 16 - outbuf;
721 } while ((cres==(size_t)-1) && (errno==E2BIG));
722 }
723
724 if (ms_wcNeedsSwap)
725 {
726 free(tmpbuf);
727 }
728
729 if (ICONV_FAILED(cres, inbuf))
730 {
731 //VS: it is ok if iconv fails, hence trace only
732 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
733 return (size_t)-1;
734 }
735
736 return res;
737 }
738
739 #endif // HAVE_ICONV
740
741 // ============================================================================
742 // Win32 conversion classes
743 // ============================================================================
744
745 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
746
747 // from utils.cpp
748 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
749 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
750
751 class CP_CharSet : public wxCharacterSet
752 {
753 public:
754 CP_CharSet(const wxChar* name)
755 {
756 m_CodePage = wxCharsetToCodepage(name);
757 }
758
759 CP_CharSet(wxFontEncoding encoding)
760 {
761 m_CodePage = wxEncodingToCodepage(encoding);
762 }
763
764 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
765 {
766 const size_t len = ::MultiByteToWideChar
767 (
768 m_CodePage, // code page
769 0, // flags (none)
770 psz, // input string
771 -1, // its length (NUL-terminated)
772 buf, // output string
773 buf ? n : 0 // size of output buffer
774 );
775
776 // note that it returns # of written chars for buf != NULL and *size*
777 // of the needed buffer for buf == NULL
778 return len ? (buf ? len : len - 1) : (size_t)-1;
779 }
780
781 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
782 {
783 const size_t len = ::WideCharToMultiByte
784 (
785 m_CodePage, // code page
786 0, // flags (none)
787 psz, // input string
788 -1, // it is (wide) NUL-terminated
789 buf, // output buffer
790 buf ? n : 0, // and its size
791 NULL, // default "replacement" char
792 NULL // [out] was it used?
793 );
794
795 // see the comment above!
796 return len ? (buf ? len : len - 1) : (size_t)-1;
797 }
798
799 bool usable() const
800 { return m_CodePage != -1; }
801
802 public:
803 long m_CodePage;
804 };
805 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
806
807 // ============================================================================
808 // wxEncodingConverter based conversion classes
809 // ============================================================================
810
811 #if wxUSE_FONTMAP
812
813 class EC_CharSet : public wxCharacterSet
814 {
815 private:
816 void Init()
817 {
818 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
819 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
820 }
821
822 public:
823 // temporarily just use wxEncodingConverter stuff,
824 // so that it works while a better implementation is built
825 EC_CharSet(const wxChar* name)
826 {
827 if (name)
828 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, FALSE);
829 else
830 m_enc = wxFONTENCODING_SYSTEM;
831
832 Init();
833 }
834
835 EC_CharSet(wxFontEncoding enc)
836 {
837 m_enc = enc;
838
839 Init();
840 }
841
842 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
843 {
844 size_t inbuf = strlen(psz);
845 if (buf)
846 m2w.Convert(psz,buf);
847 return inbuf;
848 }
849
850 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
851 {
852 const size_t inbuf = wxWcslen(psz);
853 if (buf)
854 w2m.Convert(psz,buf);
855
856 return inbuf;
857 }
858
859 bool usable() const { return m_ok; }
860
861 public:
862 wxFontEncoding m_enc;
863 wxEncodingConverter m2w, w2m;
864
865 // were we initialized successfully?
866 bool m_ok;
867
868 DECLARE_NO_COPY_CLASS(EC_CharSet)
869 };
870
871 #endif // wxUSE_FONTMAP
872
873 // ----------------------------------------------------------------------------
874 // the function creating the wxCharacterSet for the specified charset on the
875 // current system, trying all possibilities
876 //
877 // it uses the name if it is given or encoding if name == NULL
878 // ----------------------------------------------------------------------------
879
880 static wxCharacterSet *
881 wxGetCharacterSet(const wxChar *name, wxFontEncoding encoding)
882 {
883 // check for the special case of ASCII charset
884 if ( (!name && encoding == wxFONTENCODING_DEFAULT)
885 #if wxUSE_FONTMAP
886 || (name && wxFontMapper::Get()->
887 CharsetToEncoding(name) == wxFONTENCODING_DEFAULT)
888 #endif // wxUSE_FONTMAP
889 )
890 {
891 // don't convert at all
892 return NULL;
893 }
894
895 wxCharacterSet *cset;
896
897 if ( (name &&
898 (wxStricmp(name, wxT("UTF8")) == 0 ||
899 wxStricmp(name, wxT("UTF-8")) == 0)) ||
900 encoding == wxFONTENCODING_UTF8 )
901 {
902 cset = new ID_CharSet(&wxConvUTF8);
903 }
904 else // !UTF-8
905 {
906 #ifdef HAVE_ICONV
907 if ( name )
908 {
909 cset = new IC_CharSet(name);
910 }
911 else
912 #endif // HAVE_ICONV
913 {
914 cset = NULL;
915 }
916 }
917
918 // it can only be NULL in this case
919 #ifndef HAVE_ICONV
920 if ( cset )
921 #endif // !HAVE_ICONV
922 {
923 if ( cset->usable() )
924 return cset;
925
926 delete cset;
927 cset = NULL;
928 }
929
930 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
931 cset = name ? new CP_CharSet(name) : new CP_CharSet(encoding);
932 if ( cset->usable() )
933 return cset;
934
935 delete cset;
936 cset = NULL;
937 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
938
939 #if wxUSE_FONTMAP
940 cset = name ? new EC_CharSet(name) : new EC_CharSet(encoding);
941 if ( cset->usable() )
942 return cset;
943
944 delete cset;
945 cset = NULL;
946 #endif // wxUSE_FONTMAP
947
948 wxLogError(_("Cannot convert from encoding '%s'!"),
949 name ? name
950 :
951 #if wxUSE_FONTMAP
952 wxFontMapper::GetEncodingDescription(encoding).c_str()
953 #else // !wxUSE_FONTMAP
954 wxString::Format(_T("%s"), encoding).c_str()
955 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
956 );
957
958 return NULL;
959 }
960
961 // ============================================================================
962 // wxCSConv implementation
963 // ============================================================================
964
965 void wxCSConv::Init()
966 {
967 m_name = (wxChar *)NULL;
968 m_cset = (wxCharacterSet *) NULL;
969 m_deferred = TRUE;
970 }
971
972 wxCSConv::wxCSConv(const wxChar *charset)
973 {
974 Init();
975 m_encoding = wxFONTENCODING_DEFAULT;
976
977 SetName(charset);
978 }
979
980 wxCSConv::wxCSConv(wxFontEncoding encoding)
981 {
982 Init();
983
984 m_encoding = encoding;
985 }
986
987 wxCSConv::~wxCSConv()
988 {
989 Clear();
990 }
991
992 wxCSConv::wxCSConv(const wxCSConv& conv)
993 : wxMBConv()
994 {
995 Init();
996
997 SetName(conv.m_name);
998 m_encoding = conv.m_encoding;
999 }
1000
1001 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1002 {
1003 Clear();
1004
1005 SetName(conv.m_name);
1006 m_encoding = conv.m_encoding;
1007
1008 return *this;
1009 }
1010
1011 void wxCSConv::Clear()
1012 {
1013 free(m_name);
1014 delete m_cset;
1015
1016 m_name = NULL;
1017 m_cset = NULL;
1018 }
1019
1020 void wxCSConv::SetName(const wxChar *charset)
1021 {
1022 if (charset)
1023 {
1024 m_name = wxStrdup(charset);
1025 m_deferred = TRUE;
1026 }
1027 }
1028
1029 void wxCSConv::LoadNow()
1030 {
1031 if ( m_deferred )
1032 {
1033 // it would probably be better to make GetSystemEncodingName() always
1034 // available (i.e. even when wxUSE_INTL == 0)?
1035 #if wxUSE_INTL
1036 if ( !m_name && m_encoding == wxFONTENCODING_DEFAULT )
1037 {
1038 wxString name = wxLocale::GetSystemEncodingName();
1039 if ( !name.empty() )
1040 {
1041 SetName(name);
1042 }
1043 }
1044 #endif // wxUSE_INTL
1045
1046 // wxGetCharacterSet() complains about NULL name
1047 m_cset = wxGetCharacterSet(m_name, m_encoding);
1048 m_deferred = FALSE;
1049 }
1050 }
1051
1052 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1053 {
1054 ((wxCSConv *)this)->LoadNow(); // discard constness
1055
1056 if (m_cset)
1057 return m_cset->MB2WC(buf, psz, n);
1058
1059 // latin-1 (direct)
1060 size_t len = strlen(psz);
1061
1062 if (buf)
1063 {
1064 for (size_t c = 0; c <= len; c++)
1065 buf[c] = (unsigned char)(psz[c]);
1066 }
1067
1068 return len;
1069 }
1070
1071 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1072 {
1073 ((wxCSConv *)this)->LoadNow(); // discard constness
1074
1075 if (m_cset)
1076 return m_cset->WC2MB(buf, psz, n);
1077
1078 // latin-1 (direct)
1079 const size_t len = wxWcslen(psz);
1080 if (buf)
1081 {
1082 for (size_t c = 0; c <= len; c++)
1083 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
1084 }
1085
1086 return len;
1087 }
1088
1089 #endif // wxUSE_WCHAR_T
1090
1091