]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
Added AppTraits for starting/ending a thread.
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
21 #pragma implementation "strconv.h"
22 #endif
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/intl.h"
33 #include "wx/log.h"
34 #endif // WX_PRECOMP
35
36 #ifdef __WXMSW__
37 #include "wx/msw/private.h"
38 #endif
39
40 #ifndef __WXWINCE__
41 #include <errno.h>
42 #endif
43
44 #include <ctype.h>
45 #include <string.h>
46 #include <stdlib.h>
47
48 #include "wx/module.h"
49 #include "wx/strconv.h"
50
51 // ----------------------------------------------------------------------------
52 // globals
53 // ----------------------------------------------------------------------------
54
55 #if wxUSE_WCHAR_T
56 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc;
57 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvLocal((const wxChar *)NULL);
58 WXDLLIMPEXP_DATA_BASE(wxCSConv) wxConvISO8859_1(_T("iso-8859-1"));
59 #else
60 // stand-ins in absence of wchar_t
61 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
62 wxConvFile,
63 wxConvISO8859_1,
64 wxConvLocal,
65 wxConvUTF8;
66 #endif // wxUSE_WCHAR_T
67
68 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibc;
69
70 class wxStrConvModule: public wxModule
71 {
72 public:
73 wxStrConvModule() : wxModule() { }
74 virtual bool OnInit() { return TRUE; }
75 virtual void OnExit()
76 {
77 #if wxUSE_WCHAR_T
78 wxConvLocal.Clear();
79 wxConvISO8859_1.Clear();
80 #endif
81 }
82
83 DECLARE_DYNAMIC_CLASS(wxStrConvModule)
84 };
85
86 IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule, wxModule)
87
88
89 // ----------------------------------------------------------------------------
90 // headers
91 // ----------------------------------------------------------------------------
92
93 #if wxUSE_WCHAR_T
94
95 #ifdef __SALFORDC__
96 #include <clib.h>
97 #endif
98
99 #ifdef HAVE_ICONV
100 #include <iconv.h>
101 #endif
102
103 #include "wx/encconv.h"
104 #include "wx/fontmap.h"
105
106 // ----------------------------------------------------------------------------
107 // macros
108 // ----------------------------------------------------------------------------
109
110 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
111 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
112
113 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
114 // it might be not defined - assume the most common value
115 #ifndef SIZEOF_WCHAR_T
116 #define SIZEOF_WCHAR_T 2
117 #endif // !defined(SIZEOF_WCHAR_T)
118
119 #if SIZEOF_WCHAR_T == 4
120 #define WC_NAME "UCS4"
121 #define WC_BSWAP BSWAP_UCS4
122 #ifdef WORDS_BIGENDIAN
123 #define WC_NAME_BEST "UCS-4BE"
124 #else
125 #define WC_NAME_BEST "UCS-4LE"
126 #endif
127 #elif SIZEOF_WCHAR_T == 2
128 #define WC_NAME "UTF16"
129 #define WC_BSWAP BSWAP_UTF16
130 #define WC_UTF16
131 #ifdef WORDS_BIGENDIAN
132 #define WC_NAME_BEST "UTF-16BE"
133 #else
134 #define WC_NAME_BEST "UTF-16LE"
135 #endif
136 #else // sizeof(wchar_t) != 2 nor 4
137 // I don't know what to do about this
138 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
139 #endif
140
141 // ============================================================================
142 // implementation
143 // ============================================================================
144
145 // ----------------------------------------------------------------------------
146 // UTF-16 en/decoding
147 // ----------------------------------------------------------------------------
148
149 #ifdef WC_UTF16
150
151 static size_t encode_utf16(wxUint32 input, wchar_t *output)
152 {
153 if (input<=0xffff)
154 {
155 if (output) *output++ = (wchar_t) input;
156 return 1;
157 }
158 else if (input>=0x110000)
159 {
160 return (size_t)-1;
161 }
162 else
163 {
164 if (output)
165 {
166 *output++ = (wchar_t) ((input >> 10)+0xd7c0);
167 *output++ = (wchar_t) ((input&0x3ff)+0xdc00);
168 }
169 return 2;
170 }
171 }
172
173 static size_t decode_utf16(const wchar_t* input, wxUint32& output)
174 {
175 if ((*input<0xd800) || (*input>0xdfff))
176 {
177 output = *input;
178 return 1;
179 }
180 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
181 {
182 output = *input;
183 return (size_t)-1;
184 }
185 else
186 {
187 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
188 return 2;
189 }
190 }
191
192 #endif // WC_UTF16
193
194 // ----------------------------------------------------------------------------
195 // wxMBConv
196 // ----------------------------------------------------------------------------
197
198 #define IGNORE_LIBC 0
199
200 wxMBConv::~wxMBConv()
201 {
202 // nothing to do here
203 }
204
205 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
206 {
207 #if IGNORE_LIBC
208 if (buf)
209 {
210 for (size_t i = 0; i < strlen( psz )+1; i++)
211 buf[i] = (wchar_t) psz[i];
212 return strlen( psz );
213 }
214 else
215 {
216 return strlen( psz );
217 }
218 #else
219 return wxMB2WC(buf, psz, n);
220 #endif
221 }
222
223 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
224 {
225 #if IGNORE_LIBC
226 if (buf)
227 {
228 for (size_t i = 0; i < wxStrlen( psz )+1; i++)
229 buf[i] = (char) psz[i];
230 return wxStrlen( psz );
231 }
232 else
233 {
234 return wxStrlen( psz );
235 }
236 #else
237 return wxWC2MB(buf, psz, n);
238 #endif
239 }
240
241 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
242 {
243 if ( psz )
244 {
245 // calculate the length of the buffer needed first
246 size_t nLen = MB2WC(NULL, psz, 0);
247 if ( nLen != (size_t)-1 )
248 {
249 // now do the actual conversion
250 wxWCharBuffer buf(nLen);
251 MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
252
253 return buf;
254 }
255 }
256
257 wxWCharBuffer buf((wchar_t *)NULL);
258
259 return buf;
260 }
261
262 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
263 {
264 if ( pwz )
265 {
266 size_t nLen = WC2MB(NULL, pwz, 0);
267 if ( nLen != (size_t)-1 )
268 {
269 wxCharBuffer buf(nLen);
270 WC2MB(buf.data(), pwz, nLen + 1);
271
272 return buf;
273 }
274 }
275
276 wxCharBuffer buf((char *)NULL);
277
278 return buf;
279 }
280
281 // ----------------------------------------------------------------------------
282 // UTF-7
283 // ----------------------------------------------------------------------------
284
285 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7) wxConvUTF7;
286
287 #if 0
288 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
289 "abcdefghijklmnopqrstuvwxyz"
290 "0123456789'(),-./:?";
291 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
292 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
293 "abcdefghijklmnopqrstuvwxyz"
294 "0123456789+/";
295 #endif
296
297 // TODO: write actual implementations of UTF-7 here
298 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
299 const char * WXUNUSED(psz),
300 size_t WXUNUSED(n)) const
301 {
302 return 0;
303 }
304
305 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
306 const wchar_t * WXUNUSED(psz),
307 size_t WXUNUSED(n)) const
308 {
309 return 0;
310 }
311
312 // ----------------------------------------------------------------------------
313 // UTF-8
314 // ----------------------------------------------------------------------------
315
316 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8) wxConvUTF8;
317
318 static wxUint32 utf8_max[]=
319 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
320
321 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
322 {
323 size_t len = 0;
324
325 while (*psz && ((!buf) || (len < n)))
326 {
327 unsigned char cc = *psz++, fc = cc;
328 unsigned cnt;
329 for (cnt = 0; fc & 0x80; cnt++)
330 fc <<= 1;
331 if (!cnt)
332 {
333 // plain ASCII char
334 if (buf)
335 *buf++ = cc;
336 len++;
337 }
338 else
339 {
340 cnt--;
341 if (!cnt)
342 {
343 // invalid UTF-8 sequence
344 return (size_t)-1;
345 }
346 else
347 {
348 unsigned ocnt = cnt - 1;
349 wxUint32 res = cc & (0x3f >> cnt);
350 while (cnt--)
351 {
352 cc = *psz++;
353 if ((cc & 0xC0) != 0x80)
354 {
355 // invalid UTF-8 sequence
356 return (size_t)-1;
357 }
358 res = (res << 6) | (cc & 0x3f);
359 }
360 if (res <= utf8_max[ocnt])
361 {
362 // illegal UTF-8 encoding
363 return (size_t)-1;
364 }
365 #ifdef WC_UTF16
366 size_t pa = encode_utf16(res, buf);
367 if (pa == (size_t)-1)
368 return (size_t)-1;
369 if (buf)
370 buf += pa;
371 len += pa;
372 #else // !WC_UTF16
373 if (buf)
374 *buf++ = res;
375 len++;
376 #endif // WC_UTF16/!WC_UTF16
377 }
378 }
379 }
380 if (buf && (len < n))
381 *buf = 0;
382 return len;
383 }
384
385 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
386 {
387 size_t len = 0;
388
389 while (*psz && ((!buf) || (len < n)))
390 {
391 wxUint32 cc;
392 #ifdef WC_UTF16
393 size_t pa = decode_utf16(psz, cc);
394 psz += (pa == (size_t)-1) ? 1 : pa;
395 #else
396 cc=(*psz++) & 0x7fffffff;
397 #endif
398 unsigned cnt;
399 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
400 if (!cnt)
401 {
402 // plain ASCII char
403 if (buf)
404 *buf++ = (char) cc;
405 len++;
406 }
407
408 else
409 {
410 len += cnt + 1;
411 if (buf)
412 {
413 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
414 while (cnt--)
415 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
416 }
417 }
418 }
419
420 if (buf && (len<n)) *buf = 0;
421
422 return len;
423 }
424
425 // ============================================================================
426 // wxCharacterSet and derived classes
427 // ============================================================================
428
429 // ----------------------------------------------------------------------------
430 // wxCharacterSet is the ABC for the classes below
431 // ----------------------------------------------------------------------------
432
433 class wxCharacterSet
434 {
435 public:
436 wxCharacterSet() { }
437 virtual ~wxCharacterSet() {}
438
439 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) = 0;
440 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) = 0;
441 virtual bool usable() const = 0;
442 };
443
444 // ----------------------------------------------------------------------------
445 // ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
446 // ----------------------------------------------------------------------------
447
448 class ID_CharSet : public wxCharacterSet
449 {
450 public:
451 ID_CharSet(wxMBConv *cnv) : work(cnv) {}
452
453 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
454 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
455
456 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
457 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
458
459 bool usable() const
460 { return work!=NULL; }
461 public:
462 wxMBConv*work;
463 };
464
465
466 // ============================================================================
467 // The classes doing conversion using the iconv_xxx() functions
468 // ============================================================================
469
470 #ifdef HAVE_ICONV
471
472 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
473 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
474 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
475 // (which means error) and says there are 0 bytes left in the input buffer --
476 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
477 // this alternative test for iconv() failure.
478 // [This bug does not appear in glibc 2.2.]
479 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
480 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
481 (errno != E2BIG || bufLeft != 0))
482 #else
483 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
484 #endif
485
486 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
487
488 // ----------------------------------------------------------------------------
489 // IC_CharSet: encapsulates an iconv character set
490 // ----------------------------------------------------------------------------
491
492 class IC_CharSet : public wxCharacterSet
493 {
494 public:
495 IC_CharSet(const wxChar *name);
496 virtual ~IC_CharSet();
497
498 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
499 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
500
501 bool usable() const
502 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
503
504 protected:
505 // the iconv handlers used to translate from multibyte to wide char and in
506 // the other direction
507 iconv_t m2w,
508 w2m;
509
510 private:
511 // the name (for iconv_open()) of a wide char charset - if none is
512 // available on this machine, it will remain NULL
513 static const char *ms_wcCharsetName;
514
515 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
516 // different endian-ness than the native one
517 static bool ms_wcNeedsSwap;
518 };
519
520 const char *IC_CharSet::ms_wcCharsetName = NULL;
521 bool IC_CharSet::ms_wcNeedsSwap = FALSE;
522
523 IC_CharSet::IC_CharSet(const wxChar *name)
524 {
525 // Do it the hard way
526 char cname[100];
527 for (size_t i = 0; i < wxStrlen(name)+1; i++)
528 cname[i] = (char) name[i];
529
530 // check for charset that represents wchar_t:
531 if (ms_wcCharsetName == NULL)
532 {
533 ms_wcNeedsSwap = FALSE;
534
535 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
536 ms_wcCharsetName = WC_NAME_BEST;
537 m2w = iconv_open(ms_wcCharsetName, cname);
538
539 if (m2w == (iconv_t)-1)
540 {
541 // try charset w/o bytesex info (e.g. "UCS4")
542 // and check for bytesex ourselves:
543 ms_wcCharsetName = WC_NAME;
544 m2w = iconv_open(ms_wcCharsetName, cname);
545
546 // last bet, try if it knows WCHAR_T pseudo-charset
547 if (m2w == (iconv_t)-1)
548 {
549 ms_wcCharsetName = "WCHAR_T";
550 m2w = iconv_open(ms_wcCharsetName, cname);
551 }
552
553 if (m2w != (iconv_t)-1)
554 {
555 char buf[2], *bufPtr;
556 wchar_t wbuf[2], *wbufPtr;
557 size_t insz, outsz;
558 size_t res;
559
560 buf[0] = 'A';
561 buf[1] = 0;
562 wbuf[0] = 0;
563 insz = 2;
564 outsz = SIZEOF_WCHAR_T * 2;
565 wbufPtr = wbuf;
566 bufPtr = buf;
567
568 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
569 (char**)&wbufPtr, &outsz);
570
571 if (ICONV_FAILED(res, insz))
572 {
573 ms_wcCharsetName = NULL;
574 wxLogLastError(wxT("iconv"));
575 wxLogError(_("Conversion to charset '%s' doesn't work."), name);
576 }
577 else
578 {
579 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
580 }
581 }
582 else
583 {
584 ms_wcCharsetName = NULL;
585
586 // VS: we must not output an error here, since wxWindows will safely
587 // fall back to using wxEncodingConverter.
588 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
589 //wxLogError(
590 }
591 }
592 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
593 }
594 else // we already have ms_wcCharsetName
595 {
596 m2w = iconv_open(ms_wcCharsetName, cname);
597 }
598
599 // NB: don't ever pass NULL to iconv_open(), it may crash!
600 if ( ms_wcCharsetName )
601 {
602 w2m = iconv_open( cname, ms_wcCharsetName);
603 }
604 else
605 {
606 w2m = (iconv_t)-1;
607 }
608 }
609
610 IC_CharSet::~IC_CharSet()
611 {
612 if ( m2w != (iconv_t)-1 )
613 iconv_close(m2w);
614 if ( w2m != (iconv_t)-1 )
615 iconv_close(w2m);
616 }
617
618 size_t IC_CharSet::MB2WC(wchar_t *buf, const char *psz, size_t n)
619 {
620 size_t inbuf = strlen(psz);
621 size_t outbuf = n * SIZEOF_WCHAR_T;
622 size_t res, cres;
623 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
624 wchar_t *bufPtr = buf;
625 const char *pszPtr = psz;
626
627 if (buf)
628 {
629 // have destination buffer, convert there
630 cres = iconv(m2w,
631 ICONV_CHAR_CAST(&pszPtr), &inbuf,
632 (char**)&bufPtr, &outbuf);
633 res = n - (outbuf / SIZEOF_WCHAR_T);
634
635 if (ms_wcNeedsSwap)
636 {
637 // convert to native endianness
638 WC_BSWAP(buf /* _not_ bufPtr */, res)
639 }
640
641 // NB: iconv was given only strlen(psz) characters on input, and so
642 // it couldn't convert the trailing zero. Let's do it ourselves
643 // if there's some room left for it in the output buffer.
644 if (res < n)
645 buf[res] = 0;
646 }
647 else
648 {
649 // no destination buffer... convert using temp buffer
650 // to calculate destination buffer requirement
651 wchar_t tbuf[8];
652 res = 0;
653 do {
654 bufPtr = tbuf;
655 outbuf = 8*SIZEOF_WCHAR_T;
656
657 cres = iconv(m2w,
658 ICONV_CHAR_CAST(&pszPtr), &inbuf,
659 (char**)&bufPtr, &outbuf );
660
661 res += 8-(outbuf/SIZEOF_WCHAR_T);
662 } while ((cres==(size_t)-1) && (errno==E2BIG));
663 }
664
665 if (ICONV_FAILED(cres, inbuf))
666 {
667 //VS: it is ok if iconv fails, hence trace only
668 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
669 return (size_t)-1;
670 }
671
672 return res;
673 }
674
675 size_t IC_CharSet::WC2MB(char *buf, const wchar_t *psz, size_t n)
676 {
677 size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
678 size_t outbuf = n;
679 size_t res, cres;
680
681 wchar_t *tmpbuf = 0;
682
683 if (ms_wcNeedsSwap)
684 {
685 // need to copy to temp buffer to switch endianness
686 // this absolutely doesn't rock!
687 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
688 // could be in read-only memory, or be accessed in some other thread)
689 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
690 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
691 WC_BSWAP(tmpbuf, inbuf)
692 psz=tmpbuf;
693 }
694
695 if (buf)
696 {
697 // have destination buffer, convert there
698 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
699
700 res = n-outbuf;
701
702 // NB: iconv was given only wcslen(psz) characters on input, and so
703 // it couldn't convert the trailing zero. Let's do it ourselves
704 // if there's some room left for it in the output buffer.
705 if (res < n)
706 buf[0] = 0;
707 }
708 else
709 {
710 // no destination buffer... convert using temp buffer
711 // to calculate destination buffer requirement
712 char tbuf[16];
713 res = 0;
714 do {
715 buf = tbuf; outbuf = 16;
716
717 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
718
719 res += 16 - outbuf;
720 } while ((cres==(size_t)-1) && (errno==E2BIG));
721 }
722
723 if (ms_wcNeedsSwap)
724 {
725 free(tmpbuf);
726 }
727
728 if (ICONV_FAILED(cres, inbuf))
729 {
730 //VS: it is ok if iconv fails, hence trace only
731 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
732 return (size_t)-1;
733 }
734
735 return res;
736 }
737
738 #endif // HAVE_ICONV
739
740 // ============================================================================
741 // Win32 conversion classes
742 // ============================================================================
743
744 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
745
746 // from utils.cpp
747 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
748 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
749
750 class CP_CharSet : public wxCharacterSet
751 {
752 public:
753 CP_CharSet(const wxChar* name)
754 {
755 m_CodePage = wxCharsetToCodepage(name);
756 }
757
758 CP_CharSet(wxFontEncoding encoding)
759 {
760 m_CodePage = wxEncodingToCodepage(encoding);
761 }
762
763 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
764 {
765 const size_t len = ::MultiByteToWideChar
766 (
767 m_CodePage, // code page
768 0, // flags (none)
769 psz, // input string
770 -1, // its length (NUL-terminated)
771 buf, // output string
772 buf ? n : 0 // size of output buffer
773 );
774
775 // note that it returns # of written chars for buf != NULL and *size*
776 // of the needed buffer for buf == NULL
777 return len ? (buf ? len : len - 1) : (size_t)-1;
778 }
779
780 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
781 {
782 const size_t len = ::WideCharToMultiByte
783 (
784 m_CodePage, // code page
785 0, // flags (none)
786 psz, // input string
787 -1, // it is (wide) NUL-terminated
788 buf, // output buffer
789 buf ? n : 0, // and its size
790 NULL, // default "replacement" char
791 NULL // [out] was it used?
792 );
793
794 // see the comment above!
795 return len ? (buf ? len : len - 1) : (size_t)-1;
796 }
797
798 bool usable() const
799 { return m_CodePage != -1; }
800
801 public:
802 long m_CodePage;
803 };
804 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
805
806 // ============================================================================
807 // wxEncodingConverter based conversion classes
808 // ============================================================================
809
810 #if wxUSE_FONTMAP
811
812 class EC_CharSet : public wxCharacterSet
813 {
814 private:
815 void Init()
816 {
817 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
818 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
819 }
820
821 public:
822 // temporarily just use wxEncodingConverter stuff,
823 // so that it works while a better implementation is built
824 EC_CharSet(const wxChar* name)
825 {
826 if (name)
827 m_enc = wxFontMapper::Get()->CharsetToEncoding(name, FALSE);
828 else
829 m_enc = wxFONTENCODING_SYSTEM;
830
831 Init();
832 }
833
834 EC_CharSet(wxFontEncoding enc)
835 {
836 m_enc = enc;
837
838 Init();
839 }
840
841 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
842 {
843 size_t inbuf = strlen(psz);
844 if (buf)
845 m2w.Convert(psz,buf);
846 return inbuf;
847 }
848
849 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
850 {
851 const size_t inbuf = wxWcslen(psz);
852 if (buf)
853 w2m.Convert(psz,buf);
854
855 return inbuf;
856 }
857
858 bool usable() const { return m_ok; }
859
860 public:
861 wxFontEncoding m_enc;
862 wxEncodingConverter m2w, w2m;
863
864 // were we initialized successfully?
865 bool m_ok;
866
867 DECLARE_NO_COPY_CLASS(EC_CharSet)
868 };
869
870 #endif // wxUSE_FONTMAP
871
872 // ----------------------------------------------------------------------------
873 // the function creating the wxCharacterSet for the specified charset on the
874 // current system, trying all possibilities
875 //
876 // it uses the name if it is given or encoding if name == NULL
877 // ----------------------------------------------------------------------------
878
879 static wxCharacterSet *
880 wxGetCharacterSet(const wxChar *name, wxFontEncoding encoding)
881 {
882 // check for the special case of ASCII charset
883 if ( (!name && encoding == wxFONTENCODING_DEFAULT)
884 #if wxUSE_FONTMAP
885 || (name && wxFontMapper::Get()->
886 CharsetToEncoding(name) == wxFONTENCODING_DEFAULT)
887 #endif // wxUSE_FONTMAP
888 )
889 {
890 // don't convert at all
891 return NULL;
892 }
893
894 wxCharacterSet *cset;
895
896 if ( (name &&
897 (wxStricmp(name, wxT("UTF8")) == 0 ||
898 wxStricmp(name, wxT("UTF-8")) == 0)) ||
899 encoding == wxFONTENCODING_UTF8 )
900 {
901 cset = new ID_CharSet(&wxConvUTF8);
902 }
903 else // !UTF-8
904 {
905 #ifdef HAVE_ICONV
906 if ( name )
907 {
908 cset = new IC_CharSet(name);
909 }
910 else
911 #endif // HAVE_ICONV
912 {
913 cset = NULL;
914 }
915 }
916
917 // it can only be NULL in this case
918 #ifndef HAVE_ICONV
919 if ( cset )
920 #endif // !HAVE_ICONV
921 {
922 if ( cset->usable() )
923 return cset;
924
925 delete cset;
926 cset = NULL;
927 }
928
929 #if defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
930 cset = name ? new CP_CharSet(name) : new CP_CharSet(encoding);
931 if ( cset->usable() )
932 return cset;
933
934 delete cset;
935 cset = NULL;
936 #endif // defined(__WIN32__) && !defined(__WXMICROWIN__) && !defined(__WXUNIVERSAL__)
937
938 #if wxUSE_FONTMAP
939 cset = name ? new EC_CharSet(name) : new EC_CharSet(encoding);
940 if ( cset->usable() )
941 return cset;
942
943 delete cset;
944 cset = NULL;
945 #endif // wxUSE_FONTMAP
946
947 wxLogError(_("Cannot convert from encoding '%s'!"),
948 name ? name
949 :
950 #if wxUSE_FONTMAP
951 wxFontMapper::GetEncodingDescription(encoding).c_str()
952 #else // !wxUSE_FONTMAP
953 wxString::Format(_T("%s"), encoding).c_str()
954 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
955 );
956
957 return NULL;
958 }
959
960 // ============================================================================
961 // wxCSConv implementation
962 // ============================================================================
963
964 void wxCSConv::Init()
965 {
966 m_name = (wxChar *)NULL;
967 m_cset = (wxCharacterSet *) NULL;
968 m_deferred = TRUE;
969 }
970
971 wxCSConv::wxCSConv(const wxChar *charset)
972 {
973 Init();
974 m_encoding = wxFONTENCODING_DEFAULT;
975
976 SetName(charset);
977 }
978
979 wxCSConv::wxCSConv(wxFontEncoding encoding)
980 {
981 Init();
982
983 m_encoding = encoding;
984 }
985
986 wxCSConv::~wxCSConv()
987 {
988 Clear();
989 }
990
991 wxCSConv::wxCSConv(const wxCSConv& conv)
992 : wxMBConv()
993 {
994 Init();
995
996 SetName(conv.m_name);
997 m_encoding = conv.m_encoding;
998 }
999
1000 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1001 {
1002 Clear();
1003
1004 SetName(conv.m_name);
1005 m_encoding = conv.m_encoding;
1006
1007 return *this;
1008 }
1009
1010 void wxCSConv::Clear()
1011 {
1012 free(m_name);
1013 delete m_cset;
1014
1015 m_name = NULL;
1016 m_cset = NULL;
1017 }
1018
1019 void wxCSConv::SetName(const wxChar *charset)
1020 {
1021 if (charset)
1022 {
1023 m_name = wxStrdup(charset);
1024 m_deferred = TRUE;
1025 }
1026 }
1027
1028 void wxCSConv::LoadNow()
1029 {
1030 if ( m_deferred )
1031 {
1032 // it would probably be better to make GetSystemEncodingName() always
1033 // available (i.e. even when wxUSE_INTL == 0)?
1034 #if wxUSE_INTL
1035 if ( !m_name && m_encoding == wxFONTENCODING_DEFAULT )
1036 {
1037 wxString name = wxLocale::GetSystemEncodingName();
1038 if ( !name.empty() )
1039 {
1040 SetName(name);
1041 }
1042 }
1043 #endif // wxUSE_INTL
1044
1045 // wxGetCharacterSet() complains about NULL name
1046 m_cset = wxGetCharacterSet(m_name, m_encoding);
1047 m_deferred = FALSE;
1048 }
1049 }
1050
1051 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1052 {
1053 ((wxCSConv *)this)->LoadNow(); // discard constness
1054
1055 if (m_cset)
1056 return m_cset->MB2WC(buf, psz, n);
1057
1058 // latin-1 (direct)
1059 size_t len = strlen(psz);
1060
1061 if (buf)
1062 {
1063 for (size_t c = 0; c <= len; c++)
1064 buf[c] = (unsigned char)(psz[c]);
1065 }
1066
1067 return len;
1068 }
1069
1070 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1071 {
1072 ((wxCSConv *)this)->LoadNow(); // discard constness
1073
1074 if (m_cset)
1075 return m_cset->WC2MB(buf, psz, n);
1076
1077 // latin-1 (direct)
1078 const size_t len = wxWcslen(psz);
1079 if (buf)
1080 {
1081 for (size_t c = 0; c <= len; c++)
1082 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
1083 }
1084
1085 return len;
1086 }
1087
1088 #endif // wxUSE_WCHAR_T
1089
1090