]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
don't use -q option with egrep, Solaris doesn't have it (bug 517145)
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 #ifdef __GNUG__
21 #pragma implementation "strconv.h"
22 #endif
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/intl.h"
33 #include "wx/log.h"
34 #endif // WX_PRECOMP
35
36 #ifdef __WXMSW__
37 #include "wx/msw/private.h"
38 #endif
39
40 #include <errno.h>
41 #include <ctype.h>
42 #include <string.h>
43 #include <stdlib.h>
44
45 #include "wx/strconv.h"
46
47 // ----------------------------------------------------------------------------
48 // globals
49 // ----------------------------------------------------------------------------
50
51 #if wxUSE_WCHAR_T
52 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
53 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
54 #else
55 // stand-ins in absence of wchar_t
56 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
57 #endif // wxUSE_WCHAR_T
58
59 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
60
61 // ----------------------------------------------------------------------------
62 // headers
63 // ----------------------------------------------------------------------------
64
65 #if wxUSE_WCHAR_T
66
67 #ifdef __SALFORDC__
68 #include <clib.h>
69 #endif
70
71 #ifdef HAVE_ICONV
72 #include <iconv.h>
73 #endif
74
75 #include "wx/encconv.h"
76 #include "wx/fontmap.h"
77
78 // ----------------------------------------------------------------------------
79 // macros
80 // ----------------------------------------------------------------------------
81
82 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
83 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
84
85 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
86 // it might be not defined - assume the most common value
87 #ifndef SIZEOF_WCHAR_T
88 #define SIZEOF_WCHAR_T 2
89 #endif // !defined(SIZEOF_WCHAR_T)
90
91 #if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
99 #elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
102 #define WC_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
108 #else // sizeof(wchar_t) != 2 nor 4
109 // I don't know what to do about this
110 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
111 #endif
112
113 // ============================================================================
114 // implementation
115 // ============================================================================
116
117 // ----------------------------------------------------------------------------
118 // UTF-16 en/decoding
119 // ----------------------------------------------------------------------------
120
121 #ifdef WC_UTF16
122
123 static size_t encode_utf16(wxUint32 input, wchar_t *output)
124 {
125 if (input<=0xffff)
126 {
127 if (output) *output++ = (wchar_t) input;
128 return 1;
129 }
130 else if (input>=0x110000)
131 {
132 return (size_t)-1;
133 }
134 else
135 {
136 if (output)
137 {
138 *output++ = (wchar_t) ((input >> 10)+0xd7c0);
139 *output++ = (wchar_t) ((input&0x3ff)+0xdc00);
140 }
141 return 2;
142 }
143 }
144
145 static size_t decode_utf16(const wchar_t* input, wxUint32& output)
146 {
147 if ((*input<0xd800) || (*input>0xdfff))
148 {
149 output = *input;
150 return 1;
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
153 {
154 output = *input;
155 return (size_t)-1;
156 }
157 else
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
162 }
163
164 #endif // WC_UTF16
165
166 // ----------------------------------------------------------------------------
167 // wxMBConv
168 // ----------------------------------------------------------------------------
169
170 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
171 {
172 return wxMB2WC(buf, psz, n);
173 }
174
175 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
176 {
177 return wxWC2MB(buf, psz, n);
178 }
179
180 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181 {
182 if (psz)
183 {
184 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
185 if (nLen == (size_t)-1)
186 return wxWCharBuffer((wchar_t *) NULL);
187 wxWCharBuffer buf(nLen);
188 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
189 return buf;
190 }
191 else
192 return wxWCharBuffer((wchar_t *) NULL);
193 }
194
195 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
196 {
197 if (psz)
198 {
199 size_t nLen = WC2MB((char *) NULL, psz, 0);
200 if (nLen == (size_t)-1)
201 return wxCharBuffer((char *) NULL);
202 wxCharBuffer buf(nLen);
203 WC2MB((char *)(const char *) buf, psz, nLen);
204 return buf;
205 }
206 else
207 return wxCharBuffer((char *) NULL);
208 }
209
210 // ----------------------------------------------------------------------------
211 // standard file conversion
212 // ----------------------------------------------------------------------------
213
214 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
215
216 // just use the libc conversion for now
217 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
218 {
219 return wxMB2WC(buf, psz, n);
220 }
221
222 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
223 {
224 return wxWC2MB(buf, psz, n);
225 }
226
227 // ----------------------------------------------------------------------------
228 // standard gdk conversion
229 // ----------------------------------------------------------------------------
230
231 #ifdef __WXGTK12__
232
233 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
234
235 #include <gdk/gdk.h>
236
237 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
238 {
239 if (buf)
240 {
241 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
242 }
243 else
244 {
245 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
246 size_t len = gdk_mbstowcs(nbuf, psz, n);
247 delete[] nbuf;
248 return len;
249 }
250 }
251
252 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
253 {
254 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
255 size_t len = mbstr ? strlen(mbstr) : 0;
256 if (buf)
257 {
258 if (len > n)
259 len = n;
260 memcpy(buf, psz, len);
261 if (len < n)
262 buf[len] = 0;
263 }
264 return len;
265 }
266
267 #endif // GTK > 1.0
268
269 // ----------------------------------------------------------------------------
270 // UTF-7
271 // ----------------------------------------------------------------------------
272
273 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
274
275 #if 0
276 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
277 "abcdefghijklmnopqrstuvwxyz"
278 "0123456789'(),-./:?";
279 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
280 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
281 "abcdefghijklmnopqrstuvwxyz"
282 "0123456789+/";
283 #endif
284
285 // TODO: write actual implementations of UTF-7 here
286 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
287 const char * WXUNUSED(psz),
288 size_t WXUNUSED(n)) const
289 {
290 return 0;
291 }
292
293 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
294 const wchar_t * WXUNUSED(psz),
295 size_t WXUNUSED(n)) const
296 {
297 return 0;
298 }
299
300 // ----------------------------------------------------------------------------
301 // UTF-8
302 // ----------------------------------------------------------------------------
303
304 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
305
306 static wxUint32 utf8_max[]=
307 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
308
309 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
310 {
311 size_t len = 0;
312
313 while (*psz && ((!buf) || (len < n)))
314 {
315 unsigned char cc = *psz++, fc = cc;
316 unsigned cnt;
317 for (cnt = 0; fc & 0x80; cnt++)
318 fc <<= 1;
319 if (!cnt)
320 {
321 // plain ASCII char
322 if (buf)
323 *buf++ = cc;
324 len++;
325 }
326 else
327 {
328 cnt--;
329 if (!cnt)
330 {
331 // invalid UTF-8 sequence
332 return (size_t)-1;
333 }
334 else
335 {
336 unsigned ocnt = cnt - 1;
337 wxUint32 res = cc & (0x3f >> cnt);
338 while (cnt--)
339 {
340 cc = *psz++;
341 if ((cc & 0xC0) != 0x80)
342 {
343 // invalid UTF-8 sequence
344 return (size_t)-1;
345 }
346 res = (res << 6) | (cc & 0x3f);
347 }
348 if (res <= utf8_max[ocnt])
349 {
350 // illegal UTF-8 encoding
351 return (size_t)-1;
352 }
353 #ifdef WC_UTF16
354 size_t pa = encode_utf16(res, buf);
355 if (pa == (size_t)-1)
356 return (size_t)-1;
357 if (buf)
358 buf += pa;
359 len += pa;
360 #else // !WC_UTF16
361 if (buf)
362 *buf++ = res;
363 len++;
364 #endif // WC_UTF16/!WC_UTF16
365 }
366 }
367 }
368 if (buf && (len < n))
369 *buf = 0;
370 return len;
371 }
372
373 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
374 {
375 size_t len = 0;
376
377 while (*psz && ((!buf) || (len < n)))
378 {
379 wxUint32 cc;
380 #ifdef WC_UTF16
381 size_t pa = decode_utf16(psz, cc);
382 psz += (pa == (size_t)-1) ? 1 : pa;
383 #else
384 cc=(*psz++) & 0x7fffffff;
385 #endif
386 unsigned cnt;
387 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
388 if (!cnt)
389 {
390 // plain ASCII char
391 if (buf)
392 *buf++ = (char) cc;
393 len++;
394 }
395
396 else
397 {
398 len += cnt + 1;
399 if (buf)
400 {
401 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
402 while (cnt--)
403 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
404 }
405 }
406 }
407
408 if (buf && (len<n)) *buf = 0;
409 return len;
410 }
411
412 // ============================================================================
413 // wxCharacterSet and derived classes
414 // ============================================================================
415
416 // ----------------------------------------------------------------------------
417 // wxCharacterSet is the ABC for the classes below
418 // ----------------------------------------------------------------------------
419
420 class wxCharacterSet
421 {
422 public:
423 wxCharacterSet(const wxChar*name) : cname(name) {}
424 virtual ~wxCharacterSet() {}
425 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) = 0;
426 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) = 0;
427 virtual bool usable() const = 0;
428 public:
429 const wxChar*cname;
430 };
431
432 // ----------------------------------------------------------------------------
433 // ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
434 // ----------------------------------------------------------------------------
435
436 class ID_CharSet : public wxCharacterSet
437 {
438 public:
439 ID_CharSet(const wxChar *name, wxMBConv *cnv)
440 : wxCharacterSet(name), work(cnv) {}
441
442 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
443 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
444
445 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
446 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
447
448 bool usable() const
449 { return work!=NULL; }
450 public:
451 wxMBConv*work;
452 };
453
454
455 // ============================================================================
456 // The classes doing conversion using the iconv_xxx() functions
457 // ============================================================================
458
459 #ifdef HAVE_ICONV
460
461 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
462 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
463 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
464 // (which means error) and says there are 0 bytes left in the input buffer --
465 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
466 // this alternative test for iconv() failure.
467 // [This bug does not appear in glibc 2.2.]
468 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
469 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
470 (errno != E2BIG || bufLeft != 0))
471 #else
472 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
473 #endif
474
475 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
476
477 // ----------------------------------------------------------------------------
478 // IC_CharSet: encapsulates an iconv character set
479 // ----------------------------------------------------------------------------
480
481 class IC_CharSet : public wxCharacterSet
482 {
483 public:
484 IC_CharSet(const wxChar *name);
485 virtual ~IC_CharSet();
486
487 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
488 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
489
490 bool usable() const
491 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
492
493 protected:
494 // the iconv handlers used to translate from multibyte to wide char and in
495 // the other direction
496 iconv_t m2w,
497 w2m;
498
499 private:
500 // the name (for iconv_open()) of a wide char charset - if none is
501 // available on this machine, it will remain NULL
502 static const char *ms_wcCharsetName;
503
504 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
505 // different endian-ness than the native one
506 static bool ms_wcNeedsSwap;
507 };
508
509 const char *IC_CharSet::ms_wcCharsetName = NULL;
510 bool IC_CharSet::ms_wcNeedsSwap = FALSE;
511
512 IC_CharSet::IC_CharSet(const wxChar *name)
513 : wxCharacterSet(name)
514 {
515 // check for charset that represents wchar_t:
516 if (ms_wcCharsetName == NULL)
517 {
518 ms_wcNeedsSwap = FALSE;
519
520 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
521 ms_wcCharsetName = WC_NAME_BEST;
522 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
523
524 if (m2w == (iconv_t)-1)
525 {
526 // try charset w/o bytesex info (e.g. "UCS4")
527 // and check for bytesex ourselves:
528 ms_wcCharsetName = WC_NAME;
529 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
530
531 // last bet, try if it knows WCHAR_T pseudo-charset
532 if (m2w == (iconv_t)-1)
533 {
534 ms_wcCharsetName = "WCHAR_T";
535 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
536 }
537
538 if (m2w != (iconv_t)-1)
539 {
540 char buf[2], *bufPtr;
541 wchar_t wbuf[2], *wbufPtr;
542 size_t insz, outsz;
543 size_t res;
544
545 buf[0] = 'A';
546 buf[1] = 0;
547 wbuf[0] = 0;
548 insz = 2;
549 outsz = SIZEOF_WCHAR_T * 2;
550 wbufPtr = wbuf;
551 bufPtr = buf;
552
553 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
554 (char**)&wbufPtr, &outsz);
555
556 if (ICONV_FAILED(res, insz))
557 {
558 ms_wcCharsetName = NULL;
559 wxLogLastError(wxT("iconv"));
560 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
561 }
562 else
563 {
564 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
565 }
566 }
567 else
568 {
569 ms_wcCharsetName = NULL;
570
571 // VS: we must not output an error here, since wxWindows will safely
572 // fall back to using wxEncodingConverter.
573 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
574 //wxLogError(
575 }
576 }
577 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
578 }
579 else // we already have ms_wcCharsetName
580 {
581 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
582 }
583
584 // NB: don't ever pass NULL to iconv_open(), it may crash!
585 if ( ms_wcCharsetName )
586 {
587 w2m = iconv_open(wxConvLibc.cWX2MB(name), ms_wcCharsetName);
588 }
589 else
590 {
591 w2m = (iconv_t)-1;
592 }
593 }
594
595 IC_CharSet::~IC_CharSet()
596 {
597 if ( m2w != (iconv_t)-1 )
598 iconv_close(m2w);
599 if ( w2m != (iconv_t)-1 )
600 iconv_close(w2m);
601 }
602
603 size_t IC_CharSet::MB2WC(wchar_t *buf, const char *psz, size_t n)
604 {
605 size_t inbuf = strlen(psz);
606 size_t outbuf = n * SIZEOF_WCHAR_T;
607 size_t res, cres;
608 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
609 wchar_t *bufPtr = buf;
610 const char *pszPtr = psz;
611
612 if (buf)
613 {
614 // have destination buffer, convert there
615 cres = iconv(m2w,
616 ICONV_CHAR_CAST(&pszPtr), &inbuf,
617 (char**)&bufPtr, &outbuf);
618 res = n - (outbuf / SIZEOF_WCHAR_T);
619
620 if (ms_wcNeedsSwap)
621 {
622 // convert to native endianness
623 WC_BSWAP(buf /* _not_ bufPtr */, res)
624 }
625 }
626 else
627 {
628 // no destination buffer... convert using temp buffer
629 // to calculate destination buffer requirement
630 wchar_t tbuf[8];
631 res = 0;
632 do {
633 bufPtr = tbuf;
634 outbuf = 8*SIZEOF_WCHAR_T;
635
636 cres = iconv(m2w,
637 ICONV_CHAR_CAST(&pszPtr), &inbuf,
638 (char**)&bufPtr, &outbuf );
639
640 res += 8-(outbuf/SIZEOF_WCHAR_T);
641 } while ((cres==(size_t)-1) && (errno==E2BIG));
642 }
643
644 if (ICONV_FAILED(cres, inbuf))
645 {
646 //VS: it is ok if iconv fails, hence trace only
647 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
648 return (size_t)-1;
649 }
650
651 return res;
652 }
653
654 size_t IC_CharSet::WC2MB(char *buf, const wchar_t *psz, size_t n)
655 {
656 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
657 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
658 #else
659 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
660 #endif
661 size_t outbuf = n;
662 size_t res, cres;
663
664 wchar_t *tmpbuf = 0;
665
666 if (ms_wcNeedsSwap)
667 {
668 // need to copy to temp buffer to switch endianness
669 // this absolutely doesn't rock!
670 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
671 // could be in read-only memory, or be accessed in some other thread)
672 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
673 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
674 WC_BSWAP(tmpbuf, inbuf)
675 psz=tmpbuf;
676 }
677
678 if (buf)
679 {
680 // have destination buffer, convert there
681 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
682
683 res = n-outbuf;
684 }
685 else
686 {
687 // no destination buffer... convert using temp buffer
688 // to calculate destination buffer requirement
689 char tbuf[16];
690 res = 0;
691 do {
692 buf = tbuf; outbuf = 16;
693
694 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
695
696 res += 16 - outbuf;
697 } while ((cres==(size_t)-1) && (errno==E2BIG));
698 }
699
700 if (ms_wcNeedsSwap)
701 {
702 free(tmpbuf);
703 }
704
705 if (ICONV_FAILED(cres, inbuf))
706 {
707 //VS: it is ok if iconv fails, hence trace only
708 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
709 return (size_t)-1;
710 }
711
712 return res;
713 }
714
715 #endif // HAVE_ICONV
716
717 // ============================================================================
718 // Win32 conversion classes
719 // ============================================================================
720
721 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
722
723 extern long wxCharsetToCodepage(const wxChar *charset); // from utils.cpp
724
725 class CP_CharSet : public wxCharacterSet
726 {
727 public:
728 CP_CharSet(const wxChar* name)
729 : wxCharacterSet(name)
730 {
731 m_CodePage = wxCharsetToCodepage(name);
732 }
733
734 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
735 {
736 size_t len =
737 MultiByteToWideChar(m_CodePage, 0, psz, -1, buf, buf ? n : 0);
738 //VS: returns # of written chars for buf!=NULL and *size*
739 // needed buffer for buf==NULL
740 return len ? (buf ? len : len-1) : (size_t)-1;
741 }
742
743 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
744 {
745 size_t len = WideCharToMultiByte(m_CodePage, 0, psz, -1, buf,
746 buf ? n : 0, NULL, NULL);
747 //VS: returns # of written chars for buf!=NULL and *size*
748 // needed buffer for buf==NULL
749 return len ? (buf ? len : len-1) : (size_t)-1;
750 }
751
752 bool usable() const
753 { return m_CodePage != -1; }
754
755 public:
756 long m_CodePage;
757 };
758 #endif // __WIN32__
759
760 // ============================================================================
761 // wxEncodingConverter based conversion classes
762 // ============================================================================
763
764 #if wxUSE_FONTMAP
765
766 class EC_CharSet : public wxCharacterSet
767 {
768 public:
769 // temporarily just use wxEncodingConverter stuff,
770 // so that it works while a better implementation is built
771 EC_CharSet(const wxChar* name) : wxCharacterSet(name),
772 enc(wxFONTENCODING_SYSTEM)
773 {
774 if (name)
775 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
776
777 m_ok = m2w.Init(enc, wxFONTENCODING_UNICODE) &&
778 w2m.Init(wxFONTENCODING_UNICODE, enc);
779 }
780
781 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
782 {
783 size_t inbuf = strlen(psz);
784 if (buf)
785 m2w.Convert(psz,buf);
786 return inbuf;
787 }
788
789 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
790 {
791 #if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
792 || ( defined(__MWERKS__) && defined(__WXMSW__) )
793 size_t inbuf = std::wcslen(psz);
794 #else
795 size_t inbuf = ::wcslen(psz);
796 #endif
797 if (buf)
798 w2m.Convert(psz,buf);
799
800 return inbuf;
801 }
802
803 bool usable() const { return m_ok; }
804
805 public:
806 wxFontEncoding enc;
807 wxEncodingConverter m2w, w2m;
808
809 // were we initialized successfully?
810 bool m_ok;
811 };
812
813 #endif // wxUSE_FONTMAP
814
815 // ----------------------------------------------------------------------------
816 // the function creating the wxCharacterSet for the specified charset on the
817 // current system, trying all possibilities
818 // ----------------------------------------------------------------------------
819
820 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
821 {
822 // check for the special case of ASCII charset
823 #if wxUSE_FONTMAP
824 if ( wxTheFontMapper->CharsetToEncoding(name) == wxFONTENCODING_DEFAULT )
825 #else // wxUSE_FONTMAP
826 if ( !name )
827 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
828 {
829 // don't convert at all
830 return NULL;
831 }
832
833 // the test above must have taken care of this case
834 wxCHECK_MSG( name, NULL, _T("NULL name must be wxFONTENCODING_DEFAULT") );
835
836 wxCharacterSet *cset;
837
838 if ( wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
839 {
840 cset = new ID_CharSet(name, &wxConvUTF8);
841 }
842 else
843 {
844 #ifdef HAVE_ICONV
845 cset = new IC_CharSet(name);
846 #else // !HAVE_ICONV
847 cset = NULL;
848 #endif // HAVE_ICONV/!HAVE_ICONV
849 }
850
851 // it can only be NULL in this case
852 #ifndef HAVE_ICONV
853 if ( cset )
854 #endif // !HAVE_ICONV
855 {
856 if ( cset->usable() )
857 return cset;
858
859 delete cset;
860 cset = NULL;
861 }
862
863 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
864 cset = new CP_CharSet(name);
865 if ( cset->usable() )
866 return cset;
867
868 delete cset;
869 cset = NULL;
870 #endif // __WIN32__
871
872 #if wxUSE_FONTMAP
873 cset = new EC_CharSet(name);
874 if ( cset->usable() )
875 return cset;
876
877 delete cset;
878 cset = NULL;
879 #endif // wxUSE_FONTMAP
880
881 wxLogError(_("Cannot convert from encoding '%s'!"), name);
882
883 return NULL;
884 }
885
886 // ============================================================================
887 // wxCSConv implementation
888 // ============================================================================
889
890 wxCSConv::wxCSConv(const wxChar *charset)
891 {
892 m_name = (wxChar *)NULL;
893 m_cset = (wxCharacterSet *) NULL;
894 m_deferred = TRUE;
895
896 SetName(charset);
897 }
898
899 wxCSConv::~wxCSConv()
900 {
901 free(m_name);
902 delete m_cset;
903 }
904
905 void wxCSConv::SetName(const wxChar *charset)
906 {
907 if (charset)
908 {
909 m_name = wxStrdup(charset);
910 m_deferred = TRUE;
911 }
912 }
913
914 void wxCSConv::LoadNow()
915 {
916 if (m_deferred)
917 {
918 if ( !m_name )
919 {
920 wxString name = wxLocale::GetSystemEncodingName();
921 if ( !name.empty() )
922 SetName(name);
923 }
924
925 // wxGetCharacterSet() complains about NULL name
926 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
927 m_deferred = FALSE;
928 }
929 }
930
931 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
932 {
933 ((wxCSConv *)this)->LoadNow(); // discard constness
934
935 if (m_cset)
936 return m_cset->MB2WC(buf, psz, n);
937
938 // latin-1 (direct)
939 size_t len = strlen(psz);
940
941 if (buf)
942 {
943 for (size_t c = 0; c <= len; c++)
944 buf[c] = (unsigned char)(psz[c]);
945 }
946
947 return len;
948 }
949
950 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
951 {
952 ((wxCSConv *)this)->LoadNow(); // discard constness
953
954 if (m_cset)
955 return m_cset->WC2MB(buf, psz, n);
956
957 // latin-1 (direct)
958 #if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
959 || ( defined(__MWERKS__) && defined(__WXMSW__) )
960 size_t len=std::wcslen(psz);
961 #else
962 size_t len=::wcslen(psz);
963 #endif
964 if (buf)
965 {
966 for (size_t c = 0; c <= len; c++)
967 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
968 }
969
970 return len;
971 }
972
973 #endif // wxUSE_WCHAR_T
974
975