]> git.saurik.com Git - wxWidgets.git/blame_incremental - src/common/strconv.cpp
don't use -q option with egrep, Solaris doesn't have it (bug 517145)
[wxWidgets.git] / src / common / strconv.cpp
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
31#ifndef WX_PRECOMP
32 #include "wx/intl.h"
33 #include "wx/log.h"
34#endif // WX_PRECOMP
35
36#ifdef __WXMSW__
37 #include "wx/msw/private.h"
38#endif
39
40#include <errno.h>
41#include <ctype.h>
42#include <string.h>
43#include <stdlib.h>
44
45#include "wx/strconv.h"
46
47// ----------------------------------------------------------------------------
48// globals
49// ----------------------------------------------------------------------------
50
51#if wxUSE_WCHAR_T
52 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
53 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
54#else
55 // stand-ins in absence of wchar_t
56 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
57#endif // wxUSE_WCHAR_T
58
59WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
60
61// ----------------------------------------------------------------------------
62// headers
63// ----------------------------------------------------------------------------
64
65#if wxUSE_WCHAR_T
66
67#ifdef __SALFORDC__
68 #include <clib.h>
69#endif
70
71#ifdef HAVE_ICONV
72 #include <iconv.h>
73#endif
74
75#include "wx/encconv.h"
76#include "wx/fontmap.h"
77
78// ----------------------------------------------------------------------------
79// macros
80// ----------------------------------------------------------------------------
81
82#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
83#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
84
85// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
86// it might be not defined - assume the most common value
87#ifndef SIZEOF_WCHAR_T
88 #define SIZEOF_WCHAR_T 2
89#endif // !defined(SIZEOF_WCHAR_T)
90
91#if SIZEOF_WCHAR_T == 4
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
99#elif SIZEOF_WCHAR_T == 2
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
102 #define WC_UTF16
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
108#else // sizeof(wchar_t) != 2 nor 4
109 // I don't know what to do about this
110 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
111#endif
112
113// ============================================================================
114// implementation
115// ============================================================================
116
117// ----------------------------------------------------------------------------
118// UTF-16 en/decoding
119// ----------------------------------------------------------------------------
120
121#ifdef WC_UTF16
122
123static size_t encode_utf16(wxUint32 input, wchar_t *output)
124{
125 if (input<=0xffff)
126 {
127 if (output) *output++ = (wchar_t) input;
128 return 1;
129 }
130 else if (input>=0x110000)
131 {
132 return (size_t)-1;
133 }
134 else
135 {
136 if (output)
137 {
138 *output++ = (wchar_t) ((input >> 10)+0xd7c0);
139 *output++ = (wchar_t) ((input&0x3ff)+0xdc00);
140 }
141 return 2;
142 }
143}
144
145static size_t decode_utf16(const wchar_t* input, wxUint32& output)
146{
147 if ((*input<0xd800) || (*input>0xdfff))
148 {
149 output = *input;
150 return 1;
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
153 {
154 output = *input;
155 return (size_t)-1;
156 }
157 else
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
162}
163
164#endif // WC_UTF16
165
166// ----------------------------------------------------------------------------
167// wxMBConv
168// ----------------------------------------------------------------------------
169
170size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
171{
172 return wxMB2WC(buf, psz, n);
173}
174
175size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
176{
177 return wxWC2MB(buf, psz, n);
178}
179
180const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181{
182 if (psz)
183 {
184 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
185 if (nLen == (size_t)-1)
186 return wxWCharBuffer((wchar_t *) NULL);
187 wxWCharBuffer buf(nLen);
188 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
189 return buf;
190 }
191 else
192 return wxWCharBuffer((wchar_t *) NULL);
193}
194
195const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
196{
197 if (psz)
198 {
199 size_t nLen = WC2MB((char *) NULL, psz, 0);
200 if (nLen == (size_t)-1)
201 return wxCharBuffer((char *) NULL);
202 wxCharBuffer buf(nLen);
203 WC2MB((char *)(const char *) buf, psz, nLen);
204 return buf;
205 }
206 else
207 return wxCharBuffer((char *) NULL);
208}
209
210// ----------------------------------------------------------------------------
211// standard file conversion
212// ----------------------------------------------------------------------------
213
214WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
215
216// just use the libc conversion for now
217size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
218{
219 return wxMB2WC(buf, psz, n);
220}
221
222size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
223{
224 return wxWC2MB(buf, psz, n);
225}
226
227// ----------------------------------------------------------------------------
228// standard gdk conversion
229// ----------------------------------------------------------------------------
230
231#ifdef __WXGTK12__
232
233WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
234
235#include <gdk/gdk.h>
236
237size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
238{
239 if (buf)
240 {
241 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
242 }
243 else
244 {
245 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
246 size_t len = gdk_mbstowcs(nbuf, psz, n);
247 delete[] nbuf;
248 return len;
249 }
250}
251
252size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
253{
254 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
255 size_t len = mbstr ? strlen(mbstr) : 0;
256 if (buf)
257 {
258 if (len > n)
259 len = n;
260 memcpy(buf, psz, len);
261 if (len < n)
262 buf[len] = 0;
263 }
264 return len;
265}
266
267#endif // GTK > 1.0
268
269// ----------------------------------------------------------------------------
270// UTF-7
271// ----------------------------------------------------------------------------
272
273WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
274
275#if 0
276static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
277 "abcdefghijklmnopqrstuvwxyz"
278 "0123456789'(),-./:?";
279static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
280static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
281 "abcdefghijklmnopqrstuvwxyz"
282 "0123456789+/";
283#endif
284
285// TODO: write actual implementations of UTF-7 here
286size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
287 const char * WXUNUSED(psz),
288 size_t WXUNUSED(n)) const
289{
290 return 0;
291}
292
293size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
294 const wchar_t * WXUNUSED(psz),
295 size_t WXUNUSED(n)) const
296{
297 return 0;
298}
299
300// ----------------------------------------------------------------------------
301// UTF-8
302// ----------------------------------------------------------------------------
303
304WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
305
306static wxUint32 utf8_max[]=
307 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
308
309size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
310{
311 size_t len = 0;
312
313 while (*psz && ((!buf) || (len < n)))
314 {
315 unsigned char cc = *psz++, fc = cc;
316 unsigned cnt;
317 for (cnt = 0; fc & 0x80; cnt++)
318 fc <<= 1;
319 if (!cnt)
320 {
321 // plain ASCII char
322 if (buf)
323 *buf++ = cc;
324 len++;
325 }
326 else
327 {
328 cnt--;
329 if (!cnt)
330 {
331 // invalid UTF-8 sequence
332 return (size_t)-1;
333 }
334 else
335 {
336 unsigned ocnt = cnt - 1;
337 wxUint32 res = cc & (0x3f >> cnt);
338 while (cnt--)
339 {
340 cc = *psz++;
341 if ((cc & 0xC0) != 0x80)
342 {
343 // invalid UTF-8 sequence
344 return (size_t)-1;
345 }
346 res = (res << 6) | (cc & 0x3f);
347 }
348 if (res <= utf8_max[ocnt])
349 {
350 // illegal UTF-8 encoding
351 return (size_t)-1;
352 }
353#ifdef WC_UTF16
354 size_t pa = encode_utf16(res, buf);
355 if (pa == (size_t)-1)
356 return (size_t)-1;
357 if (buf)
358 buf += pa;
359 len += pa;
360#else // !WC_UTF16
361 if (buf)
362 *buf++ = res;
363 len++;
364#endif // WC_UTF16/!WC_UTF16
365 }
366 }
367 }
368 if (buf && (len < n))
369 *buf = 0;
370 return len;
371}
372
373size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
374{
375 size_t len = 0;
376
377 while (*psz && ((!buf) || (len < n)))
378 {
379 wxUint32 cc;
380#ifdef WC_UTF16
381 size_t pa = decode_utf16(psz, cc);
382 psz += (pa == (size_t)-1) ? 1 : pa;
383#else
384 cc=(*psz++) & 0x7fffffff;
385#endif
386 unsigned cnt;
387 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
388 if (!cnt)
389 {
390 // plain ASCII char
391 if (buf)
392 *buf++ = (char) cc;
393 len++;
394 }
395
396 else
397 {
398 len += cnt + 1;
399 if (buf)
400 {
401 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
402 while (cnt--)
403 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
404 }
405 }
406 }
407
408 if (buf && (len<n)) *buf = 0;
409 return len;
410}
411
412// ============================================================================
413// wxCharacterSet and derived classes
414// ============================================================================
415
416// ----------------------------------------------------------------------------
417// wxCharacterSet is the ABC for the classes below
418// ----------------------------------------------------------------------------
419
420class wxCharacterSet
421{
422public:
423 wxCharacterSet(const wxChar*name) : cname(name) {}
424 virtual ~wxCharacterSet() {}
425 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) = 0;
426 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) = 0;
427 virtual bool usable() const = 0;
428public:
429 const wxChar*cname;
430};
431
432// ----------------------------------------------------------------------------
433// ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
434// ----------------------------------------------------------------------------
435
436class ID_CharSet : public wxCharacterSet
437{
438public:
439 ID_CharSet(const wxChar *name, wxMBConv *cnv)
440 : wxCharacterSet(name), work(cnv) {}
441
442 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
443 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
444
445 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
446 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
447
448 bool usable() const
449 { return work!=NULL; }
450public:
451 wxMBConv*work;
452};
453
454
455// ============================================================================
456// The classes doing conversion using the iconv_xxx() functions
457// ============================================================================
458
459#ifdef HAVE_ICONV
460
461// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
462// if output buffer is _exactly_ as big as needed. Such case is (unless there's
463// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
464// (which means error) and says there are 0 bytes left in the input buffer --
465// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
466// this alternative test for iconv() failure.
467// [This bug does not appear in glibc 2.2.]
468#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
469#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
470 (errno != E2BIG || bufLeft != 0))
471#else
472#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
473#endif
474
475#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
476
477// ----------------------------------------------------------------------------
478// IC_CharSet: encapsulates an iconv character set
479// ----------------------------------------------------------------------------
480
481class IC_CharSet : public wxCharacterSet
482{
483public:
484 IC_CharSet(const wxChar *name);
485 virtual ~IC_CharSet();
486
487 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
488 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
489
490 bool usable() const
491 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
492
493protected:
494 // the iconv handlers used to translate from multibyte to wide char and in
495 // the other direction
496 iconv_t m2w,
497 w2m;
498
499private:
500 // the name (for iconv_open()) of a wide char charset - if none is
501 // available on this machine, it will remain NULL
502 static const char *ms_wcCharsetName;
503
504 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
505 // different endian-ness than the native one
506 static bool ms_wcNeedsSwap;
507};
508
509const char *IC_CharSet::ms_wcCharsetName = NULL;
510bool IC_CharSet::ms_wcNeedsSwap = FALSE;
511
512IC_CharSet::IC_CharSet(const wxChar *name)
513 : wxCharacterSet(name)
514{
515 // check for charset that represents wchar_t:
516 if (ms_wcCharsetName == NULL)
517 {
518 ms_wcNeedsSwap = FALSE;
519
520 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
521 ms_wcCharsetName = WC_NAME_BEST;
522 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
523
524 if (m2w == (iconv_t)-1)
525 {
526 // try charset w/o bytesex info (e.g. "UCS4")
527 // and check for bytesex ourselves:
528 ms_wcCharsetName = WC_NAME;
529 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
530
531 // last bet, try if it knows WCHAR_T pseudo-charset
532 if (m2w == (iconv_t)-1)
533 {
534 ms_wcCharsetName = "WCHAR_T";
535 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
536 }
537
538 if (m2w != (iconv_t)-1)
539 {
540 char buf[2], *bufPtr;
541 wchar_t wbuf[2], *wbufPtr;
542 size_t insz, outsz;
543 size_t res;
544
545 buf[0] = 'A';
546 buf[1] = 0;
547 wbuf[0] = 0;
548 insz = 2;
549 outsz = SIZEOF_WCHAR_T * 2;
550 wbufPtr = wbuf;
551 bufPtr = buf;
552
553 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
554 (char**)&wbufPtr, &outsz);
555
556 if (ICONV_FAILED(res, insz))
557 {
558 ms_wcCharsetName = NULL;
559 wxLogLastError(wxT("iconv"));
560 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
561 }
562 else
563 {
564 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
565 }
566 }
567 else
568 {
569 ms_wcCharsetName = NULL;
570
571 // VS: we must not output an error here, since wxWindows will safely
572 // fall back to using wxEncodingConverter.
573 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
574 //wxLogError(
575 }
576 }
577 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
578 }
579 else // we already have ms_wcCharsetName
580 {
581 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
582 }
583
584 // NB: don't ever pass NULL to iconv_open(), it may crash!
585 if ( ms_wcCharsetName )
586 {
587 w2m = iconv_open(wxConvLibc.cWX2MB(name), ms_wcCharsetName);
588 }
589 else
590 {
591 w2m = (iconv_t)-1;
592 }
593}
594
595IC_CharSet::~IC_CharSet()
596{
597 if ( m2w != (iconv_t)-1 )
598 iconv_close(m2w);
599 if ( w2m != (iconv_t)-1 )
600 iconv_close(w2m);
601}
602
603size_t IC_CharSet::MB2WC(wchar_t *buf, const char *psz, size_t n)
604{
605 size_t inbuf = strlen(psz);
606 size_t outbuf = n * SIZEOF_WCHAR_T;
607 size_t res, cres;
608 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
609 wchar_t *bufPtr = buf;
610 const char *pszPtr = psz;
611
612 if (buf)
613 {
614 // have destination buffer, convert there
615 cres = iconv(m2w,
616 ICONV_CHAR_CAST(&pszPtr), &inbuf,
617 (char**)&bufPtr, &outbuf);
618 res = n - (outbuf / SIZEOF_WCHAR_T);
619
620 if (ms_wcNeedsSwap)
621 {
622 // convert to native endianness
623 WC_BSWAP(buf /* _not_ bufPtr */, res)
624 }
625 }
626 else
627 {
628 // no destination buffer... convert using temp buffer
629 // to calculate destination buffer requirement
630 wchar_t tbuf[8];
631 res = 0;
632 do {
633 bufPtr = tbuf;
634 outbuf = 8*SIZEOF_WCHAR_T;
635
636 cres = iconv(m2w,
637 ICONV_CHAR_CAST(&pszPtr), &inbuf,
638 (char**)&bufPtr, &outbuf );
639
640 res += 8-(outbuf/SIZEOF_WCHAR_T);
641 } while ((cres==(size_t)-1) && (errno==E2BIG));
642 }
643
644 if (ICONV_FAILED(cres, inbuf))
645 {
646 //VS: it is ok if iconv fails, hence trace only
647 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
648 return (size_t)-1;
649 }
650
651 return res;
652}
653
654size_t IC_CharSet::WC2MB(char *buf, const wchar_t *psz, size_t n)
655{
656#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
657 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
658#else
659 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
660#endif
661 size_t outbuf = n;
662 size_t res, cres;
663
664 wchar_t *tmpbuf = 0;
665
666 if (ms_wcNeedsSwap)
667 {
668 // need to copy to temp buffer to switch endianness
669 // this absolutely doesn't rock!
670 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
671 // could be in read-only memory, or be accessed in some other thread)
672 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
673 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
674 WC_BSWAP(tmpbuf, inbuf)
675 psz=tmpbuf;
676 }
677
678 if (buf)
679 {
680 // have destination buffer, convert there
681 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
682
683 res = n-outbuf;
684 }
685 else
686 {
687 // no destination buffer... convert using temp buffer
688 // to calculate destination buffer requirement
689 char tbuf[16];
690 res = 0;
691 do {
692 buf = tbuf; outbuf = 16;
693
694 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
695
696 res += 16 - outbuf;
697 } while ((cres==(size_t)-1) && (errno==E2BIG));
698 }
699
700 if (ms_wcNeedsSwap)
701 {
702 free(tmpbuf);
703 }
704
705 if (ICONV_FAILED(cres, inbuf))
706 {
707 //VS: it is ok if iconv fails, hence trace only
708 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
709 return (size_t)-1;
710 }
711
712 return res;
713}
714
715#endif // HAVE_ICONV
716
717// ============================================================================
718// Win32 conversion classes
719// ============================================================================
720
721#if defined(__WIN32__) && !defined(__WXMICROWIN__)
722
723extern long wxCharsetToCodepage(const wxChar *charset); // from utils.cpp
724
725class CP_CharSet : public wxCharacterSet
726{
727public:
728 CP_CharSet(const wxChar* name)
729 : wxCharacterSet(name)
730 {
731 m_CodePage = wxCharsetToCodepage(name);
732 }
733
734 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
735 {
736 size_t len =
737 MultiByteToWideChar(m_CodePage, 0, psz, -1, buf, buf ? n : 0);
738 //VS: returns # of written chars for buf!=NULL and *size*
739 // needed buffer for buf==NULL
740 return len ? (buf ? len : len-1) : (size_t)-1;
741 }
742
743 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
744 {
745 size_t len = WideCharToMultiByte(m_CodePage, 0, psz, -1, buf,
746 buf ? n : 0, NULL, NULL);
747 //VS: returns # of written chars for buf!=NULL and *size*
748 // needed buffer for buf==NULL
749 return len ? (buf ? len : len-1) : (size_t)-1;
750 }
751
752 bool usable() const
753 { return m_CodePage != -1; }
754
755public:
756 long m_CodePage;
757};
758#endif // __WIN32__
759
760// ============================================================================
761// wxEncodingConverter based conversion classes
762// ============================================================================
763
764#if wxUSE_FONTMAP
765
766class EC_CharSet : public wxCharacterSet
767{
768public:
769 // temporarily just use wxEncodingConverter stuff,
770 // so that it works while a better implementation is built
771 EC_CharSet(const wxChar* name) : wxCharacterSet(name),
772 enc(wxFONTENCODING_SYSTEM)
773 {
774 if (name)
775 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
776
777 m_ok = m2w.Init(enc, wxFONTENCODING_UNICODE) &&
778 w2m.Init(wxFONTENCODING_UNICODE, enc);
779 }
780
781 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
782 {
783 size_t inbuf = strlen(psz);
784 if (buf)
785 m2w.Convert(psz,buf);
786 return inbuf;
787 }
788
789 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
790 {
791#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
792 || ( defined(__MWERKS__) && defined(__WXMSW__) )
793 size_t inbuf = std::wcslen(psz);
794#else
795 size_t inbuf = ::wcslen(psz);
796#endif
797 if (buf)
798 w2m.Convert(psz,buf);
799
800 return inbuf;
801 }
802
803 bool usable() const { return m_ok; }
804
805public:
806 wxFontEncoding enc;
807 wxEncodingConverter m2w, w2m;
808
809 // were we initialized successfully?
810 bool m_ok;
811};
812
813#endif // wxUSE_FONTMAP
814
815// ----------------------------------------------------------------------------
816// the function creating the wxCharacterSet for the specified charset on the
817// current system, trying all possibilities
818// ----------------------------------------------------------------------------
819
820static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
821{
822 // check for the special case of ASCII charset
823#if wxUSE_FONTMAP
824 if ( wxTheFontMapper->CharsetToEncoding(name) == wxFONTENCODING_DEFAULT )
825#else // wxUSE_FONTMAP
826 if ( !name )
827#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
828 {
829 // don't convert at all
830 return NULL;
831 }
832
833 // the test above must have taken care of this case
834 wxCHECK_MSG( name, NULL, _T("NULL name must be wxFONTENCODING_DEFAULT") );
835
836 wxCharacterSet *cset;
837
838 if ( wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
839 {
840 cset = new ID_CharSet(name, &wxConvUTF8);
841 }
842 else
843 {
844#ifdef HAVE_ICONV
845 cset = new IC_CharSet(name);
846#else // !HAVE_ICONV
847 cset = NULL;
848#endif // HAVE_ICONV/!HAVE_ICONV
849 }
850
851 // it can only be NULL in this case
852#ifndef HAVE_ICONV
853 if ( cset )
854#endif // !HAVE_ICONV
855 {
856 if ( cset->usable() )
857 return cset;
858
859 delete cset;
860 cset = NULL;
861 }
862
863#if defined(__WIN32__) && !defined(__WXMICROWIN__)
864 cset = new CP_CharSet(name);
865 if ( cset->usable() )
866 return cset;
867
868 delete cset;
869 cset = NULL;
870#endif // __WIN32__
871
872#if wxUSE_FONTMAP
873 cset = new EC_CharSet(name);
874 if ( cset->usable() )
875 return cset;
876
877 delete cset;
878 cset = NULL;
879#endif // wxUSE_FONTMAP
880
881 wxLogError(_("Cannot convert from encoding '%s'!"), name);
882
883 return NULL;
884}
885
886// ============================================================================
887// wxCSConv implementation
888// ============================================================================
889
890wxCSConv::wxCSConv(const wxChar *charset)
891{
892 m_name = (wxChar *)NULL;
893 m_cset = (wxCharacterSet *) NULL;
894 m_deferred = TRUE;
895
896 SetName(charset);
897}
898
899wxCSConv::~wxCSConv()
900{
901 free(m_name);
902 delete m_cset;
903}
904
905void wxCSConv::SetName(const wxChar *charset)
906{
907 if (charset)
908 {
909 m_name = wxStrdup(charset);
910 m_deferred = TRUE;
911 }
912}
913
914void wxCSConv::LoadNow()
915{
916 if (m_deferred)
917 {
918 if ( !m_name )
919 {
920 wxString name = wxLocale::GetSystemEncodingName();
921 if ( !name.empty() )
922 SetName(name);
923 }
924
925 // wxGetCharacterSet() complains about NULL name
926 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
927 m_deferred = FALSE;
928 }
929}
930
931size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
932{
933 ((wxCSConv *)this)->LoadNow(); // discard constness
934
935 if (m_cset)
936 return m_cset->MB2WC(buf, psz, n);
937
938 // latin-1 (direct)
939 size_t len = strlen(psz);
940
941 if (buf)
942 {
943 for (size_t c = 0; c <= len; c++)
944 buf[c] = (unsigned char)(psz[c]);
945 }
946
947 return len;
948}
949
950size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
951{
952 ((wxCSConv *)this)->LoadNow(); // discard constness
953
954 if (m_cset)
955 return m_cset->WC2MB(buf, psz, n);
956
957 // latin-1 (direct)
958#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
959 || ( defined(__MWERKS__) && defined(__WXMSW__) )
960 size_t len=std::wcslen(psz);
961#else
962 size_t len=::wcslen(psz);
963#endif
964 if (buf)
965 {
966 for (size_t c = 0; c <= len; c++)
967 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
968 }
969
970 return len;
971}
972
973#endif // wxUSE_WCHAR_T
974
975