]> git.saurik.com Git - wxWidgets.git/blame_incremental - src/common/strconv.cpp
Added a couple more numeric character references
[wxWidgets.git] / src / common / strconv.cpp
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
35#include <errno.h>
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
47#ifdef HAVE_LANGINFO_H
48 #include <langinfo.h>
49#endif
50
51#ifdef __WXMSW__
52 #include <windows.h>
53#endif
54
55#include "wx/debug.h"
56#include "wx/strconv.h"
57#include "wx/intl.h"
58#include "wx/log.h"
59
60#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
61#define BSWAP_UCS4(str, len)
62#define BSWAP_UCS2(str, len)
63#else
64#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
65#define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
66#define WC_NEED_BSWAP
67#endif
68#define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
69#define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
70
71#if SIZEOF_WCHAR_T == 4
72#define WC_NAME "UCS4"
73#define WC_BSWAP BSWAP_UCS4
74#elif SIZEOF_WCHAR_T == 2
75#define WC_NAME "UTF16"
76#define WC_BSWAP BSWAP_UTF16
77#define WC_UTF16
78#endif
79
80// ----------------------------------------------------------------------------
81// globals
82// ----------------------------------------------------------------------------
83
84WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
85
86// ============================================================================
87// implementation
88// ============================================================================
89
90#if wxUSE_WCHAR_T
91
92#ifdef WC_UTF16
93
94static size_t encode_utf16(wxUint32 input,wxUint16*output)
95{
96 if (input<=0xffff) {
97 if (output) *output++ = input;
98 return 1;
99 } else
100 if (input>=0x110000) {
101 return (size_t)-1;
102 } else {
103 if (output) {
104 *output++ = (input >> 10)+0xd7c0;
105 *output++ = (input&0x3ff)+0xdc00;
106 }
107 return 2;
108 }
109}
110
111static size_t decode_utf16(wxUint16*input,wxUint32&output)
112{
113 if ((*input<0xd800) || (*input>0xdfff)) {
114 output = *input;
115 return 1;
116 } else
117 if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
118 output = *input;
119 return (size_t)-1;
120 } else {
121 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
122 return 2;
123 }
124}
125
126#endif // WC_UTF16
127
128// ----------------------------------------------------------------------------
129// wxMBConv
130// ----------------------------------------------------------------------------
131
132WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
133
134size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
135{
136 return wxMB2WC(buf, psz, n);
137}
138
139size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
140{
141 return wxWC2MB(buf, psz, n);
142}
143
144const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
145{
146 if (psz)
147 {
148 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
149 if (nLen == (size_t)-1)
150 return wxWCharBuffer((wchar_t *) NULL);
151 wxWCharBuffer buf(nLen);
152 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
153 return buf;
154 }
155 else
156 return wxWCharBuffer((wchar_t *) NULL);
157}
158
159const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
160{
161 if (psz)
162 {
163 size_t nLen = WC2MB((char *) NULL, psz, 0);
164 if (nLen == (size_t)-1)
165 return wxCharBuffer((char *) NULL);
166 wxCharBuffer buf(nLen);
167 WC2MB((char *)(const char *) buf, psz, nLen);
168 return buf;
169 }
170 else
171 return wxCharBuffer((char *) NULL);
172}
173
174// ----------------------------------------------------------------------------
175// standard file conversion
176// ----------------------------------------------------------------------------
177
178WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
179
180// just use the libc conversion for now
181size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
182{
183 return wxMB2WC(buf, psz, n);
184}
185
186size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
187{
188 return wxWC2MB(buf, psz, n);
189}
190
191// ----------------------------------------------------------------------------
192// standard gdk conversion
193// ----------------------------------------------------------------------------
194
195#ifdef __WXGTK12__
196
197WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
198
199#include <gdk/gdk.h>
200
201size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
202{
203 if (buf) {
204 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
205 } else {
206 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
207 size_t len = gdk_mbstowcs(nbuf, psz, n);
208 delete [] nbuf;
209 return len;
210 }
211}
212
213size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
214{
215 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
216 size_t len = mbstr ? strlen(mbstr) : 0;
217 if (buf) {
218 if (len > n) len = n;
219 memcpy(buf, psz, len);
220 if (len < n) buf[len] = 0;
221 }
222 return len;
223}
224
225#endif // GTK > 1.0
226
227// ----------------------------------------------------------------------------
228// UTF-7
229// ----------------------------------------------------------------------------
230
231WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
232
233#if 0
234static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
235 "abcdefghijklmnopqrstuvwxyz"
236 "0123456789'(),-./:?";
237static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
238static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
239 "abcdefghijklmnopqrstuvwxyz"
240 "0123456789+/";
241#endif
242
243// TODO: write actual implementations of UTF-7 here
244size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
245 const char * WXUNUSED(psz),
246 size_t WXUNUSED(n)) const
247{
248 return 0;
249}
250
251size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
252 const wchar_t * WXUNUSED(psz),
253 size_t WXUNUSED(n)) const
254{
255 return 0;
256}
257
258// ----------------------------------------------------------------------------
259// UTF-8
260// ----------------------------------------------------------------------------
261
262WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
263
264static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
265
266size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
267{
268 size_t len = 0;
269
270 while (*psz && ((!buf) || (len<n))) {
271 unsigned char cc=*psz++, fc=cc;
272 unsigned cnt;
273 for (cnt=0; fc&0x80; cnt++) fc<<=1;
274 if (!cnt) {
275 // plain ASCII char
276 if (buf) *buf++=cc;
277 len++;
278 } else {
279 cnt--;
280 if (!cnt) {
281 // invalid UTF-8 sequence
282 return (size_t)-1;
283 } else {
284 unsigned ocnt=cnt-1;
285 wxUint32 res=cc&(0x3f>>cnt);
286 while (cnt--) {
287 cc = *psz++;
288 if ((cc&0xC0)!=0x80) {
289 // invalid UTF-8 sequence
290 return (size_t)-1;
291 }
292 res=(res<<6)|(cc&0x3f);
293 }
294 if (res<=utf8_max[ocnt]) {
295 // illegal UTF-8 encoding
296 return (size_t)-1;
297 }
298#ifdef WC_UTF16
299 size_t pa = encode_utf16(res, buf);
300 if (pa == (size_t)-1)
301 return (size_t)-1;
302 if (buf) buf+=pa;
303 len+=pa;
304#else
305 if (buf) *buf++=res;
306 len++;
307#endif
308 }
309 }
310 }
311 if (buf && (len<n)) *buf = 0;
312 return len;
313}
314
315size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
316{
317 size_t len = 0;
318
319 while (*psz && ((!buf) || (len<n))) {
320 wxUint32 cc;
321#ifdef WC_UTF16
322 size_t pa = decode_utf16(psz,cc);
323 psz += (pa == (size_t)-1) ? 1 : pa;
324#else
325 cc=(*psz++)&0x7fffffff;
326#endif
327 unsigned cnt;
328 for (cnt=0; cc>utf8_max[cnt]; cnt++);
329 if (!cnt) {
330 // plain ASCII char
331 if (buf) *buf++=cc;
332 len++;
333 } else {
334 len+=cnt+1;
335 if (buf) {
336 *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
337 while (cnt--)
338 *buf++=0x80|((cc>>(cnt*6))&0x3f);
339 }
340 }
341 }
342 if (buf && (len<n)) *buf = 0;
343 return len;
344}
345
346// ----------------------------------------------------------------------------
347// specified character set
348// ----------------------------------------------------------------------------
349
350WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
351
352#include "wx/encconv.h"
353#include "wx/fontmap.h"
354
355// TODO: add some tables here
356// - perhaps common encodings to common codepages (for Win32)
357// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
358// - move wxEncodingConverter meat in here
359
360#ifdef __WIN32__
361#include "wx/msw/registry.h"
362// this should work if M$ Internet Exploiter is installed
363static long CharsetToCodepage(const wxChar *name)
364{
365 if (!name)
366 return GetACP();
367
368 long CP=-1;
369
370 wxString cn(name);
371 do {
372 wxString path( wxT("MIME\\Database\\Charset\\") );
373 path += cn;
374 wxRegKey key( wxRegKey::HKCR, path );
375
376 /* two cases: either there's an AliasForCharset string,
377 * or there are Codepage and InternetEncoding dwords.
378 * The InternetEncoding gives us the actual encoding,
379 * the Codepage just says which Windows character set to
380 * use when displaying the data.
381 */
382 if (key.QueryValue( wxT("InternetEncoding"), &CP )) break;
383
384 // no encoding, see if it's an alias
385 if (!key.QueryValue( wxT("AliasForCharset"), cn )) break;
386 } while (1);
387
388 return CP;
389}
390#endif
391
392class wxCharacterSet
393{
394public:
395 wxCharacterSet(const wxChar*name)
396 : cname(name) {}
397 virtual ~wxCharacterSet()
398 {}
399 virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
400 { return (size_t)-1; }
401 virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
402 { return (size_t)-1; }
403 virtual bool usable()
404 { return FALSE; }
405public:
406 const wxChar*cname;
407};
408
409class ID_CharSet : public wxCharacterSet
410{
411public:
412 ID_CharSet(const wxChar*name,wxMBConv*cnv)
413 : wxCharacterSet(name), work(cnv) {}
414
415 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
416 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
417
418 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
419 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
420
421 bool usable()
422 { return work!=NULL; }
423public:
424 wxMBConv*work;
425};
426
427
428#ifdef HAVE_ICONV_H
429
430// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
431// if output buffer is _exactly_ as big as needed. Such case is (unless there's
432// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
433// (which means error) and says there are 0 bytes left in the input buffer --
434// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
435// this alternative test for iconv() failure.
436// [This bug does not appear in glibc 2.2.]
437#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
438#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
439 (errno != E2BIG || bufLeft != 0))
440#else
441#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
442#endif
443
444class IC_CharSet : public wxCharacterSet
445{
446public:
447 IC_CharSet(const wxChar*name)
448 : wxCharacterSet(name)
449 {
450 m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname));
451 w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME);
452 }
453
454 ~IC_CharSet()
455 {
456 if ( m2w != (iconv_t)-1 )
457 iconv_close(m2w);
458 if ( w2m != (iconv_t)-1 )
459 iconv_close(w2m);
460 }
461
462 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
463 {
464 size_t inbuf = strlen(psz);
465 size_t outbuf = n * SIZEOF_WCHAR_T;
466 size_t res, cres;
467 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
468 wchar_t *bufPtr = buf;
469 const char *pszPtr = psz;
470
471 if (buf)
472 {
473 // have destination buffer, convert there
474#ifdef WX_ICONV_TAKES_CHAR
475 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
476#else
477 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
478#endif
479 res = n - (outbuf / SIZEOF_WCHAR_T);
480 // convert to native endianness
481#ifdef WC_NEED_BSWAP
482 WC_BSWAP(buf /* _not_ bufPtr */, res)
483#endif
484 }
485 else
486 {
487 // no destination buffer... convert using temp buffer
488 // to calculate destination buffer requirement
489 wchar_t tbuf[8];
490 res = 0;
491 do {
492 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
493#ifdef WX_ICONV_TAKES_CHAR
494 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
495#else
496 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
497#endif
498 res += 8-(outbuf/SIZEOF_WCHAR_T);
499 } while ((cres==(size_t)-1) && (errno==E2BIG));
500 }
501
502 if (ICONV_FAILED(cres, inbuf))
503 return (size_t)-1;
504
505 return res;
506 }
507
508 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
509 {
510#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
511 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
512#else
513 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
514#endif
515 size_t outbuf = n;
516 size_t res, cres;
517
518#ifdef WC_NEED_BSWAP
519 // need to copy to temp buffer to switch endianness
520 // this absolutely doesn't rock!
521 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
522 // could be in read-only memory, or be accessed in some other thread)
523 wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
524 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
525 WC_BSWAP(tmpbuf, inbuf)
526 psz=tmpbuf;
527#endif
528 if (buf)
529 {
530 // have destination buffer, convert there
531#ifdef WX_ICONV_TAKES_CHAR
532 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
533#else
534 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
535#endif
536 res = n-outbuf;
537 }
538 else
539 {
540 // no destination buffer... convert using temp buffer
541 // to calculate destination buffer requirement
542 char tbuf[16];
543 res = 0;
544 do {
545 buf = tbuf; outbuf = 16;
546#ifdef WX_ICONV_TAKES_CHAR
547 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
548#else
549 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
550#endif
551 res += 16 - outbuf;
552 } while ((cres==(size_t)-1) && (errno==E2BIG));
553 }
554#ifdef WC_NEED_BSWAP
555 free(tmpbuf);
556#endif
557 if (ICONV_FAILED(cres, inbuf))
558 return (size_t)-1;
559
560 return res;
561 }
562
563 bool usable()
564 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
565
566public:
567 iconv_t m2w, w2m;
568};
569#endif
570
571#ifdef __WIN32__
572class CP_CharSet : public wxCharacterSet
573{
574public:
575 CP_CharSet(const wxChar*name)
576 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
577
578 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
579 {
580 size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
581 return len ? len : (size_t)-1;
582 }
583
584 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
585 {
586 size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
587 return len ? len : (size_t)-1;
588 }
589
590 bool usable()
591 { return CodePage!=-1; }
592
593public:
594 long CodePage;
595};
596#endif
597
598class EC_CharSet : public wxCharacterSet
599{
600public:
601 // temporarily just use wxEncodingConverter stuff,
602 // so that it works while a better implementation is built
603 EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
604 {
605 if (name)
606 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
607 m2w.Init(enc, wxFONTENCODING_UNICODE);
608 w2m.Init(wxFONTENCODING_UNICODE, enc);
609 }
610
611 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
612 {
613 size_t inbuf = strlen(psz);
614 if (buf) m2w.Convert(psz,buf);
615 return inbuf;
616 }
617
618 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
619 {
620#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
621 size_t inbuf = std::wcslen(psz);
622#else
623 size_t inbuf = ::wcslen(psz);
624#endif
625 if (buf)
626 w2m.Convert(psz,buf);
627
628 return inbuf;
629 }
630
631 bool usable()
632 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
633
634public:
635 wxFontEncoding enc;
636 wxEncodingConverter m2w, w2m;
637};
638
639static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
640{
641 wxCharacterSet *cset = NULL;
642 if (name)
643 {
644 if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8")))
645 {
646 cset = new ID_CharSet(name, &wxConvUTF8);
647 }
648 else
649 {
650#ifdef HAVE_ICONV_H
651 cset = new IC_CharSet(name); // may not take NULL
652#endif
653 }
654 }
655
656 if (cset && cset->usable()) return cset;
657 if (cset) delete cset;
658 cset = NULL;
659#ifdef __WIN32__
660 cset = new CP_CharSet(name); // may take NULL
661 if (cset->usable()) return cset;
662#endif
663 if (cset) delete cset;
664 cset = new EC_CharSet(name);
665 if (cset->usable()) return cset;
666 delete cset;
667 wxLogError(_("Unknown encoding '%s'!"), name);
668 return NULL;
669}
670
671wxCSConv::wxCSConv(const wxChar *charset)
672{
673 m_name = (wxChar *) NULL;
674 m_cset = (wxCharacterSet *) NULL;
675 m_deferred = TRUE;
676 SetName(charset);
677}
678
679wxCSConv::~wxCSConv()
680{
681 if (m_name) free(m_name);
682 if (m_cset) delete m_cset;
683}
684
685void wxCSConv::SetName(const wxChar *charset)
686{
687 if (charset)
688 {
689 m_name = wxStrdup(charset);
690 m_deferred = TRUE;
691 }
692}
693
694void wxCSConv::LoadNow()
695{
696 if (m_deferred)
697 {
698 if (!m_name)
699 {
700#ifdef __UNIX__
701#if defined(HAVE_LANGINFO_H) && defined(CODESET)
702 // GNU libc provides current character set this way
703 char*alang = nl_langinfo(CODESET);
704 if (alang)
705 {
706 SetName(wxConvLibc.cMB2WX(alang));
707 }
708 else
709#endif
710 {
711 // if we can't get at the character set directly,
712 // try to see if it's in the environment variables
713 // (in most cases this won't work, but I was out of ideas)
714 wxChar *lang = wxGetenv(wxT("LC_ALL"));
715 if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
716 if (!lang) lang = wxGetenv(wxT("LANG"));
717 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
718 if (dot) SetName(dot+1);
719 }
720#endif
721 }
722 m_cset = wxGetCharacterSet(m_name);
723 m_deferred = FALSE;
724 }
725}
726
727size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
728{
729 ((wxCSConv *)this)->LoadNow(); // discard constness
730
731 if (m_cset)
732 return m_cset->MB2WC(buf, psz, n);
733
734 // latin-1 (direct)
735 size_t len=strlen(psz);
736
737 if (buf)
738 {
739 for (size_t c=0; c<=len; c++)
740 buf[c] = (unsigned char)(psz[c]);
741 }
742
743 return len;
744}
745
746size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
747{
748 ((wxCSConv *)this)->LoadNow(); // discard constness
749
750 if (m_cset)
751 return m_cset->WC2MB(buf, psz, n);
752
753 // latin-1 (direct)
754#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
755 size_t len=std::wcslen(psz);
756#else
757 size_t len=::wcslen(psz);
758#endif
759 if (buf)
760 {
761 for (size_t c=0; c<=len; c++)
762 buf[c] = (psz[c]>0xff) ? '?' : psz[c];
763 }
764
765 return len;
766}
767
768#ifdef HAVE_ICONV_H
769class IC_CharSetConverter
770{
771public:
772 IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to)
773 { cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname)); }
774
775 ~IC_CharSetConverter()
776 { if (cnv!=(iconv_t)-1) iconv_close(cnv); }
777
778 size_t Convert(char*buf, const char*psz, size_t n)
779 {
780 size_t inbuf = strlen(psz);
781 size_t outbuf = n;
782#ifdef WX_ICONV_TAKES_CHAR
783 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
784#else
785 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
786#endif
787 if (res==(size_t)-1) return (size_t)-1;
788 return n-outbuf;
789 }
790
791public:
792 iconv_t cnv;
793};
794#endif
795
796class EC_CharSetConverter
797{
798public:
799 EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
800 { cnv.Init(from->enc,to->enc); }
801
802 size_t Convert(char*buf, const char*psz, size_t n)
803 {
804 size_t inbuf = strlen(psz);
805 if (buf) cnv.Convert(psz,buf);
806 return inbuf;
807 }
808
809public:
810 wxEncodingConverter cnv;
811};
812
813#else // !wxUSE_WCHAR_T
814
815// ----------------------------------------------------------------------------
816// stand-ins in absence of wchar_t
817// ----------------------------------------------------------------------------
818
819WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
820
821#endif // wxUSE_WCHAR_T
822
823