]> git.saurik.com Git - wxWidgets.git/blame_incremental - src/common/strconv.cpp
We use native paths now in wxWindows. On the Mac, too.
[wxWidgets.git] / src / common / strconv.cpp
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
35#include <errno.h>
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
47
48#ifdef __WXMSW__
49 #include <windows.h>
50#endif
51
52#include "wx/debug.h"
53#include "wx/strconv.h"
54#include "wx/intl.h"
55#include "wx/log.h"
56
57#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
58#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
59
60// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
61// it might be not defined - assume the most common value
62#ifndef SIZEOF_WCHAR_T
63 #define SIZEOF_WCHAR_T 2
64#endif // !defined(SIZEOF_WCHAR_T)
65
66#if SIZEOF_WCHAR_T == 4
67 #define WC_NAME "UCS4"
68 #define WC_BSWAP BSWAP_UCS4
69 #ifdef WORDS_BIGENDIAN
70 #define WC_NAME_BEST "UCS-4BE"
71 #else
72 #define WC_NAME_BEST "UCS-4LE"
73 #endif
74#elif SIZEOF_WCHAR_T == 2
75 #define WC_NAME "UTF16"
76 #define WC_BSWAP BSWAP_UTF16
77 #define WC_UTF16
78 #ifdef WORDS_BIGENDIAN
79 #define WC_NAME_BEST "UTF-16BE"
80 #else
81 #define WC_NAME_BEST "UTF-16LE"
82 #endif
83#else // sizeof(wchar_t) != 2 nor 4
84 // I don't know what to do about this
85 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
86#endif
87
88// ----------------------------------------------------------------------------
89// globals
90// ----------------------------------------------------------------------------
91
92WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
93
94// ============================================================================
95// implementation
96// ============================================================================
97
98#if wxUSE_WCHAR_T
99
100#ifdef WC_UTF16
101
102static size_t encode_utf16(wxUint32 input, wchar_t *output)
103{
104 if (input<=0xffff)
105 {
106 if (output) *output++ = input;
107 return 1;
108 }
109 else if (input>=0x110000)
110 {
111 return (size_t)-1;
112 }
113 else
114 {
115 if (output)
116 {
117 *output++ = (input >> 10)+0xd7c0;
118 *output++ = (input&0x3ff)+0xdc00;
119 }
120 return 2;
121 }
122}
123
124static size_t decode_utf16(const wchar_t* input, wxUint32& output)
125{
126 if ((*input<0xd800) || (*input>0xdfff))
127 {
128 output = *input;
129 return 1;
130 }
131 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
132 {
133 output = *input;
134 return (size_t)-1;
135 }
136 else
137 {
138 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
139 return 2;
140 }
141}
142
143#endif // WC_UTF16
144
145// ----------------------------------------------------------------------------
146// wxMBConv
147// ----------------------------------------------------------------------------
148
149WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
150
151size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
152{
153 return wxMB2WC(buf, psz, n);
154}
155
156size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
157{
158 return wxWC2MB(buf, psz, n);
159}
160
161const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
162{
163 if (psz)
164 {
165 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
166 if (nLen == (size_t)-1)
167 return wxWCharBuffer((wchar_t *) NULL);
168 wxWCharBuffer buf(nLen);
169 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
170 return buf;
171 }
172 else
173 return wxWCharBuffer((wchar_t *) NULL);
174}
175
176const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
177{
178 if (psz)
179 {
180 size_t nLen = WC2MB((char *) NULL, psz, 0);
181 if (nLen == (size_t)-1)
182 return wxCharBuffer((char *) NULL);
183 wxCharBuffer buf(nLen);
184 WC2MB((char *)(const char *) buf, psz, nLen);
185 return buf;
186 }
187 else
188 return wxCharBuffer((char *) NULL);
189}
190
191// ----------------------------------------------------------------------------
192// standard file conversion
193// ----------------------------------------------------------------------------
194
195WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
196
197// just use the libc conversion for now
198size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
199{
200 return wxMB2WC(buf, psz, n);
201}
202
203size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
204{
205 return wxWC2MB(buf, psz, n);
206}
207
208// ----------------------------------------------------------------------------
209// standard gdk conversion
210// ----------------------------------------------------------------------------
211
212#ifdef __WXGTK12__
213
214WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
215
216#include <gdk/gdk.h>
217
218size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
219{
220 if (buf)
221 {
222 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
223 }
224 else
225 {
226 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
227 size_t len = gdk_mbstowcs(nbuf, psz, n);
228 delete[] nbuf;
229 return len;
230 }
231}
232
233size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
234{
235 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
236 size_t len = mbstr ? strlen(mbstr) : 0;
237 if (buf)
238 {
239 if (len > n)
240 len = n;
241 memcpy(buf, psz, len);
242 if (len < n)
243 buf[len] = 0;
244 }
245 return len;
246}
247
248#endif // GTK > 1.0
249
250// ----------------------------------------------------------------------------
251// UTF-7
252// ----------------------------------------------------------------------------
253
254WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
255
256#if 0
257static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
258 "abcdefghijklmnopqrstuvwxyz"
259 "0123456789'(),-./:?";
260static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
261static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
262 "abcdefghijklmnopqrstuvwxyz"
263 "0123456789+/";
264#endif
265
266// TODO: write actual implementations of UTF-7 here
267size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
268 const char * WXUNUSED(psz),
269 size_t WXUNUSED(n)) const
270{
271 return 0;
272}
273
274size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
275 const wchar_t * WXUNUSED(psz),
276 size_t WXUNUSED(n)) const
277{
278 return 0;
279}
280
281// ----------------------------------------------------------------------------
282// UTF-8
283// ----------------------------------------------------------------------------
284
285WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
286
287static wxUint32 utf8_max[]=
288 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
289
290size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
291{
292 size_t len = 0;
293
294 while (*psz && ((!buf) || (len < n)))
295 {
296 unsigned char cc = *psz++, fc = cc;
297 unsigned cnt;
298 for (cnt = 0; fc & 0x80; cnt++)
299 fc <<= 1;
300 if (!cnt)
301 {
302 // plain ASCII char
303 if (buf)
304 *buf++ = cc;
305 len++;
306 }
307 else
308 {
309 cnt--;
310 if (!cnt)
311 {
312 // invalid UTF-8 sequence
313 return (size_t)-1;
314 }
315 else
316 {
317 unsigned ocnt = cnt - 1;
318 wxUint32 res = cc & (0x3f >> cnt);
319 while (cnt--)
320 {
321 cc = *psz++;
322 if ((cc & 0xC0) != 0x80)
323 {
324 // invalid UTF-8 sequence
325 return (size_t)-1;
326 }
327 res = (res << 6) | (cc & 0x3f);
328 }
329 if (res <= utf8_max[ocnt])
330 {
331 // illegal UTF-8 encoding
332 return (size_t)-1;
333 }
334#ifdef WC_UTF16
335 size_t pa = encode_utf16(res, buf);
336 if (pa == (size_t)-1)
337 return (size_t)-1;
338 if (buf)
339 buf += pa;
340 len += pa;
341#else
342 if (buf)
343 *buf++ = res;
344 len++;
345#endif
346 }
347 }
348 }
349 if (buf && (len < n))
350 *buf = 0;
351 return len;
352}
353
354size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
355{
356 size_t len = 0;
357
358 while (*psz && ((!buf) || (len < n)))
359 {
360 wxUint32 cc;
361#ifdef WC_UTF16
362 size_t pa = decode_utf16(psz, cc);
363 psz += (pa == (size_t)-1) ? 1 : pa;
364#else
365 cc=(*psz++) & 0x7fffffff;
366#endif
367 unsigned cnt;
368 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
369 if (!cnt)
370 {
371 // plain ASCII char
372 if (buf)
373 *buf++ = cc;
374 len++;
375 }
376
377 else
378 {
379 len += cnt + 1;
380 if (buf)
381 {
382 *buf++ = (-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt));
383 while (cnt--)
384 *buf++ = 0x80 | ((cc >> (cnt * 6)) & 0x3f);
385 }
386 }
387 }
388
389 if (buf && (len<n)) *buf = 0;
390 return len;
391}
392
393// ----------------------------------------------------------------------------
394// specified character set
395// ----------------------------------------------------------------------------
396
397WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
398
399#include "wx/encconv.h"
400#include "wx/fontmap.h"
401
402// TODO: add some tables here
403// - perhaps common encodings to common codepages (for Win32)
404// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
405// - move wxEncodingConverter meat in here
406
407#if defined(__WIN32__) && !defined(__WXMICROWIN__)
408#include "wx/msw/registry.h"
409// this should work if M$ Internet Exploiter is installed
410static long CharsetToCodepage(const wxChar *name)
411{
412 if (!name)
413 return GetACP();
414
415 long CP=-1;
416
417 wxString cn(name);
418 do {
419 wxString path(wxT("MIME\\Database\\Charset\\"));
420 path += cn;
421 wxRegKey key(wxRegKey::HKCR, path);
422
423 if (!key.Exists()) break;
424
425 // two cases: either there's an AliasForCharset string,
426 // or there are Codepage and InternetEncoding dwords.
427 // The InternetEncoding gives us the actual encoding,
428 // the Codepage just says which Windows character set to
429 // use when displaying the data.
430 if (key.HasValue(wxT("InternetEncoding")) &&
431 key.QueryValue(wxT("InternetEncoding"), &CP)) break;
432
433 // no encoding, see if it's an alias
434 if (!key.HasValue(wxT("AliasForCharset")) ||
435 !key.QueryValue(wxT("AliasForCharset"), cn)) break;
436 } while (1);
437
438 return CP;
439}
440#endif
441
442class wxCharacterSet
443{
444public:
445 wxCharacterSet(const wxChar*name)
446 : cname(name) {}
447 virtual ~wxCharacterSet()
448 {}
449 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
450 { return (size_t)-1; }
451 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
452 { return (size_t)-1; }
453 virtual bool usable()
454 { return FALSE; }
455public:
456 const wxChar*cname;
457};
458
459class ID_CharSet : public wxCharacterSet
460{
461public:
462 ID_CharSet(const wxChar *name,wxMBConv *cnv)
463 : wxCharacterSet(name), work(cnv) {}
464
465 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
466 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
467
468 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
469 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
470
471 bool usable()
472 { return work!=NULL; }
473public:
474 wxMBConv*work;
475};
476
477
478#ifdef HAVE_ICONV_H
479
480bool g_wcNeedsSwap = FALSE;
481static const char *g_wcCharset = NULL;
482
483// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
484// if output buffer is _exactly_ as big as needed. Such case is (unless there's
485// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
486// (which means error) and says there are 0 bytes left in the input buffer --
487// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
488// this alternative test for iconv() failure.
489// [This bug does not appear in glibc 2.2.]
490#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
491#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
492 (errno != E2BIG || bufLeft != 0))
493#else
494#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
495#endif
496
497class IC_CharSet : public wxCharacterSet
498{
499public:
500 IC_CharSet(const wxChar *name)
501 : wxCharacterSet(name)
502 {
503 // check for charset that represents wchar_t:
504 if (g_wcCharset == NULL)
505 {
506 g_wcNeedsSwap = FALSE;
507
508 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
509 g_wcCharset = WC_NAME_BEST;
510 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
511
512 if (m2w == (iconv_t)-1)
513 {
514 // try charset w/o bytesex info (e.g. "UCS4")
515 // and check for bytesex ourselves:
516 g_wcCharset = WC_NAME;
517 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
518
519 // last bet, try if it knows WCHAR_T pseudo-charset
520 if (m2w == (iconv_t)-1)
521 {
522 g_wcCharset = "WCHAR_T";
523 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
524 }
525
526 if (m2w != (iconv_t)-1)
527 {
528 char buf[2], *bufPtr;
529 wchar_t wbuf[2], *wbufPtr;
530 size_t insz, outsz;
531 size_t res;
532
533 buf[0] = 'A';
534 buf[1] = 0;
535 wbuf[0] = 0;
536 insz = 2;
537 outsz = SIZEOF_WCHAR_T * 2;
538 wbufPtr = wbuf;
539 bufPtr = buf;
540
541 #ifdef WX_ICONV_TAKES_CHAR
542 res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
543 #else
544 res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
545 #endif
546 if (ICONV_FAILED(res, insz))
547 {
548 g_wcCharset = NULL;
549 wxLogLastError(wxT("iconv"));
550 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
551 }
552 else
553 {
554 g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]);
555 }
556 }
557 else
558 {
559 g_wcCharset = NULL;
560 wxLogError(_("Don't know how to convert to/from charset '%s'."), name);
561 }
562 }
563 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap);
564 }
565 else
566 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
567
568 w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset);
569 }
570
571 ~IC_CharSet()
572 {
573 if ( m2w != (iconv_t)-1 )
574 iconv_close(m2w);
575 if ( w2m != (iconv_t)-1 )
576 iconv_close(w2m);
577 }
578
579 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
580 {
581 size_t inbuf = strlen(psz);
582 size_t outbuf = n * SIZEOF_WCHAR_T;
583 size_t res, cres;
584 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
585 wchar_t *bufPtr = buf;
586 const char *pszPtr = psz;
587
588 if (buf)
589 {
590 // have destination buffer, convert there
591#ifdef WX_ICONV_TAKES_CHAR
592 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
593#else
594 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
595#endif
596 res = n - (outbuf / SIZEOF_WCHAR_T);
597
598 if (g_wcNeedsSwap)
599 {
600 // convert to native endianness
601 WC_BSWAP(buf /* _not_ bufPtr */, res)
602 }
603 }
604 else
605 {
606 // no destination buffer... convert using temp buffer
607 // to calculate destination buffer requirement
608 wchar_t tbuf[8];
609 res = 0;
610 do {
611 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
612#ifdef WX_ICONV_TAKES_CHAR
613 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
614#else
615 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
616#endif
617 res += 8-(outbuf/SIZEOF_WCHAR_T);
618 } while ((cres==(size_t)-1) && (errno==E2BIG));
619 }
620
621 if (ICONV_FAILED(cres, inbuf))
622 {
623 //VS: it is ok if iconv fails, hence trace only
624 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
625 return (size_t)-1;
626 }
627
628 return res;
629 }
630
631 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
632 {
633#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
634 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
635#else
636 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
637#endif
638 size_t outbuf = n;
639 size_t res, cres;
640
641 wchar_t *tmpbuf;
642
643 if (g_wcNeedsSwap)
644 {
645 // need to copy to temp buffer to switch endianness
646 // this absolutely doesn't rock!
647 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
648 // could be in read-only memory, or be accessed in some other thread)
649 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
650 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
651 WC_BSWAP(tmpbuf, inbuf)
652 psz=tmpbuf;
653 }
654
655 if (buf)
656 {
657 // have destination buffer, convert there
658#ifdef WX_ICONV_TAKES_CHAR
659 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
660#else
661 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
662#endif
663 res = n-outbuf;
664 }
665 else
666 {
667 // no destination buffer... convert using temp buffer
668 // to calculate destination buffer requirement
669 char tbuf[16];
670 res = 0;
671 do {
672 buf = tbuf; outbuf = 16;
673#ifdef WX_ICONV_TAKES_CHAR
674 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
675#else
676 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
677#endif
678 res += 16 - outbuf;
679 } while ((cres==(size_t)-1) && (errno==E2BIG));
680 }
681
682 if (g_wcNeedsSwap)
683 {
684 free(tmpbuf);
685 }
686
687 if (ICONV_FAILED(cres, inbuf))
688 {
689 //VS: it is ok if iconv fails, hence trace only
690 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
691 return (size_t)-1;
692 }
693
694 return res;
695 }
696
697 bool usable()
698 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
699
700protected:
701 iconv_t m2w, w2m;
702};
703#endif
704
705#if defined(__WIN32__) && !defined(__WXMICROWIN__)
706class CP_CharSet : public wxCharacterSet
707{
708public:
709 CP_CharSet(const wxChar*name)
710 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
711
712 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
713 {
714 size_t len =
715 MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0);
716 //VS: returns # of written chars for buf!=NULL and *size*
717 // needed buffer for buf==NULL
718 return len ? (buf ? len : len-1) : (size_t)-1;
719 }
720
721 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
722 {
723 size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf,
724 buf ? n : 0, NULL, NULL);
725 //VS: returns # of written chars for buf!=NULL and *size*
726 // needed buffer for buf==NULL
727 return len ? (buf ? len : len-1) : (size_t)-1;
728 }
729
730 bool usable()
731 { return CodePage != -1; }
732
733public:
734 long CodePage;
735};
736#endif // __WIN32__
737
738#if wxUSE_FONTMAP
739
740class EC_CharSet : public wxCharacterSet
741{
742public:
743 // temporarily just use wxEncodingConverter stuff,
744 // so that it works while a better implementation is built
745 EC_CharSet(const wxChar*name) : wxCharacterSet(name),
746 enc(wxFONTENCODING_SYSTEM)
747 {
748 if (name)
749 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
750 m2w.Init(enc, wxFONTENCODING_UNICODE);
751 w2m.Init(wxFONTENCODING_UNICODE, enc);
752 }
753
754 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
755 {
756 size_t inbuf = strlen(psz);
757 if (buf)
758 m2w.Convert(psz,buf);
759 return inbuf;
760 }
761
762 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
763 {
764#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
765 size_t inbuf = std::wcslen(psz);
766#else
767 size_t inbuf = ::wcslen(psz);
768#endif
769 if (buf)
770 w2m.Convert(psz,buf);
771
772 return inbuf;
773 }
774
775 bool usable()
776 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
777
778public:
779 wxFontEncoding enc;
780 wxEncodingConverter m2w, w2m;
781};
782
783#endif // wxUSE_FONTMAP
784
785static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
786{
787 wxCharacterSet *cset = NULL;
788 if (name)
789 {
790 if (wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
791 {
792 cset = new ID_CharSet(name, &wxConvUTF8);
793 }
794 else
795 {
796#ifdef HAVE_ICONV_H
797 cset = new IC_CharSet(name); // may not take NULL
798#endif
799 }
800 }
801
802 if (cset && cset->usable())
803 return cset;
804
805 if (cset)
806 {
807 delete cset;
808 cset = NULL;
809 }
810
811#if defined(__WIN32__) && !defined(__WXMICROWIN__)
812 cset = new CP_CharSet(name); // may take NULL
813 if (cset->usable())
814 return cset;
815
816 delete cset;
817#endif // __WIN32__
818
819#if wxUSE_FONTMAP
820 cset = new EC_CharSet(name);
821 if (cset->usable())
822 return cset;
823#endif // wxUSE_FONTMAP
824
825 delete cset;
826 wxLogError(_("Unknown encoding '%s'!"), name);
827 return NULL;
828}
829
830wxCSConv::wxCSConv(const wxChar *charset)
831{
832 m_name = (wxChar *)NULL;
833 m_cset = (wxCharacterSet *) NULL;
834 m_deferred = TRUE;
835
836 SetName(charset);
837}
838
839wxCSConv::~wxCSConv()
840{
841 free(m_name);
842 delete m_cset;
843}
844
845void wxCSConv::SetName(const wxChar *charset)
846{
847 if (charset)
848 {
849 m_name = wxStrdup(charset);
850 m_deferred = TRUE;
851 }
852}
853
854void wxCSConv::LoadNow()
855{
856 if (m_deferred)
857 {
858 if ( !m_name )
859 {
860 wxString name = wxLocale::GetSystemEncodingName();
861 if ( !name.empty() )
862 SetName(name);
863 }
864
865 // wxGetCharacterSet() complains about NULL name
866 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
867 m_deferred = FALSE;
868 }
869}
870
871size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
872{
873 ((wxCSConv *)this)->LoadNow(); // discard constness
874
875 if (m_cset)
876 return m_cset->MB2WC(buf, psz, n);
877
878 // latin-1 (direct)
879 size_t len = strlen(psz);
880
881 if (buf)
882 {
883 for (size_t c = 0; c <= len; c++)
884 buf[c] = (unsigned char)(psz[c]);
885 }
886
887 return len;
888}
889
890size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
891{
892 ((wxCSConv *)this)->LoadNow(); // discard constness
893
894 if (m_cset)
895 return m_cset->WC2MB(buf, psz, n);
896
897 // latin-1 (direct)
898#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
899 size_t len=std::wcslen(psz);
900#else
901 size_t len=::wcslen(psz);
902#endif
903 if (buf)
904 {
905 for (size_t c = 0; c <= len; c++)
906 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
907 }
908
909 return len;
910}
911
912#ifdef HAVE_ICONV_H
913
914class IC_CharSetConverter
915{
916public:
917 IC_CharSetConverter(IC_CharSet *from, IC_CharSet *to)
918 {
919 cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),
920 wxConvLibc.cWX2MB(from->cname));
921 }
922
923 ~IC_CharSetConverter()
924 {
925 if (cnv != (iconv_t)-1)
926 iconv_close(cnv);
927 }
928
929 size_t Convert(char *buf, const char *psz, size_t n)
930 {
931 size_t inbuf = strlen(psz);
932 size_t outbuf = n;
933#ifdef WX_ICONV_TAKES_CHAR
934 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
935#else
936 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
937#endif
938 if (res == (size_t)-1)
939 return (size_t)-1;
940 return (n - outbuf);
941 }
942
943public:
944 iconv_t cnv;
945};
946
947#endif // HAVE_ICONV_H
948
949class EC_CharSetConverter
950{
951public:
952 EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to)
953 { cnv.Init(from->enc,to->enc); }
954
955 size_t Convert(char* buf, const char* psz, size_t n)
956 {
957 size_t inbuf = strlen(psz);
958 if (buf) cnv.Convert(psz,buf);
959 return inbuf;
960 }
961
962public:
963 wxEncodingConverter cnv;
964};
965
966#else // !wxUSE_WCHAR_T
967
968// ----------------------------------------------------------------------------
969// stand-ins in absence of wchar_t
970// ----------------------------------------------------------------------------
971
972WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
973
974#endif // wxUSE_WCHAR_T
975
976