]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
Added a couple more numeric character references
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
f6bcfd97
BP
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
6001e347
RR
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
0a1c1e62
GRG
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
1cd52418 35#include <errno.h>
6001e347
RR
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
1cd52418
OK
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
47#ifdef HAVE_LANGINFO_H
48 #include <langinfo.h>
49#endif
50
3e61dfb0
OK
51#ifdef __WXMSW__
52 #include <windows.h>
53#endif
54
6001e347
RR
55#include "wx/debug.h"
56#include "wx/strconv.h"
3caec1bb
VS
57#include "wx/intl.h"
58#include "wx/log.h"
6001e347 59
d43088ee 60#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
1cd52418
OK
61#define BSWAP_UCS4(str, len)
62#define BSWAP_UCS2(str, len)
63#else
64#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
65#define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
66#define WC_NEED_BSWAP
67#endif
68#define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
69#define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
70
71#if SIZEOF_WCHAR_T == 4
72#define WC_NAME "UCS4"
73#define WC_BSWAP BSWAP_UCS4
74#elif SIZEOF_WCHAR_T == 2
75#define WC_NAME "UTF16"
76#define WC_BSWAP BSWAP_UTF16
77#define WC_UTF16
78#endif
79
f6bcfd97
BP
80// ----------------------------------------------------------------------------
81// globals
82// ----------------------------------------------------------------------------
6001e347
RR
83
84WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
85
f6bcfd97
BP
86// ============================================================================
87// implementation
88// ============================================================================
6001e347 89
f6bcfd97 90#if wxUSE_WCHAR_T
6001e347 91
b0a6bb75
VZ
92#ifdef WC_UTF16
93
1cd52418
OK
94static size_t encode_utf16(wxUint32 input,wxUint16*output)
95{
96 if (input<=0xffff) {
97 if (output) *output++ = input;
98 return 1;
99 } else
100 if (input>=0x110000) {
101 return (size_t)-1;
102 } else {
103 if (output) {
104 *output++ = (input >> 10)+0xd7c0;
105 *output++ = (input&0x3ff)+0xdc00;
106 }
107 return 2;
108 }
109}
110
111static size_t decode_utf16(wxUint16*input,wxUint32&output)
112{
113 if ((*input<0xd800) || (*input>0xdfff)) {
114 output = *input;
115 return 1;
116 } else
117 if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
118 output = *input;
119 return (size_t)-1;
120 } else {
121 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
122 return 2;
123 }
124}
125
b0a6bb75
VZ
126#endif // WC_UTF16
127
f6bcfd97 128// ----------------------------------------------------------------------------
6001e347 129// wxMBConv
f6bcfd97 130// ----------------------------------------------------------------------------
6001e347
RR
131
132WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
133
134size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
135{
136 return wxMB2WC(buf, psz, n);
137}
138
139size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
140{
141 return wxWC2MB(buf, psz, n);
142}
143
144const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
145{
f6bcfd97 146 if (psz)
6001e347
RR
147 {
148 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
f6bcfd97
BP
149 if (nLen == (size_t)-1)
150 return wxWCharBuffer((wchar_t *) NULL);
6001e347
RR
151 wxWCharBuffer buf(nLen);
152 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
153 return buf;
f6bcfd97
BP
154 }
155 else
6001e347
RR
156 return wxWCharBuffer((wchar_t *) NULL);
157}
158
159const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
160{
f6bcfd97 161 if (psz)
6001e347
RR
162 {
163 size_t nLen = WC2MB((char *) NULL, psz, 0);
f6bcfd97
BP
164 if (nLen == (size_t)-1)
165 return wxCharBuffer((char *) NULL);
6001e347
RR
166 wxCharBuffer buf(nLen);
167 WC2MB((char *)(const char *) buf, psz, nLen);
168 return buf;
f6bcfd97
BP
169 }
170 else
6001e347
RR
171 return wxCharBuffer((char *) NULL);
172}
173
f6bcfd97 174// ----------------------------------------------------------------------------
6001e347 175// standard file conversion
f6bcfd97 176// ----------------------------------------------------------------------------
6001e347
RR
177
178WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
179
180// just use the libc conversion for now
181size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
182{
183 return wxMB2WC(buf, psz, n);
184}
185
186size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
187{
188 return wxWC2MB(buf, psz, n);
189}
190
f6bcfd97 191// ----------------------------------------------------------------------------
6001e347 192// standard gdk conversion
f6bcfd97
BP
193// ----------------------------------------------------------------------------
194
195#ifdef __WXGTK12__
6001e347
RR
196
197WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
198
199#include <gdk/gdk.h>
200
201size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
202{
203 if (buf) {
204 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
205 } else {
206 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
207 size_t len = gdk_mbstowcs(nbuf, psz, n);
208 delete [] nbuf;
209 return len;
210 }
211}
212
213size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
214{
215 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
216 size_t len = mbstr ? strlen(mbstr) : 0;
217 if (buf) {
218 if (len > n) len = n;
219 memcpy(buf, psz, len);
220 if (len < n) buf[len] = 0;
221 }
222 return len;
223}
f6bcfd97 224
6001e347
RR
225#endif // GTK > 1.0
226
227// ----------------------------------------------------------------------------
228// UTF-7
229// ----------------------------------------------------------------------------
230
231WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
232
233#if 0
234static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
235 "abcdefghijklmnopqrstuvwxyz"
236 "0123456789'(),-./:?";
237static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
238static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
239 "abcdefghijklmnopqrstuvwxyz"
240 "0123456789+/";
241#endif
242
243// TODO: write actual implementations of UTF-7 here
244size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
245 const char * WXUNUSED(psz),
246 size_t WXUNUSED(n)) const
247{
248 return 0;
249}
250
251size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
252 const wchar_t * WXUNUSED(psz),
253 size_t WXUNUSED(n)) const
254{
255 return 0;
256}
257
f6bcfd97 258// ----------------------------------------------------------------------------
6001e347 259// UTF-8
f6bcfd97 260// ----------------------------------------------------------------------------
6001e347
RR
261
262WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
263
1cd52418 264static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
6001e347
RR
265
266size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
267{
268 size_t len = 0;
269
270 while (*psz && ((!buf) || (len<n))) {
271 unsigned char cc=*psz++, fc=cc;
272 unsigned cnt;
273 for (cnt=0; fc&0x80; cnt++) fc<<=1;
274 if (!cnt) {
275 // plain ASCII char
276 if (buf) *buf++=cc;
277 len++;
278 } else {
279 cnt--;
280 if (!cnt) {
281 // invalid UTF-8 sequence
282 return (size_t)-1;
283 } else {
284 unsigned ocnt=cnt-1;
1cd52418 285 wxUint32 res=cc&(0x3f>>cnt);
6001e347
RR
286 while (cnt--) {
287 cc = *psz++;
288 if ((cc&0xC0)!=0x80) {
289 // invalid UTF-8 sequence
290 return (size_t)-1;
291 }
292 res=(res<<6)|(cc&0x3f);
293 }
294 if (res<=utf8_max[ocnt]) {
295 // illegal UTF-8 encoding
296 return (size_t)-1;
297 }
1cd52418
OK
298#ifdef WC_UTF16
299 size_t pa = encode_utf16(res, buf);
300 if (pa == (size_t)-1)
301 return (size_t)-1;
302 if (buf) buf+=pa;
303 len+=pa;
304#else
6001e347
RR
305 if (buf) *buf++=res;
306 len++;
1cd52418 307#endif
6001e347
RR
308 }
309 }
310 }
311 if (buf && (len<n)) *buf = 0;
312 return len;
313}
314
315size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
316{
317 size_t len = 0;
318
319 while (*psz && ((!buf) || (len<n))) {
1cd52418
OK
320 wxUint32 cc;
321#ifdef WC_UTF16
322 size_t pa = decode_utf16(psz,cc);
323 psz += (pa == (size_t)-1) ? 1 : pa;
324#else
325 cc=(*psz++)&0x7fffffff;
326#endif
6001e347
RR
327 unsigned cnt;
328 for (cnt=0; cc>utf8_max[cnt]; cnt++);
329 if (!cnt) {
330 // plain ASCII char
331 if (buf) *buf++=cc;
332 len++;
333 } else {
334 len+=cnt+1;
335 if (buf) {
336 *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
337 while (cnt--)
338 *buf++=0x80|((cc>>(cnt*6))&0x3f);
339 }
340 }
341 }
342 if (buf && (len<n)) *buf = 0;
343 return len;
344}
345
346// ----------------------------------------------------------------------------
347// specified character set
348// ----------------------------------------------------------------------------
349
f6bcfd97
BP
350WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
351
352#include "wx/encconv.h"
353#include "wx/fontmap.h"
6001e347 354
1cd52418
OK
355// TODO: add some tables here
356// - perhaps common encodings to common codepages (for Win32)
357// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
358// - move wxEncodingConverter meat in here
359
360#ifdef __WIN32__
361#include "wx/msw/registry.h"
362// this should work if M$ Internet Exploiter is installed
363static long CharsetToCodepage(const wxChar *name)
364{
f1339c56
RR
365 if (!name)
366 return GetACP();
367
368 long CP=-1;
369
370 wxString cn(name);
371 do {
372 wxString path( wxT("MIME\\Database\\Charset\\") );
373 path += cn;
374 wxRegKey key( wxRegKey::HKCR, path );
375
376 /* two cases: either there's an AliasForCharset string,
377 * or there are Codepage and InternetEncoding dwords.
378 * The InternetEncoding gives us the actual encoding,
379 * the Codepage just says which Windows character set to
380 * use when displaying the data.
381 */
382 if (key.QueryValue( wxT("InternetEncoding"), &CP )) break;
383
384 // no encoding, see if it's an alias
385 if (!key.QueryValue( wxT("AliasForCharset"), cn )) break;
386 } while (1);
387
388 return CP;
1cd52418
OK
389}
390#endif
391
6001e347
RR
392class wxCharacterSet
393{
1cd52418 394public:
f1339c56
RR
395 wxCharacterSet(const wxChar*name)
396 : cname(name) {}
397 virtual ~wxCharacterSet()
398 {}
399 virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
400 { return (size_t)-1; }
401 virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
402 { return (size_t)-1; }
403 virtual bool usable()
404 { return FALSE; }
405public:
406 const wxChar*cname;
1cd52418
OK
407};
408
409class ID_CharSet : public wxCharacterSet
410{
411public:
f1339c56
RR
412 ID_CharSet(const wxChar*name,wxMBConv*cnv)
413 : wxCharacterSet(name), work(cnv) {}
414
415 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
416 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
417
418 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
419 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
420
421 bool usable()
422 { return work!=NULL; }
423public:
424 wxMBConv*work;
1cd52418
OK
425};
426
3caec1bb 427
1cd52418 428#ifdef HAVE_ICONV_H
3caec1bb
VS
429
430// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
431// if output buffer is _exactly_ as big as needed. Such case is (unless there's
432// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
433// (which means error) and says there are 0 bytes left in the input buffer --
434// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
435// this alternative test for iconv() failure.
436// [This bug does not appear in glibc 2.2.]
437#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
438#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
439 (errno != E2BIG || bufLeft != 0))
440#else
441#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
442#endif
443
1cd52418
OK
444class IC_CharSet : public wxCharacterSet
445{
446public:
f1339c56 447 IC_CharSet(const wxChar*name)
3caec1bb 448 : wxCharacterSet(name)
f1339c56 449 {
3caec1bb
VS
450 m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname));
451 w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME);
f1339c56
RR
452 }
453
3caec1bb
VS
454 ~IC_CharSet()
455 {
456 if ( m2w != (iconv_t)-1 )
457 iconv_close(m2w);
458 if ( w2m != (iconv_t)-1 )
459 iconv_close(w2m);
f1339c56
RR
460 }
461
3caec1bb 462 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 463 {
f1339c56 464 size_t inbuf = strlen(psz);
3caec1bb 465 size_t outbuf = n * SIZEOF_WCHAR_T;
f1339c56 466 size_t res, cres;
3caec1bb
VS
467 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
468 wchar_t *bufPtr = buf;
469 const char *pszPtr = psz;
470
f1339c56
RR
471 if (buf)
472 {
473 // have destination buffer, convert there
95c8801c 474#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 475 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 476#else
3caec1bb 477 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 478#endif
3caec1bb 479 res = n - (outbuf / SIZEOF_WCHAR_T);
f1339c56 480 // convert to native endianness
3caec1bb
VS
481#ifdef WC_NEED_BSWAP
482 WC_BSWAP(buf /* _not_ bufPtr */, res)
483#endif
f1339c56
RR
484 }
485 else
486 {
487 // no destination buffer... convert using temp buffer
488 // to calculate destination buffer requirement
489 wchar_t tbuf[8];
490 res = 0;
491 do {
3caec1bb 492 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
95c8801c 493#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 494 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 495#else
3caec1bb 496 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 497#endif
f1339c56
RR
498 res += 8-(outbuf/SIZEOF_WCHAR_T);
499 } while ((cres==(size_t)-1) && (errno==E2BIG));
500 }
501
3caec1bb 502 if (ICONV_FAILED(cres, inbuf))
f1339c56 503 return (size_t)-1;
3caec1bb 504
f1339c56
RR
505 return res;
506 }
507
508 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
509 {
1cd52418 510#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
3caec1bb 511 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 512#else
3caec1bb 513 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 514#endif
f1339c56
RR
515 size_t outbuf = n;
516 size_t res, cres;
3caec1bb 517
1cd52418 518#ifdef WC_NEED_BSWAP
f1339c56
RR
519 // need to copy to temp buffer to switch endianness
520 // this absolutely doesn't rock!
521 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
522 // could be in read-only memory, or be accessed in some other thread)
523 wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
524 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
525 WC_BSWAP(tmpbuf, inbuf)
526 psz=tmpbuf;
1cd52418 527#endif
f1339c56
RR
528 if (buf)
529 {
530 // have destination buffer, convert there
95c8801c 531#ifdef WX_ICONV_TAKES_CHAR
f1339c56 532 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
533#else
534 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
535#endif
f1339c56
RR
536 res = n-outbuf;
537 }
538 else
539 {
540 // no destination buffer... convert using temp buffer
541 // to calculate destination buffer requirement
542 char tbuf[16];
543 res = 0;
544 do {
545 buf = tbuf; outbuf = 16;
95c8801c 546#ifdef WX_ICONV_TAKES_CHAR
f1339c56 547 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
548#else
549 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
550#endif
f1339c56
RR
551 res += 16 - outbuf;
552 } while ((cres==(size_t)-1) && (errno==E2BIG));
553 }
1cd52418 554#ifdef WC_NEED_BSWAP
f1339c56 555 free(tmpbuf);
1cd52418 556#endif
3caec1bb 557 if (ICONV_FAILED(cres, inbuf))
f1339c56
RR
558 return (size_t)-1;
559
560 return res;
561 }
562
563 bool usable()
3caec1bb 564 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
f1339c56
RR
565
566public:
567 iconv_t m2w, w2m;
1cd52418
OK
568};
569#endif
570
571#ifdef __WIN32__
572class CP_CharSet : public wxCharacterSet
573{
574public:
f1339c56
RR
575 CP_CharSet(const wxChar*name)
576 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
577
578 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
579 {
580 size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
581 return len ? len : (size_t)-1;
582 }
583
584 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
585 {
586 size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
587 return len ? len : (size_t)-1;
588 }
589
590 bool usable()
591 { return CodePage!=-1; }
592
593public:
594 long CodePage;
1cd52418
OK
595};
596#endif
597
598class EC_CharSet : public wxCharacterSet
599{
6001e347 600public:
f1339c56
RR
601 // temporarily just use wxEncodingConverter stuff,
602 // so that it works while a better implementation is built
603 EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
604 {
605 if (name)
606 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
607 m2w.Init(enc, wxFONTENCODING_UNICODE);
608 w2m.Init(wxFONTENCODING_UNICODE, enc);
609 }
610
611 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
612 {
613 size_t inbuf = strlen(psz);
f1339c56
RR
614 if (buf) m2w.Convert(psz,buf);
615 return inbuf;
616 }
617
618 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
619 {
1cd52418 620#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 621 size_t inbuf = std::wcslen(psz);
1cd52418 622#else
f1339c56 623 size_t inbuf = ::wcslen(psz);
1cd52418 624#endif
f1339c56
RR
625 if (buf)
626 w2m.Convert(psz,buf);
627
628 return inbuf;
629 }
630
631 bool usable()
632 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
633
634public:
635 wxFontEncoding enc;
636 wxEncodingConverter m2w, w2m;
f6bcfd97 637};
6001e347 638
f6bcfd97 639static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
6001e347 640{
f1339c56
RR
641 wxCharacterSet *cset = NULL;
642 if (name)
643 {
644 if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8")))
645 {
646 cset = new ID_CharSet(name, &wxConvUTF8);
647 }
648 else
649 {
1cd52418 650#ifdef HAVE_ICONV_H
f1339c56 651 cset = new IC_CharSet(name); // may not take NULL
1cd52418 652#endif
f1339c56 653 }
1cd52418 654 }
f1339c56
RR
655
656 if (cset && cset->usable()) return cset;
657 if (cset) delete cset;
3caec1bb 658 cset = NULL;
1cd52418 659#ifdef __WIN32__
f1339c56
RR
660 cset = new CP_CharSet(name); // may take NULL
661 if (cset->usable()) return cset;
1cd52418 662#endif
f1339c56
RR
663 if (cset) delete cset;
664 cset = new EC_CharSet(name);
665 if (cset->usable()) return cset;
666 delete cset;
3caec1bb 667 wxLogError(_("Unknown encoding '%s'!"), name);
f1339c56 668 return NULL;
6001e347
RR
669}
670
6001e347
RR
671wxCSConv::wxCSConv(const wxChar *charset)
672{
f1339c56
RR
673 m_name = (wxChar *) NULL;
674 m_cset = (wxCharacterSet *) NULL;
675 m_deferred = TRUE;
676 SetName(charset);
6001e347
RR
677}
678
679wxCSConv::~wxCSConv()
680{
f1339c56
RR
681 if (m_name) free(m_name);
682 if (m_cset) delete m_cset;
6001e347
RR
683}
684
685void wxCSConv::SetName(const wxChar *charset)
686{
f1339c56
RR
687 if (charset)
688 {
689 m_name = wxStrdup(charset);
690 m_deferred = TRUE;
691 }
6001e347
RR
692}
693
694void wxCSConv::LoadNow()
695{
f1339c56
RR
696 if (m_deferred)
697 {
698 if (!m_name)
699 {
6001e347 700#ifdef __UNIX__
1cd52418 701#if defined(HAVE_LANGINFO_H) && defined(CODESET)
f1339c56
RR
702 // GNU libc provides current character set this way
703 char*alang = nl_langinfo(CODESET);
704 if (alang)
705 {
706 SetName(wxConvLibc.cMB2WX(alang));
707 }
708 else
1cd52418 709#endif
f1339c56
RR
710 {
711 // if we can't get at the character set directly,
712 // try to see if it's in the environment variables
713 // (in most cases this won't work, but I was out of ideas)
714 wxChar *lang = wxGetenv(wxT("LC_ALL"));
715 if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
716 if (!lang) lang = wxGetenv(wxT("LANG"));
717 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
718 if (dot) SetName(dot+1);
719 }
6001e347 720#endif
f1339c56
RR
721 }
722 m_cset = wxGetCharacterSet(m_name);
723 m_deferred = FALSE;
6001e347 724 }
6001e347
RR
725}
726
727size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
728{
f1339c56
RR
729 ((wxCSConv *)this)->LoadNow(); // discard constness
730
731 if (m_cset)
732 return m_cset->MB2WC(buf, psz, n);
733
734 // latin-1 (direct)
735 size_t len=strlen(psz);
736
737 if (buf)
738 {
739 for (size_t c=0; c<=len; c++)
740 buf[c] = (unsigned char)(psz[c]);
741 }
742
743 return len;
6001e347
RR
744}
745
746size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
747{
f1339c56
RR
748 ((wxCSConv *)this)->LoadNow(); // discard constness
749
750 if (m_cset)
751 return m_cset->WC2MB(buf, psz, n);
1cd52418 752
f1339c56 753 // latin-1 (direct)
d834f22c 754#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 755 size_t len=std::wcslen(psz);
d834f22c 756#else
f1339c56 757 size_t len=::wcslen(psz);
d834f22c 758#endif
f1339c56
RR
759 if (buf)
760 {
761 for (size_t c=0; c<=len; c++)
762 buf[c] = (psz[c]>0xff) ? '?' : psz[c];
763 }
764
765 return len;
6001e347
RR
766}
767
1cd52418
OK
768#ifdef HAVE_ICONV_H
769class IC_CharSetConverter
770{
771public:
f1339c56
RR
772 IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to)
773 { cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname)); }
774
775 ~IC_CharSetConverter()
776 { if (cnv!=(iconv_t)-1) iconv_close(cnv); }
777
778 size_t Convert(char*buf, const char*psz, size_t n)
779 {
780 size_t inbuf = strlen(psz);
781 size_t outbuf = n;
95c8801c 782#ifdef WX_ICONV_TAKES_CHAR
f1339c56 783 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
784#else
785 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
786#endif
f1339c56
RR
787 if (res==(size_t)-1) return (size_t)-1;
788 return n-outbuf;
789 }
790
791public:
792 iconv_t cnv;
1cd52418
OK
793};
794#endif
795
796class EC_CharSetConverter
797{
798public:
f1339c56
RR
799 EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
800 { cnv.Init(from->enc,to->enc); }
801
802 size_t Convert(char*buf, const char*psz, size_t n)
803 {
804 size_t inbuf = strlen(psz);
805 if (buf) cnv.Convert(psz,buf);
806 return inbuf;
807 }
808
809public:
810 wxEncodingConverter cnv;
1cd52418
OK
811};
812
f6bcfd97
BP
813#else // !wxUSE_WCHAR_T
814
815// ----------------------------------------------------------------------------
816// stand-ins in absence of wchar_t
817// ----------------------------------------------------------------------------
818
819WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
820
821#endif // wxUSE_WCHAR_T
6001e347
RR
822
823