]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
corrected conditional compilation for Universal Interfaces (3.4 or later)
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
f6bcfd97
BP
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
6001e347
RR
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
0a1c1e62
GRG
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
1cd52418 35#include <errno.h>
6001e347
RR
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
1cd52418
OK
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
47#ifdef HAVE_LANGINFO_H
48 #include <langinfo.h>
49#endif
50
3e61dfb0
OK
51#ifdef __WXMSW__
52 #include <windows.h>
53#endif
54
6001e347
RR
55#include "wx/debug.h"
56#include "wx/strconv.h"
3caec1bb
VS
57#include "wx/intl.h"
58#include "wx/log.h"
6001e347 59
d43088ee 60#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
1cd52418
OK
61#define BSWAP_UCS4(str, len)
62#define BSWAP_UCS2(str, len)
63#else
64#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
65#define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
66#define WC_NEED_BSWAP
67#endif
68#define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
69#define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
70
71#if SIZEOF_WCHAR_T == 4
72#define WC_NAME "UCS4"
73#define WC_BSWAP BSWAP_UCS4
74#elif SIZEOF_WCHAR_T == 2
75#define WC_NAME "UTF16"
76#define WC_BSWAP BSWAP_UTF16
77#define WC_UTF16
78#endif
79
f6bcfd97
BP
80// ----------------------------------------------------------------------------
81// globals
82// ----------------------------------------------------------------------------
6001e347
RR
83
84WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
85
f6bcfd97
BP
86// ============================================================================
87// implementation
88// ============================================================================
6001e347 89
f6bcfd97 90#if wxUSE_WCHAR_T
6001e347 91
b0a6bb75
VZ
92#ifdef WC_UTF16
93
1cd52418
OK
94static size_t encode_utf16(wxUint32 input,wxUint16*output)
95{
96 if (input<=0xffff) {
97 if (output) *output++ = input;
98 return 1;
99 } else
100 if (input>=0x110000) {
101 return (size_t)-1;
102 } else {
103 if (output) {
104 *output++ = (input >> 10)+0xd7c0;
105 *output++ = (input&0x3ff)+0xdc00;
106 }
107 return 2;
108 }
109}
110
111static size_t decode_utf16(wxUint16*input,wxUint32&output)
112{
113 if ((*input<0xd800) || (*input>0xdfff)) {
114 output = *input;
115 return 1;
116 } else
117 if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
118 output = *input;
119 return (size_t)-1;
120 } else {
121 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
122 return 2;
123 }
124}
125
b0a6bb75
VZ
126#endif // WC_UTF16
127
f6bcfd97 128// ----------------------------------------------------------------------------
6001e347 129// wxMBConv
f6bcfd97 130// ----------------------------------------------------------------------------
6001e347
RR
131
132WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
133
134size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
135{
136 return wxMB2WC(buf, psz, n);
137}
138
139size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
140{
141 return wxWC2MB(buf, psz, n);
142}
143
144const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
145{
f6bcfd97 146 if (psz)
6001e347
RR
147 {
148 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
f6bcfd97
BP
149 if (nLen == (size_t)-1)
150 return wxWCharBuffer((wchar_t *) NULL);
6001e347
RR
151 wxWCharBuffer buf(nLen);
152 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
153 return buf;
f6bcfd97
BP
154 }
155 else
6001e347
RR
156 return wxWCharBuffer((wchar_t *) NULL);
157}
158
159const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
160{
f6bcfd97 161 if (psz)
6001e347
RR
162 {
163 size_t nLen = WC2MB((char *) NULL, psz, 0);
f6bcfd97
BP
164 if (nLen == (size_t)-1)
165 return wxCharBuffer((char *) NULL);
6001e347
RR
166 wxCharBuffer buf(nLen);
167 WC2MB((char *)(const char *) buf, psz, nLen);
168 return buf;
f6bcfd97
BP
169 }
170 else
6001e347
RR
171 return wxCharBuffer((char *) NULL);
172}
173
f6bcfd97 174// ----------------------------------------------------------------------------
6001e347 175// standard file conversion
f6bcfd97 176// ----------------------------------------------------------------------------
6001e347
RR
177
178WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
179
180// just use the libc conversion for now
181size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
182{
183 return wxMB2WC(buf, psz, n);
184}
185
186size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
187{
188 return wxWC2MB(buf, psz, n);
189}
190
f6bcfd97 191// ----------------------------------------------------------------------------
6001e347 192// standard gdk conversion
f6bcfd97
BP
193// ----------------------------------------------------------------------------
194
195#ifdef __WXGTK12__
6001e347
RR
196
197WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
198
199#include <gdk/gdk.h>
200
201size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
202{
203 if (buf) {
204 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
205 } else {
206 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
207 size_t len = gdk_mbstowcs(nbuf, psz, n);
208 delete [] nbuf;
209 return len;
210 }
211}
212
213size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
214{
215 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
216 size_t len = mbstr ? strlen(mbstr) : 0;
217 if (buf) {
218 if (len > n) len = n;
219 memcpy(buf, psz, len);
220 if (len < n) buf[len] = 0;
221 }
222 return len;
223}
f6bcfd97 224
6001e347
RR
225#endif // GTK > 1.0
226
227// ----------------------------------------------------------------------------
228// UTF-7
229// ----------------------------------------------------------------------------
230
231WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
232
233#if 0
234static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
235 "abcdefghijklmnopqrstuvwxyz"
236 "0123456789'(),-./:?";
237static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
238static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
239 "abcdefghijklmnopqrstuvwxyz"
240 "0123456789+/";
241#endif
242
243// TODO: write actual implementations of UTF-7 here
244size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
245 const char * WXUNUSED(psz),
246 size_t WXUNUSED(n)) const
247{
248 return 0;
249}
250
251size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
252 const wchar_t * WXUNUSED(psz),
253 size_t WXUNUSED(n)) const
254{
255 return 0;
256}
257
f6bcfd97 258// ----------------------------------------------------------------------------
6001e347 259// UTF-8
f6bcfd97 260// ----------------------------------------------------------------------------
6001e347
RR
261
262WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
263
1cd52418 264static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
6001e347
RR
265
266size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
267{
268 size_t len = 0;
269
270 while (*psz && ((!buf) || (len<n))) {
271 unsigned char cc=*psz++, fc=cc;
272 unsigned cnt;
273 for (cnt=0; fc&0x80; cnt++) fc<<=1;
274 if (!cnt) {
275 // plain ASCII char
276 if (buf) *buf++=cc;
277 len++;
278 } else {
279 cnt--;
280 if (!cnt) {
281 // invalid UTF-8 sequence
282 return (size_t)-1;
283 } else {
284 unsigned ocnt=cnt-1;
1cd52418 285 wxUint32 res=cc&(0x3f>>cnt);
6001e347
RR
286 while (cnt--) {
287 cc = *psz++;
288 if ((cc&0xC0)!=0x80) {
289 // invalid UTF-8 sequence
290 return (size_t)-1;
291 }
292 res=(res<<6)|(cc&0x3f);
293 }
294 if (res<=utf8_max[ocnt]) {
295 // illegal UTF-8 encoding
296 return (size_t)-1;
297 }
1cd52418
OK
298#ifdef WC_UTF16
299 size_t pa = encode_utf16(res, buf);
300 if (pa == (size_t)-1)
301 return (size_t)-1;
302 if (buf) buf+=pa;
303 len+=pa;
304#else
6001e347
RR
305 if (buf) *buf++=res;
306 len++;
1cd52418 307#endif
6001e347
RR
308 }
309 }
310 }
311 if (buf && (len<n)) *buf = 0;
312 return len;
313}
314
315size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
316{
317 size_t len = 0;
318
319 while (*psz && ((!buf) || (len<n))) {
1cd52418
OK
320 wxUint32 cc;
321#ifdef WC_UTF16
322 size_t pa = decode_utf16(psz,cc);
323 psz += (pa == (size_t)-1) ? 1 : pa;
324#else
325 cc=(*psz++)&0x7fffffff;
326#endif
6001e347
RR
327 unsigned cnt;
328 for (cnt=0; cc>utf8_max[cnt]; cnt++);
329 if (!cnt) {
330 // plain ASCII char
331 if (buf) *buf++=cc;
332 len++;
333 } else {
334 len+=cnt+1;
335 if (buf) {
336 *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
337 while (cnt--)
338 *buf++=0x80|((cc>>(cnt*6))&0x3f);
339 }
340 }
341 }
342 if (buf && (len<n)) *buf = 0;
343 return len;
344}
345
346// ----------------------------------------------------------------------------
347// specified character set
348// ----------------------------------------------------------------------------
349
f6bcfd97
BP
350WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
351
352#include "wx/encconv.h"
353#include "wx/fontmap.h"
6001e347 354
1cd52418
OK
355// TODO: add some tables here
356// - perhaps common encodings to common codepages (for Win32)
357// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
358// - move wxEncodingConverter meat in here
359
360#ifdef __WIN32__
361#include "wx/msw/registry.h"
362// this should work if M$ Internet Exploiter is installed
363static long CharsetToCodepage(const wxChar *name)
364{
f1339c56
RR
365 if (!name)
366 return GetACP();
367
368 long CP=-1;
369
370 wxString cn(name);
371 do {
5ce0e4ac 372 wxString path(wxT("MIME\\Database\\Charset\\"));
f1339c56 373 path += cn;
5ce0e4ac
VS
374 wxRegKey key(wxRegKey::HKCR, path);
375
376 if (!key.Exists()) continue;
377
378 // two cases: either there's an AliasForCharset string,
379 // or there are Codepage and InternetEncoding dwords.
380 // The InternetEncoding gives us the actual encoding,
381 // the Codepage just says which Windows character set to
382 // use when displaying the data.
383 if (key.HasValue(wxT("InternetEncoding")) &&
384 key.QueryValue(wxT("InternetEncoding"), &CP)) break;
f1339c56
RR
385
386 // no encoding, see if it's an alias
5ce0e4ac
VS
387 if (!key.HasValue(wxT("AliasForCharset")) ||
388 !key.QueryValue(wxT("AliasForCharset"), cn)) break;
f1339c56
RR
389 } while (1);
390
391 return CP;
1cd52418
OK
392}
393#endif
394
6001e347
RR
395class wxCharacterSet
396{
1cd52418 397public:
f1339c56
RR
398 wxCharacterSet(const wxChar*name)
399 : cname(name) {}
400 virtual ~wxCharacterSet()
401 {}
402 virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
403 { return (size_t)-1; }
404 virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
405 { return (size_t)-1; }
406 virtual bool usable()
407 { return FALSE; }
408public:
409 const wxChar*cname;
1cd52418
OK
410};
411
412class ID_CharSet : public wxCharacterSet
413{
414public:
f1339c56
RR
415 ID_CharSet(const wxChar*name,wxMBConv*cnv)
416 : wxCharacterSet(name), work(cnv) {}
417
418 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
419 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
420
421 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
422 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
423
424 bool usable()
425 { return work!=NULL; }
426public:
427 wxMBConv*work;
1cd52418
OK
428};
429
3caec1bb 430
1cd52418 431#ifdef HAVE_ICONV_H
3caec1bb
VS
432
433// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
434// if output buffer is _exactly_ as big as needed. Such case is (unless there's
435// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
436// (which means error) and says there are 0 bytes left in the input buffer --
437// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
438// this alternative test for iconv() failure.
439// [This bug does not appear in glibc 2.2.]
440#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
441#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
442 (errno != E2BIG || bufLeft != 0))
443#else
444#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
445#endif
446
1cd52418
OK
447class IC_CharSet : public wxCharacterSet
448{
449public:
f1339c56 450 IC_CharSet(const wxChar*name)
3caec1bb 451 : wxCharacterSet(name)
f1339c56 452 {
3caec1bb
VS
453 m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname));
454 w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME);
f1339c56
RR
455 }
456
3caec1bb
VS
457 ~IC_CharSet()
458 {
459 if ( m2w != (iconv_t)-1 )
460 iconv_close(m2w);
461 if ( w2m != (iconv_t)-1 )
462 iconv_close(w2m);
f1339c56
RR
463 }
464
3caec1bb 465 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 466 {
f1339c56 467 size_t inbuf = strlen(psz);
3caec1bb 468 size_t outbuf = n * SIZEOF_WCHAR_T;
f1339c56 469 size_t res, cres;
3caec1bb
VS
470 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
471 wchar_t *bufPtr = buf;
472 const char *pszPtr = psz;
473
f1339c56
RR
474 if (buf)
475 {
476 // have destination buffer, convert there
95c8801c 477#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 478 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 479#else
3caec1bb 480 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 481#endif
3caec1bb 482 res = n - (outbuf / SIZEOF_WCHAR_T);
f1339c56 483 // convert to native endianness
3caec1bb
VS
484#ifdef WC_NEED_BSWAP
485 WC_BSWAP(buf /* _not_ bufPtr */, res)
486#endif
f1339c56
RR
487 }
488 else
489 {
490 // no destination buffer... convert using temp buffer
491 // to calculate destination buffer requirement
492 wchar_t tbuf[8];
493 res = 0;
494 do {
3caec1bb 495 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
95c8801c 496#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 497 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 498#else
3caec1bb 499 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 500#endif
f1339c56
RR
501 res += 8-(outbuf/SIZEOF_WCHAR_T);
502 } while ((cres==(size_t)-1) && (errno==E2BIG));
503 }
504
3caec1bb 505 if (ICONV_FAILED(cres, inbuf))
f1339c56 506 return (size_t)-1;
3caec1bb 507
f1339c56
RR
508 return res;
509 }
510
511 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
512 {
1cd52418 513#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
3caec1bb 514 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 515#else
3caec1bb 516 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 517#endif
f1339c56
RR
518 size_t outbuf = n;
519 size_t res, cres;
3caec1bb 520
1cd52418 521#ifdef WC_NEED_BSWAP
f1339c56
RR
522 // need to copy to temp buffer to switch endianness
523 // this absolutely doesn't rock!
524 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
525 // could be in read-only memory, or be accessed in some other thread)
526 wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
527 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
528 WC_BSWAP(tmpbuf, inbuf)
529 psz=tmpbuf;
1cd52418 530#endif
f1339c56
RR
531 if (buf)
532 {
533 // have destination buffer, convert there
95c8801c 534#ifdef WX_ICONV_TAKES_CHAR
f1339c56 535 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
536#else
537 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
538#endif
f1339c56
RR
539 res = n-outbuf;
540 }
541 else
542 {
543 // no destination buffer... convert using temp buffer
544 // to calculate destination buffer requirement
545 char tbuf[16];
546 res = 0;
547 do {
548 buf = tbuf; outbuf = 16;
95c8801c 549#ifdef WX_ICONV_TAKES_CHAR
f1339c56 550 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
551#else
552 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
553#endif
f1339c56
RR
554 res += 16 - outbuf;
555 } while ((cres==(size_t)-1) && (errno==E2BIG));
556 }
1cd52418 557#ifdef WC_NEED_BSWAP
f1339c56 558 free(tmpbuf);
1cd52418 559#endif
3caec1bb 560 if (ICONV_FAILED(cres, inbuf))
f1339c56
RR
561 return (size_t)-1;
562
563 return res;
564 }
565
566 bool usable()
3caec1bb 567 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
f1339c56
RR
568
569public:
570 iconv_t m2w, w2m;
1cd52418
OK
571};
572#endif
573
574#ifdef __WIN32__
575class CP_CharSet : public wxCharacterSet
576{
577public:
f1339c56
RR
578 CP_CharSet(const wxChar*name)
579 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
580
581 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
582 {
583 size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
584 return len ? len : (size_t)-1;
585 }
586
587 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
588 {
589 size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
590 return len ? len : (size_t)-1;
591 }
592
593 bool usable()
594 { return CodePage!=-1; }
595
596public:
597 long CodePage;
1cd52418
OK
598};
599#endif
600
601class EC_CharSet : public wxCharacterSet
602{
6001e347 603public:
f1339c56
RR
604 // temporarily just use wxEncodingConverter stuff,
605 // so that it works while a better implementation is built
606 EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
607 {
608 if (name)
609 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
610 m2w.Init(enc, wxFONTENCODING_UNICODE);
611 w2m.Init(wxFONTENCODING_UNICODE, enc);
612 }
613
614 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
615 {
616 size_t inbuf = strlen(psz);
f1339c56
RR
617 if (buf) m2w.Convert(psz,buf);
618 return inbuf;
619 }
620
621 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
622 {
1cd52418 623#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 624 size_t inbuf = std::wcslen(psz);
1cd52418 625#else
f1339c56 626 size_t inbuf = ::wcslen(psz);
1cd52418 627#endif
f1339c56
RR
628 if (buf)
629 w2m.Convert(psz,buf);
630
631 return inbuf;
632 }
633
634 bool usable()
635 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
636
637public:
638 wxFontEncoding enc;
639 wxEncodingConverter m2w, w2m;
f6bcfd97 640};
6001e347 641
f6bcfd97 642static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
6001e347 643{
f1339c56
RR
644 wxCharacterSet *cset = NULL;
645 if (name)
646 {
647 if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8")))
648 {
649 cset = new ID_CharSet(name, &wxConvUTF8);
650 }
651 else
652 {
1cd52418 653#ifdef HAVE_ICONV_H
f1339c56 654 cset = new IC_CharSet(name); // may not take NULL
1cd52418 655#endif
f1339c56 656 }
1cd52418 657 }
f1339c56
RR
658
659 if (cset && cset->usable()) return cset;
660 if (cset) delete cset;
3caec1bb 661 cset = NULL;
1cd52418 662#ifdef __WIN32__
f1339c56
RR
663 cset = new CP_CharSet(name); // may take NULL
664 if (cset->usable()) return cset;
1cd52418 665#endif
f1339c56
RR
666 if (cset) delete cset;
667 cset = new EC_CharSet(name);
668 if (cset->usable()) return cset;
669 delete cset;
3caec1bb 670 wxLogError(_("Unknown encoding '%s'!"), name);
f1339c56 671 return NULL;
6001e347
RR
672}
673
6001e347
RR
674wxCSConv::wxCSConv(const wxChar *charset)
675{
f1339c56
RR
676 m_name = (wxChar *) NULL;
677 m_cset = (wxCharacterSet *) NULL;
678 m_deferred = TRUE;
679 SetName(charset);
6001e347
RR
680}
681
682wxCSConv::~wxCSConv()
683{
f1339c56
RR
684 if (m_name) free(m_name);
685 if (m_cset) delete m_cset;
6001e347
RR
686}
687
688void wxCSConv::SetName(const wxChar *charset)
689{
f1339c56
RR
690 if (charset)
691 {
692 m_name = wxStrdup(charset);
693 m_deferred = TRUE;
694 }
6001e347
RR
695}
696
697void wxCSConv::LoadNow()
698{
f1339c56
RR
699 if (m_deferred)
700 {
701 if (!m_name)
702 {
6001e347 703#ifdef __UNIX__
1cd52418 704#if defined(HAVE_LANGINFO_H) && defined(CODESET)
f1339c56
RR
705 // GNU libc provides current character set this way
706 char*alang = nl_langinfo(CODESET);
707 if (alang)
708 {
709 SetName(wxConvLibc.cMB2WX(alang));
710 }
711 else
1cd52418 712#endif
f1339c56
RR
713 {
714 // if we can't get at the character set directly,
715 // try to see if it's in the environment variables
716 // (in most cases this won't work, but I was out of ideas)
717 wxChar *lang = wxGetenv(wxT("LC_ALL"));
718 if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
719 if (!lang) lang = wxGetenv(wxT("LANG"));
720 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
721 if (dot) SetName(dot+1);
722 }
6001e347 723#endif
f1339c56
RR
724 }
725 m_cset = wxGetCharacterSet(m_name);
726 m_deferred = FALSE;
6001e347 727 }
6001e347
RR
728}
729
730size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
731{
f1339c56
RR
732 ((wxCSConv *)this)->LoadNow(); // discard constness
733
734 if (m_cset)
735 return m_cset->MB2WC(buf, psz, n);
736
737 // latin-1 (direct)
738 size_t len=strlen(psz);
739
740 if (buf)
741 {
742 for (size_t c=0; c<=len; c++)
743 buf[c] = (unsigned char)(psz[c]);
744 }
745
746 return len;
6001e347
RR
747}
748
749size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
750{
f1339c56
RR
751 ((wxCSConv *)this)->LoadNow(); // discard constness
752
753 if (m_cset)
754 return m_cset->WC2MB(buf, psz, n);
1cd52418 755
f1339c56 756 // latin-1 (direct)
d834f22c 757#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 758 size_t len=std::wcslen(psz);
d834f22c 759#else
f1339c56 760 size_t len=::wcslen(psz);
d834f22c 761#endif
f1339c56
RR
762 if (buf)
763 {
764 for (size_t c=0; c<=len; c++)
765 buf[c] = (psz[c]>0xff) ? '?' : psz[c];
766 }
767
768 return len;
6001e347
RR
769}
770
1cd52418
OK
771#ifdef HAVE_ICONV_H
772class IC_CharSetConverter
773{
774public:
f1339c56
RR
775 IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to)
776 { cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname)); }
777
778 ~IC_CharSetConverter()
779 { if (cnv!=(iconv_t)-1) iconv_close(cnv); }
780
781 size_t Convert(char*buf, const char*psz, size_t n)
782 {
783 size_t inbuf = strlen(psz);
784 size_t outbuf = n;
95c8801c 785#ifdef WX_ICONV_TAKES_CHAR
f1339c56 786 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
787#else
788 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
789#endif
f1339c56
RR
790 if (res==(size_t)-1) return (size_t)-1;
791 return n-outbuf;
792 }
793
794public:
795 iconv_t cnv;
1cd52418
OK
796};
797#endif
798
799class EC_CharSetConverter
800{
801public:
f1339c56
RR
802 EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
803 { cnv.Init(from->enc,to->enc); }
804
805 size_t Convert(char*buf, const char*psz, size_t n)
806 {
807 size_t inbuf = strlen(psz);
808 if (buf) cnv.Convert(psz,buf);
809 return inbuf;
810 }
811
812public:
813 wxEncodingConverter cnv;
1cd52418
OK
814};
815
f6bcfd97
BP
816#else // !wxUSE_WCHAR_T
817
818// ----------------------------------------------------------------------------
819// stand-ins in absence of wchar_t
820// ----------------------------------------------------------------------------
821
822WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
823
824#endif // wxUSE_WCHAR_T
6001e347
RR
825
826