]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
implemented (more) correct <pre> handling
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
f6bcfd97
BP
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
6001e347
RR
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
0a1c1e62
GRG
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
1cd52418 35#include <errno.h>
6001e347
RR
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
1cd52418
OK
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
47#ifdef HAVE_LANGINFO_H
48 #include <langinfo.h>
49#endif
50
3e61dfb0
OK
51#ifdef __WXMSW__
52 #include <windows.h>
53#endif
54
6001e347
RR
55#include "wx/debug.h"
56#include "wx/strconv.h"
57
d43088ee 58#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
1cd52418
OK
59#define BSWAP_UCS4(str, len)
60#define BSWAP_UCS2(str, len)
61#else
62#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
63#define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
64#define WC_NEED_BSWAP
65#endif
66#define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
67#define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
68
69#if SIZEOF_WCHAR_T == 4
70#define WC_NAME "UCS4"
71#define WC_BSWAP BSWAP_UCS4
72#elif SIZEOF_WCHAR_T == 2
73#define WC_NAME "UTF16"
74#define WC_BSWAP BSWAP_UTF16
75#define WC_UTF16
76#endif
77
f6bcfd97
BP
78// ----------------------------------------------------------------------------
79// globals
80// ----------------------------------------------------------------------------
6001e347
RR
81
82WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
83
f6bcfd97
BP
84// ============================================================================
85// implementation
86// ============================================================================
6001e347 87
f6bcfd97 88#if wxUSE_WCHAR_T
6001e347 89
b0a6bb75
VZ
90#ifdef WC_UTF16
91
1cd52418
OK
92static size_t encode_utf16(wxUint32 input,wxUint16*output)
93{
94 if (input<=0xffff) {
95 if (output) *output++ = input;
96 return 1;
97 } else
98 if (input>=0x110000) {
99 return (size_t)-1;
100 } else {
101 if (output) {
102 *output++ = (input >> 10)+0xd7c0;
103 *output++ = (input&0x3ff)+0xdc00;
104 }
105 return 2;
106 }
107}
108
109static size_t decode_utf16(wxUint16*input,wxUint32&output)
110{
111 if ((*input<0xd800) || (*input>0xdfff)) {
112 output = *input;
113 return 1;
114 } else
115 if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
116 output = *input;
117 return (size_t)-1;
118 } else {
119 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
120 return 2;
121 }
122}
123
b0a6bb75
VZ
124#endif // WC_UTF16
125
f6bcfd97 126// ----------------------------------------------------------------------------
6001e347 127// wxMBConv
f6bcfd97 128// ----------------------------------------------------------------------------
6001e347
RR
129
130WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
131
132size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
133{
134 return wxMB2WC(buf, psz, n);
135}
136
137size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
138{
139 return wxWC2MB(buf, psz, n);
140}
141
142const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
143{
f6bcfd97 144 if (psz)
6001e347
RR
145 {
146 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
f6bcfd97
BP
147 if (nLen == (size_t)-1)
148 return wxWCharBuffer((wchar_t *) NULL);
6001e347
RR
149 wxWCharBuffer buf(nLen);
150 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
151 return buf;
f6bcfd97
BP
152 }
153 else
6001e347
RR
154 return wxWCharBuffer((wchar_t *) NULL);
155}
156
157const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
158{
f6bcfd97 159 if (psz)
6001e347
RR
160 {
161 size_t nLen = WC2MB((char *) NULL, psz, 0);
f6bcfd97
BP
162 if (nLen == (size_t)-1)
163 return wxCharBuffer((char *) NULL);
6001e347
RR
164 wxCharBuffer buf(nLen);
165 WC2MB((char *)(const char *) buf, psz, nLen);
166 return buf;
f6bcfd97
BP
167 }
168 else
6001e347
RR
169 return wxCharBuffer((char *) NULL);
170}
171
f6bcfd97 172// ----------------------------------------------------------------------------
6001e347 173// standard file conversion
f6bcfd97 174// ----------------------------------------------------------------------------
6001e347
RR
175
176WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
177
178// just use the libc conversion for now
179size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
180{
181 return wxMB2WC(buf, psz, n);
182}
183
184size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
185{
186 return wxWC2MB(buf, psz, n);
187}
188
f6bcfd97 189// ----------------------------------------------------------------------------
6001e347 190// standard gdk conversion
f6bcfd97
BP
191// ----------------------------------------------------------------------------
192
193#ifdef __WXGTK12__
6001e347
RR
194
195WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
196
197#include <gdk/gdk.h>
198
199size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
200{
201 if (buf) {
202 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
203 } else {
204 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
205 size_t len = gdk_mbstowcs(nbuf, psz, n);
206 delete [] nbuf;
207 return len;
208 }
209}
210
211size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
212{
213 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
214 size_t len = mbstr ? strlen(mbstr) : 0;
215 if (buf) {
216 if (len > n) len = n;
217 memcpy(buf, psz, len);
218 if (len < n) buf[len] = 0;
219 }
220 return len;
221}
f6bcfd97 222
6001e347
RR
223#endif // GTK > 1.0
224
225// ----------------------------------------------------------------------------
226// UTF-7
227// ----------------------------------------------------------------------------
228
229WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
230
231#if 0
232static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
233 "abcdefghijklmnopqrstuvwxyz"
234 "0123456789'(),-./:?";
235static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
236static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
237 "abcdefghijklmnopqrstuvwxyz"
238 "0123456789+/";
239#endif
240
241// TODO: write actual implementations of UTF-7 here
242size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
243 const char * WXUNUSED(psz),
244 size_t WXUNUSED(n)) const
245{
246 return 0;
247}
248
249size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
250 const wchar_t * WXUNUSED(psz),
251 size_t WXUNUSED(n)) const
252{
253 return 0;
254}
255
f6bcfd97 256// ----------------------------------------------------------------------------
6001e347 257// UTF-8
f6bcfd97 258// ----------------------------------------------------------------------------
6001e347
RR
259
260WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
261
1cd52418 262static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
6001e347
RR
263
264size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
265{
266 size_t len = 0;
267
268 while (*psz && ((!buf) || (len<n))) {
269 unsigned char cc=*psz++, fc=cc;
270 unsigned cnt;
271 for (cnt=0; fc&0x80; cnt++) fc<<=1;
272 if (!cnt) {
273 // plain ASCII char
274 if (buf) *buf++=cc;
275 len++;
276 } else {
277 cnt--;
278 if (!cnt) {
279 // invalid UTF-8 sequence
280 return (size_t)-1;
281 } else {
282 unsigned ocnt=cnt-1;
1cd52418 283 wxUint32 res=cc&(0x3f>>cnt);
6001e347
RR
284 while (cnt--) {
285 cc = *psz++;
286 if ((cc&0xC0)!=0x80) {
287 // invalid UTF-8 sequence
288 return (size_t)-1;
289 }
290 res=(res<<6)|(cc&0x3f);
291 }
292 if (res<=utf8_max[ocnt]) {
293 // illegal UTF-8 encoding
294 return (size_t)-1;
295 }
1cd52418
OK
296#ifdef WC_UTF16
297 size_t pa = encode_utf16(res, buf);
298 if (pa == (size_t)-1)
299 return (size_t)-1;
300 if (buf) buf+=pa;
301 len+=pa;
302#else
6001e347
RR
303 if (buf) *buf++=res;
304 len++;
1cd52418 305#endif
6001e347
RR
306 }
307 }
308 }
309 if (buf && (len<n)) *buf = 0;
310 return len;
311}
312
313size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
314{
315 size_t len = 0;
316
317 while (*psz && ((!buf) || (len<n))) {
1cd52418
OK
318 wxUint32 cc;
319#ifdef WC_UTF16
320 size_t pa = decode_utf16(psz,cc);
321 psz += (pa == (size_t)-1) ? 1 : pa;
322#else
323 cc=(*psz++)&0x7fffffff;
324#endif
6001e347
RR
325 unsigned cnt;
326 for (cnt=0; cc>utf8_max[cnt]; cnt++);
327 if (!cnt) {
328 // plain ASCII char
329 if (buf) *buf++=cc;
330 len++;
331 } else {
332 len+=cnt+1;
333 if (buf) {
334 *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
335 while (cnt--)
336 *buf++=0x80|((cc>>(cnt*6))&0x3f);
337 }
338 }
339 }
340 if (buf && (len<n)) *buf = 0;
341 return len;
342}
343
344// ----------------------------------------------------------------------------
345// specified character set
346// ----------------------------------------------------------------------------
347
f6bcfd97
BP
348WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
349
350#include "wx/encconv.h"
351#include "wx/fontmap.h"
6001e347 352
1cd52418
OK
353// TODO: add some tables here
354// - perhaps common encodings to common codepages (for Win32)
355// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
356// - move wxEncodingConverter meat in here
357
358#ifdef __WIN32__
359#include "wx/msw/registry.h"
360// this should work if M$ Internet Exploiter is installed
361static long CharsetToCodepage(const wxChar *name)
362{
f1339c56
RR
363 if (!name)
364 return GetACP();
365
366 long CP=-1;
367
368 wxString cn(name);
369 do {
370 wxString path( wxT("MIME\\Database\\Charset\\") );
371 path += cn;
372 wxRegKey key( wxRegKey::HKCR, path );
373
374 /* two cases: either there's an AliasForCharset string,
375 * or there are Codepage and InternetEncoding dwords.
376 * The InternetEncoding gives us the actual encoding,
377 * the Codepage just says which Windows character set to
378 * use when displaying the data.
379 */
380 if (key.QueryValue( wxT("InternetEncoding"), &CP )) break;
381
382 // no encoding, see if it's an alias
383 if (!key.QueryValue( wxT("AliasForCharset"), cn )) break;
384 } while (1);
385
386 return CP;
1cd52418
OK
387}
388#endif
389
6001e347
RR
390class wxCharacterSet
391{
1cd52418 392public:
f1339c56
RR
393 wxCharacterSet(const wxChar*name)
394 : cname(name) {}
395 virtual ~wxCharacterSet()
396 {}
397 virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
398 { return (size_t)-1; }
399 virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
400 { return (size_t)-1; }
401 virtual bool usable()
402 { return FALSE; }
403public:
404 const wxChar*cname;
1cd52418
OK
405};
406
407class ID_CharSet : public wxCharacterSet
408{
409public:
f1339c56
RR
410 ID_CharSet(const wxChar*name,wxMBConv*cnv)
411 : wxCharacterSet(name), work(cnv) {}
412
413 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
414 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
415
416 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
417 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
418
419 bool usable()
420 { return work!=NULL; }
421public:
422 wxMBConv*work;
1cd52418
OK
423};
424
425#ifdef HAVE_ICONV_H
426class IC_CharSet : public wxCharacterSet
427{
428public:
f1339c56
RR
429 IC_CharSet(const wxChar*name)
430 : wxCharacterSet(name), m2w((iconv_t)-1), w2m((iconv_t)-1) {}
431 ~IC_CharSet()
432 {
433 if (m2w!=(iconv_t)-1) iconv_close(m2w);
434 if (w2m!=(iconv_t)-1) iconv_close(w2m);
1cd52418 435 }
f1339c56
RR
436
437 void LoadM2W()
438 {
439 if (m2w==(iconv_t)-1)
440 m2w=iconv_open(WC_NAME,wxConvLibc.cWX2MB(cname));
441 }
442
443 void LoadW2M()
444 {
445 if (w2m==(iconv_t)-1)
446 w2m=iconv_open(wxConvLibc.cWX2MB(cname),WC_NAME);
447 }
448
449 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
450 {
451 LoadM2W();
452 size_t inbuf = strlen(psz);
453 size_t outbuf = n*SIZEOF_WCHAR_T;
454 size_t res, cres;
455 fprintf(stderr,"IC Convert to WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
456 if (buf)
457 {
458 // have destination buffer, convert there
459 cres = iconv( m2w, (char**)&psz, &inbuf, (char**)&buf, &outbuf );
460 res = n-(outbuf/SIZEOF_WCHAR_T);
461 // convert to native endianness
462 WC_BSWAP(buf, res)
463 }
464 else
465 {
466 // no destination buffer... convert using temp buffer
467 // to calculate destination buffer requirement
468 wchar_t tbuf[8];
469 res = 0;
470 do {
471 buf = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
472 cres = iconv( m2w, (char**)&psz, &inbuf, (char**)&buf, &outbuf );
473 res += 8-(outbuf/SIZEOF_WCHAR_T);
474 } while ((cres==(size_t)-1) && (errno==E2BIG));
475 }
476
477 if (cres==(size_t)-1)
478 return (size_t)-1;
479
480 return res;
481 }
482
483 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
484 {
485 LoadW2M();
1cd52418 486#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 487 size_t inbuf = std::wcslen(psz);
1cd52418 488#else
f1339c56 489 size_t inbuf = ::wcslen(psz);
1cd52418 490#endif
f1339c56
RR
491 size_t outbuf = n;
492 size_t res, cres;
493 fprintf(stderr,"IC Convert from WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
1cd52418 494#ifdef WC_NEED_BSWAP
f1339c56
RR
495 // need to copy to temp buffer to switch endianness
496 // this absolutely doesn't rock!
497 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
498 // could be in read-only memory, or be accessed in some other thread)
499 wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
500 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
501 WC_BSWAP(tmpbuf, inbuf)
502 psz=tmpbuf;
1cd52418 503#endif
f1339c56
RR
504 if (buf)
505 {
506 // have destination buffer, convert there
507 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
508 res = n-outbuf;
509 }
510 else
511 {
512 // no destination buffer... convert using temp buffer
513 // to calculate destination buffer requirement
514 char tbuf[16];
515 res = 0;
516 do {
517 buf = tbuf; outbuf = 16;
518 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
519 res += 16 - outbuf;
520 } while ((cres==(size_t)-1) && (errno==E2BIG));
521 }
1cd52418 522#ifdef WC_NEED_BSWAP
f1339c56 523 free(tmpbuf);
1cd52418 524#endif
f1339c56
RR
525 if (cres==(size_t)-1)
526 return (size_t)-1;
527
528 return res;
529 }
530
531 bool usable()
532 { return TRUE; }
533
534public:
535 iconv_t m2w, w2m;
1cd52418
OK
536};
537#endif
538
539#ifdef __WIN32__
540class CP_CharSet : public wxCharacterSet
541{
542public:
f1339c56
RR
543 CP_CharSet(const wxChar*name)
544 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
545
546 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
547 {
548 size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
549 return len ? len : (size_t)-1;
550 }
551
552 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
553 {
554 size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
555 return len ? len : (size_t)-1;
556 }
557
558 bool usable()
559 { return CodePage!=-1; }
560
561public:
562 long CodePage;
1cd52418
OK
563};
564#endif
565
566class EC_CharSet : public wxCharacterSet
567{
6001e347 568public:
f1339c56
RR
569 // temporarily just use wxEncodingConverter stuff,
570 // so that it works while a better implementation is built
571 EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
572 {
573 if (name)
574 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
575 m2w.Init(enc, wxFONTENCODING_UNICODE);
576 w2m.Init(wxFONTENCODING_UNICODE, enc);
577 }
578
579 size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
580 {
581 size_t inbuf = strlen(psz);
582 fprintf(stderr,"EC Convert to WC using %d\n",enc);
583 if (buf) m2w.Convert(psz,buf);
584 return inbuf;
585 }
586
587 size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
588 {
1cd52418 589#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 590 size_t inbuf = std::wcslen(psz);
1cd52418 591#else
f1339c56 592 size_t inbuf = ::wcslen(psz);
1cd52418 593#endif
f1339c56
RR
594 fprintf(stderr,"EC Convert from WC using %d\n",enc);
595 if (buf)
596 w2m.Convert(psz,buf);
597
598 return inbuf;
599 }
600
601 bool usable()
602 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
603
604public:
605 wxFontEncoding enc;
606 wxEncodingConverter m2w, w2m;
f6bcfd97 607};
6001e347 608
f6bcfd97 609static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
6001e347 610{
f1339c56
RR
611 wxCharacterSet *cset = NULL;
612 if (name)
613 {
614 if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8")))
615 {
616 cset = new ID_CharSet(name, &wxConvUTF8);
617 }
618 else
619 {
1cd52418 620#ifdef HAVE_ICONV_H
f1339c56 621 cset = new IC_CharSet(name); // may not take NULL
1cd52418 622#endif
f1339c56 623 }
1cd52418 624 }
f1339c56
RR
625
626 if (cset && cset->usable()) return cset;
627 if (cset) delete cset;
1cd52418 628#ifdef __WIN32__
f1339c56
RR
629 cset = new CP_CharSet(name); // may take NULL
630 if (cset->usable()) return cset;
1cd52418 631#endif
f1339c56
RR
632 if (cset) delete cset;
633 cset = new EC_CharSet(name);
634 if (cset->usable()) return cset;
635 delete cset;
636 return NULL;
6001e347
RR
637}
638
6001e347
RR
639wxCSConv::wxCSConv(const wxChar *charset)
640{
f1339c56
RR
641 m_name = (wxChar *) NULL;
642 m_cset = (wxCharacterSet *) NULL;
643 m_deferred = TRUE;
644 SetName(charset);
6001e347
RR
645}
646
647wxCSConv::~wxCSConv()
648{
f1339c56
RR
649 if (m_name) free(m_name);
650 if (m_cset) delete m_cset;
6001e347
RR
651}
652
653void wxCSConv::SetName(const wxChar *charset)
654{
f1339c56
RR
655 if (charset)
656 {
657 m_name = wxStrdup(charset);
658 m_deferred = TRUE;
659 }
6001e347
RR
660}
661
662void wxCSConv::LoadNow()
663{
223d09f6 664// wxPrintf(wxT("Conversion request\n"));
f1339c56
RR
665 if (m_deferred)
666 {
667 if (!m_name)
668 {
6001e347 669#ifdef __UNIX__
1cd52418 670#if defined(HAVE_LANGINFO_H) && defined(CODESET)
f1339c56
RR
671 // GNU libc provides current character set this way
672 char*alang = nl_langinfo(CODESET);
673 if (alang)
674 {
675 SetName(wxConvLibc.cMB2WX(alang));
676 }
677 else
1cd52418 678#endif
f1339c56
RR
679 {
680 // if we can't get at the character set directly,
681 // try to see if it's in the environment variables
682 // (in most cases this won't work, but I was out of ideas)
683 wxChar *lang = wxGetenv(wxT("LC_ALL"));
684 if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
685 if (!lang) lang = wxGetenv(wxT("LANG"));
686 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
687 if (dot) SetName(dot+1);
688 }
6001e347 689#endif
f1339c56
RR
690 }
691 m_cset = wxGetCharacterSet(m_name);
692 m_deferred = FALSE;
6001e347 693 }
6001e347
RR
694}
695
696size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
697{
f1339c56
RR
698 ((wxCSConv *)this)->LoadNow(); // discard constness
699
700 if (m_cset)
701 return m_cset->MB2WC(buf, psz, n);
702
703 // latin-1 (direct)
704 size_t len=strlen(psz);
705
706 if (buf)
707 {
708 for (size_t c=0; c<=len; c++)
709 buf[c] = (unsigned char)(psz[c]);
710 }
711
712 return len;
6001e347
RR
713}
714
715size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
716{
f1339c56
RR
717 ((wxCSConv *)this)->LoadNow(); // discard constness
718
719 if (m_cset)
720 return m_cset->WC2MB(buf, psz, n);
1cd52418 721
f1339c56 722 // latin-1 (direct)
d834f22c 723#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 724 size_t len=std::wcslen(psz);
d834f22c 725#else
f1339c56 726 size_t len=::wcslen(psz);
d834f22c 727#endif
f1339c56
RR
728 if (buf)
729 {
730 for (size_t c=0; c<=len; c++)
731 buf[c] = (psz[c]>0xff) ? '?' : psz[c];
732 }
733
734 return len;
6001e347
RR
735}
736
1cd52418
OK
737#ifdef HAVE_ICONV_H
738class IC_CharSetConverter
739{
740public:
f1339c56
RR
741 IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to)
742 { cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname)); }
743
744 ~IC_CharSetConverter()
745 { if (cnv!=(iconv_t)-1) iconv_close(cnv); }
746
747 size_t Convert(char*buf, const char*psz, size_t n)
748 {
749 size_t inbuf = strlen(psz);
750 size_t outbuf = n;
751 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
752 if (res==(size_t)-1) return (size_t)-1;
753 return n-outbuf;
754 }
755
756public:
757 iconv_t cnv;
1cd52418
OK
758};
759#endif
760
761class EC_CharSetConverter
762{
763public:
f1339c56
RR
764 EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
765 { cnv.Init(from->enc,to->enc); }
766
767 size_t Convert(char*buf, const char*psz, size_t n)
768 {
769 size_t inbuf = strlen(psz);
770 if (buf) cnv.Convert(psz,buf);
771 return inbuf;
772 }
773
774public:
775 wxEncodingConverter cnv;
1cd52418
OK
776};
777
f6bcfd97
BP
778#else // !wxUSE_WCHAR_T
779
780// ----------------------------------------------------------------------------
781// stand-ins in absence of wchar_t
782// ----------------------------------------------------------------------------
783
784WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
785
786#endif // wxUSE_WCHAR_T
6001e347
RR
787
788