]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
hopefully fixed bytesex issues in strconv
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
3a0d76bc 8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
f6bcfd97
BP
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
6001e347
RR
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
0a1c1e62
GRG
31#ifdef __WXMSW__
32 #include "wx/msw/private.h"
33#endif
34
1cd52418 35#include <errno.h>
6001e347
RR
36#include <ctype.h>
37#include <string.h>
38#include <stdlib.h>
39
40#ifdef __SALFORDC__
41 #include <clib.h>
42#endif
43
1cd52418
OK
44#ifdef HAVE_ICONV_H
45 #include <iconv.h>
46#endif
1cd52418 47
3e61dfb0
OK
48#ifdef __WXMSW__
49 #include <windows.h>
50#endif
51
6001e347
RR
52#include "wx/debug.h"
53#include "wx/strconv.h"
3caec1bb
VS
54#include "wx/intl.h"
55#include "wx/log.h"
6001e347 56
1cd52418 57#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 58#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418 59
a3f2769e
VZ
60// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
61// it might be not defined - assume the most common value
62#ifndef SIZEOF_WCHAR_T
63 #define SIZEOF_WCHAR_T 2
64#endif // !defined(SIZEOF_WCHAR_T)
65
1cd52418 66#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
67 #define WC_NAME "UCS4"
68 #define WC_BSWAP BSWAP_UCS4
69 #ifdef WORDS_BIGENDIAN
70 #define WC_NAME_BEST "UCS-4BE"
71 #else
72 #define WC_NAME_BEST "UCS-4LE"
73 #endif
1cd52418 74#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
75 #define WC_NAME "UTF16"
76 #define WC_BSWAP BSWAP_UTF16
a3f2769e 77 #define WC_UTF16
3a0d76bc
VS
78 #ifdef WORDS_BIGENDIAN
79 #define WC_NAME_BEST "UTF-16BE"
80 #else
81 #define WC_NAME_BEST "UTF-16LE"
82 #endif
bab1e722 83#else // sizeof(wchar_t) != 2 nor 4
a3f2769e
VZ
84 // I don't know what to do about this
85 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
1cd52418
OK
86#endif
87
f6bcfd97
BP
88// ----------------------------------------------------------------------------
89// globals
90// ----------------------------------------------------------------------------
6001e347
RR
91
92WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
93
f6bcfd97
BP
94// ============================================================================
95// implementation
96// ============================================================================
6001e347 97
f6bcfd97 98#if wxUSE_WCHAR_T
6001e347 99
b0a6bb75
VZ
100#ifdef WC_UTF16
101
eccf1b2c 102static size_t encode_utf16(wxUint32 input, wchar_t *output)
1cd52418 103{
dccce9ea 104 if (input<=0xffff)
4def3b35
VS
105 {
106 if (output) *output++ = input;
107 return 1;
dccce9ea
VZ
108 }
109 else if (input>=0x110000)
4def3b35
VS
110 {
111 return (size_t)-1;
dccce9ea
VZ
112 }
113 else
4def3b35 114 {
dccce9ea 115 if (output)
4def3b35
VS
116 {
117 *output++ = (input >> 10)+0xd7c0;
118 *output++ = (input&0x3ff)+0xdc00;
119 }
120 return 2;
1cd52418 121 }
1cd52418
OK
122}
123
eccf1b2c 124static size_t decode_utf16(const wchar_t* input, wxUint32& output)
1cd52418 125{
dccce9ea 126 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
127 {
128 output = *input;
129 return 1;
dccce9ea
VZ
130 }
131 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
132 {
133 output = *input;
134 return (size_t)-1;
dccce9ea
VZ
135 }
136 else
4def3b35
VS
137 {
138 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
139 return 2;
140 }
1cd52418
OK
141}
142
b0a6bb75
VZ
143#endif // WC_UTF16
144
f6bcfd97 145// ----------------------------------------------------------------------------
6001e347 146// wxMBConv
f6bcfd97 147// ----------------------------------------------------------------------------
6001e347
RR
148
149WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
150
151size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
152{
153 return wxMB2WC(buf, psz, n);
154}
155
156size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
157{
158 return wxWC2MB(buf, psz, n);
159}
160
161const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
162{
f6bcfd97 163 if (psz)
6001e347
RR
164 {
165 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
f6bcfd97
BP
166 if (nLen == (size_t)-1)
167 return wxWCharBuffer((wchar_t *) NULL);
6001e347
RR
168 wxWCharBuffer buf(nLen);
169 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
170 return buf;
f6bcfd97
BP
171 }
172 else
6001e347
RR
173 return wxWCharBuffer((wchar_t *) NULL);
174}
175
176const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
177{
f6bcfd97 178 if (psz)
6001e347
RR
179 {
180 size_t nLen = WC2MB((char *) NULL, psz, 0);
f6bcfd97
BP
181 if (nLen == (size_t)-1)
182 return wxCharBuffer((char *) NULL);
6001e347
RR
183 wxCharBuffer buf(nLen);
184 WC2MB((char *)(const char *) buf, psz, nLen);
185 return buf;
f6bcfd97
BP
186 }
187 else
6001e347
RR
188 return wxCharBuffer((char *) NULL);
189}
190
f6bcfd97 191// ----------------------------------------------------------------------------
6001e347 192// standard file conversion
f6bcfd97 193// ----------------------------------------------------------------------------
6001e347
RR
194
195WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
196
197// just use the libc conversion for now
198size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
199{
200 return wxMB2WC(buf, psz, n);
201}
202
203size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
204{
205 return wxWC2MB(buf, psz, n);
206}
207
f6bcfd97 208// ----------------------------------------------------------------------------
6001e347 209// standard gdk conversion
f6bcfd97
BP
210// ----------------------------------------------------------------------------
211
212#ifdef __WXGTK12__
6001e347
RR
213
214WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
215
216#include <gdk/gdk.h>
217
218size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
219{
dccce9ea 220 if (buf)
4def3b35
VS
221 {
222 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
dccce9ea
VZ
223 }
224 else
4def3b35
VS
225 {
226 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
227 size_t len = gdk_mbstowcs(nbuf, psz, n);
228 delete[] nbuf;
229 return len;
230 }
6001e347
RR
231}
232
233size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
234{
4def3b35
VS
235 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
236 size_t len = mbstr ? strlen(mbstr) : 0;
dccce9ea 237 if (buf)
4def3b35 238 {
dccce9ea 239 if (len > n)
4def3b35
VS
240 len = n;
241 memcpy(buf, psz, len);
dccce9ea 242 if (len < n)
4def3b35
VS
243 buf[len] = 0;
244 }
245 return len;
6001e347 246}
f6bcfd97 247
6001e347
RR
248#endif // GTK > 1.0
249
250// ----------------------------------------------------------------------------
251// UTF-7
252// ----------------------------------------------------------------------------
253
254WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
255
256#if 0
257static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
258 "abcdefghijklmnopqrstuvwxyz"
259 "0123456789'(),-./:?";
260static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
261static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
262 "abcdefghijklmnopqrstuvwxyz"
263 "0123456789+/";
264#endif
265
266// TODO: write actual implementations of UTF-7 here
267size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
268 const char * WXUNUSED(psz),
269 size_t WXUNUSED(n)) const
270{
271 return 0;
272}
273
274size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
275 const wchar_t * WXUNUSED(psz),
276 size_t WXUNUSED(n)) const
277{
278 return 0;
279}
280
f6bcfd97 281// ----------------------------------------------------------------------------
6001e347 282// UTF-8
f6bcfd97 283// ----------------------------------------------------------------------------
6001e347
RR
284
285WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
286
dccce9ea 287static wxUint32 utf8_max[]=
4def3b35 288 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
289
290size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
291{
4def3b35
VS
292 size_t len = 0;
293
dccce9ea 294 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
295 {
296 unsigned char cc = *psz++, fc = cc;
297 unsigned cnt;
dccce9ea 298 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 299 fc <<= 1;
dccce9ea 300 if (!cnt)
4def3b35
VS
301 {
302 // plain ASCII char
dccce9ea 303 if (buf)
4def3b35
VS
304 *buf++ = cc;
305 len++;
dccce9ea
VZ
306 }
307 else
4def3b35
VS
308 {
309 cnt--;
dccce9ea 310 if (!cnt)
4def3b35
VS
311 {
312 // invalid UTF-8 sequence
313 return (size_t)-1;
dccce9ea
VZ
314 }
315 else
4def3b35
VS
316 {
317 unsigned ocnt = cnt - 1;
318 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 319 while (cnt--)
4def3b35
VS
320 {
321 cc = *psz++;
dccce9ea 322 if ((cc & 0xC0) != 0x80)
4def3b35
VS
323 {
324 // invalid UTF-8 sequence
325 return (size_t)-1;
326 }
327 res = (res << 6) | (cc & 0x3f);
328 }
dccce9ea 329 if (res <= utf8_max[ocnt])
4def3b35
VS
330 {
331 // illegal UTF-8 encoding
332 return (size_t)-1;
333 }
1cd52418 334#ifdef WC_UTF16
4def3b35
VS
335 size_t pa = encode_utf16(res, buf);
336 if (pa == (size_t)-1)
337 return (size_t)-1;
dccce9ea 338 if (buf)
4def3b35
VS
339 buf += pa;
340 len += pa;
1cd52418 341#else
dccce9ea 342 if (buf)
4def3b35
VS
343 *buf++ = res;
344 len++;
1cd52418 345#endif
4def3b35
VS
346 }
347 }
6001e347 348 }
dccce9ea 349 if (buf && (len < n))
4def3b35
VS
350 *buf = 0;
351 return len;
6001e347
RR
352}
353
354size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
355{
4def3b35 356 size_t len = 0;
6001e347 357
dccce9ea 358 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
359 {
360 wxUint32 cc;
1cd52418 361#ifdef WC_UTF16
eccf1b2c 362 size_t pa = decode_utf16(psz, cc);
4def3b35 363 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 364#else
4def3b35
VS
365 cc=(*psz++) & 0x7fffffff;
366#endif
367 unsigned cnt;
368 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 369 if (!cnt)
4def3b35
VS
370 {
371 // plain ASCII char
dccce9ea 372 if (buf)
4def3b35
VS
373 *buf++ = cc;
374 len++;
dccce9ea
VZ
375 }
376
377 else
4def3b35
VS
378 {
379 len += cnt + 1;
dccce9ea 380 if (buf)
4def3b35
VS
381 {
382 *buf++ = (-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt));
383 while (cnt--)
384 *buf++ = 0x80 | ((cc >> (cnt * 6)) & 0x3f);
385 }
386 }
6001e347 387 }
4def3b35
VS
388
389 if (buf && (len<n)) *buf = 0;
390 return len;
6001e347
RR
391}
392
393// ----------------------------------------------------------------------------
394// specified character set
395// ----------------------------------------------------------------------------
396
f6bcfd97
BP
397WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
398
399#include "wx/encconv.h"
400#include "wx/fontmap.h"
6001e347 401
1cd52418
OK
402// TODO: add some tables here
403// - perhaps common encodings to common codepages (for Win32)
404// - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
405// - move wxEncodingConverter meat in here
406
04ef50df 407#if defined(__WIN32__) && !defined(__WXMICROWIN__)
1cd52418
OK
408#include "wx/msw/registry.h"
409// this should work if M$ Internet Exploiter is installed
410static long CharsetToCodepage(const wxChar *name)
411{
dccce9ea 412 if (!name)
f1339c56 413 return GetACP();
dccce9ea 414
f1339c56 415 long CP=-1;
dccce9ea 416
f1339c56
RR
417 wxString cn(name);
418 do {
5ce0e4ac 419 wxString path(wxT("MIME\\Database\\Charset\\"));
f1339c56 420 path += cn;
5ce0e4ac
VS
421 wxRegKey key(wxRegKey::HKCR, path);
422
9c904e25 423 if (!key.Exists()) break;
dccce9ea 424
5ce0e4ac
VS
425 // two cases: either there's an AliasForCharset string,
426 // or there are Codepage and InternetEncoding dwords.
427 // The InternetEncoding gives us the actual encoding,
428 // the Codepage just says which Windows character set to
429 // use when displaying the data.
430 if (key.HasValue(wxT("InternetEncoding")) &&
431 key.QueryValue(wxT("InternetEncoding"), &CP)) break;
dccce9ea 432
f1339c56 433 // no encoding, see if it's an alias
5ce0e4ac
VS
434 if (!key.HasValue(wxT("AliasForCharset")) ||
435 !key.QueryValue(wxT("AliasForCharset"), cn)) break;
f1339c56 436 } while (1);
dccce9ea 437
f1339c56 438 return CP;
1cd52418
OK
439}
440#endif
441
6001e347
RR
442class wxCharacterSet
443{
1cd52418 444public:
f1339c56
RR
445 wxCharacterSet(const wxChar*name)
446 : cname(name) {}
447 virtual ~wxCharacterSet()
448 {}
dccce9ea 449 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 450 { return (size_t)-1; }
4def3b35 451 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56
RR
452 { return (size_t)-1; }
453 virtual bool usable()
454 { return FALSE; }
455public:
456 const wxChar*cname;
1cd52418
OK
457};
458
459class ID_CharSet : public wxCharacterSet
460{
461public:
4def3b35 462 ID_CharSet(const wxChar *name,wxMBConv *cnv)
f1339c56 463 : wxCharacterSet(name), work(cnv) {}
dccce9ea 464
4def3b35 465 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 466 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
dccce9ea 467
4def3b35 468 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 469 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
dccce9ea 470
f1339c56
RR
471 bool usable()
472 { return work!=NULL; }
473public:
474 wxMBConv*work;
1cd52418
OK
475};
476
3caec1bb 477
1cd52418 478#ifdef HAVE_ICONV_H
3caec1bb 479
3a0d76bc
VS
480bool g_wcNeedsSwap = FALSE;
481static const char *g_wcCharset = NULL;
482
3caec1bb
VS
483// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
484// if output buffer is _exactly_ as big as needed. Such case is (unless there's
485// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
486// (which means error) and says there are 0 bytes left in the input buffer --
487// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
488// this alternative test for iconv() failure.
489// [This bug does not appear in glibc 2.2.]
490#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
491#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
492 (errno != E2BIG || bufLeft != 0))
493#else
494#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
495#endif
496
1cd52418
OK
497class IC_CharSet : public wxCharacterSet
498{
499public:
dccce9ea 500 IC_CharSet(const wxChar *name)
3caec1bb 501 : wxCharacterSet(name)
f1339c56 502 {
3a0d76bc
VS
503 // check for charset that represents wchar_t:
504 if (g_wcCharset == NULL)
505 {
506 g_wcNeedsSwap = FALSE;
dccce9ea 507
3a0d76bc
VS
508 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
509 g_wcCharset = WC_NAME_BEST;
510 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
511
512 if (m2w == (iconv_t)-1)
513 {
514 // try charset w/o bytesex info (e.g. "UCS4")
515 // and check for bytesex ourselves:
516 g_wcCharset = WC_NAME;
517 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
518
519 // last bet, try if it knows WCHAR_T pseudo-charset
520 if (m2w == (iconv_t)-1)
521 {
522 g_wcCharset = "WCHAR_T";
523 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
524 }
525
526 if (m2w != (iconv_t)-1)
527 {
528 char buf[2], *bufPtr;
529 wchar_t wbuf[2], *wbufPtr;
530 size_t insz, outsz;
531 size_t res;
532
533 buf[0] = 'A';
534 buf[1] = 0;
535 wbuf[0] = 0;
536 insz = 2;
537 outsz = SIZEOF_WCHAR_T * 2;
538 wbufPtr = wbuf;
539 bufPtr = buf;
540
541 #ifdef WX_ICONV_TAKES_CHAR
542 res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
543 #else
544 res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
545 #endif
546 if (ICONV_FAILED(res, insz))
547 {
548 g_wcCharset = NULL;
549 wxLogLastError(wxT("iconv"));
550 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
551 }
552 else
553 {
554 g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]);
555 }
556 }
557 else
558 {
559 g_wcCharset = NULL;
560 wxLogError(_("Don't know how to convert to/from charset '%s'."), name);
561 }
562 }
563 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap);
564 }
565 else
566 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
567
568 w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset);
569 }
570
571 ~IC_CharSet()
3caec1bb 572 {
dccce9ea 573 if ( m2w != (iconv_t)-1 )
3caec1bb 574 iconv_close(m2w);
dccce9ea 575 if ( w2m != (iconv_t)-1 )
3caec1bb 576 iconv_close(w2m);
f1339c56 577 }
dccce9ea 578
3caec1bb 579 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 580 {
f1339c56 581 size_t inbuf = strlen(psz);
3caec1bb 582 size_t outbuf = n * SIZEOF_WCHAR_T;
f1339c56 583 size_t res, cres;
3caec1bb
VS
584 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
585 wchar_t *bufPtr = buf;
586 const char *pszPtr = psz;
587
f1339c56
RR
588 if (buf)
589 {
590 // have destination buffer, convert there
95c8801c 591#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 592 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 593#else
3caec1bb 594 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
95c8801c 595#endif
3caec1bb 596 res = n - (outbuf / SIZEOF_WCHAR_T);
3a0d76bc
VS
597
598 if (g_wcNeedsSwap)
599 {
600 // convert to native endianness
601 WC_BSWAP(buf /* _not_ bufPtr */, res)
602 }
f1339c56
RR
603 }
604 else
605 {
606 // no destination buffer... convert using temp buffer
607 // to calculate destination buffer requirement
608 wchar_t tbuf[8];
609 res = 0;
610 do {
3caec1bb 611 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
95c8801c 612#ifdef WX_ICONV_TAKES_CHAR
3caec1bb 613 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 614#else
3caec1bb 615 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
95c8801c 616#endif
f1339c56
RR
617 res += 8-(outbuf/SIZEOF_WCHAR_T);
618 } while ((cres==(size_t)-1) && (errno==E2BIG));
619 }
dccce9ea 620
3caec1bb 621 if (ICONV_FAILED(cres, inbuf))
3a0d76bc
VS
622 {
623 //VS: it is ok if iconv fails, hence trace only
624 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
f1339c56 625 return (size_t)-1;
3a0d76bc 626 }
3caec1bb 627
f1339c56
RR
628 return res;
629 }
dccce9ea 630
4def3b35 631 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 632 {
1cd52418 633#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
3caec1bb 634 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 635#else
3caec1bb 636 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 637#endif
f1339c56
RR
638 size_t outbuf = n;
639 size_t res, cres;
3a0d76bc
VS
640
641 wchar_t *tmpbuf;
642
643 if (g_wcNeedsSwap)
644 {
645 // need to copy to temp buffer to switch endianness
646 // this absolutely doesn't rock!
647 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
648 // could be in read-only memory, or be accessed in some other thread)
649 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
650 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
651 WC_BSWAP(tmpbuf, inbuf)
652 psz=tmpbuf;
653 }
3caec1bb 654
f1339c56
RR
655 if (buf)
656 {
657 // have destination buffer, convert there
95c8801c 658#ifdef WX_ICONV_TAKES_CHAR
f1339c56 659 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
660#else
661 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
662#endif
f1339c56
RR
663 res = n-outbuf;
664 }
665 else
666 {
667 // no destination buffer... convert using temp buffer
668 // to calculate destination buffer requirement
669 char tbuf[16];
670 res = 0;
671 do {
672 buf = tbuf; outbuf = 16;
95c8801c 673#ifdef WX_ICONV_TAKES_CHAR
f1339c56 674 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
675#else
676 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
677#endif
f1339c56
RR
678 res += 16 - outbuf;
679 } while ((cres==(size_t)-1) && (errno==E2BIG));
680 }
3a0d76bc
VS
681
682 if (g_wcNeedsSwap)
683 {
684 free(tmpbuf);
685 }
686
3caec1bb 687 if (ICONV_FAILED(cres, inbuf))
3a0d76bc
VS
688 {
689 //VS: it is ok if iconv fails, hence trace only
690 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
f1339c56 691 return (size_t)-1;
3a0d76bc 692 }
dccce9ea 693
f1339c56
RR
694 return res;
695 }
dccce9ea 696
f1339c56 697 bool usable()
3caec1bb 698 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
dccce9ea 699
3a0d76bc 700protected:
f1339c56 701 iconv_t m2w, w2m;
1cd52418
OK
702};
703#endif
704
04ef50df 705#if defined(__WIN32__) && !defined(__WXMICROWIN__)
1cd52418
OK
706class CP_CharSet : public wxCharacterSet
707{
708public:
dccce9ea 709 CP_CharSet(const wxChar*name)
f1339c56 710 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
dccce9ea 711
4def3b35 712 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 713 {
dccce9ea 714 size_t len =
4def3b35 715 MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0);
1e6feb95 716 //VS: returns # of written chars for buf!=NULL and *size*
35d764b0
VS
717 // needed buffer for buf==NULL
718 return len ? (buf ? len : len-1) : (size_t)-1;
f1339c56 719 }
dccce9ea 720
4def3b35 721 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 722 {
4def3b35
VS
723 size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf,
724 buf ? n : 0, NULL, NULL);
1e6feb95 725 //VS: returns # of written chars for buf!=NULL and *size*
35d764b0
VS
726 // needed buffer for buf==NULL
727 return len ? (buf ? len : len-1) : (size_t)-1;
f1339c56 728 }
dccce9ea 729
f1339c56 730 bool usable()
4def3b35 731 { return CodePage != -1; }
f1339c56
RR
732
733public:
734 long CodePage;
1cd52418 735};
1e6feb95
VZ
736#endif // __WIN32__
737
738#if wxUSE_FONTMAP
1cd52418
OK
739
740class EC_CharSet : public wxCharacterSet
741{
6001e347 742public:
f1339c56
RR
743 // temporarily just use wxEncodingConverter stuff,
744 // so that it works while a better implementation is built
dccce9ea 745 EC_CharSet(const wxChar*name) : wxCharacterSet(name),
4def3b35 746 enc(wxFONTENCODING_SYSTEM)
f1339c56
RR
747 {
748 if (name)
749 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
750 m2w.Init(enc, wxFONTENCODING_UNICODE);
751 w2m.Init(wxFONTENCODING_UNICODE, enc);
752 }
dccce9ea 753
4def3b35 754 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56
RR
755 {
756 size_t inbuf = strlen(psz);
dccce9ea 757 if (buf)
4def3b35 758 m2w.Convert(psz,buf);
f1339c56
RR
759 return inbuf;
760 }
dccce9ea 761
4def3b35 762 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 763 {
1cd52418 764#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 765 size_t inbuf = std::wcslen(psz);
1cd52418 766#else
f1339c56 767 size_t inbuf = ::wcslen(psz);
1cd52418 768#endif
f1339c56
RR
769 if (buf)
770 w2m.Convert(psz,buf);
dccce9ea 771
f1339c56
RR
772 return inbuf;
773 }
dccce9ea 774
f1339c56
RR
775 bool usable()
776 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
777
778public:
779 wxFontEncoding enc;
780 wxEncodingConverter m2w, w2m;
f6bcfd97 781};
6001e347 782
1e6feb95
VZ
783#endif // wxUSE_FONTMAP
784
f6bcfd97 785static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
6001e347 786{
f1339c56
RR
787 wxCharacterSet *cset = NULL;
788 if (name)
789 {
4def3b35 790 if (wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
f1339c56
RR
791 {
792 cset = new ID_CharSet(name, &wxConvUTF8);
793 }
794 else
795 {
1cd52418 796#ifdef HAVE_ICONV_H
f1339c56 797 cset = new IC_CharSet(name); // may not take NULL
1cd52418 798#endif
f1339c56 799 }
1cd52418 800 }
dccce9ea 801
1e6feb95
VZ
802 if (cset && cset->usable())
803 return cset;
804
dccce9ea
VZ
805 if (cset)
806 {
807 delete cset;
808 cset = NULL;
809 }
810
04ef50df 811#if defined(__WIN32__) && !defined(__WXMICROWIN__)
f1339c56 812 cset = new CP_CharSet(name); // may take NULL
dccce9ea
VZ
813 if (cset->usable())
814 return cset;
815
816 delete cset;
817#endif // __WIN32__
818
1e6feb95 819#if wxUSE_FONTMAP
f1339c56 820 cset = new EC_CharSet(name);
dccce9ea
VZ
821 if (cset->usable())
822 return cset;
1e6feb95 823#endif // wxUSE_FONTMAP
dccce9ea 824
f1339c56 825 delete cset;
3caec1bb 826 wxLogError(_("Unknown encoding '%s'!"), name);
f1339c56 827 return NULL;
6001e347
RR
828}
829
6001e347
RR
830wxCSConv::wxCSConv(const wxChar *charset)
831{
dccce9ea 832 m_name = (wxChar *)NULL;
f1339c56 833 m_cset = (wxCharacterSet *) NULL;
82713003
VZ
834 m_deferred = TRUE;
835
f1339c56 836 SetName(charset);
6001e347
RR
837}
838
839wxCSConv::~wxCSConv()
840{
dccce9ea
VZ
841 free(m_name);
842 delete m_cset;
6001e347
RR
843}
844
845void wxCSConv::SetName(const wxChar *charset)
846{
f1339c56
RR
847 if (charset)
848 {
849 m_name = wxStrdup(charset);
850 m_deferred = TRUE;
851 }
6001e347
RR
852}
853
854void wxCSConv::LoadNow()
855{
f1339c56
RR
856 if (m_deferred)
857 {
dccce9ea 858 if ( !m_name )
f1339c56 859 {
dccce9ea
VZ
860 wxString name = wxLocale::GetSystemEncodingName();
861 if ( !name.empty() )
862 SetName(name);
f1339c56 863 }
dccce9ea 864
a45a98fb
VZ
865 // wxGetCharacterSet() complains about NULL name
866 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
f1339c56 867 m_deferred = FALSE;
6001e347 868 }
6001e347
RR
869}
870
871size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
872{
f1339c56 873 ((wxCSConv *)this)->LoadNow(); // discard constness
dccce9ea 874
f1339c56
RR
875 if (m_cset)
876 return m_cset->MB2WC(buf, psz, n);
877
878 // latin-1 (direct)
4def3b35 879 size_t len = strlen(psz);
dccce9ea 880
f1339c56
RR
881 if (buf)
882 {
4def3b35 883 for (size_t c = 0; c <= len; c++)
f1339c56
RR
884 buf[c] = (unsigned char)(psz[c]);
885 }
dccce9ea 886
f1339c56 887 return len;
6001e347
RR
888}
889
890size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
891{
f1339c56 892 ((wxCSConv *)this)->LoadNow(); // discard constness
dccce9ea 893
f1339c56
RR
894 if (m_cset)
895 return m_cset->WC2MB(buf, psz, n);
1cd52418 896
f1339c56 897 // latin-1 (direct)
d834f22c 898#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
f1339c56 899 size_t len=std::wcslen(psz);
d834f22c 900#else
f1339c56 901 size_t len=::wcslen(psz);
d834f22c 902#endif
f1339c56
RR
903 if (buf)
904 {
4def3b35
VS
905 for (size_t c = 0; c <= len; c++)
906 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
f1339c56 907 }
dccce9ea 908
f1339c56 909 return len;
6001e347
RR
910}
911
1cd52418 912#ifdef HAVE_ICONV_H
dccce9ea 913
1cd52418
OK
914class IC_CharSetConverter
915{
916public:
4def3b35
VS
917 IC_CharSetConverter(IC_CharSet *from, IC_CharSet *to)
918 {
dccce9ea
VZ
919 cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),
920 wxConvLibc.cWX2MB(from->cname));
4def3b35 921 }
dccce9ea 922
f1339c56 923 ~IC_CharSetConverter()
dccce9ea
VZ
924 {
925 if (cnv != (iconv_t)-1)
926 iconv_close(cnv);
4def3b35 927 }
dccce9ea 928
4def3b35 929 size_t Convert(char *buf, const char *psz, size_t n)
f1339c56
RR
930 {
931 size_t inbuf = strlen(psz);
932 size_t outbuf = n;
95c8801c 933#ifdef WX_ICONV_TAKES_CHAR
f1339c56 934 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
95c8801c
VS
935#else
936 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
937#endif
dccce9ea 938 if (res == (size_t)-1)
4def3b35
VS
939 return (size_t)-1;
940 return (n - outbuf);
f1339c56
RR
941 }
942
943public:
944 iconv_t cnv;
1cd52418 945};
dccce9ea
VZ
946
947#endif // HAVE_ICONV_H
1cd52418
OK
948
949class EC_CharSetConverter
950{
951public:
1e6feb95 952 EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to)
f1339c56 953 { cnv.Init(from->enc,to->enc); }
dccce9ea 954
1e6feb95 955 size_t Convert(char* buf, const char* psz, size_t n)
f1339c56
RR
956 {
957 size_t inbuf = strlen(psz);
958 if (buf) cnv.Convert(psz,buf);
959 return inbuf;
960 }
dccce9ea 961
f1339c56
RR
962public:
963 wxEncodingConverter cnv;
1cd52418
OK
964};
965
f6bcfd97
BP
966#else // !wxUSE_WCHAR_T
967
968// ----------------------------------------------------------------------------
969// stand-ins in absence of wchar_t
970// ----------------------------------------------------------------------------
971
972WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
973
974#endif // wxUSE_WCHAR_T
6001e347
RR
975
976