]> git.saurik.com Git - wxWidgets.git/blame - src/common/strconv.cpp
hide tree root on wxMSW in wxHtmlHelpController
[wxWidgets.git] / src / common / strconv.cpp
CommitLineData
6001e347
RR
1/////////////////////////////////////////////////////////////////////////////
2// Name: strconv.cpp
3// Purpose: Unicode conversion classes
3a0d76bc 4// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
3a0d76bc 8// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
6001e347
RR
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
f6bcfd97
BP
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
6001e347
RR
20#ifdef __GNUG__
21 #pragma implementation "strconv.h"
22#endif
23
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
28 #pragma hdrstop
29#endif
30
373658eb
VZ
31#ifndef WX_PRECOMP
32 #include "wx/intl.h"
33 #include "wx/log.h"
34#endif // WX_PRECOMP
35
0a1c1e62 36#ifdef __WXMSW__
373658eb 37 #include "wx/msw/private.h"
0a1c1e62
GRG
38#endif
39
1cd52418 40#include <errno.h>
6001e347
RR
41#include <ctype.h>
42#include <string.h>
43#include <stdlib.h>
44
7af284fd 45#include "wx/strconv.h"
7af284fd
VS
46
47// ----------------------------------------------------------------------------
48// globals
49// ----------------------------------------------------------------------------
50
373658eb
VZ
51#if wxUSE_WCHAR_T
52 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
53 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
54#else
55 // stand-ins in absence of wchar_t
56 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
57#endif // wxUSE_WCHAR_T
7af284fd 58
373658eb 59WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
7af284fd 60
373658eb
VZ
61// ----------------------------------------------------------------------------
62// headers
63// ----------------------------------------------------------------------------
7af284fd
VS
64
65#if wxUSE_WCHAR_T
66
6001e347 67#ifdef __SALFORDC__
373658eb 68 #include <clib.h>
6001e347
RR
69#endif
70
b040e242 71#ifdef HAVE_ICONV
373658eb 72 #include <iconv.h>
1cd52418 73#endif
1cd52418 74
373658eb
VZ
75#include "wx/encconv.h"
76#include "wx/fontmap.h"
77
78// ----------------------------------------------------------------------------
79// macros
80// ----------------------------------------------------------------------------
3e61dfb0 81
1cd52418 82#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
3a0d76bc 83#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
1cd52418 84
a3f2769e
VZ
85// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
86// it might be not defined - assume the most common value
87#ifndef SIZEOF_WCHAR_T
88 #define SIZEOF_WCHAR_T 2
89#endif // !defined(SIZEOF_WCHAR_T)
90
1cd52418 91#if SIZEOF_WCHAR_T == 4
3a0d76bc
VS
92 #define WC_NAME "UCS4"
93 #define WC_BSWAP BSWAP_UCS4
94 #ifdef WORDS_BIGENDIAN
95 #define WC_NAME_BEST "UCS-4BE"
96 #else
97 #define WC_NAME_BEST "UCS-4LE"
98 #endif
1cd52418 99#elif SIZEOF_WCHAR_T == 2
3a0d76bc
VS
100 #define WC_NAME "UTF16"
101 #define WC_BSWAP BSWAP_UTF16
a3f2769e 102 #define WC_UTF16
3a0d76bc
VS
103 #ifdef WORDS_BIGENDIAN
104 #define WC_NAME_BEST "UTF-16BE"
105 #else
106 #define WC_NAME_BEST "UTF-16LE"
107 #endif
bab1e722 108#else // sizeof(wchar_t) != 2 nor 4
a3f2769e
VZ
109 // I don't know what to do about this
110 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
1cd52418
OK
111#endif
112
373658eb
VZ
113// ============================================================================
114// implementation
115// ============================================================================
116
117// ----------------------------------------------------------------------------
118// UTF-16 en/decoding
119// ----------------------------------------------------------------------------
6001e347 120
b0a6bb75
VZ
121#ifdef WC_UTF16
122
eccf1b2c 123static size_t encode_utf16(wxUint32 input, wchar_t *output)
1cd52418 124{
dccce9ea 125 if (input<=0xffff)
4def3b35 126 {
574c939e 127 if (output) *output++ = (wchar_t) input;
4def3b35 128 return 1;
dccce9ea
VZ
129 }
130 else if (input>=0x110000)
4def3b35
VS
131 {
132 return (size_t)-1;
dccce9ea
VZ
133 }
134 else
4def3b35 135 {
dccce9ea 136 if (output)
4def3b35 137 {
574c939e
KB
138 *output++ = (wchar_t) ((input >> 10)+0xd7c0);
139 *output++ = (wchar_t) ((input&0x3ff)+0xdc00);
4def3b35
VS
140 }
141 return 2;
1cd52418 142 }
1cd52418
OK
143}
144
eccf1b2c 145static size_t decode_utf16(const wchar_t* input, wxUint32& output)
1cd52418 146{
dccce9ea 147 if ((*input<0xd800) || (*input>0xdfff))
4def3b35
VS
148 {
149 output = *input;
150 return 1;
dccce9ea
VZ
151 }
152 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
4def3b35
VS
153 {
154 output = *input;
155 return (size_t)-1;
dccce9ea
VZ
156 }
157 else
4def3b35
VS
158 {
159 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
160 return 2;
161 }
1cd52418
OK
162}
163
b0a6bb75
VZ
164#endif // WC_UTF16
165
f6bcfd97 166// ----------------------------------------------------------------------------
6001e347 167// wxMBConv
f6bcfd97 168// ----------------------------------------------------------------------------
6001e347 169
6001e347
RR
170size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
171{
172 return wxMB2WC(buf, psz, n);
173}
174
175size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
176{
177 return wxWC2MB(buf, psz, n);
178}
179
180const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
181{
f6bcfd97 182 if (psz)
6001e347
RR
183 {
184 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
f6bcfd97
BP
185 if (nLen == (size_t)-1)
186 return wxWCharBuffer((wchar_t *) NULL);
6001e347
RR
187 wxWCharBuffer buf(nLen);
188 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
189 return buf;
f6bcfd97
BP
190 }
191 else
6001e347
RR
192 return wxWCharBuffer((wchar_t *) NULL);
193}
194
195const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
196{
f6bcfd97 197 if (psz)
6001e347
RR
198 {
199 size_t nLen = WC2MB((char *) NULL, psz, 0);
f6bcfd97
BP
200 if (nLen == (size_t)-1)
201 return wxCharBuffer((char *) NULL);
6001e347
RR
202 wxCharBuffer buf(nLen);
203 WC2MB((char *)(const char *) buf, psz, nLen);
204 return buf;
f6bcfd97
BP
205 }
206 else
6001e347
RR
207 return wxCharBuffer((char *) NULL);
208}
209
f6bcfd97 210// ----------------------------------------------------------------------------
6001e347 211// standard file conversion
f6bcfd97 212// ----------------------------------------------------------------------------
6001e347
RR
213
214WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
215
216// just use the libc conversion for now
217size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
218{
219 return wxMB2WC(buf, psz, n);
220}
221
222size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
223{
224 return wxWC2MB(buf, psz, n);
225}
226
f6bcfd97 227// ----------------------------------------------------------------------------
6001e347 228// standard gdk conversion
f6bcfd97
BP
229// ----------------------------------------------------------------------------
230
231#ifdef __WXGTK12__
6001e347
RR
232
233WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
234
235#include <gdk/gdk.h>
236
237size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
238{
dccce9ea 239 if (buf)
4def3b35
VS
240 {
241 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
dccce9ea
VZ
242 }
243 else
4def3b35
VS
244 {
245 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
246 size_t len = gdk_mbstowcs(nbuf, psz, n);
247 delete[] nbuf;
248 return len;
249 }
6001e347
RR
250}
251
252size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
253{
4def3b35
VS
254 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
255 size_t len = mbstr ? strlen(mbstr) : 0;
dccce9ea 256 if (buf)
4def3b35 257 {
dccce9ea 258 if (len > n)
4def3b35
VS
259 len = n;
260 memcpy(buf, psz, len);
dccce9ea 261 if (len < n)
4def3b35
VS
262 buf[len] = 0;
263 }
264 return len;
6001e347 265}
f6bcfd97 266
6001e347
RR
267#endif // GTK > 1.0
268
269// ----------------------------------------------------------------------------
270// UTF-7
271// ----------------------------------------------------------------------------
272
273WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
274
275#if 0
276static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
277 "abcdefghijklmnopqrstuvwxyz"
278 "0123456789'(),-./:?";
279static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
280static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
281 "abcdefghijklmnopqrstuvwxyz"
282 "0123456789+/";
283#endif
284
285// TODO: write actual implementations of UTF-7 here
286size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
287 const char * WXUNUSED(psz),
288 size_t WXUNUSED(n)) const
289{
290 return 0;
291}
292
293size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
294 const wchar_t * WXUNUSED(psz),
295 size_t WXUNUSED(n)) const
296{
297 return 0;
298}
299
f6bcfd97 300// ----------------------------------------------------------------------------
6001e347 301// UTF-8
f6bcfd97 302// ----------------------------------------------------------------------------
6001e347
RR
303
304WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
305
dccce9ea 306static wxUint32 utf8_max[]=
4def3b35 307 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347
RR
308
309size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
310{
4def3b35
VS
311 size_t len = 0;
312
dccce9ea 313 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
314 {
315 unsigned char cc = *psz++, fc = cc;
316 unsigned cnt;
dccce9ea 317 for (cnt = 0; fc & 0x80; cnt++)
4def3b35 318 fc <<= 1;
dccce9ea 319 if (!cnt)
4def3b35
VS
320 {
321 // plain ASCII char
dccce9ea 322 if (buf)
4def3b35
VS
323 *buf++ = cc;
324 len++;
dccce9ea
VZ
325 }
326 else
4def3b35
VS
327 {
328 cnt--;
dccce9ea 329 if (!cnt)
4def3b35
VS
330 {
331 // invalid UTF-8 sequence
332 return (size_t)-1;
dccce9ea
VZ
333 }
334 else
4def3b35
VS
335 {
336 unsigned ocnt = cnt - 1;
337 wxUint32 res = cc & (0x3f >> cnt);
dccce9ea 338 while (cnt--)
4def3b35
VS
339 {
340 cc = *psz++;
dccce9ea 341 if ((cc & 0xC0) != 0x80)
4def3b35
VS
342 {
343 // invalid UTF-8 sequence
344 return (size_t)-1;
345 }
346 res = (res << 6) | (cc & 0x3f);
347 }
dccce9ea 348 if (res <= utf8_max[ocnt])
4def3b35
VS
349 {
350 // illegal UTF-8 encoding
351 return (size_t)-1;
352 }
1cd52418 353#ifdef WC_UTF16
4def3b35
VS
354 size_t pa = encode_utf16(res, buf);
355 if (pa == (size_t)-1)
356 return (size_t)-1;
dccce9ea 357 if (buf)
4def3b35
VS
358 buf += pa;
359 len += pa;
373658eb 360#else // !WC_UTF16
dccce9ea 361 if (buf)
4def3b35
VS
362 *buf++ = res;
363 len++;
373658eb 364#endif // WC_UTF16/!WC_UTF16
4def3b35
VS
365 }
366 }
6001e347 367 }
dccce9ea 368 if (buf && (len < n))
4def3b35
VS
369 *buf = 0;
370 return len;
6001e347
RR
371}
372
373size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
374{
4def3b35 375 size_t len = 0;
6001e347 376
dccce9ea 377 while (*psz && ((!buf) || (len < n)))
4def3b35
VS
378 {
379 wxUint32 cc;
1cd52418 380#ifdef WC_UTF16
eccf1b2c 381 size_t pa = decode_utf16(psz, cc);
4def3b35 382 psz += (pa == (size_t)-1) ? 1 : pa;
1cd52418 383#else
4def3b35
VS
384 cc=(*psz++) & 0x7fffffff;
385#endif
386 unsigned cnt;
387 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
dccce9ea 388 if (!cnt)
4def3b35
VS
389 {
390 // plain ASCII char
dccce9ea 391 if (buf)
574c939e 392 *buf++ = (char) cc;
4def3b35 393 len++;
dccce9ea
VZ
394 }
395
396 else
4def3b35
VS
397 {
398 len += cnt + 1;
dccce9ea 399 if (buf)
4def3b35 400 {
574c939e 401 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
4def3b35 402 while (cnt--)
574c939e 403 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
4def3b35
VS
404 }
405 }
6001e347 406 }
4def3b35
VS
407
408 if (buf && (len<n)) *buf = 0;
409 return len;
6001e347
RR
410}
411
36acb880
VZ
412// ============================================================================
413// wxCharacterSet and derived classes
414// ============================================================================
415
416// ----------------------------------------------------------------------------
417// wxCharacterSet is the ABC for the classes below
418// ----------------------------------------------------------------------------
419
6001e347
RR
420class wxCharacterSet
421{
1cd52418 422public:
4f61e22c
VS
423 wxCharacterSet(const wxChar*name) : cname(name) {}
424 virtual ~wxCharacterSet() {}
425 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) = 0;
426 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) = 0;
427 virtual bool usable() const = 0;
f1339c56
RR
428public:
429 const wxChar*cname;
1cd52418
OK
430};
431
36acb880
VZ
432// ----------------------------------------------------------------------------
433// ID_CharSet: implementation of wxCharacterSet using an existing wxMBConv
434// ----------------------------------------------------------------------------
435
1cd52418
OK
436class ID_CharSet : public wxCharacterSet
437{
438public:
36acb880 439 ID_CharSet(const wxChar *name, wxMBConv *cnv)
f1339c56 440 : wxCharacterSet(name), work(cnv) {}
dccce9ea 441
4def3b35 442 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 443 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
dccce9ea 444
4def3b35 445 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 446 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
dccce9ea 447
4f61e22c 448 bool usable() const
f1339c56
RR
449 { return work!=NULL; }
450public:
451 wxMBConv*work;
1cd52418
OK
452};
453
3caec1bb 454
36acb880
VZ
455// ============================================================================
456// The classes doing conversion using the iconv_xxx() functions
457// ============================================================================
3caec1bb 458
b040e242 459#ifdef HAVE_ICONV
3a0d76bc 460
3caec1bb
VS
461// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
462// if output buffer is _exactly_ as big as needed. Such case is (unless there's
463// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
464// (which means error) and says there are 0 bytes left in the input buffer --
465// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
466// this alternative test for iconv() failure.
467// [This bug does not appear in glibc 2.2.]
468#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
469#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
470 (errno != E2BIG || bufLeft != 0))
471#else
472#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
473#endif
474
b040e242 475#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880
VZ
476
477// ----------------------------------------------------------------------------
478// IC_CharSet: encapsulates an iconv character set
479// ----------------------------------------------------------------------------
480
1cd52418
OK
481class IC_CharSet : public wxCharacterSet
482{
483public:
36acb880
VZ
484 IC_CharSet(const wxChar *name);
485 virtual ~IC_CharSet();
486
487 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n);
488 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n);
489
490 bool usable() const
491 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
492
493protected:
494 // the iconv handlers used to translate from multibyte to wide char and in
495 // the other direction
496 iconv_t m2w,
497 w2m;
498
499private:
500 // the name (for iconv_open()) of a wide char charset - if none is
501 // available on this machine, it will remain NULL
502 static const char *ms_wcCharsetName;
503
504 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
505 // different endian-ness than the native one
405d8f46 506 static bool ms_wcNeedsSwap;
36acb880
VZ
507};
508
509const char *IC_CharSet::ms_wcCharsetName = NULL;
405d8f46 510bool IC_CharSet::ms_wcNeedsSwap = FALSE;
36acb880
VZ
511
512IC_CharSet::IC_CharSet(const wxChar *name)
513 : wxCharacterSet(name)
514{
515 // check for charset that represents wchar_t:
516 if (ms_wcCharsetName == NULL)
f1339c56 517 {
36acb880 518 ms_wcNeedsSwap = FALSE;
dccce9ea 519
36acb880
VZ
520 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
521 ms_wcCharsetName = WC_NAME_BEST;
522 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
3a0d76bc 523
36acb880
VZ
524 if (m2w == (iconv_t)-1)
525 {
526 // try charset w/o bytesex info (e.g. "UCS4")
527 // and check for bytesex ourselves:
528 ms_wcCharsetName = WC_NAME;
529 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
530
531 // last bet, try if it knows WCHAR_T pseudo-charset
3a0d76bc
VS
532 if (m2w == (iconv_t)-1)
533 {
36acb880
VZ
534 ms_wcCharsetName = "WCHAR_T";
535 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
536 }
3a0d76bc 537
36acb880
VZ
538 if (m2w != (iconv_t)-1)
539 {
540 char buf[2], *bufPtr;
541 wchar_t wbuf[2], *wbufPtr;
542 size_t insz, outsz;
543 size_t res;
544
545 buf[0] = 'A';
546 buf[1] = 0;
547 wbuf[0] = 0;
548 insz = 2;
549 outsz = SIZEOF_WCHAR_T * 2;
550 wbufPtr = wbuf;
551 bufPtr = buf;
552
553 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
554 (char**)&wbufPtr, &outsz);
555
556 if (ICONV_FAILED(res, insz))
3a0d76bc 557 {
36acb880
VZ
558 ms_wcCharsetName = NULL;
559 wxLogLastError(wxT("iconv"));
560 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
3a0d76bc
VS
561 }
562 else
563 {
36acb880 564 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
3a0d76bc
VS
565 }
566 }
36acb880
VZ
567 else
568 {
569 ms_wcCharsetName = NULL;
373658eb 570
957686c8
VS
571 // VS: we must not output an error here, since wxWindows will safely
572 // fall back to using wxEncodingConverter.
573 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
574 //wxLogError(
36acb880 575 }
3a0d76bc 576 }
36acb880 577 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
3a0d76bc 578 }
36acb880 579 else // we already have ms_wcCharsetName
3caec1bb 580 {
36acb880 581 m2w = iconv_open(ms_wcCharsetName, wxConvLibc.cWX2MB(name));
f1339c56 582 }
dccce9ea 583
36acb880
VZ
584 // NB: don't ever pass NULL to iconv_open(), it may crash!
585 if ( ms_wcCharsetName )
f1339c56 586 {
36acb880
VZ
587 w2m = iconv_open(wxConvLibc.cWX2MB(name), ms_wcCharsetName);
588 }
405d8f46
VZ
589 else
590 {
591 w2m = (iconv_t)-1;
592 }
36acb880 593}
3caec1bb 594
36acb880
VZ
595IC_CharSet::~IC_CharSet()
596{
597 if ( m2w != (iconv_t)-1 )
598 iconv_close(m2w);
599 if ( w2m != (iconv_t)-1 )
600 iconv_close(w2m);
601}
3a0d76bc 602
36acb880
VZ
603size_t IC_CharSet::MB2WC(wchar_t *buf, const char *psz, size_t n)
604{
605 size_t inbuf = strlen(psz);
606 size_t outbuf = n * SIZEOF_WCHAR_T;
607 size_t res, cres;
608 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
609 wchar_t *bufPtr = buf;
610 const char *pszPtr = psz;
611
612 if (buf)
613 {
614 // have destination buffer, convert there
615 cres = iconv(m2w,
616 ICONV_CHAR_CAST(&pszPtr), &inbuf,
617 (char**)&bufPtr, &outbuf);
618 res = n - (outbuf / SIZEOF_WCHAR_T);
dccce9ea 619
36acb880 620 if (ms_wcNeedsSwap)
3a0d76bc 621 {
36acb880
VZ
622 // convert to native endianness
623 WC_BSWAP(buf /* _not_ bufPtr */, res)
3a0d76bc 624 }
36acb880
VZ
625 }
626 else
627 {
628 // no destination buffer... convert using temp buffer
629 // to calculate destination buffer requirement
630 wchar_t tbuf[8];
631 res = 0;
632 do {
633 bufPtr = tbuf;
634 outbuf = 8*SIZEOF_WCHAR_T;
635
636 cres = iconv(m2w,
637 ICONV_CHAR_CAST(&pszPtr), &inbuf,
638 (char**)&bufPtr, &outbuf );
639
640 res += 8-(outbuf/SIZEOF_WCHAR_T);
641 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 642 }
dccce9ea 643
36acb880 644 if (ICONV_FAILED(cres, inbuf))
f1339c56 645 {
36acb880
VZ
646 //VS: it is ok if iconv fails, hence trace only
647 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
648 return (size_t)-1;
649 }
650
651 return res;
652}
653
654size_t IC_CharSet::WC2MB(char *buf, const wchar_t *psz, size_t n)
655{
1cd52418 656#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
36acb880 657 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 658#else
36acb880 659 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
1cd52418 660#endif
36acb880
VZ
661 size_t outbuf = n;
662 size_t res, cres;
3a0d76bc 663
36acb880 664 wchar_t *tmpbuf = 0;
3caec1bb 665
36acb880
VZ
666 if (ms_wcNeedsSwap)
667 {
668 // need to copy to temp buffer to switch endianness
669 // this absolutely doesn't rock!
670 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
671 // could be in read-only memory, or be accessed in some other thread)
672 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
673 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
674 WC_BSWAP(tmpbuf, inbuf)
675 psz=tmpbuf;
676 }
3a0d76bc 677
36acb880
VZ
678 if (buf)
679 {
680 // have destination buffer, convert there
681 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
3a0d76bc 682
36acb880
VZ
683 res = n-outbuf;
684 }
685 else
686 {
687 // no destination buffer... convert using temp buffer
688 // to calculate destination buffer requirement
689 char tbuf[16];
690 res = 0;
691 do {
692 buf = tbuf; outbuf = 16;
693
694 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
dccce9ea 695
36acb880
VZ
696 res += 16 - outbuf;
697 } while ((cres==(size_t)-1) && (errno==E2BIG));
f1339c56 698 }
dccce9ea 699
36acb880
VZ
700 if (ms_wcNeedsSwap)
701 {
702 free(tmpbuf);
703 }
dccce9ea 704
36acb880
VZ
705 if (ICONV_FAILED(cres, inbuf))
706 {
707 //VS: it is ok if iconv fails, hence trace only
708 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
709 return (size_t)-1;
710 }
711
712 return res;
713}
714
b040e242 715#endif // HAVE_ICONV
36acb880
VZ
716
717// ============================================================================
718// Win32 conversion classes
719// ============================================================================
1cd52418 720
04ef50df 721#if defined(__WIN32__) && !defined(__WXMICROWIN__)
373658eb
VZ
722
723extern long wxCharsetToCodepage(const wxChar *charset); // from utils.cpp
724
1cd52418
OK
725class CP_CharSet : public wxCharacterSet
726{
727public:
b1d66b54
VZ
728 CP_CharSet(const wxChar* name)
729 : wxCharacterSet(name)
730 {
731 m_CodePage = wxCharsetToCodepage(name);
732 }
dccce9ea 733
4def3b35 734 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
f1339c56 735 {
dccce9ea 736 size_t len =
b1d66b54 737 MultiByteToWideChar(m_CodePage, 0, psz, -1, buf, buf ? n : 0);
1e6feb95 738 //VS: returns # of written chars for buf!=NULL and *size*
35d764b0
VS
739 // needed buffer for buf==NULL
740 return len ? (buf ? len : len-1) : (size_t)-1;
f1339c56 741 }
dccce9ea 742
4def3b35 743 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
f1339c56 744 {
b1d66b54 745 size_t len = WideCharToMultiByte(m_CodePage, 0, psz, -1, buf,
4def3b35 746 buf ? n : 0, NULL, NULL);
1e6feb95 747 //VS: returns # of written chars for buf!=NULL and *size*
35d764b0
VS
748 // needed buffer for buf==NULL
749 return len ? (buf ? len : len-1) : (size_t)-1;
f1339c56 750 }
dccce9ea 751
4f61e22c 752 bool usable() const
b1d66b54 753 { return m_CodePage != -1; }
f1339c56
RR
754
755public:
b1d66b54 756 long m_CodePage;
1cd52418 757};
1e6feb95
VZ
758#endif // __WIN32__
759
36acb880
VZ
760// ============================================================================
761// wxEncodingConverter based conversion classes
762// ============================================================================
763
1e6feb95 764#if wxUSE_FONTMAP
1cd52418
OK
765
766class EC_CharSet : public wxCharacterSet
767{
6001e347 768public:
f1339c56
RR
769 // temporarily just use wxEncodingConverter stuff,
770 // so that it works while a better implementation is built
b1d66b54
VZ
771 EC_CharSet(const wxChar* name) : wxCharacterSet(name),
772 enc(wxFONTENCODING_SYSTEM)
f1339c56
RR
773 {
774 if (name)
775 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
cafbf6fb
VZ
776
777 m_ok = m2w.Init(enc, wxFONTENCODING_UNICODE) &&
778 w2m.Init(wxFONTENCODING_UNICODE, enc);
f1339c56 779 }
dccce9ea 780
574c939e 781 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n))
f1339c56
RR
782 {
783 size_t inbuf = strlen(psz);
dccce9ea 784 if (buf)
4def3b35 785 m2w.Convert(psz,buf);
f1339c56
RR
786 return inbuf;
787 }
dccce9ea 788
574c939e 789 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n))
f1339c56 790 {
de85a884
VZ
791#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
792 || ( defined(__MWERKS__) && defined(__WXMSW__) )
f1339c56 793 size_t inbuf = std::wcslen(psz);
1cd52418 794#else
f1339c56 795 size_t inbuf = ::wcslen(psz);
1cd52418 796#endif
f1339c56
RR
797 if (buf)
798 w2m.Convert(psz,buf);
dccce9ea 799
f1339c56
RR
800 return inbuf;
801 }
dccce9ea 802
cafbf6fb 803 bool usable() const { return m_ok; }
f1339c56
RR
804
805public:
806 wxFontEncoding enc;
807 wxEncodingConverter m2w, w2m;
cafbf6fb
VZ
808
809 // were we initialized successfully?
810 bool m_ok;
f6bcfd97 811};
6001e347 812
1e6feb95
VZ
813#endif // wxUSE_FONTMAP
814
36acb880
VZ
815// ----------------------------------------------------------------------------
816// the function creating the wxCharacterSet for the specified charset on the
817// current system, trying all possibilities
818// ----------------------------------------------------------------------------
819
f6bcfd97 820static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
6001e347 821{
cafbf6fb
VZ
822 // check for the special case of ASCII charset
823#if wxUSE_FONTMAP
824 if ( wxTheFontMapper->CharsetToEncoding(name) == wxFONTENCODING_DEFAULT )
825#else // wxUSE_FONTMAP
826 if ( !name )
827#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
f1339c56 828 {
cafbf6fb
VZ
829 // don't convert at all
830 return NULL;
1cd52418 831 }
dccce9ea 832
cafbf6fb
VZ
833 // the test above must have taken care of this case
834 wxCHECK_MSG( name, NULL, _T("NULL name must be wxFONTENCODING_DEFAULT") );
1e6feb95 835
cafbf6fb
VZ
836 wxCharacterSet *cset;
837
838 if ( wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
839 {
840 cset = new ID_CharSet(name, &wxConvUTF8);
841 }
842 else
dccce9ea 843 {
cafbf6fb
VZ
844#ifdef HAVE_ICONV
845 cset = new IC_CharSet(name);
846#else // !HAVE_ICONV
dccce9ea 847 cset = NULL;
cafbf6fb 848#endif // HAVE_ICONV/!HAVE_ICONV
dccce9ea
VZ
849 }
850
734eda8a
VZ
851 // it can only be NULL in this case
852#ifndef HAVE_ICONV
853 if ( cset )
854#endif // !HAVE_ICONV
855 {
856 if ( cset->usable() )
857 return cset;
cafbf6fb 858
734eda8a
VZ
859 delete cset;
860 cset = NULL;
861 }
cafbf6fb 862
04ef50df 863#if defined(__WIN32__) && !defined(__WXMICROWIN__)
cafbf6fb
VZ
864 cset = new CP_CharSet(name);
865 if ( cset->usable() )
dccce9ea
VZ
866 return cset;
867
868 delete cset;
cafbf6fb 869 cset = NULL;
dccce9ea
VZ
870#endif // __WIN32__
871
1e6feb95 872#if wxUSE_FONTMAP
f1339c56 873 cset = new EC_CharSet(name);
cafbf6fb 874 if ( cset->usable() )
dccce9ea
VZ
875 return cset;
876
f1339c56 877 delete cset;
cafbf6fb
VZ
878 cset = NULL;
879#endif // wxUSE_FONTMAP
880
881 wxLogError(_("Cannot convert from encoding '%s'!"), name);
882
f1339c56 883 return NULL;
6001e347
RR
884}
885
36acb880
VZ
886// ============================================================================
887// wxCSConv implementation
888// ============================================================================
889
6001e347
RR
890wxCSConv::wxCSConv(const wxChar *charset)
891{
dccce9ea 892 m_name = (wxChar *)NULL;
f1339c56 893 m_cset = (wxCharacterSet *) NULL;
82713003
VZ
894 m_deferred = TRUE;
895
f1339c56 896 SetName(charset);
6001e347
RR
897}
898
899wxCSConv::~wxCSConv()
900{
dccce9ea
VZ
901 free(m_name);
902 delete m_cset;
6001e347
RR
903}
904
905void wxCSConv::SetName(const wxChar *charset)
906{
f1339c56
RR
907 if (charset)
908 {
909 m_name = wxStrdup(charset);
910 m_deferred = TRUE;
911 }
6001e347
RR
912}
913
914void wxCSConv::LoadNow()
915{
f1339c56
RR
916 if (m_deferred)
917 {
dccce9ea 918 if ( !m_name )
f1339c56 919 {
dccce9ea
VZ
920 wxString name = wxLocale::GetSystemEncodingName();
921 if ( !name.empty() )
922 SetName(name);
f1339c56 923 }
dccce9ea 924
a45a98fb
VZ
925 // wxGetCharacterSet() complains about NULL name
926 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
f1339c56 927 m_deferred = FALSE;
6001e347 928 }
6001e347
RR
929}
930
931size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
932{
f1339c56 933 ((wxCSConv *)this)->LoadNow(); // discard constness
dccce9ea 934
f1339c56
RR
935 if (m_cset)
936 return m_cset->MB2WC(buf, psz, n);
937
938 // latin-1 (direct)
4def3b35 939 size_t len = strlen(psz);
dccce9ea 940
f1339c56
RR
941 if (buf)
942 {
4def3b35 943 for (size_t c = 0; c <= len; c++)
f1339c56
RR
944 buf[c] = (unsigned char)(psz[c]);
945 }
dccce9ea 946
f1339c56 947 return len;
6001e347
RR
948}
949
950size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
951{
f1339c56 952 ((wxCSConv *)this)->LoadNow(); // discard constness
dccce9ea 953
f1339c56
RR
954 if (m_cset)
955 return m_cset->WC2MB(buf, psz, n);
1cd52418 956
f1339c56 957 // latin-1 (direct)
de85a884
VZ
958#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \
959 || ( defined(__MWERKS__) && defined(__WXMSW__) )
f1339c56 960 size_t len=std::wcslen(psz);
d834f22c 961#else
f1339c56 962 size_t len=::wcslen(psz);
d834f22c 963#endif
f1339c56
RR
964 if (buf)
965 {
4def3b35
VS
966 for (size_t c = 0; c <= len; c++)
967 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
f1339c56 968 }
dccce9ea 969
f1339c56 970 return len;
6001e347
RR
971}
972
f6bcfd97 973#endif // wxUSE_WCHAR_T
6001e347
RR
974
975