]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
added wxURLDataObject which unfortunately doesn't seem to work
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 #ifdef __GNUG__
21 #pragma implementation "strconv.h"
22 #endif
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifdef __WXMSW__
32 #include "wx/msw/private.h"
33 #endif
34
35 #include <errno.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include <stdlib.h>
39
40 #ifdef __SALFORDC__
41 #include <clib.h>
42 #endif
43
44 #ifdef HAVE_ICONV_H
45 #include <iconv.h>
46 #endif
47
48 #ifdef __WXMSW__
49 #include <windows.h>
50 #endif
51
52 #include "wx/debug.h"
53 #include "wx/strconv.h"
54 #include "wx/intl.h"
55 #include "wx/log.h"
56
57 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
58 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
59
60 // under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms
61 // it might be not defined - assume the most common value
62 #ifndef SIZEOF_WCHAR_T
63 #define SIZEOF_WCHAR_T 2
64 #endif // !defined(SIZEOF_WCHAR_T)
65
66 #if SIZEOF_WCHAR_T == 4
67 #define WC_NAME "UCS4"
68 #define WC_BSWAP BSWAP_UCS4
69 #ifdef WORDS_BIGENDIAN
70 #define WC_NAME_BEST "UCS-4BE"
71 #else
72 #define WC_NAME_BEST "UCS-4LE"
73 #endif
74 #elif SIZEOF_WCHAR_T == 2
75 #define WC_NAME "UTF16"
76 #define WC_BSWAP BSWAP_UTF16
77 #define WC_UTF16
78 #ifdef WORDS_BIGENDIAN
79 #define WC_NAME_BEST "UTF-16BE"
80 #else
81 #define WC_NAME_BEST "UTF-16LE"
82 #endif
83 #else // sizeof(wchar_t) != 2 nor 4
84 // I don't know what to do about this
85 #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list"
86 #endif
87
88 // ----------------------------------------------------------------------------
89 // globals
90 // ----------------------------------------------------------------------------
91
92 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
93
94 // ============================================================================
95 // implementation
96 // ============================================================================
97
98 #if wxUSE_WCHAR_T
99
100 #ifdef WC_UTF16
101
102 static size_t encode_utf16(wxUint32 input, wchar_t *output)
103 {
104 if (input<=0xffff)
105 {
106 if (output) *output++ = input;
107 return 1;
108 }
109 else if (input>=0x110000)
110 {
111 return (size_t)-1;
112 }
113 else
114 {
115 if (output)
116 {
117 *output++ = (input >> 10)+0xd7c0;
118 *output++ = (input&0x3ff)+0xdc00;
119 }
120 return 2;
121 }
122 }
123
124 static size_t decode_utf16(const wchar_t* input, wxUint32& output)
125 {
126 if ((*input<0xd800) || (*input>0xdfff))
127 {
128 output = *input;
129 return 1;
130 }
131 else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
132 {
133 output = *input;
134 return (size_t)-1;
135 }
136 else
137 {
138 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
139 return 2;
140 }
141 }
142
143 #endif // WC_UTF16
144
145 // ----------------------------------------------------------------------------
146 // wxMBConv
147 // ----------------------------------------------------------------------------
148
149 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
150
151 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
152 {
153 return wxMB2WC(buf, psz, n);
154 }
155
156 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
157 {
158 return wxWC2MB(buf, psz, n);
159 }
160
161 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
162 {
163 if (psz)
164 {
165 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
166 if (nLen == (size_t)-1)
167 return wxWCharBuffer((wchar_t *) NULL);
168 wxWCharBuffer buf(nLen);
169 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
170 return buf;
171 }
172 else
173 return wxWCharBuffer((wchar_t *) NULL);
174 }
175
176 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
177 {
178 if (psz)
179 {
180 size_t nLen = WC2MB((char *) NULL, psz, 0);
181 if (nLen == (size_t)-1)
182 return wxCharBuffer((char *) NULL);
183 wxCharBuffer buf(nLen);
184 WC2MB((char *)(const char *) buf, psz, nLen);
185 return buf;
186 }
187 else
188 return wxCharBuffer((char *) NULL);
189 }
190
191 // ----------------------------------------------------------------------------
192 // standard file conversion
193 // ----------------------------------------------------------------------------
194
195 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
196
197 // just use the libc conversion for now
198 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
199 {
200 return wxMB2WC(buf, psz, n);
201 }
202
203 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
204 {
205 return wxWC2MB(buf, psz, n);
206 }
207
208 // ----------------------------------------------------------------------------
209 // standard gdk conversion
210 // ----------------------------------------------------------------------------
211
212 #ifdef __WXGTK12__
213
214 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
215
216 #include <gdk/gdk.h>
217
218 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
219 {
220 if (buf)
221 {
222 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
223 }
224 else
225 {
226 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
227 size_t len = gdk_mbstowcs(nbuf, psz, n);
228 delete[] nbuf;
229 return len;
230 }
231 }
232
233 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
234 {
235 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
236 size_t len = mbstr ? strlen(mbstr) : 0;
237 if (buf)
238 {
239 if (len > n)
240 len = n;
241 memcpy(buf, psz, len);
242 if (len < n)
243 buf[len] = 0;
244 }
245 return len;
246 }
247
248 #endif // GTK > 1.0
249
250 // ----------------------------------------------------------------------------
251 // UTF-7
252 // ----------------------------------------------------------------------------
253
254 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
255
256 #if 0
257 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
258 "abcdefghijklmnopqrstuvwxyz"
259 "0123456789'(),-./:?";
260 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
261 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
262 "abcdefghijklmnopqrstuvwxyz"
263 "0123456789+/";
264 #endif
265
266 // TODO: write actual implementations of UTF-7 here
267 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
268 const char * WXUNUSED(psz),
269 size_t WXUNUSED(n)) const
270 {
271 return 0;
272 }
273
274 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
275 const wchar_t * WXUNUSED(psz),
276 size_t WXUNUSED(n)) const
277 {
278 return 0;
279 }
280
281 // ----------------------------------------------------------------------------
282 // UTF-8
283 // ----------------------------------------------------------------------------
284
285 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
286
287 static wxUint32 utf8_max[]=
288 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
289
290 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
291 {
292 size_t len = 0;
293
294 while (*psz && ((!buf) || (len < n)))
295 {
296 unsigned char cc = *psz++, fc = cc;
297 unsigned cnt;
298 for (cnt = 0; fc & 0x80; cnt++)
299 fc <<= 1;
300 if (!cnt)
301 {
302 // plain ASCII char
303 if (buf)
304 *buf++ = cc;
305 len++;
306 }
307 else
308 {
309 cnt--;
310 if (!cnt)
311 {
312 // invalid UTF-8 sequence
313 return (size_t)-1;
314 }
315 else
316 {
317 unsigned ocnt = cnt - 1;
318 wxUint32 res = cc & (0x3f >> cnt);
319 while (cnt--)
320 {
321 cc = *psz++;
322 if ((cc & 0xC0) != 0x80)
323 {
324 // invalid UTF-8 sequence
325 return (size_t)-1;
326 }
327 res = (res << 6) | (cc & 0x3f);
328 }
329 if (res <= utf8_max[ocnt])
330 {
331 // illegal UTF-8 encoding
332 return (size_t)-1;
333 }
334 #ifdef WC_UTF16
335 size_t pa = encode_utf16(res, buf);
336 if (pa == (size_t)-1)
337 return (size_t)-1;
338 if (buf)
339 buf += pa;
340 len += pa;
341 #else
342 if (buf)
343 *buf++ = res;
344 len++;
345 #endif
346 }
347 }
348 }
349 if (buf && (len < n))
350 *buf = 0;
351 return len;
352 }
353
354 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
355 {
356 size_t len = 0;
357
358 while (*psz && ((!buf) || (len < n)))
359 {
360 wxUint32 cc;
361 #ifdef WC_UTF16
362 size_t pa = decode_utf16(psz, cc);
363 psz += (pa == (size_t)-1) ? 1 : pa;
364 #else
365 cc=(*psz++) & 0x7fffffff;
366 #endif
367 unsigned cnt;
368 for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
369 if (!cnt)
370 {
371 // plain ASCII char
372 if (buf)
373 *buf++ = cc;
374 len++;
375 }
376
377 else
378 {
379 len += cnt + 1;
380 if (buf)
381 {
382 *buf++ = (-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt));
383 while (cnt--)
384 *buf++ = 0x80 | ((cc >> (cnt * 6)) & 0x3f);
385 }
386 }
387 }
388
389 if (buf && (len<n)) *buf = 0;
390 return len;
391 }
392
393 // ----------------------------------------------------------------------------
394 // specified character set
395 // ----------------------------------------------------------------------------
396
397 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
398
399 #include "wx/encconv.h"
400 #include "wx/fontmap.h"
401
402 // TODO: add some tables here
403 // - perhaps common encodings to common codepages (for Win32)
404 // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
405 // - move wxEncodingConverter meat in here
406
407 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
408 #include "wx/msw/registry.h"
409 // this should work if M$ Internet Exploiter is installed
410 static long CharsetToCodepage(const wxChar *name)
411 {
412 if (!name)
413 return GetACP();
414
415 long CP=-1;
416
417 wxString cn(name);
418 do {
419 wxString path(wxT("MIME\\Database\\Charset\\"));
420 path += cn;
421 wxRegKey key(wxRegKey::HKCR, path);
422
423 if (!key.Exists()) break;
424
425 // two cases: either there's an AliasForCharset string,
426 // or there are Codepage and InternetEncoding dwords.
427 // The InternetEncoding gives us the actual encoding,
428 // the Codepage just says which Windows character set to
429 // use when displaying the data.
430 if (key.HasValue(wxT("InternetEncoding")) &&
431 key.QueryValue(wxT("InternetEncoding"), &CP)) break;
432
433 // no encoding, see if it's an alias
434 if (!key.HasValue(wxT("AliasForCharset")) ||
435 !key.QueryValue(wxT("AliasForCharset"), cn)) break;
436 } while (1);
437
438 return CP;
439 }
440 #endif
441
442 class wxCharacterSet
443 {
444 public:
445 wxCharacterSet(const wxChar*name)
446 : cname(name) {}
447 virtual ~wxCharacterSet()
448 {}
449 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
450 { return (size_t)-1; }
451 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
452 { return (size_t)-1; }
453 virtual bool usable()
454 { return FALSE; }
455 public:
456 const wxChar*cname;
457 };
458
459 class ID_CharSet : public wxCharacterSet
460 {
461 public:
462 ID_CharSet(const wxChar *name,wxMBConv *cnv)
463 : wxCharacterSet(name), work(cnv) {}
464
465 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
466 { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
467
468 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
469 { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
470
471 bool usable()
472 { return work!=NULL; }
473 public:
474 wxMBConv*work;
475 };
476
477
478 #ifdef HAVE_ICONV_H
479
480 bool g_wcNeedsSwap = FALSE;
481 static const char *g_wcCharset = NULL;
482
483 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
484 // if output buffer is _exactly_ as big as needed. Such case is (unless there's
485 // yet another bug in glibc) the only case when iconv() returns with (size_t)-1
486 // (which means error) and says there are 0 bytes left in the input buffer --
487 // when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
488 // this alternative test for iconv() failure.
489 // [This bug does not appear in glibc 2.2.]
490 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
491 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
492 (errno != E2BIG || bufLeft != 0))
493 #else
494 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
495 #endif
496
497 class IC_CharSet : public wxCharacterSet
498 {
499 public:
500 IC_CharSet(const wxChar *name)
501 : wxCharacterSet(name)
502 {
503 // check for charset that represents wchar_t:
504 if (g_wcCharset == NULL)
505 {
506 g_wcNeedsSwap = FALSE;
507
508 // try charset with explicit bytesex info (e.g. "UCS-4LE"):
509 g_wcCharset = WC_NAME_BEST;
510 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
511
512 if (m2w == (iconv_t)-1)
513 {
514 // try charset w/o bytesex info (e.g. "UCS4")
515 // and check for bytesex ourselves:
516 g_wcCharset = WC_NAME;
517 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
518
519 // last bet, try if it knows WCHAR_T pseudo-charset
520 if (m2w == (iconv_t)-1)
521 {
522 g_wcCharset = "WCHAR_T";
523 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
524 }
525
526 if (m2w != (iconv_t)-1)
527 {
528 char buf[2], *bufPtr;
529 wchar_t wbuf[2], *wbufPtr;
530 size_t insz, outsz;
531 size_t res;
532
533 buf[0] = 'A';
534 buf[1] = 0;
535 wbuf[0] = 0;
536 insz = 2;
537 outsz = SIZEOF_WCHAR_T * 2;
538 wbufPtr = wbuf;
539 bufPtr = buf;
540
541 #ifdef WX_ICONV_TAKES_CHAR
542 res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
543 #else
544 res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz);
545 #endif
546 if (ICONV_FAILED(res, insz))
547 {
548 g_wcCharset = NULL;
549 wxLogLastError(wxT("iconv"));
550 wxLogError(_("Convertion to charset '%s' doesn't work."), name);
551 }
552 else
553 {
554 g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]);
555 }
556 }
557 else
558 {
559 g_wcCharset = NULL;
560 wxLogError(_("Don't know how to convert to/from charset '%s'."), name);
561 }
562 }
563 wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap);
564 }
565 else
566 m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name));
567
568 w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset);
569 }
570
571 ~IC_CharSet()
572 {
573 if ( m2w != (iconv_t)-1 )
574 iconv_close(m2w);
575 if ( w2m != (iconv_t)-1 )
576 iconv_close(w2m);
577 }
578
579 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
580 {
581 size_t inbuf = strlen(psz);
582 size_t outbuf = n * SIZEOF_WCHAR_T;
583 size_t res, cres;
584 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
585 wchar_t *bufPtr = buf;
586 const char *pszPtr = psz;
587
588 if (buf)
589 {
590 // have destination buffer, convert there
591 #ifdef WX_ICONV_TAKES_CHAR
592 cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
593 #else
594 cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
595 #endif
596 res = n - (outbuf / SIZEOF_WCHAR_T);
597
598 if (g_wcNeedsSwap)
599 {
600 // convert to native endianness
601 WC_BSWAP(buf /* _not_ bufPtr */, res)
602 }
603 }
604 else
605 {
606 // no destination buffer... convert using temp buffer
607 // to calculate destination buffer requirement
608 wchar_t tbuf[8];
609 res = 0;
610 do {
611 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
612 #ifdef WX_ICONV_TAKES_CHAR
613 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
614 #else
615 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
616 #endif
617 res += 8-(outbuf/SIZEOF_WCHAR_T);
618 } while ((cres==(size_t)-1) && (errno==E2BIG));
619 }
620
621 if (ICONV_FAILED(cres, inbuf))
622 {
623 //VS: it is ok if iconv fails, hence trace only
624 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
625 return (size_t)-1;
626 }
627
628 return res;
629 }
630
631 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
632 {
633 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
634 size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
635 #else
636 size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
637 #endif
638 size_t outbuf = n;
639 size_t res, cres;
640
641 wchar_t *tmpbuf;
642
643 if (g_wcNeedsSwap)
644 {
645 // need to copy to temp buffer to switch endianness
646 // this absolutely doesn't rock!
647 // (no, doing WC_BSWAP twice on the original buffer won't help, as it
648 // could be in read-only memory, or be accessed in some other thread)
649 tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
650 memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
651 WC_BSWAP(tmpbuf, inbuf)
652 psz=tmpbuf;
653 }
654
655 if (buf)
656 {
657 // have destination buffer, convert there
658 #ifdef WX_ICONV_TAKES_CHAR
659 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
660 #else
661 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
662 #endif
663 res = n-outbuf;
664 }
665 else
666 {
667 // no destination buffer... convert using temp buffer
668 // to calculate destination buffer requirement
669 char tbuf[16];
670 res = 0;
671 do {
672 buf = tbuf; outbuf = 16;
673 #ifdef WX_ICONV_TAKES_CHAR
674 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
675 #else
676 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
677 #endif
678 res += 16 - outbuf;
679 } while ((cres==(size_t)-1) && (errno==E2BIG));
680 }
681
682 if (g_wcNeedsSwap)
683 {
684 free(tmpbuf);
685 }
686
687 if (ICONV_FAILED(cres, inbuf))
688 {
689 //VS: it is ok if iconv fails, hence trace only
690 wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
691 return (size_t)-1;
692 }
693
694 return res;
695 }
696
697 bool usable()
698 { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
699
700 protected:
701 iconv_t m2w, w2m;
702 };
703 #endif
704
705 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
706 class CP_CharSet : public wxCharacterSet
707 {
708 public:
709 CP_CharSet(const wxChar*name)
710 : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
711
712 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
713 {
714 size_t len =
715 MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0);
716 //VS: returns # of written chars for buf!=NULL and *size*
717 // needed buffer for buf==NULL
718 return len ? (buf ? len : len-1) : (size_t)-1;
719 }
720
721 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
722 {
723 size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf,
724 buf ? n : 0, NULL, NULL);
725 //VS: returns # of written chars for buf!=NULL and *size*
726 // needed buffer for buf==NULL
727 return len ? (buf ? len : len-1) : (size_t)-1;
728 }
729
730 bool usable()
731 { return CodePage != -1; }
732
733 public:
734 long CodePage;
735 };
736 #endif // __WIN32__
737
738 #if wxUSE_FONTMAP
739
740 class EC_CharSet : public wxCharacterSet
741 {
742 public:
743 // temporarily just use wxEncodingConverter stuff,
744 // so that it works while a better implementation is built
745 EC_CharSet(const wxChar*name) : wxCharacterSet(name),
746 enc(wxFONTENCODING_SYSTEM)
747 {
748 if (name)
749 enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
750 m2w.Init(enc, wxFONTENCODING_UNICODE);
751 w2m.Init(wxFONTENCODING_UNICODE, enc);
752 }
753
754 size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
755 {
756 size_t inbuf = strlen(psz);
757 if (buf)
758 m2w.Convert(psz,buf);
759 return inbuf;
760 }
761
762 size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
763 {
764 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
765 size_t inbuf = std::wcslen(psz);
766 #else
767 size_t inbuf = ::wcslen(psz);
768 #endif
769 if (buf)
770 w2m.Convert(psz,buf);
771
772 return inbuf;
773 }
774
775 bool usable()
776 { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
777
778 public:
779 wxFontEncoding enc;
780 wxEncodingConverter m2w, w2m;
781 };
782
783 #endif // wxUSE_FONTMAP
784
785 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
786 {
787 wxCharacterSet *cset = NULL;
788 if (name)
789 {
790 if (wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
791 {
792 cset = new ID_CharSet(name, &wxConvUTF8);
793 }
794 else
795 {
796 #ifdef HAVE_ICONV_H
797 cset = new IC_CharSet(name); // may not take NULL
798 #endif
799 }
800 }
801
802 if (cset && cset->usable())
803 return cset;
804
805 if (cset)
806 {
807 delete cset;
808 cset = NULL;
809 }
810
811 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
812 cset = new CP_CharSet(name); // may take NULL
813 if (cset->usable())
814 return cset;
815
816 delete cset;
817 #endif // __WIN32__
818
819 #if wxUSE_FONTMAP
820 cset = new EC_CharSet(name);
821 if (cset->usable())
822 return cset;
823 #endif // wxUSE_FONTMAP
824
825 delete cset;
826 wxLogError(_("Unknown encoding '%s'!"), name);
827 return NULL;
828 }
829
830 wxCSConv::wxCSConv(const wxChar *charset)
831 {
832 m_name = (wxChar *)NULL;
833 m_cset = (wxCharacterSet *) NULL;
834 m_deferred = TRUE;
835
836 SetName(charset);
837 }
838
839 wxCSConv::~wxCSConv()
840 {
841 free(m_name);
842 delete m_cset;
843 }
844
845 void wxCSConv::SetName(const wxChar *charset)
846 {
847 if (charset)
848 {
849 m_name = wxStrdup(charset);
850 m_deferred = TRUE;
851 }
852 }
853
854 void wxCSConv::LoadNow()
855 {
856 if (m_deferred)
857 {
858 if ( !m_name )
859 {
860 wxString name = wxLocale::GetSystemEncodingName();
861 if ( !name.empty() )
862 SetName(name);
863 }
864
865 // wxGetCharacterSet() complains about NULL name
866 m_cset = m_name ? wxGetCharacterSet(m_name) : NULL;
867 m_deferred = FALSE;
868 }
869 }
870
871 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
872 {
873 ((wxCSConv *)this)->LoadNow(); // discard constness
874
875 if (m_cset)
876 return m_cset->MB2WC(buf, psz, n);
877
878 // latin-1 (direct)
879 size_t len = strlen(psz);
880
881 if (buf)
882 {
883 for (size_t c = 0; c <= len; c++)
884 buf[c] = (unsigned char)(psz[c]);
885 }
886
887 return len;
888 }
889
890 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
891 {
892 ((wxCSConv *)this)->LoadNow(); // discard constness
893
894 if (m_cset)
895 return m_cset->WC2MB(buf, psz, n);
896
897 // latin-1 (direct)
898 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
899 size_t len=std::wcslen(psz);
900 #else
901 size_t len=::wcslen(psz);
902 #endif
903 if (buf)
904 {
905 for (size_t c = 0; c <= len; c++)
906 buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
907 }
908
909 return len;
910 }
911
912 #ifdef HAVE_ICONV_H
913
914 class IC_CharSetConverter
915 {
916 public:
917 IC_CharSetConverter(IC_CharSet *from, IC_CharSet *to)
918 {
919 cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),
920 wxConvLibc.cWX2MB(from->cname));
921 }
922
923 ~IC_CharSetConverter()
924 {
925 if (cnv != (iconv_t)-1)
926 iconv_close(cnv);
927 }
928
929 size_t Convert(char *buf, const char *psz, size_t n)
930 {
931 size_t inbuf = strlen(psz);
932 size_t outbuf = n;
933 #ifdef WX_ICONV_TAKES_CHAR
934 size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
935 #else
936 size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
937 #endif
938 if (res == (size_t)-1)
939 return (size_t)-1;
940 return (n - outbuf);
941 }
942
943 public:
944 iconv_t cnv;
945 };
946
947 #endif // HAVE_ICONV_H
948
949 class EC_CharSetConverter
950 {
951 public:
952 EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to)
953 { cnv.Init(from->enc,to->enc); }
954
955 size_t Convert(char* buf, const char* psz, size_t n)
956 {
957 size_t inbuf = strlen(psz);
958 if (buf) cnv.Convert(psz,buf);
959 return inbuf;
960 }
961
962 public:
963 wxEncodingConverter cnv;
964 };
965
966 #else // !wxUSE_WCHAR_T
967
968 // ----------------------------------------------------------------------------
969 // stand-ins in absence of wchar_t
970 // ----------------------------------------------------------------------------
971
972 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
973
974 #endif // wxUSE_WCHAR_T
975
976