src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
   9 // Licence:     wxWindows license
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #ifdef __GNUG__
  21   #pragma implementation "strconv.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28   #pragma hdrstop
  29 #endif
  30
  31 #ifdef __WXMSW__
  32   #include "wx/msw/private.h"
  33 #endif
  34
  35 #include <errno.h>
  36 #include <ctype.h>
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #ifdef __SALFORDC__
  41   #include <clib.h>
  42 #endif
  43
  44 #ifdef HAVE_ICONV_H
  45   #include <iconv.h>
  46 #endif
  47
  48 #ifdef __WXMSW__
  49   #include <windows.h>
  50 #endif
  51
  52 #include "wx/debug.h"
  53 #include "wx/strconv.h"
  54 #include "wx/intl.h"
  55 #include "wx/log.h"
  56
  57 #if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
  58 #define BSWAP_UCS4(str, len)
  59 #define BSWAP_UCS2(str, len)
  60 #else
  61 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  62 #define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  63 #define WC_NEED_BSWAP
  64 #endif
  65 #define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
  66 #define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
  67
  68 #if SIZEOF_WCHAR_T == 4
  69 #define WC_NAME "UCS4"
  70 #define WC_BSWAP BSWAP_UCS4
  71 #elif SIZEOF_WCHAR_T == 2
  72 #define WC_NAME "UTF16"
  73 #define WC_BSWAP BSWAP_UTF16
  74 #define WC_UTF16
  75 #endif
  76
  77 // ----------------------------------------------------------------------------
  78 // globals
  79 // ----------------------------------------------------------------------------
  80
  81 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
  82
  83 // ============================================================================
  84 // implementation
  85 // ============================================================================
  86
  87 #if wxUSE_WCHAR_T
  88
  89 #ifdef WC_UTF16
  90
  91 static size_t encode_utf16(wxUint32 input,wxUint16*output)
  92 {
  93     if (input<=0xffff)
  94     {
  95         if (output) *output++ = input;
  96         return 1;
  97     }
  98     else if (input>=0x110000)
  99     {
 100         return (size_t)-1;
 101     }
 102     else
 103     {
 104         if (output)
 105         {
 106             *output++ = (input >> 10)+0xd7c0;
 107             *output++ = (input&0x3ff)+0xdc00;
 108         }
 109         return 2;
 110     }
 111 }
 112
 113 static size_t decode_utf16(wxUint16*input,wxUint32&output)
 114 {
 115     if ((*input<0xd800) || (*input>0xdfff))
 116     {
 117         output = *input;
 118         return 1;
 119     }
 120     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 121     {
 122         output = *input;
 123         return (size_t)-1;
 124     }
 125     else
 126     {
 127         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 128         return 2;
 129     }
 130 }
 131
 132 #endif // WC_UTF16
 133
 134 // ----------------------------------------------------------------------------
 135 // wxMBConv
 136 // ----------------------------------------------------------------------------
 137
 138 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
 139
 140 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 141 {
 142     return wxMB2WC(buf, psz, n);
 143 }
 144
 145 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 146 {
 147     return wxWC2MB(buf, psz, n);
 148 }
 149
 150 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 151 {
 152     if (psz)
 153     {
 154         size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
 155         if (nLen == (size_t)-1)
 156             return wxWCharBuffer((wchar_t *) NULL);
 157         wxWCharBuffer buf(nLen);
 158         MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
 159         return buf;
 160     }
 161     else
 162         return wxWCharBuffer((wchar_t *) NULL);
 163 }
 164
 165 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
 166 {
 167     if (psz)
 168     {
 169         size_t nLen = WC2MB((char *) NULL, psz, 0);
 170         if (nLen == (size_t)-1)
 171             return wxCharBuffer((char *) NULL);
 172         wxCharBuffer buf(nLen);
 173         WC2MB((char *)(const char *) buf, psz, nLen);
 174         return buf;
 175     }
 176     else
 177         return wxCharBuffer((char *) NULL);
 178 }
 179
 180 // ----------------------------------------------------------------------------
 181 // standard file conversion
 182 // ----------------------------------------------------------------------------
 183
 184 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
 185
 186 // just use the libc conversion for now
 187 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 188 {
 189     return wxMB2WC(buf, psz, n);
 190 }
 191
 192 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 193 {
 194     return wxWC2MB(buf, psz, n);
 195 }
 196
 197 // ----------------------------------------------------------------------------
 198 // standard gdk conversion
 199 // ----------------------------------------------------------------------------
 200
 201 #ifdef __WXGTK12__
 202
 203 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
 204
 205 #include <gdk/gdk.h>
 206
 207 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 208 {
 209     if (buf)
 210     {
 211         return gdk_mbstowcs((GdkWChar *)buf, psz, n);
 212     }
 213     else
 214     {
 215         GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
 216         size_t len = gdk_mbstowcs(nbuf, psz, n);
 217         delete[] nbuf;
 218         return len;
 219     }
 220 }
 221
 222 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 223 {
 224     char *mbstr = gdk_wcstombs((GdkWChar *)psz);
 225     size_t len = mbstr ? strlen(mbstr) : 0;
 226     if (buf)
 227     {
 228         if (len > n)
 229             len = n;
 230         memcpy(buf, psz, len);
 231         if (len < n)
 232             buf[len] = 0;
 233     }
 234     return len;
 235 }
 236
 237 #endif // GTK > 1.0
 238
 239 // ----------------------------------------------------------------------------
 240 // UTF-7
 241 // ----------------------------------------------------------------------------
 242
 243 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
 244
 245 #if 0
 246 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 247                         "abcdefghijklmnopqrstuvwxyz"
 248                         "0123456789'(),-./:?";
 249 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 250 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 251                         "abcdefghijklmnopqrstuvwxyz"
 252                         "0123456789+/";
 253 #endif
 254
 255 // TODO: write actual implementations of UTF-7 here
 256 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 257                            const char * WXUNUSED(psz),
 258                            size_t WXUNUSED(n)) const
 259 {
 260   return 0;
 261 }
 262
 263 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 264                            const wchar_t * WXUNUSED(psz),
 265                            size_t WXUNUSED(n)) const
 266 {
 267   return 0;
 268 }
 269
 270 // ----------------------------------------------------------------------------
 271 // UTF-8
 272 // ----------------------------------------------------------------------------
 273
 274 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
 275
 276 static wxUint32 utf8_max[]=
 277     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 278
 279 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 280 {
 281     size_t len = 0;
 282
 283     while (*psz && ((!buf) || (len < n)))
 284     {
 285         unsigned char cc = *psz++, fc = cc;
 286         unsigned cnt;
 287         for (cnt = 0; fc & 0x80; cnt++)
 288             fc <<= 1;
 289         if (!cnt)
 290         {
 291             // plain ASCII char
 292             if (buf)
 293                 *buf++ = cc;
 294             len++;
 295         }
 296         else
 297         {
 298             cnt--;
 299             if (!cnt)
 300             {
 301                 // invalid UTF-8 sequence
 302                 return (size_t)-1;
 303             }
 304             else
 305             {
 306                 unsigned ocnt = cnt - 1;
 307                 wxUint32 res = cc & (0x3f >> cnt);
 308                 while (cnt--)
 309                 {
 310                     cc = *psz++;
 311                     if ((cc & 0xC0) != 0x80)
 312                     {
 313                         // invalid UTF-8 sequence
 314                         return (size_t)-1;
 315                     }
 316                     res = (res << 6) | (cc & 0x3f);
 317                 }
 318                 if (res <= utf8_max[ocnt])
 319                 {
 320                     // illegal UTF-8 encoding
 321                     return (size_t)-1;
 322                 }
 323 #ifdef WC_UTF16
 324                 size_t pa = encode_utf16(res, buf);
 325                 if (pa == (size_t)-1)
 326                   return (size_t)-1;
 327                 if (buf)
 328                     buf += pa;
 329                 len += pa;
 330 #else
 331                 if (buf)
 332                     *buf++ = res;
 333                 len++;
 334 #endif
 335             }
 336         }
 337     }
 338     if (buf && (len < n))
 339         *buf = 0;
 340     return len;
 341 }
 342
 343 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 344 {
 345     size_t len = 0;
 346
 347     while (*psz && ((!buf) || (len < n)))
 348     {
 349         wxUint32 cc;
 350 #ifdef WC_UTF16
 351         size_t pa = decode_utf16(psz,cc);
 352         psz += (pa == (size_t)-1) ? 1 : pa;
 353 #else
 354         cc=(*psz++) & 0x7fffffff;
 355 #endif
 356         unsigned cnt;
 357         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 358         if (!cnt)
 359         {
 360             // plain ASCII char
 361             if (buf)
 362                 *buf++ = cc;
 363             len++;
 364         }
 365
 366         else
 367         {
 368             len += cnt + 1;
 369             if (buf)
 370             {
 371                 *buf++ = (-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt));
 372                 while (cnt--)
 373                     *buf++ = 0x80 | ((cc >> (cnt * 6)) & 0x3f);
 374             }
 375         }
 376     }
 377
 378     if (buf && (len<n)) *buf = 0;
 379     return len;
 380 }
 381
 382 // ----------------------------------------------------------------------------
 383 // specified character set
 384 // ----------------------------------------------------------------------------
 385
 386 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
 387
 388 #include "wx/encconv.h"
 389 #include "wx/fontmap.h"
 390
 391 // TODO: add some tables here
 392 // - perhaps common encodings to common codepages (for Win32)
 393 // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
 394 // - move wxEncodingConverter meat in here
 395
 396 #ifdef __WIN32__
 397 #include "wx/msw/registry.h"
 398 // this should work if M$ Internet Exploiter is installed
 399 static long CharsetToCodepage(const wxChar *name)
 400 {
 401     if (!name)
 402         return GetACP();
 403
 404     long CP=-1;
 405
 406     wxString cn(name);
 407     do {
 408         wxString path(wxT("MIME\\Database\\Charset\\"));
 409         path += cn;
 410         wxRegKey key(wxRegKey::HKCR, path);
 411
 412         if (!key.Exists()) continue;
 413
 414         // two cases: either there's an AliasForCharset string,
 415         // or there are Codepage and InternetEncoding dwords.
 416         // The InternetEncoding gives us the actual encoding,
 417         // the Codepage just says which Windows character set to
 418         // use when displaying the data.
 419         if (key.HasValue(wxT("InternetEncoding")) &&
 420             key.QueryValue(wxT("InternetEncoding"), &CP)) break;
 421
 422         // no encoding, see if it's an alias
 423         if (!key.HasValue(wxT("AliasForCharset")) ||
 424             !key.QueryValue(wxT("AliasForCharset"), cn)) break;
 425     } while (1);
 426
 427     return CP;
 428 }
 429 #endif
 430
 431 class wxCharacterSet
 432 {
 433 public:
 434     wxCharacterSet(const wxChar*name)
 435         : cname(name) {}
 436     virtual ~wxCharacterSet()
 437         {}
 438     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 439         { return (size_t)-1; }
 440     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 441         { return (size_t)-1; }
 442     virtual bool usable()
 443         { return FALSE; }
 444 public:
 445     const wxChar*cname;
 446 };
 447
 448 class ID_CharSet : public wxCharacterSet
 449 {
 450 public:
 451     ID_CharSet(const wxChar *name,wxMBConv *cnv)
 452         : wxCharacterSet(name), work(cnv) {}
 453
 454     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 455         { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
 456
 457     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 458         { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
 459
 460     bool usable()
 461         { return work!=NULL; }
 462 public:
 463     wxMBConv*work;
 464 };
 465
 466
 467 #ifdef HAVE_ICONV_H
 468
 469 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 470 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 471 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 472 //     (which means error) and says there are 0 bytes left in the input buffer --
 473 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 474 //     this alternative test for iconv() failure.
 475 //     [This bug does not appear in glibc 2.2.]
 476 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 477 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 478                                      (errno != E2BIG || bufLeft != 0))
 479 #else
 480 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 481 #endif
 482
 483 class IC_CharSet : public wxCharacterSet
 484 {
 485 public:
 486     IC_CharSet(const wxChar *name)
 487         : wxCharacterSet(name)
 488     {
 489         m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname));
 490         w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME);
 491     }
 492
 493     ~IC_CharSet()
 494     {
 495         if ( m2w != (iconv_t)-1 )
 496             iconv_close(m2w);
 497         if ( w2m != (iconv_t)-1 )
 498             iconv_close(w2m);
 499     }
 500
 501     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 502     {
 503         size_t inbuf = strlen(psz);
 504         size_t outbuf = n * SIZEOF_WCHAR_T;
 505         size_t res, cres;
 506         // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 507         wchar_t *bufPtr = buf;
 508         const char *pszPtr = psz;
 509
 510         if (buf)
 511         {
 512             // have destination buffer, convert there
 513 #ifdef WX_ICONV_TAKES_CHAR
 514             cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
 515 #else
 516             cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
 517 #endif
 518             res = n - (outbuf / SIZEOF_WCHAR_T);
 519             // convert to native endianness
 520 #ifdef WC_NEED_BSWAP
 521             WC_BSWAP(buf /* _not_ bufPtr */, res)
 522 #endif
 523         }
 524         else
 525         {
 526             // no destination buffer... convert using temp buffer
 527             // to calculate destination buffer requirement
 528             wchar_t tbuf[8];
 529             res = 0;
 530             do {
 531                 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
 532 #ifdef WX_ICONV_TAKES_CHAR
 533                 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
 534 #else
 535                 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
 536 #endif
 537                 res += 8-(outbuf/SIZEOF_WCHAR_T);
 538             } while ((cres==(size_t)-1) && (errno==E2BIG));
 539         }
 540
 541         if (ICONV_FAILED(cres, inbuf))
 542             return (size_t)-1;
 543
 544         return res;
 545     }
 546
 547     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 548     {
 549 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 550         size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
 551 #else
 552         size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
 553 #endif
 554         size_t outbuf = n;
 555         size_t res, cres;
 556
 557 #ifdef WC_NEED_BSWAP
 558         // need to copy to temp buffer to switch endianness
 559         // this absolutely doesn't rock!
 560         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
 561         //  could be in read-only memory, or be accessed in some other thread)
 562         wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
 563         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
 564         WC_BSWAP(tmpbuf, inbuf)
 565         psz=tmpbuf;
 566 #endif
 567         if (buf)
 568         {
 569             // have destination buffer, convert there
 570 #ifdef WX_ICONV_TAKES_CHAR
 571             cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 572 #else
 573             cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 574 #endif
 575             res = n-outbuf;
 576         }
 577         else
 578         {
 579             // no destination buffer... convert using temp buffer
 580             // to calculate destination buffer requirement
 581             char tbuf[16];
 582             res = 0;
 583             do {
 584                 buf = tbuf; outbuf = 16;
 585 #ifdef WX_ICONV_TAKES_CHAR
 586                 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 587 #else
 588                 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 589 #endif
 590                 res += 16 - outbuf;
 591             } while ((cres==(size_t)-1) && (errno==E2BIG));
 592         }
 593 #ifdef WC_NEED_BSWAP
 594         free(tmpbuf);
 595 #endif
 596         if (ICONV_FAILED(cres, inbuf))
 597             return (size_t)-1;
 598
 599         return res;
 600     }
 601
 602     bool usable()
 603         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 604
 605 public:
 606     iconv_t m2w, w2m;
 607 };
 608 #endif
 609
 610 #ifdef __WIN32__
 611 class CP_CharSet : public wxCharacterSet
 612 {
 613 public:
 614     CP_CharSet(const wxChar*name)
 615         : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
 616
 617     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 618     {
 619         size_t len =
 620             MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0);
 621         return len ? len : (size_t)-1;
 622     }
 623
 624     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 625     {
 626         size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf,
 627                                          buf ? n : 0, NULL, NULL);
 628         return len ? len : (size_t)-1;
 629     }
 630
 631     bool usable()
 632         { return CodePage != -1; }
 633
 634 public:
 635     long CodePage;
 636 };
 637 #endif
 638
 639 class EC_CharSet : public wxCharacterSet
 640 {
 641 public:
 642     // temporarily just use wxEncodingConverter stuff,
 643     // so that it works while a better implementation is built
 644     EC_CharSet(const wxChar*name) : wxCharacterSet(name),
 645                                     enc(wxFONTENCODING_SYSTEM)
 646     {
 647         if (name)
 648             enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
 649         m2w.Init(enc, wxFONTENCODING_UNICODE);
 650         w2m.Init(wxFONTENCODING_UNICODE, enc);
 651     }
 652
 653     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 654     {
 655         size_t inbuf = strlen(psz);
 656         if (buf)
 657             m2w.Convert(psz,buf);
 658         return inbuf;
 659     }
 660
 661     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 662     {
 663 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 664         size_t inbuf = std::wcslen(psz);
 665 #else
 666         size_t inbuf = ::wcslen(psz);
 667 #endif
 668         if (buf)
 669             w2m.Convert(psz,buf);
 670
 671         return inbuf;
 672     }
 673
 674     bool usable()
 675         { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
 676
 677 public:
 678     wxFontEncoding enc;
 679     wxEncodingConverter m2w, w2m;
 680 };
 681
 682 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
 683 {
 684     wxCharacterSet *cset = NULL;
 685     if (name)
 686     {
 687         if (wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
 688         {
 689             cset = new ID_CharSet(name, &wxConvUTF8);
 690         }
 691         else
 692         {
 693 #ifdef HAVE_ICONV_H
 694             cset = new IC_CharSet(name); // may not take NULL
 695 #endif
 696         }
 697     }
 698
 699     if (cset && cset->usable()) return cset;
 700     if (cset)
 701     {
 702         delete cset;
 703         cset = NULL;
 704     }
 705
 706 #ifdef __WIN32__
 707     cset = new CP_CharSet(name); // may take NULL
 708     if (cset->usable())
 709         return cset;
 710
 711     delete cset;
 712 #endif // __WIN32__
 713
 714     cset = new EC_CharSet(name);
 715     if (cset->usable())
 716         return cset;
 717
 718     delete cset;
 719     wxLogError(_("Unknown encoding '%s'!"), name);
 720     return NULL;
 721 }
 722
 723 wxCSConv::wxCSConv(const wxChar *charset)
 724 {
 725     m_name = (wxChar *)NULL;
 726     m_cset = (wxCharacterSet *) NULL;
 727     m_deferred = TRUE;
 728
 729     SetName(charset);
 730 }
 731
 732 wxCSConv::~wxCSConv()
 733 {
 734     free(m_name);
 735     delete m_cset;
 736 }
 737
 738 void wxCSConv::SetName(const wxChar *charset)
 739 {
 740     if (charset)
 741     {
 742         m_name = wxStrdup(charset);
 743         m_deferred = TRUE;
 744     }
 745 }
 746
 747 void wxCSConv::LoadNow()
 748 {
 749     if (m_deferred)
 750     {
 751         if ( !m_name )
 752         {
 753             wxString name = wxLocale::GetSystemEncodingName();
 754             if ( !name.empty() )
 755                 SetName(name);
 756         }
 757
 758         m_cset = wxGetCharacterSet(m_name);
 759         m_deferred = FALSE;
 760     }
 761 }
 762
 763 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 764 {
 765     ((wxCSConv *)this)->LoadNow(); // discard constness
 766
 767     if (m_cset)
 768         return m_cset->MB2WC(buf, psz, n);
 769
 770     // latin-1 (direct)
 771     size_t len = strlen(psz);
 772
 773     if (buf)
 774     {
 775         for (size_t c = 0; c <= len; c++)
 776             buf[c] = (unsigned char)(psz[c]);
 777     }
 778
 779     return len;
 780 }
 781
 782 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 783 {
 784     ((wxCSConv *)this)->LoadNow(); // discard constness
 785
 786     if (m_cset)
 787         return m_cset->WC2MB(buf, psz, n);
 788
 789     // latin-1 (direct)
 790 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 791     size_t len=std::wcslen(psz);
 792 #else
 793     size_t len=::wcslen(psz);
 794 #endif
 795     if (buf)
 796     {
 797         for (size_t c = 0; c <= len; c++)
 798             buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
 799     }
 800
 801     return len;
 802 }
 803
 804 #ifdef HAVE_ICONV_H
 805
 806 class IC_CharSetConverter
 807 {
 808 public:
 809     IC_CharSetConverter(IC_CharSet *from, IC_CharSet *to)
 810     {
 811         cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),
 812                          wxConvLibc.cWX2MB(from->cname));
 813     }
 814
 815     ~IC_CharSetConverter()
 816     {
 817         if (cnv != (iconv_t)-1)
 818             iconv_close(cnv);
 819     }
 820
 821     size_t Convert(char *buf, const char *psz, size_t n)
 822     {
 823         size_t inbuf = strlen(psz);
 824         size_t outbuf = n;
 825 #ifdef WX_ICONV_TAKES_CHAR
 826         size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
 827 #else
 828         size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
 829 #endif
 830         if (res == (size_t)-1)
 831             return (size_t)-1;
 832         return (n - outbuf);
 833     }
 834
 835 public:
 836     iconv_t cnv;
 837 };
 838
 839 #endif // HAVE_ICONV_H
 840
 841 class EC_CharSetConverter
 842 {
 843 public:
 844     EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
 845         { cnv.Init(from->enc,to->enc); }
 846
 847     size_t Convert(char*buf, const char*psz, size_t n)
 848     {
 849         size_t inbuf = strlen(psz);
 850         if (buf) cnv.Convert(psz,buf);
 851         return inbuf;
 852     }
 853
 854 public:
 855     wxEncodingConverter cnv;
 856 };
 857
 858 #else // !wxUSE_WCHAR_T
 859
 860 // ----------------------------------------------------------------------------
 861 // stand-ins in absence of wchar_t
 862 // ----------------------------------------------------------------------------
 863
 864 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
 865
 866 #endif // wxUSE_WCHAR_T
 867
 868