src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
   9 // Licence:     wxWindows license
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #ifdef __GNUG__
  21   #pragma implementation "strconv.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28   #pragma hdrstop
  29 #endif
  30
  31 #ifdef __WXMSW__
  32   #include "wx/msw/private.h"
  33 #endif
  34
  35 #include <errno.h>
  36 #include <ctype.h>
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #ifdef __SALFORDC__
  41   #include <clib.h>
  42 #endif
  43
  44 #ifdef HAVE_ICONV_H
  45   #include <iconv.h>
  46 #endif
  47 #ifdef HAVE_LANGINFO_H
  48   #include <langinfo.h>
  49 #endif
  50
  51 #ifdef __WXMSW__
  52   #include <windows.h>
  53 #endif
  54
  55 #include "wx/debug.h"
  56 #include "wx/strconv.h"
  57 #include "wx/intl.h"
  58 #include "wx/log.h"
  59
  60 #if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
  61 #define BSWAP_UCS4(str, len)
  62 #define BSWAP_UCS2(str, len)
  63 #else
  64 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  65 #define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  66 #define WC_NEED_BSWAP
  67 #endif
  68 #define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
  69 #define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
  70
  71 #if SIZEOF_WCHAR_T == 4
  72 #define WC_NAME "UCS4"
  73 #define WC_BSWAP BSWAP_UCS4
  74 #elif SIZEOF_WCHAR_T == 2
  75 #define WC_NAME "UTF16"
  76 #define WC_BSWAP BSWAP_UTF16
  77 #define WC_UTF16
  78 #endif
  79
  80 // ----------------------------------------------------------------------------
  81 // globals
  82 // ----------------------------------------------------------------------------
  83
  84 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
  85
  86 // ============================================================================
  87 // implementation
  88 // ============================================================================
  89
  90 #if wxUSE_WCHAR_T
  91
  92 #ifdef WC_UTF16
  93
  94 static size_t encode_utf16(wxUint32 input,wxUint16*output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output) *output++ = input;
  99         return 1;
 100     }
 101     else if (input>=0x110000)
 102     {
 103         return (size_t)-1;
 104     }
 105     else
 106     {
 107         if (output)
 108         {
 109             *output++ = (input >> 10)+0xd7c0;
 110             *output++ = (input&0x3ff)+0xdc00;
 111         }
 112         return 2;
 113     }
 114 }
 115
 116 static size_t decode_utf16(wxUint16*input,wxUint32&output)
 117 {
 118     if ((*input<0xd800) || (*input>0xdfff))
 119     {
 120         output = *input;
 121         return 1;
 122     }
 123     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 124     {
 125         output = *input;
 126         return (size_t)-1;
 127     }
 128     else
 129     {
 130         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 131         return 2;
 132     }
 133 }
 134
 135 #endif // WC_UTF16
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
 142
 143 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 144 {
 145     return wxMB2WC(buf, psz, n);
 146 }
 147
 148 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 149 {
 150     return wxWC2MB(buf, psz, n);
 151 }
 152
 153 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 154 {
 155     if (psz)
 156     {
 157         size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
 158         if (nLen == (size_t)-1)
 159             return wxWCharBuffer((wchar_t *) NULL);
 160         wxWCharBuffer buf(nLen);
 161         MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
 162         return buf;
 163     }
 164     else
 165         return wxWCharBuffer((wchar_t *) NULL);
 166 }
 167
 168 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
 169 {
 170     if (psz)
 171     {
 172         size_t nLen = WC2MB((char *) NULL, psz, 0);
 173         if (nLen == (size_t)-1)
 174             return wxCharBuffer((char *) NULL);
 175         wxCharBuffer buf(nLen);
 176         WC2MB((char *)(const char *) buf, psz, nLen);
 177         return buf;
 178     }
 179     else
 180         return wxCharBuffer((char *) NULL);
 181 }
 182
 183 // ----------------------------------------------------------------------------
 184 // standard file conversion
 185 // ----------------------------------------------------------------------------
 186
 187 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
 188
 189 // just use the libc conversion for now
 190 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 191 {
 192     return wxMB2WC(buf, psz, n);
 193 }
 194
 195 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 196 {
 197     return wxWC2MB(buf, psz, n);
 198 }
 199
 200 // ----------------------------------------------------------------------------
 201 // standard gdk conversion
 202 // ----------------------------------------------------------------------------
 203
 204 #ifdef __WXGTK12__
 205
 206 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
 207
 208 #include <gdk/gdk.h>
 209
 210 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 211 {
 212     if (buf)
 213     {
 214         return gdk_mbstowcs((GdkWChar *)buf, psz, n);
 215     }
 216     else
 217     {
 218         GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
 219         size_t len = gdk_mbstowcs(nbuf, psz, n);
 220         delete[] nbuf;
 221         return len;
 222     }
 223 }
 224
 225 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 226 {
 227     char *mbstr = gdk_wcstombs((GdkWChar *)psz);
 228     size_t len = mbstr ? strlen(mbstr) : 0;
 229     if (buf)
 230     {
 231         if (len > n)
 232             len = n;
 233         memcpy(buf, psz, len);
 234         if (len < n)
 235             buf[len] = 0;
 236     }
 237     return len;
 238 }
 239
 240 #endif // GTK > 1.0
 241
 242 // ----------------------------------------------------------------------------
 243 // UTF-7
 244 // ----------------------------------------------------------------------------
 245
 246 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
 247
 248 #if 0
 249 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 250                         "abcdefghijklmnopqrstuvwxyz"
 251                         "0123456789'(),-./:?";
 252 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 253 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 254                         "abcdefghijklmnopqrstuvwxyz"
 255                         "0123456789+/";
 256 #endif
 257
 258 // TODO: write actual implementations of UTF-7 here
 259 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 260                            const char * WXUNUSED(psz),
 261                            size_t WXUNUSED(n)) const
 262 {
 263   return 0;
 264 }
 265
 266 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 267                            const wchar_t * WXUNUSED(psz),
 268                            size_t WXUNUSED(n)) const
 269 {
 270   return 0;
 271 }
 272
 273 // ----------------------------------------------------------------------------
 274 // UTF-8
 275 // ----------------------------------------------------------------------------
 276
 277 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
 278
 279 static wxUint32 utf8_max[]=
 280     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 281
 282 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 283 {
 284     size_t len = 0;
 285
 286     while (*psz && ((!buf) || (len < n)))
 287     {
 288         unsigned char cc = *psz++, fc = cc;
 289         unsigned cnt;
 290         for (cnt = 0; fc & 0x80; cnt++)
 291             fc <<= 1;
 292         if (!cnt)
 293         {
 294             // plain ASCII char
 295             if (buf)
 296                 *buf++ = cc;
 297             len++;
 298         }
 299         else
 300         {
 301             cnt--;
 302             if (!cnt)
 303             {
 304                 // invalid UTF-8 sequence
 305                 return (size_t)-1;
 306             }
 307             else
 308             {
 309                 unsigned ocnt = cnt - 1;
 310                 wxUint32 res = cc & (0x3f >> cnt);
 311                 while (cnt--)
 312                 {
 313                     cc = *psz++;
 314                     if ((cc & 0xC0) != 0x80)
 315                     {
 316                         // invalid UTF-8 sequence
 317                         return (size_t)-1;
 318                     }
 319                     res = (res << 6) | (cc & 0x3f);
 320                 }
 321                 if (res <= utf8_max[ocnt])
 322                 {
 323                     // illegal UTF-8 encoding
 324                     return (size_t)-1;
 325                 }
 326 #ifdef WC_UTF16
 327                 size_t pa = encode_utf16(res, buf);
 328                 if (pa == (size_t)-1)
 329                   return (size_t)-1;
 330                 if (buf)
 331                     buf += pa;
 332                 len += pa;
 333 #else
 334                 if (buf)
 335                     *buf++ = res;
 336                 len++;
 337 #endif
 338             }
 339         }
 340     }
 341     if (buf && (len < n))
 342         *buf = 0;
 343     return len;
 344 }
 345
 346 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 347 {
 348     size_t len = 0;
 349
 350     while (*psz && ((!buf) || (len < n)))
 351     {
 352         wxUint32 cc;
 353 #ifdef WC_UTF16
 354         size_t pa = decode_utf16(psz,cc);
 355         psz += (pa == (size_t)-1) ? 1 : pa;
 356 #else
 357         cc=(*psz++) & 0x7fffffff;
 358 #endif
 359         unsigned cnt;
 360         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 361         if (!cnt)
 362         {
 363             // plain ASCII char
 364             if (buf)
 365                 *buf++ = cc;
 366             len++;
 367         }
 368
 369         else
 370         {
 371             len += cnt + 1;
 372             if (buf)
 373             {
 374                 *buf++ = (-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt));
 375                 while (cnt--)
 376                     *buf++ = 0x80 | ((cc >> (cnt * 6)) & 0x3f);
 377             }
 378         }
 379     }
 380
 381     if (buf && (len<n)) *buf = 0;
 382     return len;
 383 }
 384
 385 // ----------------------------------------------------------------------------
 386 // specified character set
 387 // ----------------------------------------------------------------------------
 388
 389 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
 390
 391 #include "wx/encconv.h"
 392 #include "wx/fontmap.h"
 393
 394 // TODO: add some tables here
 395 // - perhaps common encodings to common codepages (for Win32)
 396 // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
 397 // - move wxEncodingConverter meat in here
 398
 399 #ifdef __WIN32__
 400 #include "wx/msw/registry.h"
 401 // this should work if M$ Internet Exploiter is installed
 402 static long CharsetToCodepage(const wxChar *name)
 403 {
 404     if (!name)
 405         return GetACP();
 406
 407     long CP=-1;
 408
 409     wxString cn(name);
 410     do {
 411         wxString path(wxT("MIME\\Database\\Charset\\"));
 412         path += cn;
 413         wxRegKey key(wxRegKey::HKCR, path);
 414
 415         if (!key.Exists()) continue;
 416
 417         // two cases: either there's an AliasForCharset string,
 418         // or there are Codepage and InternetEncoding dwords.
 419         // The InternetEncoding gives us the actual encoding,
 420         // the Codepage just says which Windows character set to
 421         // use when displaying the data.
 422         if (key.HasValue(wxT("InternetEncoding")) &&
 423             key.QueryValue(wxT("InternetEncoding"), &CP)) break;
 424
 425         // no encoding, see if it's an alias
 426         if (!key.HasValue(wxT("AliasForCharset")) ||
 427             !key.QueryValue(wxT("AliasForCharset"), cn)) break;
 428     } while (1);
 429
 430     return CP;
 431 }
 432 #endif
 433
 434 class wxCharacterSet
 435 {
 436 public:
 437     wxCharacterSet(const wxChar*name)
 438         : cname(name) {}
 439     virtual ~wxCharacterSet()
 440         {}
 441     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 442         { return (size_t)-1; }
 443     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 444         { return (size_t)-1; }
 445     virtual bool usable()
 446         { return FALSE; }
 447 public:
 448     const wxChar*cname;
 449 };
 450
 451 class ID_CharSet : public wxCharacterSet
 452 {
 453 public:
 454     ID_CharSet(const wxChar *name,wxMBConv *cnv)
 455         : wxCharacterSet(name), work(cnv) {}
 456
 457     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 458         { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
 459
 460     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 461         { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
 462
 463     bool usable()
 464         { return work!=NULL; }
 465 public:
 466     wxMBConv*work;
 467 };
 468
 469
 470 #ifdef HAVE_ICONV_H
 471
 472 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 473 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 474 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 475 //     (which means error) and says there are 0 bytes left in the input buffer --
 476 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 477 //     this alternative test for iconv() failure.
 478 //     [This bug does not appear in glibc 2.2.]
 479 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 480 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 481                                      (errno != E2BIG || bufLeft != 0))
 482 #else
 483 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 484 #endif
 485
 486 class IC_CharSet : public wxCharacterSet
 487 {
 488 public:
 489     IC_CharSet(const wxChar *name)
 490         : wxCharacterSet(name)
 491     {
 492         m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname));
 493         w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME);
 494     }
 495
 496     ~IC_CharSet()
 497     {
 498         if ( m2w != (iconv_t)-1 )
 499             iconv_close(m2w);
 500         if ( w2m != (iconv_t)-1 )
 501             iconv_close(w2m);
 502     }
 503
 504     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 505     {
 506         size_t inbuf = strlen(psz);
 507         size_t outbuf = n * SIZEOF_WCHAR_T;
 508         size_t res, cres;
 509         // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 510         wchar_t *bufPtr = buf;
 511         const char *pszPtr = psz;
 512
 513         if (buf)
 514         {
 515             // have destination buffer, convert there
 516 #ifdef WX_ICONV_TAKES_CHAR
 517             cres = iconv(m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
 518 #else
 519             cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf);
 520 #endif
 521             res = n - (outbuf / SIZEOF_WCHAR_T);
 522             // convert to native endianness
 523 #ifdef WC_NEED_BSWAP
 524             WC_BSWAP(buf /* _not_ bufPtr */, res)
 525 #endif
 526         }
 527         else
 528         {
 529             // no destination buffer... convert using temp buffer
 530             // to calculate destination buffer requirement
 531             wchar_t tbuf[8];
 532             res = 0;
 533             do {
 534                 bufPtr = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
 535 #ifdef WX_ICONV_TAKES_CHAR
 536                 cres = iconv( m2w, (char**)&pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
 537 #else
 538                 cres = iconv( m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf );
 539 #endif
 540                 res += 8-(outbuf/SIZEOF_WCHAR_T);
 541             } while ((cres==(size_t)-1) && (errno==E2BIG));
 542         }
 543
 544         if (ICONV_FAILED(cres, inbuf))
 545             return (size_t)-1;
 546
 547         return res;
 548     }
 549
 550     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 551     {
 552 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 553         size_t inbuf = std::wcslen(psz) * SIZEOF_WCHAR_T;
 554 #else
 555         size_t inbuf = ::wcslen(psz) * SIZEOF_WCHAR_T;
 556 #endif
 557         size_t outbuf = n;
 558         size_t res, cres;
 559
 560 #ifdef WC_NEED_BSWAP
 561         // need to copy to temp buffer to switch endianness
 562         // this absolutely doesn't rock!
 563         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
 564         //  could be in read-only memory, or be accessed in some other thread)
 565         wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
 566         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
 567         WC_BSWAP(tmpbuf, inbuf)
 568         psz=tmpbuf;
 569 #endif
 570         if (buf)
 571         {
 572             // have destination buffer, convert there
 573 #ifdef WX_ICONV_TAKES_CHAR
 574             cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 575 #else
 576             cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 577 #endif
 578             res = n-outbuf;
 579         }
 580         else
 581         {
 582             // no destination buffer... convert using temp buffer
 583             // to calculate destination buffer requirement
 584             char tbuf[16];
 585             res = 0;
 586             do {
 587                 buf = tbuf; outbuf = 16;
 588 #ifdef WX_ICONV_TAKES_CHAR
 589                 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 590 #else
 591                 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 592 #endif
 593                 res += 16 - outbuf;
 594             } while ((cres==(size_t)-1) && (errno==E2BIG));
 595         }
 596 #ifdef WC_NEED_BSWAP
 597         free(tmpbuf);
 598 #endif
 599         if (ICONV_FAILED(cres, inbuf))
 600             return (size_t)-1;
 601
 602         return res;
 603     }
 604
 605     bool usable()
 606         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 607
 608 public:
 609     iconv_t m2w, w2m;
 610 };
 611 #endif
 612
 613 #ifdef __WIN32__
 614 class CP_CharSet : public wxCharacterSet
 615 {
 616 public:
 617     CP_CharSet(const wxChar*name)
 618         : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
 619
 620     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 621     {
 622         size_t len =
 623             MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0);
 624         return len ? len : (size_t)-1;
 625     }
 626
 627     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 628     {
 629         size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf,
 630                                          buf ? n : 0, NULL, NULL);
 631         return len ? len : (size_t)-1;
 632     }
 633
 634     bool usable()
 635         { return CodePage != -1; }
 636
 637 public:
 638     long CodePage;
 639 };
 640 #endif
 641
 642 class EC_CharSet : public wxCharacterSet
 643 {
 644 public:
 645     // temporarily just use wxEncodingConverter stuff,
 646     // so that it works while a better implementation is built
 647     EC_CharSet(const wxChar*name) : wxCharacterSet(name),
 648                                     enc(wxFONTENCODING_SYSTEM)
 649     {
 650         if (name)
 651             enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
 652         m2w.Init(enc, wxFONTENCODING_UNICODE);
 653         w2m.Init(wxFONTENCODING_UNICODE, enc);
 654     }
 655
 656     size_t MB2WC(wchar_t *buf, const char *psz, size_t n)
 657     {
 658         size_t inbuf = strlen(psz);
 659         if (buf)
 660             m2w.Convert(psz,buf);
 661         return inbuf;
 662     }
 663
 664     size_t WC2MB(char *buf, const wchar_t *psz, size_t n)
 665     {
 666 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 667         size_t inbuf = std::wcslen(psz);
 668 #else
 669         size_t inbuf = ::wcslen(psz);
 670 #endif
 671         if (buf)
 672             w2m.Convert(psz,buf);
 673
 674         return inbuf;
 675     }
 676
 677     bool usable()
 678         { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
 679
 680 public:
 681     wxFontEncoding enc;
 682     wxEncodingConverter m2w, w2m;
 683 };
 684
 685 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
 686 {
 687     wxCharacterSet *cset = NULL;
 688     if (name)
 689     {
 690         if (wxStricmp(name, wxT("UTF8")) == 0 || wxStricmp(name, wxT("UTF-8")) == 0)
 691         {
 692             cset = new ID_CharSet(name, &wxConvUTF8);
 693         }
 694         else
 695         {
 696 #ifdef HAVE_ICONV_H
 697             cset = new IC_CharSet(name); // may not take NULL
 698 #endif
 699         }
 700     }
 701
 702     if (cset && cset->usable()) return cset;
 703     if (cset) delete cset;
 704     cset = NULL;
 705 #ifdef __WIN32__
 706     cset = new CP_CharSet(name); // may take NULL
 707     if (cset->usable()) return cset;
 708 #endif
 709     if (cset) delete cset;
 710     cset = new EC_CharSet(name);
 711     if (cset->usable()) return cset;
 712     delete cset;
 713     wxLogError(_("Unknown encoding '%s'!"), name);
 714     return NULL;
 715 }
 716
 717 wxCSConv::wxCSConv(const wxChar *charset)
 718 {
 719     m_name = (wxChar *) NULL;
 720     m_cset = (wxCharacterSet *) NULL;
 721     m_deferred = TRUE;
 722     SetName(charset);
 723 }
 724
 725 wxCSConv::~wxCSConv()
 726 {
 727     if (m_name) free(m_name);
 728     if (m_cset) delete m_cset;
 729 }
 730
 731 void wxCSConv::SetName(const wxChar *charset)
 732 {
 733     if (charset)
 734     {
 735         m_name = wxStrdup(charset);
 736         m_deferred = TRUE;
 737     }
 738 }
 739
 740 void wxCSConv::LoadNow()
 741 {
 742     if (m_deferred)
 743     {
 744         if (!m_name)
 745         {
 746 #ifdef __UNIX__
 747 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
 748             // GNU libc provides current character set this way
 749             char *alang = nl_langinfo(CODESET);
 750             if (alang)
 751             {
 752                 SetName(wxConvLibc.cMB2WX(alang));
 753             }
 754             else
 755 #endif
 756             {
 757                 // if we can't get at the character set directly,
 758                 // try to see if it's in the environment variables
 759                 // (in most cases this won't work, but I was out of ideas)
 760                 wxChar *lang = wxGetenv(wxT("LC_ALL"));
 761                 if (!lang)
 762                     lang = wxGetenv(wxT("LC_CTYPE"));
 763                 if (!lang)
 764                     lang = wxGetenv(wxT("LANG"));
 765                 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
 766                 if (dot)
 767                     SetName(dot+1);
 768             }
 769 #endif
 770         }
 771         m_cset = wxGetCharacterSet(m_name);
 772         m_deferred = FALSE;
 773     }
 774 }
 775
 776 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 777 {
 778     ((wxCSConv *)this)->LoadNow(); // discard constness
 779
 780     if (m_cset)
 781         return m_cset->MB2WC(buf, psz, n);
 782
 783     // latin-1 (direct)
 784     size_t len = strlen(psz);
 785
 786     if (buf)
 787     {
 788         for (size_t c = 0; c <= len; c++)
 789             buf[c] = (unsigned char)(psz[c]);
 790     }
 791
 792     return len;
 793 }
 794
 795 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 796 {
 797     ((wxCSConv *)this)->LoadNow(); // discard constness
 798
 799     if (m_cset)
 800         return m_cset->WC2MB(buf, psz, n);
 801
 802     // latin-1 (direct)
 803 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 804     size_t len=std::wcslen(psz);
 805 #else
 806     size_t len=::wcslen(psz);
 807 #endif
 808     if (buf)
 809     {
 810         for (size_t c = 0; c <= len; c++)
 811             buf[c] = (psz[c] > 0xff) ? '?' : psz[c];
 812     }
 813
 814     return len;
 815 }
 816
 817 #ifdef HAVE_ICONV_H
 818 class IC_CharSetConverter
 819 {
 820 public:
 821     IC_CharSetConverter(IC_CharSet *from, IC_CharSet *to)
 822     {
 823         cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),
 824                          wxConvLibc.cWX2MB(from->cname));
 825     }
 826
 827     ~IC_CharSetConverter()
 828     {
 829         if (cnv != (iconv_t)-1)
 830             iconv_close(cnv);
 831     }
 832
 833     size_t Convert(char *buf, const char *psz, size_t n)
 834     {
 835         size_t inbuf = strlen(psz);
 836         size_t outbuf = n;
 837 #ifdef WX_ICONV_TAKES_CHAR
 838         size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
 839 #else
 840         size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
 841 #endif
 842         if (res == (size_t)-1)
 843             return (size_t)-1;
 844         return (n - outbuf);
 845     }
 846
 847 public:
 848     iconv_t cnv;
 849 };
 850 #endif
 851
 852 class EC_CharSetConverter
 853 {
 854 public:
 855     EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
 856         { cnv.Init(from->enc,to->enc); }
 857
 858     size_t Convert(char*buf, const char*psz, size_t n)
 859     {
 860         size_t inbuf = strlen(psz);
 861         if (buf) cnv.Convert(psz,buf);
 862         return inbuf;
 863     }
 864
 865 public:
 866     wxEncodingConverter cnv;
 867 };
 868
 869 #else // !wxUSE_WCHAR_T
 870
 871 // ----------------------------------------------------------------------------
 872 // stand-ins in absence of wchar_t
 873 // ----------------------------------------------------------------------------
 874
 875 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
 876
 877 #endif // wxUSE_WCHAR_T
 878
 879