src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
   9 // Licence:     wxWindows license
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #ifdef __GNUG__
  21   #pragma implementation "strconv.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28   #pragma hdrstop
  29 #endif
  30
  31 #ifdef __WXMSW__
  32   #include "wx/msw/private.h"
  33 #endif
  34
  35 #include <errno.h>
  36 #include <ctype.h>
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #ifdef __SALFORDC__
  41   #include <clib.h>
  42 #endif
  43
  44 #ifdef HAVE_ICONV_H
  45   #include <iconv.h>
  46 #endif
  47 #ifdef HAVE_LANGINFO_H
  48   #include <langinfo.h>
  49 #endif
  50
  51 #ifdef __WXMSW__
  52   #include <windows.h>
  53 #endif
  54
  55 #include "wx/debug.h"
  56 #include "wx/strconv.h"
  57
  58 #if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__)
  59 #define BSWAP_UCS4(str, len)
  60 #define BSWAP_UCS2(str, len)
  61 #else
  62 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  63 #define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  64 #define WC_NEED_BSWAP
  65 #endif
  66 #define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
  67 #define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
  68
  69 #if SIZEOF_WCHAR_T == 4
  70 #define WC_NAME "UCS4"
  71 #define WC_BSWAP BSWAP_UCS4
  72 #elif SIZEOF_WCHAR_T == 2
  73 #define WC_NAME "UTF16"
  74 #define WC_BSWAP BSWAP_UTF16
  75 #define WC_UTF16
  76 #endif
  77
  78 // ----------------------------------------------------------------------------
  79 // globals
  80 // ----------------------------------------------------------------------------
  81
  82 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
  83
  84 // ============================================================================
  85 // implementation
  86 // ============================================================================
  87
  88 #if wxUSE_WCHAR_T
  89
  90 #ifdef WC_UTF16
  91
  92 static size_t encode_utf16(wxUint32 input,wxUint16*output)
  93 {
  94   if (input<=0xffff) {
  95     if (output) *output++ = input;
  96     return 1;
  97   } else
  98   if (input>=0x110000) {
  99     return (size_t)-1;
 100   } else {
 101     if (output) {
 102       *output++ = (input >> 10)+0xd7c0;
 103       *output++ = (input&0x3ff)+0xdc00;
 104     }
 105     return 2;
 106   }
 107 }
 108
 109 static size_t decode_utf16(wxUint16*input,wxUint32&output)
 110 {
 111   if ((*input<0xd800) || (*input>0xdfff)) {
 112     output = *input;
 113     return 1;
 114   } else
 115   if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
 116     output = *input;
 117     return (size_t)-1;
 118   } else {
 119     output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 120     return 2;
 121   }
 122 }
 123
 124 #endif // WC_UTF16
 125
 126 // ----------------------------------------------------------------------------
 127 // wxMBConv
 128 // ----------------------------------------------------------------------------
 129
 130 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
 131
 132 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 133 {
 134     return wxMB2WC(buf, psz, n);
 135 }
 136
 137 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 138 {
 139     return wxWC2MB(buf, psz, n);
 140 }
 141
 142 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 143 {
 144     if (psz)
 145     {
 146         size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
 147         if (nLen == (size_t)-1)
 148             return wxWCharBuffer((wchar_t *) NULL);
 149         wxWCharBuffer buf(nLen);
 150         MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
 151         return buf;
 152     }
 153     else
 154         return wxWCharBuffer((wchar_t *) NULL);
 155 }
 156
 157 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
 158 {
 159     if (psz)
 160     {
 161         size_t nLen = WC2MB((char *) NULL, psz, 0);
 162         if (nLen == (size_t)-1)
 163             return wxCharBuffer((char *) NULL);
 164         wxCharBuffer buf(nLen);
 165         WC2MB((char *)(const char *) buf, psz, nLen);
 166         return buf;
 167     }
 168     else
 169         return wxCharBuffer((char *) NULL);
 170 }
 171
 172 // ----------------------------------------------------------------------------
 173 // standard file conversion
 174 // ----------------------------------------------------------------------------
 175
 176 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
 177
 178 // just use the libc conversion for now
 179 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 180 {
 181     return wxMB2WC(buf, psz, n);
 182 }
 183
 184 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 185 {
 186     return wxWC2MB(buf, psz, n);
 187 }
 188
 189 // ----------------------------------------------------------------------------
 190 // standard gdk conversion
 191 // ----------------------------------------------------------------------------
 192
 193 #ifdef __WXGTK12__
 194
 195 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
 196
 197 #include <gdk/gdk.h>
 198
 199 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 200 {
 201   if (buf) {
 202     return gdk_mbstowcs((GdkWChar *)buf, psz, n);
 203   } else {
 204     GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
 205     size_t len = gdk_mbstowcs(nbuf, psz, n);
 206     delete [] nbuf;
 207     return len;
 208   }
 209 }
 210
 211 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 212 {
 213   char *mbstr = gdk_wcstombs((GdkWChar *)psz);
 214   size_t len = mbstr ? strlen(mbstr) : 0;
 215   if (buf) {
 216     if (len > n) len = n;
 217     memcpy(buf, psz, len);
 218     if (len < n) buf[len] = 0;
 219   }
 220   return len;
 221 }
 222
 223 #endif // GTK > 1.0
 224
 225 // ----------------------------------------------------------------------------
 226 // UTF-7
 227 // ----------------------------------------------------------------------------
 228
 229 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
 230
 231 #if 0
 232 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 233                         "abcdefghijklmnopqrstuvwxyz"
 234                         "0123456789'(),-./:?";
 235 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 236 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 237                         "abcdefghijklmnopqrstuvwxyz"
 238                         "0123456789+/";
 239 #endif
 240
 241 // TODO: write actual implementations of UTF-7 here
 242 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 243                            const char * WXUNUSED(psz),
 244                            size_t WXUNUSED(n)) const
 245 {
 246   return 0;
 247 }
 248
 249 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 250                            const wchar_t * WXUNUSED(psz),
 251                            size_t WXUNUSED(n)) const
 252 {
 253   return 0;
 254 }
 255
 256 // ----------------------------------------------------------------------------
 257 // UTF-8
 258 // ----------------------------------------------------------------------------
 259
 260 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
 261
 262 static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
 263
 264 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 265 {
 266   size_t len = 0;
 267
 268   while (*psz && ((!buf) || (len<n))) {
 269     unsigned char cc=*psz++, fc=cc;
 270     unsigned cnt;
 271     for (cnt=0; fc&0x80; cnt++) fc<<=1;
 272     if (!cnt) {
 273       // plain ASCII char
 274       if (buf) *buf++=cc;
 275       len++;
 276     } else {
 277       cnt--;
 278       if (!cnt) {
 279         // invalid UTF-8 sequence
 280         return (size_t)-1;
 281       } else {
 282         unsigned ocnt=cnt-1;
 283         wxUint32 res=cc&(0x3f>>cnt);
 284         while (cnt--) {
 285           cc = *psz++;
 286           if ((cc&0xC0)!=0x80) {
 287             // invalid UTF-8 sequence
 288             return (size_t)-1;
 289           }
 290           res=(res<<6)|(cc&0x3f);
 291         }
 292         if (res<=utf8_max[ocnt]) {
 293           // illegal UTF-8 encoding
 294           return (size_t)-1;
 295         }
 296 #ifdef WC_UTF16
 297         size_t pa = encode_utf16(res, buf);
 298         if (pa == (size_t)-1)
 299           return (size_t)-1;
 300         if (buf) buf+=pa;
 301         len+=pa;
 302 #else
 303         if (buf) *buf++=res;
 304         len++;
 305 #endif
 306       }
 307     }
 308   }
 309   if (buf && (len<n)) *buf = 0;
 310   return len;
 311 }
 312
 313 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 314 {
 315   size_t len = 0;
 316
 317   while (*psz && ((!buf) || (len<n))) {
 318     wxUint32 cc;
 319 #ifdef WC_UTF16
 320     size_t pa = decode_utf16(psz,cc);
 321     psz += (pa == (size_t)-1) ? 1 : pa;
 322 #else
 323     cc=(*psz++)&0x7fffffff;
 324 #endif
 325     unsigned cnt;
 326     for (cnt=0; cc>utf8_max[cnt]; cnt++);
 327     if (!cnt) {
 328       // plain ASCII char
 329       if (buf) *buf++=cc;
 330       len++;
 331     } else {
 332       len+=cnt+1;
 333       if (buf) {
 334         *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
 335         while (cnt--)
 336           *buf++=0x80|((cc>>(cnt*6))&0x3f);
 337       }
 338     }
 339   }
 340   if (buf && (len<n)) *buf = 0;
 341   return len;
 342 }
 343
 344 // ----------------------------------------------------------------------------
 345 // specified character set
 346 // ----------------------------------------------------------------------------
 347
 348 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
 349
 350 #include "wx/encconv.h"
 351 #include "wx/fontmap.h"
 352
 353 // TODO: add some tables here
 354 // - perhaps common encodings to common codepages (for Win32)
 355 // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
 356 // - move wxEncodingConverter meat in here
 357
 358 #ifdef __WIN32__
 359 #include "wx/msw/registry.h"
 360 // this should work if M$ Internet Exploiter is installed
 361 static long CharsetToCodepage(const wxChar *name)
 362 {
 363     if (!name)
 364         return GetACP();
 365
 366     long CP=-1;
 367
 368     wxString cn(name);
 369     do {
 370         wxString path( wxT("MIME\\Database\\Charset\\") );
 371         path += cn;
 372         wxRegKey key( wxRegKey::HKCR, path );
 373
 374         /* two cases: either there's an AliasForCharset string,
 375          * or there are Codepage and InternetEncoding dwords.
 376          * The InternetEncoding gives us the actual encoding,
 377          * the Codepage just says which Windows character set to
 378          * use when displaying the data.
 379          */
 380         if (key.QueryValue( wxT("InternetEncoding"), &CP )) break;
 381
 382         // no encoding, see if it's an alias
 383         if (!key.QueryValue( wxT("AliasForCharset"), cn )) break;
 384     } while (1);
 385
 386     return CP;
 387 }
 388 #endif
 389
 390 class wxCharacterSet
 391 {
 392 public:
 393     wxCharacterSet(const wxChar*name)
 394         : cname(name) {}
 395     virtual ~wxCharacterSet()
 396         {}
 397     virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 398         { return (size_t)-1; }
 399     virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 400         { return (size_t)-1; }
 401     virtual bool usable()
 402         { return FALSE; }
 403 public:
 404     const wxChar*cname;
 405 };
 406
 407 class ID_CharSet : public wxCharacterSet
 408 {
 409 public:
 410     ID_CharSet(const wxChar*name,wxMBConv*cnv)
 411         : wxCharacterSet(name), work(cnv) {}
 412
 413     size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 414         { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
 415
 416     size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 417         { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
 418
 419     bool usable()
 420         { return work!=NULL; }
 421 public:
 422     wxMBConv*work;
 423 };
 424
 425 #ifdef HAVE_ICONV_H
 426 class IC_CharSet : public wxCharacterSet
 427 {
 428 public:
 429     IC_CharSet(const wxChar*name)
 430         : wxCharacterSet(name), m2w((iconv_t)-1), w2m((iconv_t)-1) {}
 431     ~IC_CharSet()
 432     {
 433         if (m2w!=(iconv_t)-1) iconv_close(m2w);
 434         if (w2m!=(iconv_t)-1) iconv_close(w2m);
 435     }
 436
 437     void LoadM2W()
 438     {
 439         if (m2w==(iconv_t)-1)
 440             m2w=iconv_open(WC_NAME,wxConvLibc.cWX2MB(cname));
 441     }
 442
 443     void LoadW2M()
 444     {
 445         if (w2m==(iconv_t)-1)
 446             w2m=iconv_open(wxConvLibc.cWX2MB(cname),WC_NAME);
 447     }
 448
 449     size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 450     {
 451         LoadM2W();
 452         size_t inbuf = strlen(psz);
 453         size_t outbuf = n*SIZEOF_WCHAR_T;
 454         size_t res, cres;
 455         fprintf(stderr,"IC Convert to WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
 456         if (buf)
 457         {
 458             // have destination buffer, convert there
 459 #ifdef WX_ICONV_TAKES_CHAR
 460             cres = iconv( m2w, (char**)&psz, &inbuf, (char**)&buf, &outbuf );
 461 #else
 462             cres = iconv( m2w, &psz, &inbuf, (char**)&buf, &outbuf );
 463 #endif
 464             res = n-(outbuf/SIZEOF_WCHAR_T);
 465             // convert to native endianness
 466             WC_BSWAP(buf, res)
 467         }
 468         else
 469         {
 470             // no destination buffer... convert using temp buffer
 471             // to calculate destination buffer requirement
 472             wchar_t tbuf[8];
 473             res = 0;
 474             do {
 475                 buf = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
 476 #ifdef WX_ICONV_TAKES_CHAR
 477                 cres = iconv( m2w, (char**)&psz, &inbuf, (char**)&buf, &outbuf );
 478 #else
 479                 cres = iconv( m2w, &psz, &inbuf, (char**)&buf, &outbuf );
 480 #endif
 481                 res += 8-(outbuf/SIZEOF_WCHAR_T);
 482             } while ((cres==(size_t)-1) && (errno==E2BIG));
 483         }
 484
 485         if (cres==(size_t)-1)
 486             return (size_t)-1;
 487
 488         return res;
 489     }
 490
 491     size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 492     {
 493         LoadW2M();
 494 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 495         size_t inbuf = std::wcslen(psz);
 496 #else
 497         size_t inbuf = ::wcslen(psz);
 498 #endif
 499         size_t outbuf = n;
 500         size_t res, cres;
 501         fprintf(stderr,"IC Convert from WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
 502 #ifdef WC_NEED_BSWAP
 503         // need to copy to temp buffer to switch endianness
 504         // this absolutely doesn't rock!
 505         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
 506         //  could be in read-only memory, or be accessed in some other thread)
 507         wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
 508         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
 509         WC_BSWAP(tmpbuf, inbuf)
 510         psz=tmpbuf;
 511 #endif
 512         if (buf)
 513         {
 514             // have destination buffer, convert there
 515 #ifdef WX_ICONV_TAKES_CHAR
 516             cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 517 #else
 518             cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 519 #endif
 520             res = n-outbuf;
 521         }
 522         else
 523         {
 524             // no destination buffer... convert using temp buffer
 525             // to calculate destination buffer requirement
 526             char tbuf[16];
 527             res = 0;
 528             do {
 529                 buf = tbuf; outbuf = 16;
 530 #ifdef WX_ICONV_TAKES_CHAR
 531                 cres = iconv( w2m, (char**)&psz, &inbuf, &buf, &outbuf );
 532 #else
 533                 cres = iconv( w2m, (const char**)&psz, &inbuf, &buf, &outbuf );
 534 #endif
 535                 res += 16 - outbuf;
 536             } while ((cres==(size_t)-1) && (errno==E2BIG));
 537         }
 538 #ifdef WC_NEED_BSWAP
 539         free(tmpbuf);
 540 #endif
 541         if (cres==(size_t)-1)
 542             return (size_t)-1;
 543
 544         return res;
 545     }
 546
 547     bool usable()
 548         { return TRUE; }
 549
 550 public:
 551     iconv_t m2w, w2m;
 552 };
 553 #endif
 554
 555 #ifdef __WIN32__
 556 class CP_CharSet : public wxCharacterSet
 557 {
 558 public:
 559     CP_CharSet(const wxChar*name)
 560         : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
 561
 562     size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 563     {
 564         size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
 565         return len ? len : (size_t)-1;
 566     }
 567
 568     size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 569     {
 570         size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
 571         return len ? len : (size_t)-1;
 572     }
 573
 574     bool usable()
 575         { return CodePage!=-1; }
 576
 577 public:
 578     long CodePage;
 579 };
 580 #endif
 581
 582 class EC_CharSet : public wxCharacterSet
 583 {
 584 public:
 585     // temporarily just use wxEncodingConverter stuff,
 586     // so that it works while a better implementation is built
 587     EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
 588     {
 589         if (name)
 590             enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
 591         m2w.Init(enc, wxFONTENCODING_UNICODE);
 592         w2m.Init(wxFONTENCODING_UNICODE, enc);
 593     }
 594
 595     size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 596     {
 597         size_t inbuf = strlen(psz);
 598         fprintf(stderr,"EC Convert to WC using %d\n",enc);
 599         if (buf) m2w.Convert(psz,buf);
 600         return inbuf;
 601     }
 602
 603     size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 604     {
 605 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 606         size_t inbuf = std::wcslen(psz);
 607 #else
 608         size_t inbuf = ::wcslen(psz);
 609 #endif
 610         fprintf(stderr,"EC Convert from WC using %d\n",enc);
 611         if (buf)
 612             w2m.Convert(psz,buf);
 613
 614         return inbuf;
 615     }
 616
 617     bool usable()
 618         { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
 619
 620 public:
 621     wxFontEncoding enc;
 622     wxEncodingConverter m2w, w2m;
 623 };
 624
 625 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
 626 {
 627     wxCharacterSet *cset = NULL;
 628     if (name)
 629     {
 630         if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8")))
 631         {
 632             cset = new ID_CharSet(name, &wxConvUTF8);
 633         }
 634         else
 635         {
 636 #ifdef HAVE_ICONV_H
 637             cset = new IC_CharSet(name); // may not take NULL
 638 #endif
 639         }
 640     }
 641
 642     if (cset && cset->usable()) return cset;
 643     if (cset) delete cset;
 644 #ifdef __WIN32__
 645     cset = new CP_CharSet(name); // may take NULL
 646     if (cset->usable()) return cset;
 647 #endif
 648     if (cset) delete cset;
 649     cset = new EC_CharSet(name);
 650     if (cset->usable()) return cset;
 651     delete cset;
 652     return NULL;
 653 }
 654
 655 wxCSConv::wxCSConv(const wxChar *charset)
 656 {
 657     m_name = (wxChar *) NULL;
 658     m_cset = (wxCharacterSet *) NULL;
 659     m_deferred = TRUE;
 660     SetName(charset);
 661 }
 662
 663 wxCSConv::~wxCSConv()
 664 {
 665     if (m_name) free(m_name);
 666     if (m_cset) delete m_cset;
 667 }
 668
 669 void wxCSConv::SetName(const wxChar *charset)
 670 {
 671     if (charset)
 672     {
 673         m_name = wxStrdup(charset);
 674         m_deferred = TRUE;
 675     }
 676 }
 677
 678 void wxCSConv::LoadNow()
 679 {
 680 //  wxPrintf(wxT("Conversion request\n"));
 681     if (m_deferred)
 682     {
 683         if (!m_name)
 684         {
 685 #ifdef __UNIX__
 686 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
 687             // GNU libc provides current character set this way
 688             char*alang = nl_langinfo(CODESET);
 689             if (alang)
 690             {
 691                 SetName(wxConvLibc.cMB2WX(alang));
 692             }
 693             else
 694 #endif
 695             {
 696                 // if we can't get at the character set directly,
 697                 // try to see if it's in the environment variables
 698                 // (in most cases this won't work, but I was out of ideas)
 699                 wxChar *lang = wxGetenv(wxT("LC_ALL"));
 700                 if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
 701                 if (!lang) lang = wxGetenv(wxT("LANG"));
 702                 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
 703                 if (dot) SetName(dot+1);
 704             }
 705 #endif
 706         }
 707         m_cset = wxGetCharacterSet(m_name);
 708         m_deferred = FALSE;
 709     }
 710 }
 711
 712 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 713 {
 714     ((wxCSConv *)this)->LoadNow(); // discard constness
 715
 716     if (m_cset)
 717         return m_cset->MB2WC(buf, psz, n);
 718
 719     // latin-1 (direct)
 720     size_t len=strlen(psz);
 721
 722     if (buf)
 723     {
 724         for (size_t c=0; c<=len; c++)
 725             buf[c] = (unsigned char)(psz[c]);
 726     }
 727
 728     return len;
 729 }
 730
 731 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 732 {
 733     ((wxCSConv *)this)->LoadNow(); // discard constness
 734
 735     if (m_cset)
 736         return m_cset->WC2MB(buf, psz, n);
 737
 738     // latin-1 (direct)
 739 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 740     size_t len=std::wcslen(psz);
 741 #else
 742     size_t len=::wcslen(psz);
 743 #endif
 744     if (buf)
 745     {
 746         for (size_t c=0; c<=len; c++)
 747             buf[c] = (psz[c]>0xff) ? '?' : psz[c];
 748     }
 749
 750     return len;
 751 }
 752
 753 #ifdef HAVE_ICONV_H
 754 class IC_CharSetConverter
 755 {
 756 public:
 757     IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to)
 758         { cnv = iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname)); }
 759
 760     ~IC_CharSetConverter()
 761         { if (cnv!=(iconv_t)-1) iconv_close(cnv); }
 762
 763     size_t Convert(char*buf, const char*psz, size_t n)
 764     {
 765         size_t inbuf = strlen(psz);
 766         size_t outbuf = n;
 767 #ifdef WX_ICONV_TAKES_CHAR
 768         size_t res = iconv( cnv, (char**)&psz, &inbuf, &buf, &outbuf );
 769 #else
 770         size_t res = iconv( cnv, &psz, &inbuf, &buf, &outbuf );
 771 #endif
 772         if (res==(size_t)-1) return (size_t)-1;
 773         return n-outbuf;
 774     }
 775
 776 public:
 777     iconv_t cnv;
 778 };
 779 #endif
 780
 781 class EC_CharSetConverter
 782 {
 783 public:
 784     EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to)
 785         { cnv.Init(from->enc,to->enc); }
 786
 787     size_t Convert(char*buf, const char*psz, size_t n)
 788     {
 789         size_t inbuf = strlen(psz);
 790         if (buf) cnv.Convert(psz,buf);
 791         return inbuf;
 792     }
 793
 794 public:
 795     wxEncodingConverter cnv;
 796 };
 797
 798 #else // !wxUSE_WCHAR_T
 799
 800 // ----------------------------------------------------------------------------
 801 // stand-ins in absence of wchar_t
 802 // ----------------------------------------------------------------------------
 803
 804 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
 805
 806 #endif // wxUSE_WCHAR_T
 807
 808