src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
   9 // Licence:     wxWindows license
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #ifdef __GNUG__
  21   #pragma implementation "strconv.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28   #pragma hdrstop
  29 #endif
  30
  31 #ifdef __WXMSW__
  32   #include "wx/msw/private.h"
  33 #endif
  34
  35 #include <errno.h>
  36 #include <ctype.h>
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #ifdef __SALFORDC__
  41   #include <clib.h>
  42 #endif
  43
  44 #ifdef HAVE_ICONV_H
  45   #include <iconv.h>
  46 #endif
  47 #ifdef HAVE_LANGINFO_H
  48   #include <langinfo.h>
  49 #endif
  50
  51 #ifdef __WXMSW__
  52   #include <windows.h>
  53 #endif
  54
  55 #include "wx/debug.h"
  56 #include "wx/strconv.h"
  57
  58 #ifdef WORDS_BIGENDIAN
  59 #define BSWAP_UCS4(str, len)
  60 #define BSWAP_UCS2(str, len)
  61 #else
  62 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  63 #define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  64 #define WC_NEED_BSWAP
  65 #endif
  66 #define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len)
  67 #define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len)
  68
  69 #if SIZEOF_WCHAR_T == 4
  70 #define WC_NAME "UCS4"
  71 #define WC_BSWAP BSWAP_UCS4
  72 #elif SIZEOF_WCHAR_T == 2
  73 #define WC_NAME "UTF16"
  74 #define WC_BSWAP BSWAP_UTF16
  75 #define WC_UTF16
  76 #endif
  77
  78 // ----------------------------------------------------------------------------
  79 // globals
  80 // ----------------------------------------------------------------------------
  81
  82 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
  83
  84 // ============================================================================
  85 // implementation
  86 // ============================================================================
  87
  88 #if wxUSE_WCHAR_T
  89
  90 #ifdef WC_UTF16
  91
  92 static size_t encode_utf16(wxUint32 input,wxUint16*output)
  93 {
  94   if (input<=0xffff) {
  95     if (output) *output++ = input;
  96     return 1;
  97   } else
  98   if (input>=0x110000) {
  99     return (size_t)-1;
 100   } else {
 101     if (output) {
 102       *output++ = (input >> 10)+0xd7c0;
 103       *output++ = (input&0x3ff)+0xdc00;
 104     }
 105     return 2;
 106   }
 107 }
 108
 109 static size_t decode_utf16(wxUint16*input,wxUint32&output)
 110 {
 111   if ((*input<0xd800) || (*input>0xdfff)) {
 112     output = *input;
 113     return 1;
 114   } else
 115   if ((input[1]<0xdc00) || (input[1]>=0xdfff)) {
 116     output = *input;
 117     return (size_t)-1;
 118   } else {
 119     output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 120     return 2;
 121   }
 122 }
 123
 124 #endif // WC_UTF16
 125
 126 // ----------------------------------------------------------------------------
 127 // wxMBConv
 128 // ----------------------------------------------------------------------------
 129
 130 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
 131
 132 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 133 {
 134     return wxMB2WC(buf, psz, n);
 135 }
 136
 137 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 138 {
 139     return wxWC2MB(buf, psz, n);
 140 }
 141
 142 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 143 {
 144     if (psz)
 145     {
 146         size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
 147         if (nLen == (size_t)-1)
 148             return wxWCharBuffer((wchar_t *) NULL);
 149         wxWCharBuffer buf(nLen);
 150         MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
 151         return buf;
 152     }
 153     else
 154         return wxWCharBuffer((wchar_t *) NULL);
 155 }
 156
 157 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
 158 {
 159     if (psz)
 160     {
 161         size_t nLen = WC2MB((char *) NULL, psz, 0);
 162         if (nLen == (size_t)-1)
 163             return wxCharBuffer((char *) NULL);
 164         wxCharBuffer buf(nLen);
 165         WC2MB((char *)(const char *) buf, psz, nLen);
 166         return buf;
 167     }
 168     else
 169         return wxCharBuffer((char *) NULL);
 170 }
 171
 172 // ----------------------------------------------------------------------------
 173 // standard file conversion
 174 // ----------------------------------------------------------------------------
 175
 176 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
 177
 178 // just use the libc conversion for now
 179 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 180 {
 181     return wxMB2WC(buf, psz, n);
 182 }
 183
 184 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 185 {
 186     return wxWC2MB(buf, psz, n);
 187 }
 188
 189 // ----------------------------------------------------------------------------
 190 // standard gdk conversion
 191 // ----------------------------------------------------------------------------
 192
 193 #ifdef __WXGTK12__
 194
 195 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
 196
 197 #include <gdk/gdk.h>
 198
 199 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 200 {
 201   if (buf) {
 202     return gdk_mbstowcs((GdkWChar *)buf, psz, n);
 203   } else {
 204     GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
 205     size_t len = gdk_mbstowcs(nbuf, psz, n);
 206     delete [] nbuf;
 207     return len;
 208   }
 209 }
 210
 211 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 212 {
 213   char *mbstr = gdk_wcstombs((GdkWChar *)psz);
 214   size_t len = mbstr ? strlen(mbstr) : 0;
 215   if (buf) {
 216     if (len > n) len = n;
 217     memcpy(buf, psz, len);
 218     if (len < n) buf[len] = 0;
 219   }
 220   return len;
 221 }
 222
 223 #endif // GTK > 1.0
 224
 225 // ----------------------------------------------------------------------------
 226 // UTF-7
 227 // ----------------------------------------------------------------------------
 228
 229 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
 230
 231 #if 0
 232 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 233                         "abcdefghijklmnopqrstuvwxyz"
 234                         "0123456789'(),-./:?";
 235 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 236 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 237                         "abcdefghijklmnopqrstuvwxyz"
 238                         "0123456789+/";
 239 #endif
 240
 241 // TODO: write actual implementations of UTF-7 here
 242 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 243                            const char * WXUNUSED(psz),
 244                            size_t WXUNUSED(n)) const
 245 {
 246   return 0;
 247 }
 248
 249 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 250                            const wchar_t * WXUNUSED(psz),
 251                            size_t WXUNUSED(n)) const
 252 {
 253   return 0;
 254 }
 255
 256 // ----------------------------------------------------------------------------
 257 // UTF-8
 258 // ----------------------------------------------------------------------------
 259
 260 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
 261
 262 static wxUint32 utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
 263
 264 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 265 {
 266   size_t len = 0;
 267
 268   while (*psz && ((!buf) || (len<n))) {
 269     unsigned char cc=*psz++, fc=cc;
 270     unsigned cnt;
 271     for (cnt=0; fc&0x80; cnt++) fc<<=1;
 272     if (!cnt) {
 273       // plain ASCII char
 274       if (buf) *buf++=cc;
 275       len++;
 276     } else {
 277       cnt--;
 278       if (!cnt) {
 279         // invalid UTF-8 sequence
 280         return (size_t)-1;
 281       } else {
 282         unsigned ocnt=cnt-1;
 283         wxUint32 res=cc&(0x3f>>cnt);
 284         while (cnt--) {
 285           cc = *psz++;
 286           if ((cc&0xC0)!=0x80) {
 287             // invalid UTF-8 sequence
 288             return (size_t)-1;
 289           }
 290           res=(res<<6)|(cc&0x3f);
 291         }
 292         if (res<=utf8_max[ocnt]) {
 293           // illegal UTF-8 encoding
 294           return (size_t)-1;
 295         }
 296 #ifdef WC_UTF16
 297         size_t pa = encode_utf16(res, buf);
 298         if (pa == (size_t)-1)
 299           return (size_t)-1;
 300         if (buf) buf+=pa;
 301         len+=pa;
 302 #else
 303         if (buf) *buf++=res;
 304         len++;
 305 #endif
 306       }
 307     }
 308   }
 309   if (buf && (len<n)) *buf = 0;
 310   return len;
 311 }
 312
 313 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 314 {
 315   size_t len = 0;
 316
 317   while (*psz && ((!buf) || (len<n))) {
 318     wxUint32 cc;
 319 #ifdef WC_UTF16
 320     size_t pa = decode_utf16(psz,cc);
 321     psz += (pa == (size_t)-1) ? 1 : pa;
 322 #else
 323     cc=(*psz++)&0x7fffffff;
 324 #endif
 325     unsigned cnt;
 326     for (cnt=0; cc>utf8_max[cnt]; cnt++);
 327     if (!cnt) {
 328       // plain ASCII char
 329       if (buf) *buf++=cc;
 330       len++;
 331     } else {
 332       len+=cnt+1;
 333       if (buf) {
 334         *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
 335         while (cnt--)
 336           *buf++=0x80|((cc>>(cnt*6))&0x3f);
 337       }
 338     }
 339   }
 340   if (buf && (len<n)) *buf = 0;
 341   return len;
 342 }
 343
 344 // ----------------------------------------------------------------------------
 345 // specified character set
 346 // ----------------------------------------------------------------------------
 347
 348 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
 349
 350 #include "wx/encconv.h"
 351 #include "wx/fontmap.h"
 352
 353 // TODO: add some tables here
 354 // - perhaps common encodings to common codepages (for Win32)
 355 // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8)
 356 // - move wxEncodingConverter meat in here
 357
 358 #ifdef __WIN32__
 359 #include "wx/msw/registry.h"
 360 // this should work if M$ Internet Exploiter is installed
 361 static long CharsetToCodepage(const wxChar *name)
 362 {
 363   if (!name) return GetACP();
 364   long CP=-1;
 365   wxString cn(name);
 366   do {
 367     wxString path(wxT("MIME\\Database\\Charset\\"));
 368     path += cn;
 369     wxRegKey key(wxRegKey::HKCR,path);
 370
 371     /* two cases: either there's an AliasForCharset string,
 372      * or there are Codepage and InternetEncoding dwords.
 373      * The InternetEncoding gives us the actual encoding,
 374      * the Codepage just says which Windows character set to
 375      * use when displaying the data.
 376      */
 377     if (key.QueryValue(wxT("InternetEncoding"),&CP)) break;
 378     // no encoding, see if it's an alias
 379     if (!key.QueryValue(wxT("AliasForCharset"),cn)) break;
 380   } while (1);
 381   return CP;
 382 }
 383 #endif
 384
 385 class wxCharacterSet
 386 {
 387 public:
 388   const wxChar*cname;
 389   wxCharacterSet(const wxChar*name) : cname(name) {}
 390   virtual ~wxCharacterSet() {}
 391   virtual size_t MB2WC(wchar_t*buf, const char*psz, size_t n) { return (size_t)-1; }
 392   virtual size_t WC2MB(char*buf, const wchar_t*psz, size_t n) { return (size_t)-1; }
 393   virtual bool usable() { return FALSE; }
 394 };
 395
 396 class ID_CharSet : public wxCharacterSet
 397 {
 398 public:
 399   wxMBConv*work;
 400   ID_CharSet(const wxChar*name,wxMBConv*cnv) : wxCharacterSet(name), work(cnv) {}
 401   size_t MB2WC(wchar_t*buf, const char*psz, size_t n)
 402   { return work ? work->MB2WC(buf,psz,n) : (size_t)-1; }
 403   size_t WC2MB(char*buf, const wchar_t*psz, size_t n)
 404   { return work ? work->WC2MB(buf,psz,n) : (size_t)-1; }
 405   bool usable() { return work!=NULL; }
 406 };
 407
 408 #ifdef HAVE_ICONV_H
 409 class IC_CharSet : public wxCharacterSet
 410 {
 411 public:
 412   iconv_t m2w, w2m;
 413   IC_CharSet(const wxChar*name) : wxCharacterSet(name), m2w((iconv_t)-1), w2m((iconv_t)-1) {}
 414   ~IC_CharSet() {
 415     if (m2w!=(iconv_t)-1) iconv_close(m2w);
 416     if (w2m!=(iconv_t)-1) iconv_close(w2m);
 417   }
 418   void LoadM2W() { if (m2w==(iconv_t)-1) m2w=iconv_open(WC_NAME,wxConvLibc.cWX2MB(cname)); }
 419   void LoadW2M() { if (w2m==(iconv_t)-1) w2m=iconv_open(wxConvLibc.cWX2MB(cname),WC_NAME); }
 420   size_t MB2WC(wchar_t*buf, const char*psz, size_t n) {
 421     LoadM2W();
 422     size_t inbuf = strlen(psz);
 423     size_t outbuf = n*SIZEOF_WCHAR_T;
 424     size_t res, cres;
 425     fprintf(stderr,"IC Convert to WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
 426     if (buf) {
 427       // have destination buffer, convert there
 428       cres = iconv(m2w,&psz,&inbuf,(char**)&buf,&outbuf);
 429       res = n-(outbuf/SIZEOF_WCHAR_T);
 430       // convert to native endianness
 431       WC_BSWAP(buf, res)
 432     } else {
 433       // no destination buffer... convert using temp buffer
 434       // to calculate destination buffer requirement
 435       wchar_t tbuf[8];
 436       res = 0;
 437       do {
 438         buf = tbuf; outbuf = 8*SIZEOF_WCHAR_T;
 439         cres = iconv(m2w,&psz,&inbuf,(char**)&buf,&outbuf);
 440         res += 8-(outbuf/SIZEOF_WCHAR_T);
 441       } while ((cres==(size_t)-1) && (errno==E2BIG));
 442     }
 443     if (cres==(size_t)-1) return (size_t)-1;
 444     return res;
 445   }
 446   size_t WC2MB(char*buf, const wchar_t*psz, size_t n) {
 447     LoadW2M();
 448 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 449     size_t inbuf = std::wcslen(psz);
 450 #else
 451     size_t inbuf = ::wcslen(psz);
 452 #endif
 453     size_t outbuf = n;
 454     size_t res, cres;
 455     fprintf(stderr,"IC Convert from WC using %s\n",(const char*)wxConvLibc.cWX2MB(cname));
 456 #ifdef WC_NEED_BSWAP
 457     // need to copy to temp buffer to switch endianness
 458     // this absolutely doesn't rock!
 459     // (no, doing WC_BSWAP twice on the original buffer won't help, as it
 460     //  could be in read-only memory, or be accessed in some other thread)
 461     wchar_t*tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
 462     memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
 463     WC_BSWAP(tmpbuf, inbuf)
 464     psz=tmpbuf;
 465 #endif
 466     if (buf) {
 467       // have destination buffer, convert there
 468       cres = iconv(w2m,(const char**)&psz,&inbuf,&buf,&outbuf);
 469       res = n-outbuf;
 470     } else {
 471       // no destination buffer... convert using temp buffer
 472       // to calculate destination buffer requirement
 473       char tbuf[16];
 474       res = 0;
 475       do {
 476         buf = tbuf; outbuf = 16;
 477         cres = iconv(w2m,(const char**)&psz,&inbuf,&buf,&outbuf);
 478         res += 16 - outbuf;
 479       } while ((cres==(size_t)-1) && (errno==E2BIG));
 480     }
 481 #ifdef WC_NEED_BSWAP
 482     free(tmpbuf);
 483 #endif
 484     if (cres==(size_t)-1) return (size_t)-1;
 485     return res;
 486   }
 487   bool usable() { return TRUE; }
 488 };
 489 #endif
 490
 491 #ifdef __WIN32__
 492 class CP_CharSet : public wxCharacterSet
 493 {
 494 public:
 495   long CodePage;
 496   CP_CharSet(const wxChar*name) : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {}
 497   size_t MB2WC(wchar_t*buf, const char*psz, size_t n) {
 498     size_t len = MultiByteToWideChar(CodePage,0,psz,-1,buf,buf?n:0);
 499     return len?len:(size_t)-1;
 500   }
 501   size_t WC2MB(char*buf, const wchar_t*psz, size_t n) {
 502     size_t len = WideCharToMultiByte(CodePage,0,psz,-1,buf,buf?n:0,NULL,NULL);
 503     return len?len:(size_t)-1;
 504   }
 505   bool usable() { return CodePage!=-1; }
 506 };
 507 #endif
 508
 509 class EC_CharSet : public wxCharacterSet
 510 {
 511 public:
 512   // temporarily just use wxEncodingConverter stuff,
 513   // so that it works while a better implementation is built
 514   wxFontEncoding enc;
 515   wxEncodingConverter m2w, w2m;
 516   EC_CharSet(const wxChar*name) : wxCharacterSet(name), enc(wxFONTENCODING_SYSTEM)
 517   {
 518     if (name) enc = wxTheFontMapper->CharsetToEncoding(name, FALSE);
 519     m2w.Init(enc, wxFONTENCODING_UNICODE);
 520     w2m.Init(wxFONTENCODING_UNICODE, enc);
 521   }
 522   size_t MB2WC(wchar_t*buf, const char*psz, size_t n) {
 523     size_t inbuf = strlen(psz);
 524     fprintf(stderr,"EC Convert to WC using %d\n",enc);
 525     if (buf) m2w.Convert(psz,buf);
 526     return inbuf;
 527   }
 528   size_t WC2MB(char*buf, const wchar_t*psz, size_t n) {
 529 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 530     size_t inbuf = std::wcslen(psz);
 531 #else
 532     size_t inbuf = ::wcslen(psz);
 533 #endif
 534     fprintf(stderr,"EC Convert from WC using %d\n",enc);
 535     if (buf) w2m.Convert(psz,buf);
 536     return inbuf;
 537   }
 538   bool usable() { return (enc!=wxFONTENCODING_SYSTEM) && (enc!=wxFONTENCODING_DEFAULT); }
 539 };
 540
 541 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
 542 {
 543   wxCharacterSet *cset = NULL;
 544   if (name) {
 545     if (!wxStricmp(name, wxT("UTF8")) || !wxStricmp(name, wxT("UTF-8"))) {
 546       cset = new ID_CharSet(name, &wxConvUTF8);
 547     } else {
 548 #ifdef HAVE_ICONV_H
 549       cset = new IC_CharSet(name); // may not take NULL
 550 #endif
 551     }
 552   }
 553   if (cset && cset->usable()) return cset;
 554   if (cset) delete cset;
 555 #ifdef __WIN32__
 556   cset = new CP_CharSet(name); // may take NULL
 557   if (cset->usable()) return cset;
 558 #endif
 559   if (cset) delete cset;
 560   cset = new EC_CharSet(name);
 561   if (cset->usable()) return cset;
 562   delete cset;
 563   return NULL;
 564 }
 565
 566 wxCSConv::wxCSConv(const wxChar *charset)
 567 {
 568   m_name = (wxChar *) NULL;
 569   m_cset = (wxCharacterSet *) NULL;
 570   m_deferred = TRUE;
 571   SetName(charset);
 572 }
 573
 574 wxCSConv::~wxCSConv()
 575 {
 576   if (m_name) free(m_name);
 577   if (m_cset) delete m_cset;
 578 }
 579
 580 void wxCSConv::SetName(const wxChar *charset)
 581 {
 582   if (charset) {
 583     m_name = wxStrdup(charset);
 584     m_deferred = TRUE;
 585   }
 586 }
 587
 588 void wxCSConv::LoadNow()
 589 {
 590 //  wxPrintf(wxT("Conversion request\n"));
 591   if (m_deferred) {
 592     if (!m_name) {
 593 #ifdef __UNIX__
 594 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
 595       // GNU libc provides current character set this way
 596       char*alang = nl_langinfo(CODESET);
 597       if (alang) SetName(wxConvLibc.cMB2WX(alang));
 598       else
 599 #endif
 600       // if we can't get at the character set directly,
 601       // try to see if it's in the environment variables
 602       // (in most cases this won't work, but I was out of ideas)
 603       {
 604         wxChar *lang = wxGetenv(wxT("LC_ALL"));
 605         if (!lang) lang = wxGetenv(wxT("LC_CTYPE"));
 606         if (!lang) lang = wxGetenv(wxT("LANG"));
 607         wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
 608         if (dot) SetName(dot+1);
 609       }
 610 #endif
 611     }
 612     m_cset = wxGetCharacterSet(m_name);
 613     m_deferred = FALSE;
 614   }
 615 }
 616
 617 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 618 {
 619   ((wxCSConv *)this)->LoadNow(); // discard constness
 620   if (m_cset)
 621     return m_cset->MB2WC(buf, psz, n);
 622
 623   // latin-1 (direct)
 624   size_t len=strlen(psz);
 625   if (buf) {
 626     for (size_t c=0; c<=len; c++)
 627       buf[c] = (unsigned char)(psz[c]);
 628   }
 629   return len;
 630 }
 631
 632 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 633 {
 634   ((wxCSConv *)this)->LoadNow(); // discard constness
 635   if (m_cset)
 636     return m_cset->WC2MB(buf, psz, n);
 637
 638   // latin-1 (direct)
 639 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
 640   size_t len=std::wcslen(psz);
 641 #else
 642   size_t len=::wcslen(psz);
 643 #endif
 644   if (buf) {
 645     for (size_t c=0; c<=len; c++)
 646       buf[c] = (psz[c]>0xff) ? '?' : psz[c];
 647   }
 648   return len;
 649 }
 650
 651 #ifdef HAVE_ICONV_H
 652 class IC_CharSetConverter
 653 {
 654 public:
 655   iconv_t cnv;
 656   IC_CharSetConverter(IC_CharSet*from,IC_CharSet*to) {
 657     cnv=iconv_open(wxConvLibc.cWX2MB(to->cname),wxConvLibc.cWX2MB(from->cname));
 658   }
 659   ~IC_CharSetConverter() {
 660     if (cnv!=(iconv_t)-1) iconv_close(cnv);
 661   }
 662   size_t Convert(char*buf, const char*psz, size_t n) {
 663     size_t inbuf = strlen(psz);
 664     size_t outbuf = n;
 665     size_t res = iconv(cnv,&psz,&inbuf,&buf,&outbuf);
 666     if (res==(size_t)-1) return (size_t)-1;
 667     return n-outbuf;
 668   }
 669 };
 670 #endif
 671
 672 class EC_CharSetConverter
 673 {
 674 public:
 675   wxEncodingConverter cnv;
 676   EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to) {
 677     cnv.Init(from->enc,to->enc);
 678   }
 679   size_t Convert(char*buf, const char*psz, size_t n) {
 680     size_t inbuf = strlen(psz);
 681     if (buf) cnv.Convert(psz,buf);
 682     return inbuf;
 683   }
 684 };
 685
 686 #else // !wxUSE_WCHAR_T
 687
 688 // ----------------------------------------------------------------------------
 689 // stand-ins in absence of wchar_t
 690 // ----------------------------------------------------------------------------
 691
 692 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
 693
 694 #endif // wxUSE_WCHAR_T
 695
 696