src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 #if SIZEOF_WCHAR_T == 2
  82     #define WC_UTF16
  83 #endif
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer
 191 wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
 192 {
 193     // the currently accumulated wide characters
 194     wxWCharBuffer wbuf;
 195
 196     // the current length of wbuf
 197     size_t lenBuf = 0;
 198
 199     // we need to know the representation of L'\0' for this conversion
 200     size_t nulLen;
 201     const char * const nul = GetMBNul(&nulLen);
 202     if ( nulLen == (size_t)-1 || nulLen == 0 )
 203         return wxWCharBuffer();
 204
 205     // make a copy of the input string unless it is already properly
 206     // NUL-terminated
 207     wxCharBuffer bufTmp;
 208
 209     // now we can compute the input size if we were not given it: notice that
 210     // in this case the string must be properly NUL-terminated, of course, as
 211     // otherwise we have no way of knowing how long it is
 212     if ( inLen == (size_t)-1 )
 213     {
 214         // not the most efficient algorithm but it shouldn't matter as normally
 215         // there are not many NULs in the string and so normally memcmp()
 216         // should stop on the first character
 217         const char *p = in;
 218         while ( memcmp(p, nul, nulLen) != 0 )
 219             p++;
 220
 221         inLen = p - in + nulLen;
 222     }
 223     else // we already have the size
 224     {
 225         // check if it's not already NUL-terminated too to avoid the copy
 226         if ( inLen < nulLen || memcmp(in + inLen - nulLen, nul, nulLen) != 0 )
 227         {
 228             // make a copy in order to properly NUL-terminate the string
 229             bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
 230             memcpy(bufTmp.data(), in, inLen);
 231             memcpy(bufTmp.data() + inLen, nul, nulLen);
 232         }
 233     }
 234
 235     if ( bufTmp )
 236         in = bufTmp;
 237
 238     for ( const char * const inEnd = in + inLen;; )
 239     {
 240         // try to convert the current chunk if anything left
 241         size_t lenChunk = in < inEnd ? MB2WC(NULL, in, 0) : 0;
 242         if ( lenChunk == 0 )
 243         {
 244             // nothing left in the input string, conversion succeeded
 245             if ( outLen )
 246             {
 247                 // we shouldn't include the last NUL in the result length
 248                 *outLen = lenBuf ? lenBuf - 1 : 0;
 249             }
 250
 251             return wbuf;
 252         }
 253
 254         if ( lenChunk == (size_t)-1 )
 255             break;
 256
 257         const size_t lenBufNew = lenBuf + lenChunk;
 258         if ( !wbuf.extend(lenBufNew) )
 259             break;
 260
 261         lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
 262         if ( lenChunk == (size_t)-1 )
 263             break;
 264
 265         // +! for the embedded NUL (if something follows)
 266         lenBuf = lenBufNew + 1;
 267
 268         // advance the input pointer past the end of this chunk
 269         while ( memcmp(in, nul, nulLen) != 0 )
 270             in++;
 271
 272         in += nulLen; // skipping over its terminator as well
 273     }
 274
 275     // conversion failed
 276     if ( outLen )
 277         *outLen = 0;
 278
 279     return wxWCharBuffer();
 280 }
 281
 282 const wxCharBuffer
 283 wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
 284 {
 285     // the currently accumulated multibyte characters
 286     wxCharBuffer buf;
 287
 288     // the current length of buf
 289     size_t lenBuf = 0;
 290
 291     // make a copy of the input string unless it is already properly
 292     // NUL-terminated
 293     //
 294     // if we don't know its length we have no choice but to assume that it is,
 295     // indeed, properly terminated
 296     wxWCharBuffer bufTmp;
 297     if ( inLen == (size_t)-1 )
 298     {
 299         inLen = wxWcslen(in) + 1;
 300     }
 301     else if ( inLen != 0 && in[inLen - 1] != L'\0' )
 302     {
 303         // make a copy in order to properly NUL-terminate the string
 304         bufTmp = wxWCharBuffer(inLen);
 305         memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
 306     }
 307
 308     if ( bufTmp )
 309         in = bufTmp;
 310
 311     for ( const wchar_t * const inEnd = in + inLen;; )
 312     {
 313         // try to convert the current chunk, if anything left
 314         size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
 315         if ( lenChunk == 0 )
 316         {
 317             // nothing left in the input string, conversion succeeded
 318             if ( outLen )
 319                 *outLen = lenBuf ? lenBuf - 1 : lenBuf;
 320
 321             return buf;
 322         }
 323
 324         if ( lenChunk == (size_t)-1 )
 325             break;
 326
 327         const size_t lenBufNew = lenBuf + lenChunk;
 328         if ( !buf.extend(lenBufNew) )
 329             break;
 330
 331         lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
 332         if ( lenChunk == (size_t)-1 )
 333             break;
 334
 335         // chunk successfully converted, go to the next one
 336         in += wxWcslen(in) + 1 /* skip NUL too */;
 337         lenBuf = lenBufNew + 1;
 338     }
 339
 340     // conversion failed
 341     if ( outLen )
 342         *outLen = 0;
 343
 344     return wxCharBuffer();
 345 }
 346
 347 // ----------------------------------------------------------------------------
 348 // wxMBConvLibc
 349 // ----------------------------------------------------------------------------
 350
 351 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 352 {
 353     return wxMB2WC(buf, psz, n);
 354 }
 355
 356 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 357 {
 358     return wxWC2MB(buf, psz, n);
 359 }
 360
 361 // ----------------------------------------------------------------------------
 362 // wxConvBrokenFileNames
 363 // ----------------------------------------------------------------------------
 364
 365 #ifdef __UNIX__
 366
 367 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 368 {
 369     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 370                   || wxStricmp(charset, _T("UTF8")) == 0  )
 371         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 372     else
 373         m_conv = new wxCSConv(charset);
 374 }
 375
 376 #endif // __UNIX__
 377
 378 // ----------------------------------------------------------------------------
 379 // UTF-7
 380 // ----------------------------------------------------------------------------
 381
 382 // Implementation (C) 2004 Fredrik Roubert
 383
 384 //
 385 // BASE64 decoding table
 386 //
 387 static const unsigned char utf7unb64[] =
 388 {
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 395     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 396     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 398     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 399     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 400     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 402     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 403     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 404     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 409     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 410     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 412     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 413     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 414     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 415     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 416     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 417     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 418     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 421 };
 422
 423 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 424 {
 425     size_t len = 0;
 426
 427     while ( *psz && (!buf || (len < n)) )
 428     {
 429         unsigned char cc = *psz++;
 430         if (cc != '+')
 431         {
 432             // plain ASCII char
 433             if (buf)
 434                 *buf++ = cc;
 435             len++;
 436         }
 437         else if (*psz == '-')
 438         {
 439             // encoded plus sign
 440             if (buf)
 441                 *buf++ = cc;
 442             len++;
 443             psz++;
 444         }
 445         else // start of BASE64 encoded string
 446         {
 447             bool lsb, ok;
 448             unsigned int d, l;
 449             for ( ok = lsb = false, d = 0, l = 0;
 450                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 451                   psz++ )
 452             {
 453                 d <<= 6;
 454                 d += cc;
 455                 for (l += 6; l >= 8; lsb = !lsb)
 456                 {
 457                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 458                     if (lsb)
 459                     {
 460                         if (buf)
 461                             *buf++ |= c;
 462                         len ++;
 463                     }
 464                     else
 465                     {
 466                         if (buf)
 467                             *buf = (wchar_t)(c << 8);
 468                     }
 469
 470                     ok = true;
 471                 }
 472             }
 473
 474             if ( !ok )
 475             {
 476                 // in valid UTF7 we should have valid characters after '+'
 477                 return (size_t)-1;
 478             }
 479
 480             if (*psz == '-')
 481                 psz++;
 482         }
 483     }
 484
 485     if ( buf && (len < n) )
 486         *buf = '\0';
 487
 488     return len;
 489 }
 490
 491 //
 492 // BASE64 encoding table
 493 //
 494 static const unsigned char utf7enb64[] =
 495 {
 496     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 497     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 498     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 499     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 500     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 501     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 502     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 503     '4', '5', '6', '7', '8', '9', '+', '/'
 504 };
 505
 506 //
 507 // UTF-7 encoding table
 508 //
 509 // 0 - Set D (directly encoded characters)
 510 // 1 - Set O (optional direct characters)
 511 // 2 - whitespace characters (optional)
 512 // 3 - special characters
 513 //
 514 static const unsigned char utf7encode[128] =
 515 {
 516     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 517     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 518     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 519     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 520     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 521     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 522     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 524 };
 525
 526 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 527 {
 528     size_t len = 0;
 529
 530     while (*psz && ((!buf) || (len < n)))
 531     {
 532         wchar_t cc = *psz++;
 533         if (cc < 0x80 && utf7encode[cc] < 1)
 534         {
 535             // plain ASCII char
 536             if (buf)
 537                 *buf++ = (char)cc;
 538             len++;
 539         }
 540 #ifndef WC_UTF16
 541         else if (((wxUint32)cc) > 0xffff)
 542         {
 543             // no surrogate pair generation (yet?)
 544             return (size_t)-1;
 545         }
 546 #endif
 547         else
 548         {
 549             if (buf)
 550                 *buf++ = '+';
 551             len++;
 552             if (cc != '+')
 553             {
 554                 // BASE64 encode string
 555                 unsigned int lsb, d, l;
 556                 for (d = 0, l = 0; /*nothing*/; psz++)
 557                 {
 558                     for (lsb = 0; lsb < 2; lsb ++)
 559                     {
 560                         d <<= 8;
 561                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 562
 563                         for (l += 8; l >= 6; )
 564                         {
 565                             l -= 6;
 566                             if (buf)
 567                                 *buf++ = utf7enb64[(d >> l) % 64];
 568                             len++;
 569                         }
 570                     }
 571                     cc = *psz;
 572                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 573                         break;
 574                 }
 575                 if (l != 0)
 576                 {
 577                     if (buf)
 578                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 579                     len++;
 580                 }
 581             }
 582             if (buf)
 583                 *buf++ = '-';
 584             len++;
 585         }
 586     }
 587     if (buf && (len < n))
 588         *buf = 0;
 589     return len;
 590 }
 591
 592 // ----------------------------------------------------------------------------
 593 // UTF-8
 594 // ----------------------------------------------------------------------------
 595
 596 static wxUint32 utf8_max[]=
 597     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 598
 599 // boundaries of the private use area we use to (temporarily) remap invalid
 600 // characters invalid in a UTF-8 encoded string
 601 const wxUint32 wxUnicodePUA = 0x100000;
 602 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 603
 604 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 605 {
 606     size_t len = 0;
 607
 608     while (*psz && ((!buf) || (len < n)))
 609     {
 610         const char *opsz = psz;
 611         bool invalid = false;
 612         unsigned char cc = *psz++, fc = cc;
 613         unsigned cnt;
 614         for (cnt = 0; fc & 0x80; cnt++)
 615             fc <<= 1;
 616         if (!cnt)
 617         {
 618             // plain ASCII char
 619             if (buf)
 620                 *buf++ = cc;
 621             len++;
 622
 623             // escape the escape character for octal escapes
 624             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 625                     && cc == '\\' && (!buf || len < n))
 626             {
 627                 if (buf)
 628                     *buf++ = cc;
 629                 len++;
 630             }
 631         }
 632         else
 633         {
 634             cnt--;
 635             if (!cnt)
 636             {
 637                 // invalid UTF-8 sequence
 638                 invalid = true;
 639             }
 640             else
 641             {
 642                 unsigned ocnt = cnt - 1;
 643                 wxUint32 res = cc & (0x3f >> cnt);
 644                 while (cnt--)
 645                 {
 646                     cc = *psz;
 647                     if ((cc & 0xC0) != 0x80)
 648                     {
 649                         // invalid UTF-8 sequence
 650                         invalid = true;
 651                         break;
 652                     }
 653                     psz++;
 654                     res = (res << 6) | (cc & 0x3f);
 655                 }
 656                 if (invalid || res <= utf8_max[ocnt])
 657                 {
 658                     // illegal UTF-8 encoding
 659                     invalid = true;
 660                 }
 661                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 662                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 663                 {
 664                     // if one of our PUA characters turns up externally
 665                     // it must also be treated as an illegal sequence
 666                     // (a bit like you have to escape an escape character)
 667                     invalid = true;
 668                 }
 669                 else
 670                 {
 671 #ifdef WC_UTF16
 672                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 673                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 674                     if (pa == (size_t)-1)
 675                     {
 676                         invalid = true;
 677                     }
 678                     else
 679                     {
 680                         if (buf)
 681                             buf += pa;
 682                         len += pa;
 683                     }
 684 #else // !WC_UTF16
 685                     if (buf)
 686                         *buf++ = (wchar_t)res;
 687                     len++;
 688 #endif // WC_UTF16/!WC_UTF16
 689                 }
 690             }
 691             if (invalid)
 692             {
 693                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 694                 {
 695                     while (opsz < psz && (!buf || len < n))
 696                     {
 697 #ifdef WC_UTF16
 698                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 699                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 700                         wxASSERT(pa != (size_t)-1);
 701                         if (buf)
 702                             buf += pa;
 703                         opsz++;
 704                         len += pa;
 705 #else
 706                         if (buf)
 707                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 708                         opsz++;
 709                         len++;
 710 #endif
 711                     }
 712                 }
 713                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 714                 {
 715                     while (opsz < psz && (!buf || len < n))
 716                     {
 717                         if ( buf && len + 3 < n )
 718                         {
 719                             unsigned char on = *opsz;
 720                             *buf++ = L'\\';
 721                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 722                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 723                             *buf++ = (wchar_t)( L'0' + on % 010 );
 724                         }
 725                         opsz++;
 726                         len += 4;
 727                     }
 728                 }
 729                 else // MAP_INVALID_UTF8_NOT
 730                 {
 731                     return (size_t)-1;
 732                 }
 733             }
 734         }
 735     }
 736     if (buf && (len < n))
 737         *buf = 0;
 738     return len;
 739 }
 740
 741 static inline bool isoctal(wchar_t wch)
 742 {
 743     return L'0' <= wch && wch <= L'7';
 744 }
 745
 746 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 747 {
 748     size_t len = 0;
 749
 750     while (*psz && ((!buf) || (len < n)))
 751     {
 752         wxUint32 cc;
 753 #ifdef WC_UTF16
 754         // cast is ok for WC_UTF16
 755         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 756         psz += (pa == (size_t)-1) ? 1 : pa;
 757 #else
 758         cc=(*psz++) & 0x7fffffff;
 759 #endif
 760
 761         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 762                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 763         {
 764             if (buf)
 765                 *buf++ = (char)(cc - wxUnicodePUA);
 766             len++;
 767         }
 768         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 769                     && cc == L'\\' && psz[0] == L'\\' )
 770         {
 771             if (buf)
 772                 *buf++ = (char)cc;
 773             psz++;
 774             len++;
 775         }
 776         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 777                     cc == L'\\' &&
 778                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 779         {
 780             if (buf)
 781             {
 782                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 783                                  (psz[1] - L'0')*010 +
 784                                  (psz[2] - L'0'));
 785             }
 786
 787             psz += 3;
 788             len++;
 789         }
 790         else
 791         {
 792             unsigned cnt;
 793             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 794             if (!cnt)
 795             {
 796                 // plain ASCII char
 797                 if (buf)
 798                     *buf++ = (char) cc;
 799                 len++;
 800             }
 801
 802             else
 803             {
 804                 len += cnt + 1;
 805                 if (buf)
 806                 {
 807                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 808                     while (cnt--)
 809                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 810                 }
 811             }
 812         }
 813     }
 814
 815     if (buf && (len<n))
 816         *buf = 0;
 817
 818     return len;
 819 }
 820
 821 // ----------------------------------------------------------------------------
 822 // UTF-16
 823 // ----------------------------------------------------------------------------
 824
 825 #ifdef WORDS_BIGENDIAN
 826     #define wxMBConvUTF16straight wxMBConvUTF16BE
 827     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 828 #else
 829     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 830     #define wxMBConvUTF16straight wxMBConvUTF16LE
 831 #endif
 832
 833
 834 #ifdef WC_UTF16
 835
 836 // copy 16bit MB to 16bit String
 837 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 838 {
 839     size_t len=0;
 840
 841     while (*(wxUint16*)psz && (!buf || len < n))
 842     {
 843         if (buf)
 844             *buf++ = *(wxUint16*)psz;
 845         len++;
 846
 847         psz += sizeof(wxUint16);
 848     }
 849     if (buf && len<n)   *buf=0;
 850
 851     return len;
 852 }
 853
 854
 855 // copy 16bit String to 16bit MB
 856 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 857 {
 858     size_t len=0;
 859
 860     while (*psz && (!buf || len < n))
 861     {
 862         if (buf)
 863         {
 864             *(wxUint16*)buf = *psz;
 865             buf += sizeof(wxUint16);
 866         }
 867         len += sizeof(wxUint16);
 868         psz++;
 869     }
 870     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 871
 872     return len;
 873 }
 874
 875
 876 // swap 16bit MB to 16bit String
 877 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 878 {
 879     size_t len = 0;
 880
 881     // UTF16 string must be terminated by 2 NULs as single NULs may occur
 882     // inside the string
 883     while ( (psz[0] || psz[1]) && (!buf || len < n) )
 884     {
 885         if ( buf )
 886         {
 887             ((char *)buf)[0] = psz[1];
 888             ((char *)buf)[1] = psz[0];
 889             buf++;
 890         }
 891         len++;
 892         psz += 2;
 893     }
 894
 895     if ( buf && len < n )
 896         *buf = L'\0';
 897
 898     return len;
 899 }
 900
 901
 902 // swap 16bit MB to 16bit String
 903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 904 {
 905     size_t len = 0;
 906
 907     while ( *psz && (!buf || len < n) )
 908     {
 909         if ( buf )
 910         {
 911             *buf++ = ((char*)psz)[1];
 912             *buf++ = ((char*)psz)[0];
 913         }
 914         len += 2;
 915         psz++;
 916     }
 917
 918     if ( buf && len < n )
 919         *buf = '\0';
 920
 921     return len;
 922 }
 923
 924
 925 #else // WC_UTF16
 926
 927
 928 // copy 16bit MB to 32bit String
 929 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 930 {
 931     size_t len=0;
 932
 933     while (*(wxUint16*)psz && (!buf || len < n))
 934     {
 935         wxUint32 cc;
 936         size_t pa=decode_utf16((wxUint16*)psz, cc);
 937         if (pa == (size_t)-1)
 938             return pa;
 939
 940         if (buf)
 941             *buf++ = (wchar_t)cc;
 942         len++;
 943         psz += pa * sizeof(wxUint16);
 944     }
 945     if (buf && len<n)   *buf=0;
 946
 947     return len;
 948 }
 949
 950
 951 // copy 32bit String to 16bit MB
 952 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 953 {
 954     size_t len=0;
 955
 956     while (*psz && (!buf || len < n))
 957     {
 958         wxUint16 cc[2];
 959         size_t pa=encode_utf16(*psz, cc);
 960
 961         if (pa == (size_t)-1)
 962             return pa;
 963
 964         if (buf)
 965         {
 966             *(wxUint16*)buf = cc[0];
 967             buf += sizeof(wxUint16);
 968             if (pa > 1)
 969             {
 970                 *(wxUint16*)buf = cc[1];
 971                 buf += sizeof(wxUint16);
 972             }
 973         }
 974
 975         len += pa*sizeof(wxUint16);
 976         psz++;
 977     }
 978     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 979
 980     return len;
 981 }
 982
 983
 984 // swap 16bit MB to 32bit String
 985 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 986 {
 987     size_t len=0;
 988
 989     while (*(wxUint16*)psz && (!buf || len < n))
 990     {
 991         wxUint32 cc;
 992         char tmp[4];
 993         tmp[0]=psz[1];  tmp[1]=psz[0];
 994         tmp[2]=psz[3];  tmp[3]=psz[2];
 995
 996         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 997         if (pa == (size_t)-1)
 998             return pa;
 999
1000         if (buf)
1001             *buf++ = (wchar_t)cc;
1002
1003         len++;
1004         psz += pa * sizeof(wxUint16);
1005     }
1006     if (buf && len<n)   *buf=0;
1007
1008     return len;
1009 }
1010
1011
1012 // swap 32bit String to 16bit MB
1013 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1014 {
1015     size_t len=0;
1016
1017     while (*psz && (!buf || len < n))
1018     {
1019         wxUint16 cc[2];
1020         size_t pa=encode_utf16(*psz, cc);
1021
1022         if (pa == (size_t)-1)
1023             return pa;
1024
1025         if (buf)
1026         {
1027             *buf++ = ((char*)cc)[1];
1028             *buf++ = ((char*)cc)[0];
1029             if (pa > 1)
1030             {
1031                 *buf++ = ((char*)cc)[3];
1032                 *buf++ = ((char*)cc)[2];
1033             }
1034         }
1035
1036         len += pa*sizeof(wxUint16);
1037         psz++;
1038     }
1039     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1040
1041     return len;
1042 }
1043
1044 #endif // WC_UTF16
1045
1046
1047 // ----------------------------------------------------------------------------
1048 // UTF-32
1049 // ----------------------------------------------------------------------------
1050
1051 #ifdef WORDS_BIGENDIAN
1052 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1053 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1054 #else
1055 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1056 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1057 #endif
1058
1059
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1062
1063
1064 #ifdef WC_UTF16
1065
1066 // copy 32bit MB to 16bit String
1067 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1068 {
1069     size_t len=0;
1070
1071     while (*(wxUint32*)psz && (!buf || len < n))
1072     {
1073         wxUint16 cc[2];
1074
1075         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1076         if (pa == (size_t)-1)
1077             return pa;
1078
1079         if (buf)
1080         {
1081             *buf++ = cc[0];
1082             if (pa > 1)
1083                 *buf++ = cc[1];
1084         }
1085         len += pa;
1086         psz += sizeof(wxUint32);
1087     }
1088     if (buf && len<n)   *buf=0;
1089
1090     return len;
1091 }
1092
1093
1094 // copy 16bit String to 32bit MB
1095 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1096 {
1097     size_t len=0;
1098
1099     while (*psz && (!buf || len < n))
1100     {
1101         wxUint32 cc;
1102
1103         // cast is ok for WC_UTF16
1104         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1105         if (pa == (size_t)-1)
1106             return pa;
1107
1108         if (buf)
1109         {
1110             *(wxUint32*)buf = cc;
1111             buf += sizeof(wxUint32);
1112         }
1113         len += sizeof(wxUint32);
1114         psz += pa;
1115     }
1116
1117     if (buf && len<=n-sizeof(wxUint32))
1118         *(wxUint32*)buf=0;
1119
1120     return len;
1121 }
1122
1123
1124
1125 // swap 32bit MB to 16bit String
1126 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1127 {
1128     size_t len=0;
1129
1130     while (*(wxUint32*)psz && (!buf || len < n))
1131     {
1132         char tmp[4];
1133         tmp[0] = psz[3];   tmp[1] = psz[2];
1134         tmp[2] = psz[1];   tmp[3] = psz[0];
1135
1136
1137         wxUint16 cc[2];
1138
1139         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1140         if (pa == (size_t)-1)
1141             return pa;
1142
1143         if (buf)
1144         {
1145             *buf++ = cc[0];
1146             if (pa > 1)
1147                 *buf++ = cc[1];
1148         }
1149         len += pa;
1150         psz += sizeof(wxUint32);
1151     }
1152
1153     if (buf && len<n)
1154         *buf=0;
1155
1156     return len;
1157 }
1158
1159
1160 // swap 16bit String to 32bit MB
1161 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1162 {
1163     size_t len=0;
1164
1165     while (*psz && (!buf || len < n))
1166     {
1167         char cc[4];
1168
1169         // cast is ok for WC_UTF16
1170         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1171         if (pa == (size_t)-1)
1172             return pa;
1173
1174         if (buf)
1175         {
1176             *buf++ = cc[3];
1177             *buf++ = cc[2];
1178             *buf++ = cc[1];
1179             *buf++ = cc[0];
1180         }
1181         len += sizeof(wxUint32);
1182         psz += pa;
1183     }
1184
1185     if (buf && len<=n-sizeof(wxUint32))
1186         *(wxUint32*)buf=0;
1187
1188     return len;
1189 }
1190
1191 #else // WC_UTF16
1192
1193
1194 // copy 32bit MB to 32bit String
1195 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1196 {
1197     size_t len=0;
1198
1199     while (*(wxUint32*)psz && (!buf || len < n))
1200     {
1201         if (buf)
1202             *buf++ = (wchar_t)(*(wxUint32*)psz);
1203         len++;
1204         psz += sizeof(wxUint32);
1205     }
1206
1207     if (buf && len<n)
1208         *buf=0;
1209
1210     return len;
1211 }
1212
1213
1214 // copy 32bit String to 32bit MB
1215 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1216 {
1217     size_t len=0;
1218
1219     while (*psz && (!buf || len < n))
1220     {
1221         if (buf)
1222         {
1223             *(wxUint32*)buf = *psz;
1224             buf += sizeof(wxUint32);
1225         }
1226
1227         len += sizeof(wxUint32);
1228         psz++;
1229     }
1230
1231     if (buf && len<=n-sizeof(wxUint32))
1232         *(wxUint32*)buf=0;
1233
1234     return len;
1235 }
1236
1237
1238 // swap 32bit MB to 32bit String
1239 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1240 {
1241     size_t len=0;
1242
1243     while (*(wxUint32*)psz && (!buf || len < n))
1244     {
1245         if (buf)
1246         {
1247             ((char *)buf)[0] = psz[3];
1248             ((char *)buf)[1] = psz[2];
1249             ((char *)buf)[2] = psz[1];
1250             ((char *)buf)[3] = psz[0];
1251             buf++;
1252         }
1253         len++;
1254         psz += sizeof(wxUint32);
1255     }
1256
1257     if (buf && len<n)
1258         *buf=0;
1259
1260     return len;
1261 }
1262
1263
1264 // swap 32bit String to 32bit MB
1265 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1266 {
1267     size_t len=0;
1268
1269     while (*psz && (!buf || len < n))
1270     {
1271         if (buf)
1272         {
1273             *buf++ = ((char *)psz)[3];
1274             *buf++ = ((char *)psz)[2];
1275             *buf++ = ((char *)psz)[1];
1276             *buf++ = ((char *)psz)[0];
1277         }
1278         len += sizeof(wxUint32);
1279         psz++;
1280     }
1281
1282     if (buf && len<=n-sizeof(wxUint32))
1283         *(wxUint32*)buf=0;
1284
1285     return len;
1286 }
1287
1288
1289 #endif // WC_UTF16
1290
1291
1292 // ============================================================================
1293 // The classes doing conversion using the iconv_xxx() functions
1294 // ============================================================================
1295
1296 #ifdef HAVE_ICONV
1297
1298 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1299 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1300 //     (unless there's yet another bug in glibc) the only case when iconv()
1301 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1302 //     left in the input buffer -- when _real_ error occurs,
1303 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1304 //     iconv() failure.
1305 //     [This bug does not appear in glibc 2.2.]
1306 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1307 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1308                                      (errno != E2BIG || bufLeft != 0))
1309 #else
1310 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1311 #endif
1312
1313 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1314
1315 #define ICONV_T_INVALID ((iconv_t)-1)
1316
1317 #if SIZEOF_WCHAR_T == 4
1318     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1319     #define WC_ENC      wxFONTENCODING_UTF32
1320 #elif SIZEOF_WCHAR_T == 2
1321     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1322     #define WC_ENC      wxFONTENCODING_UTF16
1323 #else // sizeof(wchar_t) != 2 nor 4
1324     // does this ever happen?
1325     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1326 #endif
1327
1328 // ----------------------------------------------------------------------------
1329 // wxMBConv_iconv: encapsulates an iconv character set
1330 // ----------------------------------------------------------------------------
1331
1332 class wxMBConv_iconv : public wxMBConv
1333 {
1334 public:
1335     wxMBConv_iconv(const wxChar *name);
1336     virtual ~wxMBConv_iconv();
1337
1338     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1339     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1340
1341     bool IsOk() const
1342         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1343
1344 protected:
1345     // the iconv handlers used to translate from multibyte to wide char and in
1346     // the other direction
1347     iconv_t m2w,
1348             w2m;
1349 #if wxUSE_THREADS
1350     // guards access to m2w and w2m objects
1351     wxMutex m_iconvMutex;
1352 #endif
1353
1354 private:
1355     virtual const char *GetMBNul(size_t *nulLen) const;
1356
1357     // the name (for iconv_open()) of a wide char charset -- if none is
1358     // available on this machine, it will remain NULL
1359     static wxString ms_wcCharsetName;
1360
1361     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1362     // different endian-ness than the native one
1363     static bool ms_wcNeedsSwap;
1364
1365     // NUL representation
1366     size_t m_nulLen;
1367     char m_nulBuf[8];
1368 };
1369
1370 // make the constructor available for unit testing
1371 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1372 {
1373     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1374     if ( !result->IsOk() )
1375     {
1376         delete result;
1377         return 0;
1378     }
1379     return result;
1380 }
1381
1382 wxString wxMBConv_iconv::ms_wcCharsetName;
1383 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1384
1385 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1386 {
1387     m_nulLen = (size_t)-2;
1388
1389     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1390     // names for the charsets
1391     const wxCharBuffer cname(wxString(name).ToAscii());
1392
1393     // check for charset that represents wchar_t:
1394     if ( ms_wcCharsetName.empty() )
1395     {
1396         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1397
1398 #if wxUSE_FONTMAP
1399         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1400 #else // !wxUSE_FONTMAP
1401         static const wxChar *names[] =
1402         {
1403 #if SIZEOF_WCHAR_T == 4
1404             _T("UCS-4"),
1405 #elif SIZEOF_WCHAR_T = 2
1406             _T("UCS-2"),
1407 #endif
1408             NULL
1409         };
1410 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1411
1412         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1413         {
1414             const wxString nameCS(*names);
1415
1416             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1417             wxString nameXE(nameCS);
1418             #ifdef WORDS_BIGENDIAN
1419                 nameXE += _T("BE");
1420             #else // little endian
1421                 nameXE += _T("LE");
1422             #endif
1423
1424             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1425                        nameXE.c_str());
1426
1427             m2w = iconv_open(nameXE.ToAscii(), cname);
1428             if ( m2w == ICONV_T_INVALID )
1429             {
1430                 // try charset w/o bytesex info (e.g. "UCS4")
1431                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1432                            nameCS.c_str());
1433                 m2w = iconv_open(nameCS.ToAscii(), cname);
1434
1435                 // and check for bytesex ourselves:
1436                 if ( m2w != ICONV_T_INVALID )
1437                 {
1438                     char    buf[2], *bufPtr;
1439                     wchar_t wbuf[2], *wbufPtr;
1440                     size_t  insz, outsz;
1441                     size_t  res;
1442
1443                     buf[0] = 'A';
1444                     buf[1] = 0;
1445                     wbuf[0] = 0;
1446                     insz = 2;
1447                     outsz = SIZEOF_WCHAR_T * 2;
1448                     wbufPtr = wbuf;
1449                     bufPtr = buf;
1450
1451                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1452                                 (char**)&wbufPtr, &outsz);
1453
1454                     if (ICONV_FAILED(res, insz))
1455                     {
1456                         wxLogLastError(wxT("iconv"));
1457                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1458                                    nameCS.c_str());
1459                     }
1460                     else // ok, can convert to this encoding, remember it
1461                     {
1462                         ms_wcCharsetName = nameCS;
1463                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1464                     }
1465                 }
1466             }
1467             else // use charset not requiring byte swapping
1468             {
1469                 ms_wcCharsetName = nameXE;
1470             }
1471         }
1472
1473         wxLogTrace(TRACE_STRCONV,
1474                    wxT("iconv wchar_t charset is \"%s\"%s"),
1475                    ms_wcCharsetName.empty() ? _T("<none>")
1476                                             : ms_wcCharsetName.c_str(),
1477                    ms_wcNeedsSwap ? _T(" (needs swap)")
1478                                   : _T(""));
1479     }
1480     else // we already have ms_wcCharsetName
1481     {
1482         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1483     }
1484
1485     if ( ms_wcCharsetName.empty() )
1486     {
1487         w2m = ICONV_T_INVALID;
1488     }
1489     else
1490     {
1491         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1492         if ( w2m == ICONV_T_INVALID )
1493         {
1494             wxLogTrace(TRACE_STRCONV,
1495                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1496                        ms_wcCharsetName.c_str(), cname.data());
1497         }
1498     }
1499 }
1500
1501 wxMBConv_iconv::~wxMBConv_iconv()
1502 {
1503     if ( m2w != ICONV_T_INVALID )
1504         iconv_close(m2w);
1505     if ( w2m != ICONV_T_INVALID )
1506         iconv_close(w2m);
1507 }
1508
1509 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1510 {
1511 #if wxUSE_THREADS
1512     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1513     //     Unfortunately there is a couple of global wxCSConv objects such as
1514     //     wxConvLocal that are used all over wx code, so we have to make sure
1515     //     the handle is used by at most one thread at the time. Otherwise
1516     //     only a few wx classes would be safe to use from non-main threads
1517     //     as MB<->WC conversion would fail "randomly".
1518     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1519 #endif
1520
1521     size_t inbuf = strlen(psz);
1522     size_t outbuf = n * SIZEOF_WCHAR_T;
1523     size_t res, cres;
1524     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1525     wchar_t *bufPtr = buf;
1526     const char *pszPtr = psz;
1527
1528     if (buf)
1529     {
1530         // have destination buffer, convert there
1531         cres = iconv(m2w,
1532                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1533                      (char**)&bufPtr, &outbuf);
1534         res = n - (outbuf / SIZEOF_WCHAR_T);
1535
1536         if (ms_wcNeedsSwap)
1537         {
1538             // convert to native endianness
1539             for ( unsigned i = 0; i < res; i++ )
1540                 buf[n] = WC_BSWAP(buf[i]);
1541         }
1542
1543         // NB: iconv was given only strlen(psz) characters on input, and so
1544         //     it couldn't convert the trailing zero. Let's do it ourselves
1545         //     if there's some room left for it in the output buffer.
1546         if (res < n)
1547             buf[res] = 0;
1548     }
1549     else
1550     {
1551         // no destination buffer... convert using temp buffer
1552         // to calculate destination buffer requirement
1553         wchar_t tbuf[8];
1554         res = 0;
1555         do {
1556             bufPtr = tbuf;
1557             outbuf = 8*SIZEOF_WCHAR_T;
1558
1559             cres = iconv(m2w,
1560                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1561                          (char**)&bufPtr, &outbuf );
1562
1563             res += 8-(outbuf/SIZEOF_WCHAR_T);
1564         } while ((cres==(size_t)-1) && (errno==E2BIG));
1565     }
1566
1567     if (ICONV_FAILED(cres, inbuf))
1568     {
1569         //VS: it is ok if iconv fails, hence trace only
1570         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1571         return (size_t)-1;
1572     }
1573
1574     return res;
1575 }
1576
1577 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1578 {
1579 #if wxUSE_THREADS
1580     // NB: explained in MB2WC
1581     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1582 #endif
1583
1584     size_t inlen = wxWcslen(psz);
1585     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1586     size_t outbuf = n;
1587     size_t res, cres;
1588
1589     wchar_t *tmpbuf = 0;
1590
1591     if (ms_wcNeedsSwap)
1592     {
1593         // need to copy to temp buffer to switch endianness
1594         // (doing WC_BSWAP twice on the original buffer won't help, as it
1595         //  could be in read-only memory, or be accessed in some other thread)
1596         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1597         for ( size_t i = 0; i < inlen; i++ )
1598             tmpbuf[n] = WC_BSWAP(psz[i]);
1599         tmpbuf[inlen] = L'\0';
1600         psz = tmpbuf;
1601     }
1602
1603     if (buf)
1604     {
1605         // have destination buffer, convert there
1606         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1607
1608         res = n-outbuf;
1609
1610         // NB: iconv was given only wcslen(psz) characters on input, and so
1611         //     it couldn't convert the trailing zero. Let's do it ourselves
1612         //     if there's some room left for it in the output buffer.
1613         if (res < n)
1614             buf[0] = 0;
1615     }
1616     else
1617     {
1618         // no destination buffer... convert using temp buffer
1619         // to calculate destination buffer requirement
1620         char tbuf[16];
1621         res = 0;
1622         do {
1623             buf = tbuf; outbuf = 16;
1624
1625             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1626
1627             res += 16 - outbuf;
1628         } while ((cres==(size_t)-1) && (errno==E2BIG));
1629     }
1630
1631     if (ms_wcNeedsSwap)
1632     {
1633         free(tmpbuf);
1634     }
1635
1636     if (ICONV_FAILED(cres, inbuf))
1637     {
1638         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1639         return (size_t)-1;
1640     }
1641
1642     return res;
1643 }
1644
1645 const char *wxMBConv_iconv::GetMBNul(size_t *nulLen) const
1646 {
1647     if ( m_nulLen == (size_t)-2 )
1648     {
1649         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1650
1651 #if wxUSE_THREADS
1652         // NB: explained in MB2WC
1653         wxMutexLocker lock(self->m_iconvMutex);
1654 #endif
1655
1656         size_t inLen = 1,
1657                outLen = WXSIZEOF(m_nulBuf);
1658         self->m_nulLen = iconv(w2m, ICONV_CHAR_CAST(L""), &inLen,
1659                                (char **)&self->m_nulBuf, &outLen);
1660     }
1661
1662     *nulLen = m_nulLen;
1663     return m_nulBuf;
1664 }
1665
1666 #endif // HAVE_ICONV
1667
1668
1669 // ============================================================================
1670 // Win32 conversion classes
1671 // ============================================================================
1672
1673 #ifdef wxHAVE_WIN32_MB2WC
1674
1675 // from utils.cpp
1676 #if wxUSE_FONTMAP
1677 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1678 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1679 #endif
1680
1681 class wxMBConv_win32 : public wxMBConv
1682 {
1683 public:
1684     wxMBConv_win32()
1685     {
1686         m_CodePage = CP_ACP;
1687         m_nulLen = (size_t)-2;
1688     }
1689
1690 #if wxUSE_FONTMAP
1691     wxMBConv_win32(const wxChar* name)
1692     {
1693         m_CodePage = wxCharsetToCodepage(name);
1694         m_nulLen = (size_t)-2;
1695     }
1696
1697     wxMBConv_win32(wxFontEncoding encoding)
1698     {
1699         m_CodePage = wxEncodingToCodepage(encoding);
1700         m_nulLen = (size_t)-2;
1701     }
1702 #endif // wxUSE_FONTMAP
1703
1704     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1705     {
1706         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1707         // the behaviour is not compatible with the Unix version (using iconv)
1708         // and break the library itself, e.g. wxTextInputStream::NextChar()
1709         // wouldn't work if reading an incomplete MB char didn't result in an
1710         // error
1711         //
1712         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1713         // an error (tested under Windows Server 2003) and apparently it is
1714         // done on purpose, i.e. the function accepts any input in this case
1715         // and although I'd prefer to return error on ill-formed output, our
1716         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1717         // explicitly ill-formed according to RFC 2152) neither so we don't
1718         // even have any fallback here...
1719         //
1720         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1721         // Win XP or newer and if it is specified on older versions, conversion
1722         // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1723         // fails. So we can only use the flag on newer Windows versions.
1724         // Additionally, the flag is not supported by UTF7, symbol and CJK
1725         // encodings. See here:
1726         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1727         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1728         int flags = 0;
1729         if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1730              m_CodePage < 50000 &&
1731              IsAtLeastWin2kSP4() )
1732         {
1733             flags = MB_ERR_INVALID_CHARS;
1734         }
1735         else if ( m_CodePage == CP_UTF8 )
1736         {
1737             // Avoid round-trip in the special case of UTF-8 by using our
1738             // own UTF-8 conversion code:
1739             return wxMBConvUTF8().MB2WC(buf, psz, n);
1740         }
1741
1742         const size_t len = ::MultiByteToWideChar
1743                              (
1744                                 m_CodePage,     // code page
1745                                 flags,          // flags: fall on error
1746                                 psz,            // input string
1747                                 -1,             // its length (NUL-terminated)
1748                                 buf,            // output string
1749                                 buf ? n : 0     // size of output buffer
1750                              );
1751         if ( !len )
1752         {
1753             // function totally failed
1754             return (size_t)-1;
1755         }
1756
1757         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1758         // check if we succeeded, by doing a double trip:
1759         if ( !flags && buf )
1760         {
1761             const size_t mbLen = strlen(psz);
1762             wxCharBuffer mbBuf(mbLen);
1763             if ( ::WideCharToMultiByte
1764                    (
1765                       m_CodePage,
1766                       0,
1767                       buf,
1768                       -1,
1769                       mbBuf.data(),
1770                       mbLen + 1,        // size in bytes, not length
1771                       NULL,
1772                       NULL
1773                    ) == 0 ||
1774                   strcmp(mbBuf, psz) != 0 )
1775             {
1776                 // we didn't obtain the same thing we started from, hence
1777                 // the conversion was lossy and we consider that it failed
1778                 return (size_t)-1;
1779             }
1780         }
1781
1782         // note that it returns count of written chars for buf != NULL and size
1783         // of the needed buffer for buf == NULL so in either case the length of
1784         // the string (which never includes the terminating NUL) is one less
1785         return len - 1;
1786     }
1787
1788     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1789     {
1790         /*
1791             we have a problem here: by default, WideCharToMultiByte() may
1792             replace characters unrepresentable in the target code page with bad
1793             quality approximations such as turning "1/2" symbol (U+00BD) into
1794             "1" for the code pages which don't have it and we, obviously, want
1795             to avoid this at any price
1796
1797             the trouble is that this function does it _silently_, i.e. it won't
1798             even tell us whether it did or not... Win98/2000 and higher provide
1799             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1800             we have to resort to a round trip, i.e. check that converting back
1801             results in the same string -- this is, of course, expensive but
1802             otherwise we simply can't be sure to not garble the data.
1803          */
1804
1805         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1806         // it doesn't work with CJK encodings (which we test for rather roughly
1807         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1808         // supporting it
1809         BOOL usedDef wxDUMMY_INITIALIZE(false);
1810         BOOL *pUsedDef;
1811         int flags;
1812         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1813         {
1814             // it's our lucky day
1815             flags = WC_NO_BEST_FIT_CHARS;
1816             pUsedDef = &usedDef;
1817         }
1818         else // old system or unsupported encoding
1819         {
1820             flags = 0;
1821             pUsedDef = NULL;
1822         }
1823
1824         const size_t len = ::WideCharToMultiByte
1825                              (
1826                                 m_CodePage,     // code page
1827                                 flags,          // either none or no best fit
1828                                 pwz,            // input string
1829                                 -1,             // it is (wide) NUL-terminated
1830                                 buf,            // output buffer
1831                                 buf ? n : 0,    // and its size
1832                                 NULL,           // default "replacement" char
1833                                 pUsedDef        // [out] was it used?
1834                              );
1835
1836         if ( !len )
1837         {
1838             // function totally failed
1839             return (size_t)-1;
1840         }
1841
1842         // if we were really converting, check if we succeeded
1843         if ( buf )
1844         {
1845             if ( flags )
1846             {
1847                 // check if the conversion failed, i.e. if any replacements
1848                 // were done
1849                 if ( usedDef )
1850                     return (size_t)-1;
1851             }
1852             else // we must resort to double tripping...
1853             {
1854                 wxWCharBuffer wcBuf(n);
1855                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1856                         wcscmp(wcBuf, pwz) != 0 )
1857                 {
1858                     // we didn't obtain the same thing we started from, hence
1859                     // the conversion was lossy and we consider that it failed
1860                     return (size_t)-1;
1861                 }
1862             }
1863         }
1864
1865         // see the comment above for the reason of "len - 1"
1866         return len - 1;
1867     }
1868
1869     bool IsOk() const { return m_CodePage != -1; }
1870
1871 private:
1872     static bool CanUseNoBestFit()
1873     {
1874         static int s_isWin98Or2k = -1;
1875
1876         if ( s_isWin98Or2k == -1 )
1877         {
1878             int verMaj, verMin;
1879             switch ( wxGetOsVersion(&verMaj, &verMin) )
1880             {
1881                 case wxWIN95:
1882                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1883                     break;
1884
1885                 case wxWINDOWS_NT:
1886                     s_isWin98Or2k = verMaj >= 5;
1887                     break;
1888
1889                 default:
1890                     // unknown, be conseravtive by default
1891                     s_isWin98Or2k = 0;
1892             }
1893
1894             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1895         }
1896
1897         return s_isWin98Or2k == 1;
1898     }
1899
1900     static bool IsAtLeastWin2kSP4()
1901     {
1902 #ifdef __WXWINCE__
1903         return false;
1904 #else
1905         static int s_isAtLeastWin2kSP4 = -1;
1906
1907         if ( s_isAtLeastWin2kSP4 == -1 )
1908         {
1909             OSVERSIONINFOEX ver;
1910
1911             memset(&ver, 0, sizeof(ver));
1912             ver.dwOSVersionInfoSize = sizeof(ver);
1913             GetVersionEx((OSVERSIONINFO*)&ver);
1914
1915             s_isAtLeastWin2kSP4 =
1916               ((ver.dwMajorVersion > 5) || // Vista+
1917                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1918                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1919                ver.wServicePackMajor >= 4)) // 2000 SP4+
1920               ? 1 : 0;
1921         }
1922
1923         return s_isAtLeastWin2kSP4 == 1;
1924 #endif
1925     }
1926
1927     virtual const char *GetMBNul(size_t *nulLen) const
1928     {
1929         if ( m_nulLen == (size_t)-2 )
1930         {
1931             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
1932
1933             self->m_nulLen = ::WideCharToMultiByte
1934                                (
1935                                     m_CodePage,         // code page
1936                                     0,                  // no flags
1937                                     L"",                // input string
1938                                     1,                  // translate just NUL
1939                                     self->m_nulBuf,     // output buffer
1940                                     WXSIZEOF(m_nulBuf), // and its size
1941                                     NULL,               // "replacement" char
1942                                     NULL                // [out] was it used?
1943                                );
1944
1945             if ( m_nulLen == 0 )
1946                 self->m_nulLen = (size_t)-1;
1947         }
1948
1949         *nulLen = m_nulLen;
1950         return m_nulBuf;
1951     }
1952
1953     long m_CodePage;
1954     size_t m_nulLen;
1955     char m_nulBuf[8];
1956 };
1957
1958 #endif // wxHAVE_WIN32_MB2WC
1959
1960 // ============================================================================
1961 // Cocoa conversion classes
1962 // ============================================================================
1963
1964 #if defined(__WXCOCOA__)
1965
1966 // RN:  There is no UTF-32 support in either Core Foundation or
1967 // Cocoa.  Strangely enough, internally Core Foundation uses
1968 // UTF 32 internally quite a bit - its just not public (yet).
1969
1970 #include <CoreFoundation/CFString.h>
1971 #include <CoreFoundation/CFStringEncodingExt.h>
1972
1973 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1974 {
1975     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1976     if ( encoding == wxFONTENCODING_DEFAULT )
1977     {
1978         enc = CFStringGetSystemEncoding();
1979     }
1980     else switch( encoding)
1981     {
1982         case wxFONTENCODING_ISO8859_1 :
1983             enc = kCFStringEncodingISOLatin1 ;
1984             break ;
1985         case wxFONTENCODING_ISO8859_2 :
1986             enc = kCFStringEncodingISOLatin2;
1987             break ;
1988         case wxFONTENCODING_ISO8859_3 :
1989             enc = kCFStringEncodingISOLatin3 ;
1990             break ;
1991         case wxFONTENCODING_ISO8859_4 :
1992             enc = kCFStringEncodingISOLatin4;
1993             break ;
1994         case wxFONTENCODING_ISO8859_5 :
1995             enc = kCFStringEncodingISOLatinCyrillic;
1996             break ;
1997         case wxFONTENCODING_ISO8859_6 :
1998             enc = kCFStringEncodingISOLatinArabic;
1999             break ;
2000         case wxFONTENCODING_ISO8859_7 :
2001             enc = kCFStringEncodingISOLatinGreek;
2002             break ;
2003         case wxFONTENCODING_ISO8859_8 :
2004             enc = kCFStringEncodingISOLatinHebrew;
2005             break ;
2006         case wxFONTENCODING_ISO8859_9 :
2007             enc = kCFStringEncodingISOLatin5;
2008             break ;
2009         case wxFONTENCODING_ISO8859_10 :
2010             enc = kCFStringEncodingISOLatin6;
2011             break ;
2012         case wxFONTENCODING_ISO8859_11 :
2013             enc = kCFStringEncodingISOLatinThai;
2014             break ;
2015         case wxFONTENCODING_ISO8859_13 :
2016             enc = kCFStringEncodingISOLatin7;
2017             break ;
2018         case wxFONTENCODING_ISO8859_14 :
2019             enc = kCFStringEncodingISOLatin8;
2020             break ;
2021         case wxFONTENCODING_ISO8859_15 :
2022             enc = kCFStringEncodingISOLatin9;
2023             break ;
2024
2025         case wxFONTENCODING_KOI8 :
2026             enc = kCFStringEncodingKOI8_R;
2027             break ;
2028         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2029             enc = kCFStringEncodingDOSRussian;
2030             break ;
2031
2032 //      case wxFONTENCODING_BULGARIAN :
2033 //          enc = ;
2034 //          break ;
2035
2036         case wxFONTENCODING_CP437 :
2037             enc =kCFStringEncodingDOSLatinUS ;
2038             break ;
2039         case wxFONTENCODING_CP850 :
2040             enc = kCFStringEncodingDOSLatin1;
2041             break ;
2042         case wxFONTENCODING_CP852 :
2043             enc = kCFStringEncodingDOSLatin2;
2044             break ;
2045         case wxFONTENCODING_CP855 :
2046             enc = kCFStringEncodingDOSCyrillic;
2047             break ;
2048         case wxFONTENCODING_CP866 :
2049             enc =kCFStringEncodingDOSRussian ;
2050             break ;
2051         case wxFONTENCODING_CP874 :
2052             enc = kCFStringEncodingDOSThai;
2053             break ;
2054         case wxFONTENCODING_CP932 :
2055             enc = kCFStringEncodingDOSJapanese;
2056             break ;
2057         case wxFONTENCODING_CP936 :
2058             enc =kCFStringEncodingDOSChineseSimplif ;
2059             break ;
2060         case wxFONTENCODING_CP949 :
2061             enc = kCFStringEncodingDOSKorean;
2062             break ;
2063         case wxFONTENCODING_CP950 :
2064             enc = kCFStringEncodingDOSChineseTrad;
2065             break ;
2066         case wxFONTENCODING_CP1250 :
2067             enc = kCFStringEncodingWindowsLatin2;
2068             break ;
2069         case wxFONTENCODING_CP1251 :
2070             enc =kCFStringEncodingWindowsCyrillic ;
2071             break ;
2072         case wxFONTENCODING_CP1252 :
2073             enc =kCFStringEncodingWindowsLatin1 ;
2074             break ;
2075         case wxFONTENCODING_CP1253 :
2076             enc = kCFStringEncodingWindowsGreek;
2077             break ;
2078         case wxFONTENCODING_CP1254 :
2079             enc = kCFStringEncodingWindowsLatin5;
2080             break ;
2081         case wxFONTENCODING_CP1255 :
2082             enc =kCFStringEncodingWindowsHebrew ;
2083             break ;
2084         case wxFONTENCODING_CP1256 :
2085             enc =kCFStringEncodingWindowsArabic ;
2086             break ;
2087         case wxFONTENCODING_CP1257 :
2088             enc = kCFStringEncodingWindowsBalticRim;
2089             break ;
2090 //   This only really encodes to UTF7 (if that) evidently
2091 //        case wxFONTENCODING_UTF7 :
2092 //            enc = kCFStringEncodingNonLossyASCII ;
2093 //            break ;
2094         case wxFONTENCODING_UTF8 :
2095             enc = kCFStringEncodingUTF8 ;
2096             break ;
2097         case wxFONTENCODING_EUC_JP :
2098             enc = kCFStringEncodingEUC_JP;
2099             break ;
2100         case wxFONTENCODING_UTF16 :
2101             enc = kCFStringEncodingUnicode ;
2102             break ;
2103         case wxFONTENCODING_MACROMAN :
2104             enc = kCFStringEncodingMacRoman ;
2105             break ;
2106         case wxFONTENCODING_MACJAPANESE :
2107             enc = kCFStringEncodingMacJapanese ;
2108             break ;
2109         case wxFONTENCODING_MACCHINESETRAD :
2110             enc = kCFStringEncodingMacChineseTrad ;
2111             break ;
2112         case wxFONTENCODING_MACKOREAN :
2113             enc = kCFStringEncodingMacKorean ;
2114             break ;
2115         case wxFONTENCODING_MACARABIC :
2116             enc = kCFStringEncodingMacArabic ;
2117             break ;
2118         case wxFONTENCODING_MACHEBREW :
2119             enc = kCFStringEncodingMacHebrew ;
2120             break ;
2121         case wxFONTENCODING_MACGREEK :
2122             enc = kCFStringEncodingMacGreek ;
2123             break ;
2124         case wxFONTENCODING_MACCYRILLIC :
2125             enc = kCFStringEncodingMacCyrillic ;
2126             break ;
2127         case wxFONTENCODING_MACDEVANAGARI :
2128             enc = kCFStringEncodingMacDevanagari ;
2129             break ;
2130         case wxFONTENCODING_MACGURMUKHI :
2131             enc = kCFStringEncodingMacGurmukhi ;
2132             break ;
2133         case wxFONTENCODING_MACGUJARATI :
2134             enc = kCFStringEncodingMacGujarati ;
2135             break ;
2136         case wxFONTENCODING_MACORIYA :
2137             enc = kCFStringEncodingMacOriya ;
2138             break ;
2139         case wxFONTENCODING_MACBENGALI :
2140             enc = kCFStringEncodingMacBengali ;
2141             break ;
2142         case wxFONTENCODING_MACTAMIL :
2143             enc = kCFStringEncodingMacTamil ;
2144             break ;
2145         case wxFONTENCODING_MACTELUGU :
2146             enc = kCFStringEncodingMacTelugu ;
2147             break ;
2148         case wxFONTENCODING_MACKANNADA :
2149             enc = kCFStringEncodingMacKannada ;
2150             break ;
2151         case wxFONTENCODING_MACMALAJALAM :
2152             enc = kCFStringEncodingMacMalayalam ;
2153             break ;
2154         case wxFONTENCODING_MACSINHALESE :
2155             enc = kCFStringEncodingMacSinhalese ;
2156             break ;
2157         case wxFONTENCODING_MACBURMESE :
2158             enc = kCFStringEncodingMacBurmese ;
2159             break ;
2160         case wxFONTENCODING_MACKHMER :
2161             enc = kCFStringEncodingMacKhmer ;
2162             break ;
2163         case wxFONTENCODING_MACTHAI :
2164             enc = kCFStringEncodingMacThai ;
2165             break ;
2166         case wxFONTENCODING_MACLAOTIAN :
2167             enc = kCFStringEncodingMacLaotian ;
2168             break ;
2169         case wxFONTENCODING_MACGEORGIAN :
2170             enc = kCFStringEncodingMacGeorgian ;
2171             break ;
2172         case wxFONTENCODING_MACARMENIAN :
2173             enc = kCFStringEncodingMacArmenian ;
2174             break ;
2175         case wxFONTENCODING_MACCHINESESIMP :
2176             enc = kCFStringEncodingMacChineseSimp ;
2177             break ;
2178         case wxFONTENCODING_MACTIBETAN :
2179             enc = kCFStringEncodingMacTibetan ;
2180             break ;
2181         case wxFONTENCODING_MACMONGOLIAN :
2182             enc = kCFStringEncodingMacMongolian ;
2183             break ;
2184         case wxFONTENCODING_MACETHIOPIC :
2185             enc = kCFStringEncodingMacEthiopic ;
2186             break ;
2187         case wxFONTENCODING_MACCENTRALEUR :
2188             enc = kCFStringEncodingMacCentralEurRoman ;
2189             break ;
2190         case wxFONTENCODING_MACVIATNAMESE :
2191             enc = kCFStringEncodingMacVietnamese ;
2192             break ;
2193         case wxFONTENCODING_MACARABICEXT :
2194             enc = kCFStringEncodingMacExtArabic ;
2195             break ;
2196         case wxFONTENCODING_MACSYMBOL :
2197             enc = kCFStringEncodingMacSymbol ;
2198             break ;
2199         case wxFONTENCODING_MACDINGBATS :
2200             enc = kCFStringEncodingMacDingbats ;
2201             break ;
2202         case wxFONTENCODING_MACTURKISH :
2203             enc = kCFStringEncodingMacTurkish ;
2204             break ;
2205         case wxFONTENCODING_MACCROATIAN :
2206             enc = kCFStringEncodingMacCroatian ;
2207             break ;
2208         case wxFONTENCODING_MACICELANDIC :
2209             enc = kCFStringEncodingMacIcelandic ;
2210             break ;
2211         case wxFONTENCODING_MACROMANIAN :
2212             enc = kCFStringEncodingMacRomanian ;
2213             break ;
2214         case wxFONTENCODING_MACCELTIC :
2215             enc = kCFStringEncodingMacCeltic ;
2216             break ;
2217         case wxFONTENCODING_MACGAELIC :
2218             enc = kCFStringEncodingMacGaelic ;
2219             break ;
2220 //      case wxFONTENCODING_MACKEYBOARD :
2221 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2222 //          break ;
2223         default :
2224             // because gcc is picky
2225             break ;
2226     } ;
2227     return enc ;
2228 }
2229
2230 class wxMBConv_cocoa : public wxMBConv
2231 {
2232 public:
2233     wxMBConv_cocoa()
2234     {
2235         Init(CFStringGetSystemEncoding()) ;
2236     }
2237
2238 #if wxUSE_FONTMAP
2239     wxMBConv_cocoa(const wxChar* name)
2240     {
2241         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2242     }
2243 #endif
2244
2245     wxMBConv_cocoa(wxFontEncoding encoding)
2246     {
2247         Init( wxCFStringEncFromFontEnc(encoding) );
2248     }
2249
2250     ~wxMBConv_cocoa()
2251     {
2252     }
2253
2254     void Init( CFStringEncoding encoding)
2255     {
2256         m_encoding = encoding ;
2257     }
2258
2259     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2260     {
2261         wxASSERT(szUnConv);
2262
2263         CFStringRef theString = CFStringCreateWithBytes (
2264                                                 NULL, //the allocator
2265                                                 (const UInt8*)szUnConv,
2266                                                 strlen(szUnConv),
2267                                                 m_encoding,
2268                                                 false //no BOM/external representation
2269                                                 );
2270
2271         wxASSERT(theString);
2272
2273         size_t nOutLength = CFStringGetLength(theString);
2274
2275         if (szOut == NULL)
2276         {
2277             CFRelease(theString);
2278             return nOutLength;
2279         }
2280
2281         CFRange theRange = { 0, nOutSize };
2282
2283 #if SIZEOF_WCHAR_T == 4
2284         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2285 #endif
2286
2287         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2288
2289         CFRelease(theString);
2290
2291         szUniCharBuffer[nOutLength] = '\0' ;
2292
2293 #if SIZEOF_WCHAR_T == 4
2294         wxMBConvUTF16 converter ;
2295         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2296         delete[] szUniCharBuffer;
2297 #endif
2298
2299         return nOutLength;
2300     }
2301
2302     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2303     {
2304         wxASSERT(szUnConv);
2305
2306         size_t nRealOutSize;
2307         size_t nBufSize = wxWcslen(szUnConv);
2308         UniChar* szUniBuffer = (UniChar*) szUnConv;
2309
2310 #if SIZEOF_WCHAR_T == 4
2311         wxMBConvUTF16 converter ;
2312         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2313         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2314         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2315         nBufSize /= sizeof(UniChar);
2316 #endif
2317
2318         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2319                                 NULL, //allocator
2320                                 szUniBuffer,
2321                                 nBufSize,
2322                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2323                             );
2324
2325         wxASSERT(theString);
2326
2327         //Note that CER puts a BOM when converting to unicode
2328         //so we  check and use getchars instead in that case
2329         if (m_encoding == kCFStringEncodingUnicode)
2330         {
2331             if (szOut != NULL)
2332                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2333
2334             nRealOutSize = CFStringGetLength(theString) + 1;
2335         }
2336         else
2337         {
2338             CFStringGetBytes(
2339                 theString,
2340                 CFRangeMake(0, CFStringGetLength(theString)),
2341                 m_encoding,
2342                 0, //what to put in characters that can't be converted -
2343                     //0 tells CFString to return NULL if it meets such a character
2344                 false, //not an external representation
2345                 (UInt8*) szOut,
2346                 nOutSize,
2347                 (CFIndex*) &nRealOutSize
2348                         );
2349         }
2350
2351         CFRelease(theString);
2352
2353 #if SIZEOF_WCHAR_T == 4
2354         delete[] szUniBuffer;
2355 #endif
2356
2357         return  nRealOutSize - 1;
2358     }
2359
2360     bool IsOk() const
2361     {
2362         return m_encoding != kCFStringEncodingInvalidId &&
2363               CFStringIsEncodingAvailable(m_encoding);
2364     }
2365
2366 private:
2367     CFStringEncoding m_encoding ;
2368 };
2369
2370 #endif // defined(__WXCOCOA__)
2371
2372 // ============================================================================
2373 // Mac conversion classes
2374 // ============================================================================
2375
2376 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2377
2378 class wxMBConv_mac : public wxMBConv
2379 {
2380 public:
2381     wxMBConv_mac()
2382     {
2383         Init(CFStringGetSystemEncoding()) ;
2384     }
2385
2386 #if wxUSE_FONTMAP
2387     wxMBConv_mac(const wxChar* name)
2388     {
2389         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2390     }
2391 #endif
2392
2393     wxMBConv_mac(wxFontEncoding encoding)
2394     {
2395         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2396     }
2397
2398     ~wxMBConv_mac()
2399     {
2400         OSStatus status = noErr ;
2401         status = TECDisposeConverter(m_MB2WC_converter);
2402         status = TECDisposeConverter(m_WC2MB_converter);
2403     }
2404
2405
2406     void Init( TextEncodingBase encoding)
2407     {
2408         OSStatus status = noErr ;
2409         m_char_encoding = encoding ;
2410         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2411
2412         status = TECCreateConverter(&m_MB2WC_converter,
2413                                     m_char_encoding,
2414                                     m_unicode_encoding);
2415         status = TECCreateConverter(&m_WC2MB_converter,
2416                                     m_unicode_encoding,
2417                                     m_char_encoding);
2418     }
2419
2420     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2421     {
2422         OSStatus status = noErr ;
2423         ByteCount byteOutLen ;
2424         ByteCount byteInLen = strlen(psz) ;
2425         wchar_t *tbuf = NULL ;
2426         UniChar* ubuf = NULL ;
2427         size_t res = 0 ;
2428
2429         if (buf == NULL)
2430         {
2431             //apple specs say at least 32
2432             n = wxMax( 32 , byteInLen ) ;
2433             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2434         }
2435         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2436 #if SIZEOF_WCHAR_T == 4
2437         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2438 #else
2439         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2440 #endif
2441         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2442           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2443 #if SIZEOF_WCHAR_T == 4
2444         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2445         // is not properly terminated we get random characters at the end
2446         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2447         wxMBConvUTF16 converter ;
2448         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2449         free( ubuf ) ;
2450 #else
2451         res = byteOutLen / sizeof( UniChar ) ;
2452 #endif
2453         if ( buf == NULL )
2454              free(tbuf) ;
2455
2456         if ( buf  && res < n)
2457             buf[res] = 0;
2458
2459         return res ;
2460     }
2461
2462     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2463     {
2464         OSStatus status = noErr ;
2465         ByteCount byteOutLen ;
2466         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2467
2468         char *tbuf = NULL ;
2469
2470         if (buf == NULL)
2471         {
2472             //apple specs say at least 32
2473             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2474             tbuf = (char*) malloc( n ) ;
2475         }
2476
2477         ByteCount byteBufferLen = n ;
2478         UniChar* ubuf = NULL ;
2479 #if SIZEOF_WCHAR_T == 4
2480         wxMBConvUTF16 converter ;
2481         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2482         byteInLen = unicharlen ;
2483         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2484         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2485 #else
2486         ubuf = (UniChar*) psz ;
2487 #endif
2488         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2489             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2490 #if SIZEOF_WCHAR_T == 4
2491         free( ubuf ) ;
2492 #endif
2493         if ( buf == NULL )
2494             free(tbuf) ;
2495
2496         size_t res = byteOutLen ;
2497         if ( buf  && res < n)
2498         {
2499             buf[res] = 0;
2500
2501             //we need to double-trip to verify it didn't insert any ? in place
2502             //of bogus characters
2503             wxWCharBuffer wcBuf(n);
2504             size_t pszlen = wxWcslen(psz);
2505             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2506                         wxWcslen(wcBuf) != pszlen ||
2507                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2508             {
2509                 // we didn't obtain the same thing we started from, hence
2510                 // the conversion was lossy and we consider that it failed
2511                 return (size_t)-1;
2512             }
2513         }
2514
2515         return res ;
2516     }
2517
2518     bool IsOk() const
2519         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2520
2521 private:
2522     TECObjectRef m_MB2WC_converter ;
2523     TECObjectRef m_WC2MB_converter ;
2524
2525     TextEncodingBase m_char_encoding ;
2526     TextEncodingBase m_unicode_encoding ;
2527 };
2528
2529 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2530
2531 // ============================================================================
2532 // wxEncodingConverter based conversion classes
2533 // ============================================================================
2534
2535 #if wxUSE_FONTMAP
2536
2537 class wxMBConv_wxwin : public wxMBConv
2538 {
2539 private:
2540     void Init()
2541     {
2542         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2543                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2544     }
2545
2546 public:
2547     // temporarily just use wxEncodingConverter stuff,
2548     // so that it works while a better implementation is built
2549     wxMBConv_wxwin(const wxChar* name)
2550     {
2551         if (name)
2552             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2553         else
2554             m_enc = wxFONTENCODING_SYSTEM;
2555
2556         Init();
2557     }
2558
2559     wxMBConv_wxwin(wxFontEncoding enc)
2560     {
2561         m_enc = enc;
2562
2563         Init();
2564     }
2565
2566     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2567     {
2568         size_t inbuf = strlen(psz);
2569         if (buf)
2570         {
2571             if (!m2w.Convert(psz,buf))
2572                 return (size_t)-1;
2573         }
2574         return inbuf;
2575     }
2576
2577     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2578     {
2579         const size_t inbuf = wxWcslen(psz);
2580         if (buf)
2581         {
2582             if (!w2m.Convert(psz,buf))
2583                 return (size_t)-1;
2584         }
2585
2586         return inbuf;
2587     }
2588
2589     bool IsOk() const { return m_ok; }
2590
2591 public:
2592     wxFontEncoding m_enc;
2593     wxEncodingConverter m2w, w2m;
2594
2595 private:
2596     virtual const char *GetMBNul(size_t *nulLen) const
2597     {
2598         switch ( m_enc )
2599         {
2600             case wxFONTENCODING_UTF16BE:
2601             case wxFONTENCODING_UTF16LE:
2602                 *nulLen = 2;
2603                 return "\0";
2604
2605             case wxFONTENCODING_UTF32BE:
2606             case wxFONTENCODING_UTF32LE:
2607                 *nulLen = 4;
2608                 return "\0\0\0";
2609
2610             default:
2611                 *nulLen = 1;
2612                 return "";
2613         }
2614     }
2615
2616     // were we initialized successfully?
2617     bool m_ok;
2618
2619     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2620 };
2621
2622 // make the constructors available for unit testing
2623 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2624 {
2625     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2626     if ( !result->IsOk() )
2627     {
2628         delete result;
2629         return 0;
2630     }
2631     return result;
2632 }
2633
2634 #endif // wxUSE_FONTMAP
2635
2636 // ============================================================================
2637 // wxCSConv implementation
2638 // ============================================================================
2639
2640 void wxCSConv::Init()
2641 {
2642     m_name = NULL;
2643     m_convReal =  NULL;
2644     m_deferred = true;
2645 }
2646
2647 wxCSConv::wxCSConv(const wxChar *charset)
2648 {
2649     Init();
2650
2651     if ( charset )
2652     {
2653         SetName(charset);
2654     }
2655
2656 #if wxUSE_FONTMAP
2657     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2658 #else
2659     m_encoding = wxFONTENCODING_SYSTEM;
2660 #endif
2661 }
2662
2663 wxCSConv::wxCSConv(wxFontEncoding encoding)
2664 {
2665     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2666     {
2667         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2668
2669         encoding = wxFONTENCODING_SYSTEM;
2670     }
2671
2672     Init();
2673
2674     m_encoding = encoding;
2675 }
2676
2677 wxCSConv::~wxCSConv()
2678 {
2679     Clear();
2680 }
2681
2682 wxCSConv::wxCSConv(const wxCSConv& conv)
2683         : wxMBConv()
2684 {
2685     Init();
2686
2687     SetName(conv.m_name);
2688     m_encoding = conv.m_encoding;
2689 }
2690
2691 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2692 {
2693     Clear();
2694
2695     SetName(conv.m_name);
2696     m_encoding = conv.m_encoding;
2697
2698     return *this;
2699 }
2700
2701 void wxCSConv::Clear()
2702 {
2703     free(m_name);
2704     delete m_convReal;
2705
2706     m_name = NULL;
2707     m_convReal = NULL;
2708 }
2709
2710 void wxCSConv::SetName(const wxChar *charset)
2711 {
2712     if (charset)
2713     {
2714         m_name = wxStrdup(charset);
2715         m_deferred = true;
2716     }
2717 }
2718
2719 #if wxUSE_FONTMAP
2720 #include "wx/hashmap.h"
2721
2722 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2723                      wxEncodingNameCache );
2724
2725 static wxEncodingNameCache gs_nameCache;
2726 #endif
2727
2728 wxMBConv *wxCSConv::DoCreate() const
2729 {
2730 #if wxUSE_FONTMAP
2731     wxLogTrace(TRACE_STRCONV,
2732                wxT("creating conversion for %s"),
2733                (m_name ? m_name
2734                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2735 #endif // wxUSE_FONTMAP
2736
2737     // check for the special case of ASCII or ISO8859-1 charset: as we have
2738     // special knowledge of it anyhow, we don't need to create a special
2739     // conversion object
2740     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2741             m_encoding == wxFONTENCODING_DEFAULT )
2742     {
2743         // don't convert at all
2744         return NULL;
2745     }
2746
2747     // we trust OS to do conversion better than we can so try external
2748     // conversion methods first
2749     //
2750     // the full order is:
2751     //      1. OS conversion (iconv() under Unix or Win32 API)
2752     //      2. hard coded conversions for UTF
2753     //      3. wxEncodingConverter as fall back
2754
2755     // step (1)
2756 #ifdef HAVE_ICONV
2757 #if !wxUSE_FONTMAP
2758     if ( m_name )
2759 #endif // !wxUSE_FONTMAP
2760     {
2761         wxString name(m_name);
2762         wxFontEncoding encoding(m_encoding);
2763
2764         if ( !name.empty() )
2765         {
2766             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2767             if ( conv->IsOk() )
2768                 return conv;
2769
2770             delete conv;
2771
2772 #if wxUSE_FONTMAP
2773             encoding =
2774                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2775 #endif // wxUSE_FONTMAP
2776         }
2777 #if wxUSE_FONTMAP
2778         {
2779             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2780             if ( it != gs_nameCache.end() )
2781             {
2782                 if ( it->second.empty() )
2783                     return NULL;
2784
2785                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2786                 if ( conv->IsOk() )
2787                     return conv;
2788
2789                 delete conv;
2790             }
2791
2792             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2793
2794             for ( ; *names; ++names )
2795             {
2796                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2797                 if ( conv->IsOk() )
2798                 {
2799                     gs_nameCache[encoding] = *names;
2800                     return conv;
2801                 }
2802
2803                 delete conv;
2804             }
2805
2806             gs_nameCache[encoding] = _T(""); // cache the failure
2807         }
2808 #endif // wxUSE_FONTMAP
2809     }
2810 #endif // HAVE_ICONV
2811
2812 #ifdef wxHAVE_WIN32_MB2WC
2813     {
2814 #if wxUSE_FONTMAP
2815         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2816                                       : new wxMBConv_win32(m_encoding);
2817         if ( conv->IsOk() )
2818             return conv;
2819
2820         delete conv;
2821 #else
2822         return NULL;
2823 #endif
2824     }
2825 #endif // wxHAVE_WIN32_MB2WC
2826 #if defined(__WXMAC__)
2827     {
2828         // leave UTF16 and UTF32 to the built-ins of wx
2829         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2830             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2831         {
2832
2833 #if wxUSE_FONTMAP
2834             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2835                                         : new wxMBConv_mac(m_encoding);
2836 #else
2837             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2838 #endif
2839             if ( conv->IsOk() )
2840                  return conv;
2841
2842             delete conv;
2843         }
2844     }
2845 #endif
2846 #if defined(__WXCOCOA__)
2847     {
2848         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2849         {
2850
2851 #if wxUSE_FONTMAP
2852             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2853                                           : new wxMBConv_cocoa(m_encoding);
2854 #else
2855             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2856 #endif
2857             if ( conv->IsOk() )
2858                  return conv;
2859
2860             delete conv;
2861         }
2862     }
2863 #endif
2864     // step (2)
2865     wxFontEncoding enc = m_encoding;
2866 #if wxUSE_FONTMAP
2867     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2868     {
2869         // use "false" to suppress interactive dialogs -- we can be called from
2870         // anywhere and popping up a dialog from here is the last thing we want to
2871         // do
2872         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2873     }
2874 #endif // wxUSE_FONTMAP
2875
2876     switch ( enc )
2877     {
2878         case wxFONTENCODING_UTF7:
2879              return new wxMBConvUTF7;
2880
2881         case wxFONTENCODING_UTF8:
2882              return new wxMBConvUTF8;
2883
2884         case wxFONTENCODING_UTF16BE:
2885              return new wxMBConvUTF16BE;
2886
2887         case wxFONTENCODING_UTF16LE:
2888              return new wxMBConvUTF16LE;
2889
2890         case wxFONTENCODING_UTF32BE:
2891              return new wxMBConvUTF32BE;
2892
2893         case wxFONTENCODING_UTF32LE:
2894              return new wxMBConvUTF32LE;
2895
2896         default:
2897              // nothing to do but put here to suppress gcc warnings
2898              ;
2899     }
2900
2901     // step (3)
2902 #if wxUSE_FONTMAP
2903     {
2904         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2905                                       : new wxMBConv_wxwin(m_encoding);
2906         if ( conv->IsOk() )
2907             return conv;
2908
2909         delete conv;
2910     }
2911 #endif // wxUSE_FONTMAP
2912
2913     // NB: This is a hack to prevent deadlock. What could otherwise happen
2914     //     in Unicode build: wxConvLocal creation ends up being here
2915     //     because of some failure and logs the error. But wxLog will try to
2916     //     attach timestamp, for which it will need wxConvLocal (to convert
2917     //     time to char* and then wchar_t*), but that fails, tries to log
2918     //     error, but wxLog has a (already locked) critical section that
2919     //     guards static buffer.
2920     static bool alreadyLoggingError = false;
2921     if (!alreadyLoggingError)
2922     {
2923         alreadyLoggingError = true;
2924         wxLogError(_("Cannot convert from the charset '%s'!"),
2925                    m_name ? m_name
2926                       :
2927 #if wxUSE_FONTMAP
2928                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2929 #else // !wxUSE_FONTMAP
2930                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2931 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2932               );
2933         alreadyLoggingError = false;
2934     }
2935
2936     return NULL;
2937 }
2938
2939 void wxCSConv::CreateConvIfNeeded() const
2940 {
2941     if ( m_deferred )
2942     {
2943         wxCSConv *self = (wxCSConv *)this; // const_cast
2944
2945 #if wxUSE_INTL
2946         // if we don't have neither the name nor the encoding, use the default
2947         // encoding for this system
2948         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2949         {
2950             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2951         }
2952 #endif // wxUSE_INTL
2953
2954         self->m_convReal = DoCreate();
2955         self->m_deferred = false;
2956     }
2957 }
2958
2959 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2960 {
2961     CreateConvIfNeeded();
2962
2963     if (m_convReal)
2964         return m_convReal->MB2WC(buf, psz, n);
2965
2966     // latin-1 (direct)
2967     size_t len = strlen(psz);
2968
2969     if (buf)
2970     {
2971         for (size_t c = 0; c <= len; c++)
2972             buf[c] = (unsigned char)(psz[c]);
2973     }
2974
2975     return len;
2976 }
2977
2978 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2979 {
2980     CreateConvIfNeeded();
2981
2982     if (m_convReal)
2983         return m_convReal->WC2MB(buf, psz, n);
2984
2985     // latin-1 (direct)
2986     const size_t len = wxWcslen(psz);
2987     if (buf)
2988     {
2989         for (size_t c = 0; c <= len; c++)
2990         {
2991             if (psz[c] > 0xFF)
2992                 return (size_t)-1;
2993             buf[c] = (char)psz[c];
2994         }
2995     }
2996     else
2997     {
2998         for (size_t c = 0; c <= len; c++)
2999         {
3000             if (psz[c] > 0xFF)
3001                 return (size_t)-1;
3002         }
3003     }
3004
3005     return len;
3006 }
3007
3008 const char *wxCSConv::GetMBNul(size_t *nulLen) const
3009 {
3010     CreateConvIfNeeded();
3011
3012     if ( m_convReal )
3013     {
3014         // cast needed just to call private function of m_convReal
3015         return ((wxCSConv *)m_convReal)->GetMBNul(nulLen);
3016     }
3017
3018     *nulLen = 1;
3019     return "";
3020 }
3021
3022 // ----------------------------------------------------------------------------
3023 // globals
3024 // ----------------------------------------------------------------------------
3025
3026 #ifdef __WINDOWS__
3027     static wxMBConv_win32 wxConvLibcObj;
3028 #elif defined(__WXMAC__) && !defined(__MACH__)
3029     static wxMBConv_mac wxConvLibcObj ;
3030 #else
3031     static wxMBConvLibc wxConvLibcObj;
3032 #endif
3033
3034 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3035 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3036 static wxMBConvUTF7 wxConvUTF7Obj;
3037 static wxMBConvUTF8 wxConvUTF8Obj;
3038
3039 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3040 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3041 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3044 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3045 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3046 #ifdef __WXOSX__
3047                                     wxConvUTF8Obj;
3048 #else
3049                                     wxConvLibcObj;
3050 #endif
3051
3052
3053 #else // !wxUSE_WCHAR_T
3054
3055 // stand-ins in absence of wchar_t
3056 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3057                                 wxConvISO8859_1,
3058                                 wxConvLocal,
3059                                 wxConvUTF8;
3060
3061 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T