cvs to svn

[wxWidgets.git] / src / common / strconv.cpp
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp

index f92260934f525674dfd2de9a4e8f42fdc6a97465..843c84e046ce7c5b007af15ab4c07b76ca207a73 100644 (file)
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -15,20 +15,21 @@
  // For compilers that support precompilation, includes "wx.h".
  #include "wx/wxprec.h"
  
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif  //__BORLANDC__
+
  #ifndef WX_PRECOMP
      #include "wx/intl.h"
      #include "wx/log.h"
+    #include "wx/utils.h"
+    #include "wx/hashmap.h"
  #endif
  
  #include "wx/strconv.h"
  
  #if wxUSE_WCHAR_T
  
-#ifdef __WINDOWS__
-    #include "wx/msw/private.h"
-    #include "wx/msw/missing.h"
-#endif
-
  #ifndef __WXWINCE__
  #include <errno.h>
  #endif
@@ -38,6 +39,8 @@
  #include <stdlib.h>
  
  #if defined(__WIN32__) && !defined(__WXMICROWIN__)
+    #include "wx/msw/private.h"
+    #include "wx/msw/missing.h"
      #define wxHAVE_WIN32_MB2WC
  #endif
  
@@ -52,7 +55,6 @@
  
  #include "wx/encconv.h"
  #include "wx/fontmap.h"
-#include "wx/utils.h"
  
  #ifdef __WXMAC__
  #ifndef __DARWIN__
@@ -179,7 +181,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
      size_t dstWritten = 0;
  
      // the number of NULs terminating this string
-    size_t nulLen wxDUMMY_INITIALIZE(0);
+    size_t nulLen = 0;  // not really needed, but just to avoid warnings
  
      // if we were not given the input size we just have to assume that the
      // string is properly terminated as we have no way of knowing how long it
@@ -422,10 +424,12 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
  const wxCharBuffer
  wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
  {
-    const size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
+    size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
      if ( dstLen != wxCONV_FAILED )
      {
-        wxCharBuffer buf(dstLen - 1);
+        // special case of empty input: can't allocate 0 size buffer below as
+        // wxCharBuffer insists on NUL-terminating it
+        wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
          if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
          {
              if ( outLen )
@@ -433,11 +437,12 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
                  *outLen = dstLen;
  
                  const size_t nulLen = GetMBNulLen();
-                if ( !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
+                if ( dstLen >= nulLen &&
+                        !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
                  {
                      // in this case the output is NUL-terminated and we're not
                      // supposed to count NUL
-                    (*outLen) -= nulLen;
+                    *outLen -= nulLen;
                  }
              }
  
@@ -471,11 +476,11 @@ size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
  
  #ifdef __UNIX__
  
-wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
+wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
  {
-    if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
-                  || wxStricmp(charset, _T("UTF8")) == 0  )
-        m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
+    if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
+         wxStricmp(charset, _T("UTF8")) == 0  )
+        m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
      else
          m_conv = new wxCSConv(charset);
  }
@@ -1583,7 +1588,7 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
  class wxMBConv_iconv : public wxMBConv
  {
  public:
-    wxMBConv_iconv(const wxChar *name);
+    wxMBConv_iconv(const char *name);
      virtual ~wxMBConv_iconv();
  
      virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
@@ -1592,9 +1597,13 @@ public:
      // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
      virtual size_t GetMBNulLen() const;
  
+#if wxUSE_UNICODE_UTF8
+    virtual bool IsUTF8() const;
+#endif
+
      virtual wxMBConv *Clone() const
      {
-        wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
+        wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
          p->m_minMBCharWidth = m_minMBCharWidth;
          return p;
      }
@@ -1632,7 +1641,7 @@ private:
  };
  
  // make the constructor available for unit testing
-WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
+WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
  {
      wxMBConv_iconv* result = new wxMBConv_iconv( name );
      if ( !result->IsOk() )
@@ -1647,15 +1656,11 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
  wxString wxMBConv_iconv::ms_wcCharsetName;
  bool wxMBConv_iconv::ms_wcNeedsSwap = false;
  
-wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
+wxMBConv_iconv::wxMBConv_iconv(const char *name)
                : m_name(name)
  {
      m_minMBCharWidth = 0;
  
-    // iconv operates with chars, not wxChars, but luckily it uses only ASCII
-    // names for the charsets
-    const wxCharBuffer cname(wxString(name).ToAscii());
-
      // check for charset that represents wchar_t:
      if ( ms_wcCharsetName.empty() )
      {
@@ -1664,7 +1669,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
  #if wxUSE_FONTMAP
          const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
  #else // !wxUSE_FONTMAP
-        static const wxChar *names[] =
+        static const wxChar *names_static[] =
          {
  #if SIZEOF_WCHAR_T == 4
              _T("UCS-4"),
@@ -1673,6 +1678,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
  #endif
              NULL
          };
+        const wxChar **names = names_static;
  #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
  
          for ( ; *names && ms_wcCharsetName.empty(); ++names )
@@ -1691,13 +1697,13 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
              wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
                         nameXE.c_str());
  
-            m2w = iconv_open(nameXE.ToAscii(), cname);
+            m2w = iconv_open(nameXE.ToAscii(), name);
              if ( m2w == ICONV_T_INVALID )
              {
                  // try charset w/o bytesex info (e.g. "UCS4")
                  wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
                             nameCS.c_str());
-                m2w = iconv_open(nameCS.ToAscii(), cname);
+                m2w = iconv_open(nameCS.ToAscii(), name);
  
                  // and check for bytesex ourselves:
                  if ( m2w != ICONV_T_INVALID )
@@ -1740,14 +1746,14 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
  
          wxLogTrace(TRACE_STRCONV,
                     wxT("iconv wchar_t charset is \"%s\"%s"),
-                   ms_wcCharsetName.empty() ? _T("<none>")
-                                            : ms_wcCharsetName.c_str(),
+                   ms_wcCharsetName.empty() ? wxString("<none>")
+                                            : ms_wcCharsetName,
                     ms_wcNeedsSwap ? _T(" (needs swap)")
                                    : _T(""));
      }
      else // we already have ms_wcCharsetName
      {
-        m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
+        m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
      }
  
      if ( ms_wcCharsetName.empty() )
@@ -1756,12 +1762,12 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
      }
      else
      {
-        w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
+        w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
          if ( w2m == ICONV_T_INVALID )
          {
              wxLogTrace(TRACE_STRCONV,
                         wxT("\"%s\" -> \"%s\" works but not the converse!?"),
-                       ms_wcCharsetName.c_str(), cname.data());
+                       ms_wcCharsetName.c_str(), name);
          }
      }
  }
@@ -1802,8 +1808,8 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
      }
  
  #if wxUSE_THREADS
-    // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
-    //     Unfortunately there is a couple of global wxCSConv objects such as
+    // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
+    //     Unfortunately there are a couple of global wxCSConv objects such as
      //     wxConvLocal that are used all over wx code, so we have to make sure
      //     the handle is used by at most one thread at the time. Otherwise
      //     only a few wx classes would be safe to use from non-main threads
@@ -1950,7 +1956,7 @@ size_t wxMBConv_iconv::GetMBNulLen() const
          wxMutexLocker lock(self->m_iconvMutex);
  #endif
  
-        wchar_t *wnul = L"";
+        const wchar_t *wnul = L"";
          char buf[8]; // should be enough for NUL in any encoding
          size_t inLen = sizeof(wchar_t),
                 outLen = WXSIZEOF(buf);
@@ -1969,6 +1975,14 @@ size_t wxMBConv_iconv::GetMBNulLen() const
      return m_minMBCharWidth;
  }
  
+#if wxUSE_UNICODE_UTF8
+bool wxMBConv_iconv::IsUTF8() const
+{
+    return wxStricmp(m_name, "UTF-8") == 0 ||
+           wxStricmp(m_name, "UTF8") == 0;
+}
+#endif
+
  #endif // HAVE_ICONV
  
  
@@ -1980,7 +1994,7 @@ size_t wxMBConv_iconv::GetMBNulLen() const
  
  // from utils.cpp
  #if wxUSE_FONTMAP
-extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
+extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
  extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
  #endif
  
@@ -1994,13 +2008,14 @@ public:
      }
  
      wxMBConv_win32(const wxMBConv_win32& conv)
+        : wxMBConv()
      {
          m_CodePage = conv.m_CodePage;
          m_minMBCharWidth = conv.m_minMBCharWidth;
      }
  
  #if wxUSE_FONTMAP
-    wxMBConv_win32(const wxChar* name)
+    wxMBConv_win32(const char* name)
      {
          m_CodePage = wxCharsetToCodepage(name);
          m_minMBCharWidth = 0;
@@ -2028,12 +2043,12 @@ public:
          //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
          if ( m_CodePage == CP_UTF8 )
          {
-            return wxConvUTF8.MB2WC(buf, psz, n);
+            return wxMBConvUTF8().MB2WC(buf, psz, n);
          }
  
          if ( m_CodePage == CP_UTF7 )
          {
-            return wxConvUTF7.MB2WC(buf, psz, n);
+            return wxMBConvUTF7().MB2WC(buf, psz, n);
          }
  
          int flags = 0;
@@ -2223,11 +2238,11 @@ private:
              int verMaj, verMin;
              switch ( wxGetOsVersion(&verMaj, &verMin) )
              {
-                case wxWIN95:
+                case wxOS_WINDOWS_9X:
                      s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
                      break;
  
-                case wxWINDOWS_NT:
+                case wxOS_WINDOWS_NT:
                      s_isWin98Or2k = verMaj >= 5;
                      break;
  
@@ -2579,7 +2594,7 @@ public:
          Init( wxCFStringEncFromFontEnc(encoding) );
      }
  
-    ~wxMBConv_cocoa()
+    virtual ~wxMBConv_cocoa()
      {
      }
  
@@ -2723,7 +2738,7 @@ public:
      }
  
  #if wxUSE_FONTMAP
-    wxMBConv_mac(const wxChar* name)
+    wxMBConv_mac(const char* name)
      {
          Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
      }
@@ -2734,30 +2749,43 @@ public:
          Init( wxMacGetSystemEncFromFontEnc(encoding) );
      }
  
-    ~wxMBConv_mac()
+    virtual ~wxMBConv_mac()
      {
          OSStatus status = noErr ;
-        status = TECDisposeConverter(m_MB2WC_converter);
-        status = TECDisposeConverter(m_WC2MB_converter);
+        if (m_MB2WC_converter)
+            status = TECDisposeConverter(m_MB2WC_converter);
+        if (m_WC2MB_converter)
+            status = TECDisposeConverter(m_WC2MB_converter);
      }
  
-
-    void Init( TextEncodingBase encoding)
+    void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
+            TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
      {
-        OSStatus status = noErr ;
-        m_char_encoding = encoding ;
+        m_MB2WC_converter = NULL ;
+        m_WC2MB_converter = NULL ;
+        m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
          m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
+    }
  
-        status = TECCreateConverter(&m_MB2WC_converter,
+    virtual void CreateIfNeeded() const
+    {
+        if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
+        {
+            OSStatus status = noErr ;
+            status = TECCreateConverter(&m_MB2WC_converter,
                                      m_char_encoding,
                                      m_unicode_encoding);
-        status = TECCreateConverter(&m_WC2MB_converter,
+            wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
+            status = TECCreateConverter(&m_WC2MB_converter,
                                      m_unicode_encoding,
                                      m_char_encoding);
+            wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
+        }
      }
  
      size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
      {
+        CreateIfNeeded() ;
          OSStatus status = noErr ;
          ByteCount byteOutLen ;
          ByteCount byteInLen = strlen(psz) + 1;
@@ -2806,6 +2834,7 @@ public:
  
      size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
      {
+        CreateIfNeeded() ;
          OSStatus status = noErr ;
          ByteCount byteOutLen ;
          ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
@@ -2868,16 +2897,192 @@ public:
      virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
  
      bool IsOk() const
-        { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; }
+    {
+        CreateIfNeeded() ;
+        return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
+    }
  
-private:
-    TECObjectRef m_MB2WC_converter;
-    TECObjectRef m_WC2MB_converter;
+protected :
+    mutable TECObjectRef m_MB2WC_converter;
+    mutable TECObjectRef m_WC2MB_converter;
  
      TextEncodingBase m_char_encoding;
      TextEncodingBase m_unicode_encoding;
  };
  
+// MB is decomposed (D) normalized UTF8
+
+class wxMBConv_macUTF8D : public wxMBConv_mac
+{
+public :
+    wxMBConv_macUTF8D()
+    {
+        Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
+        m_uni = NULL;
+        m_uniBack = NULL ;
+    }
+
+    virtual ~wxMBConv_macUTF8D()
+    {
+        if (m_uni!=NULL)
+            DisposeUnicodeToTextInfo(&m_uni);
+        if (m_uniBack!=NULL)
+            DisposeUnicodeToTextInfo(&m_uniBack);
+    }
+
+    size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
+    {
+        CreateIfNeeded() ;
+        OSStatus status = noErr ;
+        ByteCount byteOutLen ;
+        ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
+
+        char *tbuf = NULL ;
+
+        if (buf == NULL)
+        {
+            // Apple specs say at least 32
+            n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
+            tbuf = (char*) malloc( n ) ;
+        }
+
+        ByteCount byteBufferLen = n ;
+        UniChar* ubuf = NULL ;
+
+#if SIZEOF_WCHAR_T == 4
+        wxMBConvUTF16 converter ;
+        size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
+        byteInLen = unicharlen ;
+        ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
+        converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
+#else
+        ubuf = (UniChar*) psz ;
+#endif
+
+        // ubuf is a non-decomposed UniChar buffer
+
+        ByteCount dcubuflen = byteInLen * 2 + 2 ;
+        ByteCount dcubufread , dcubufwritten ;
+        UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
+
+        ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
+            kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , dcubuf ) ;
+
+        // we now convert that decomposed buffer into UTF8
+
+        status = TECConvertText(
+            m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
+            (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
+
+        free( dcubuf );
+
+#if SIZEOF_WCHAR_T == 4
+        free( ubuf ) ;
+#endif
+
+        if ( buf == NULL )
+            free(tbuf) ;
+
+        size_t res = byteOutLen ;
+        if ( buf  && res < n)
+        {
+            buf[res] = 0;
+            // don't test for round-trip fidelity yet, we cannot guarantee it yet
+        }
+
+        return res ;
+    }
+
+    size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
+    {
+        CreateIfNeeded() ;
+        OSStatus status = noErr ;
+        ByteCount byteOutLen ;
+        ByteCount byteInLen = strlen(psz) + 1;
+        wchar_t *tbuf = NULL ;
+        UniChar* ubuf = NULL ;
+        size_t res = 0 ;
+
+        if (buf == NULL)
+        {
+            // Apple specs say at least 32
+            n = wxMax( 32, byteInLen ) ;
+            tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
+        }
+
+        ByteCount byteBufferLen = n * sizeof( UniChar ) ;
+
+#if SIZEOF_WCHAR_T == 4
+        ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
+#else
+        ubuf = (UniChar*) (buf ? buf : tbuf) ;
+#endif
+
+        ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
+        ByteCount dcubufread , dcubufwritten ;
+        UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
+
+        status = TECConvertText(
+                                m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
+                                (TextPtr) dcubuf, dcubuflen, &byteOutLen);
+        // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
+        // is not properly terminated we get random characters at the end
+        dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
+
+        // now from the decomposed UniChar to properly composed uniChar
+        ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
+                                  kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , ubuf ) ;
+
+        free( dcubuf );
+        byteOutLen = dcubufwritten ;
+        ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
+
+
+#if SIZEOF_WCHAR_T == 4
+        wxMBConvUTF16 converter ;
+        res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
+        free( ubuf ) ;
+#else
+        res = byteOutLen / sizeof( UniChar ) ;
+#endif
+
+        if ( buf == NULL )
+            free(tbuf) ;
+
+        if ( buf  && res < n)
+            buf[res] = 0;
+
+        return res ;
+    }
+
+    virtual void CreateIfNeeded() const
+    {
+        wxMBConv_mac::CreateIfNeeded() ;
+        if ( m_uni == NULL )
+        {
+            m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+                kUnicodeNoSubset, kTextEncodingDefaultFormat);
+            m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+                kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
+            m_map.mappingVersion = kUnicodeUseLatestMapping;
+
+            OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
+            wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
+
+            m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+                                                       kUnicodeNoSubset, kTextEncodingDefaultFormat);
+            m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+                                                     kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
+            m_map.mappingVersion = kUnicodeUseLatestMapping;
+            err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
+            wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
+        }
+    }
+protected :
+    mutable UnicodeToTextInfo   m_uni;
+    mutable UnicodeToTextInfo   m_uniBack;
+    mutable UnicodeMapping      m_map;
+};
  #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
  
  // ============================================================================
@@ -2898,7 +3103,7 @@ private:
  public:
      // temporarily just use wxEncodingConverter stuff,
      // so that it works while a better implementation is built
-    wxMBConv_wxwin(const wxChar* name)
+    wxMBConv_wxwin(const char* name)
      {
          if (name)
              m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
@@ -2971,7 +3176,7 @@ private:
  };
  
  // make the constructors available for unit testing
-WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
+WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
  {
      wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
      if ( !result->IsOk() )
@@ -2996,13 +3201,13 @@ void wxCSConv::Init()
      m_deferred = true;
  }
  
-wxCSConv::wxCSConv(const wxChar *charset)
+wxCSConv::wxCSConv(const wxString& charset)
  {
      Init();
  
-    if ( charset )
+    if ( !charset.empty() )
      {
-        SetName(charset);
+        SetName(charset.ToAscii());
      }
  
  #if wxUSE_FONTMAP
@@ -3059,17 +3264,16 @@ void wxCSConv::Clear()
      m_convReal = NULL;
  }
  
-void wxCSConv::SetName(const wxChar *charset)
+void wxCSConv::SetName(const char *charset)
  {
      if (charset)
      {
-        m_name = wxStrdup(charset);
+        m_name = strdup(charset);
          m_deferred = true;
      }
  }
  
  #if wxUSE_FONTMAP
-#include "wx/hashmap.h"
  
  WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
                       wxEncodingNameCache );
@@ -3083,7 +3287,7 @@ wxMBConv *wxCSConv::DoCreate() const
      wxLogTrace(TRACE_STRCONV,
                 wxT("creating conversion for %s"),
                 (m_name ? m_name
-                       : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
+                       : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
  #endif // wxUSE_FONTMAP
  
      // check for the special case of ASCII or ISO8859-1 charset: as we have
@@ -3110,12 +3314,13 @@ wxMBConv *wxCSConv::DoCreate() const
      if ( m_name )
  #endif // !wxUSE_FONTMAP
      {
-        wxString name(m_name);
+#if wxUSE_FONTMAP
          wxFontEncoding encoding(m_encoding);
+#endif
  
-        if ( !name.empty() )
+        if ( m_name )
          {
-            wxMBConv_iconv *conv = new wxMBConv_iconv(name);
+            wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
              if ( conv->IsOk() )
                  return conv;
  
@@ -3123,7 +3328,7 @@ wxMBConv *wxCSConv::DoCreate() const
  
  #if wxUSE_FONTMAP
              encoding =
-                wxFontMapperBase::Get()->CharsetToEncoding(name, false);
+                wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
  #endif // wxUSE_FONTMAP
          }
  #if wxUSE_FONTMAP
@@ -3134,7 +3339,7 @@ wxMBConv *wxCSConv::DoCreate() const
                  if ( it->second.empty() )
                      return NULL;
  
-                wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
+                wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
                  if ( conv->IsOk() )
                      return conv;
  
@@ -3142,20 +3347,30 @@ wxMBConv *wxCSConv::DoCreate() const
              }
  
              const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
-
-            for ( ; *names; ++names )
+            // CS : in case this does not return valid names (eg for MacRoman)
+            // encoding got a 'failure' entry in the cache all the same,
+            // although it just has to be created using a different method, so
+            // only store failed iconv creation attempts (or perhaps we
+            // shoulnd't do this at all ?)
+            if ( names[0] != NULL )
              {
-                wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
-                if ( conv->IsOk() )
+                for ( ; *names; ++names )
                  {
-                    gs_nameCache[encoding] = *names;
-                    return conv;
+                    // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
+                    //             will need changes that will obsolete this
+                    wxString name(*names);
+                    wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
+                    if ( conv->IsOk() )
+                    {
+                        gs_nameCache[encoding] = *names;
+                        return conv;
+                    }
+
+                    delete conv;
                  }
  
-                delete conv;
+                gs_nameCache[encoding] = _T(""); // cache the failure
              }
-
-            gs_nameCache[encoding] = _T(""); // cache the failure
          }
  #endif // wxUSE_FONTMAP
      }
@@ -3266,10 +3481,10 @@ wxMBConv *wxCSConv::DoCreate() const
      // NB: This is a hack to prevent deadlock. What could otherwise happen
      //     in Unicode build: wxConvLocal creation ends up being here
      //     because of some failure and logs the error. But wxLog will try to
-    //     attach timestamp, for which it will need wxConvLocal (to convert
-    //     time to char* and then wchar_t*), but that fails, tries to log
-    //     error, but wxLog has a (already locked) critical section that
-    //     guards static buffer.
+    //     attach a timestamp, for which it will need wxConvLocal (to convert
+    //     time to char* and then wchar_t*), but that fails, tries to log the
+    //     error, but wxLog has an (already locked) critical section that
+    //     guards the static buffer.
      static bool alreadyLoggingError = false;
      if (!alreadyLoggingError)
      {
@@ -3278,9 +3493,9 @@ wxMBConv *wxCSConv::DoCreate() const
                     m_name ? m_name
                        :
  #if wxUSE_FONTMAP
-                         wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
+                         (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
  #else // !wxUSE_FONTMAP
-                         wxString::Format(_("encoding %s"), m_encoding).c_str()
+                         (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
  #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
                );
  
@@ -3296,20 +3511,60 @@ void wxCSConv::CreateConvIfNeeded() const
      {
          wxCSConv *self = (wxCSConv *)this; // const_cast
  
-#if wxUSE_INTL
          // if we don't have neither the name nor the encoding, use the default
          // encoding for this system
          if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
          {
-            self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
-        }
+#if wxUSE_INTL
+            self->m_encoding = wxLocale::GetSystemEncoding();
+#else
+            // fallback to some reasonable default:
+            self->m_encoding = wxFONTENCODING_ISO8859_1;
  #endif // wxUSE_INTL
+        }
  
          self->m_convReal = DoCreate();
          self->m_deferred = false;
      }
  }
  
+bool wxCSConv::IsOk() const
+{
+    CreateConvIfNeeded();
+
+    // special case: no convReal created for wxFONTENCODING_ISO8859_1
+    if ( m_encoding == wxFONTENCODING_ISO8859_1 )
+        return true; // always ok as we do it ourselves
+
+    // m_convReal->IsOk() is called at its own creation, so we know it must
+    // be ok if m_convReal is non-NULL
+    return m_convReal != NULL;
+}
+
+size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
+                         const char *src, size_t srcLen) const
+{
+    CreateConvIfNeeded();
+
+    if (m_convReal)
+        return m_convReal->ToWChar(dst, dstLen, src, srcLen);
+
+    // latin-1 (direct)
+    return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
+}
+
+size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
+                           const wchar_t *src, size_t srcLen) const
+{
+    CreateConvIfNeeded();
+
+    if (m_convReal)
+        return m_convReal->FromWChar(dst, dstLen, src, srcLen);
+
+    // latin-1 (direct)
+    return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
+}
+
  size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  {
      CreateConvIfNeeded();
@@ -3369,42 +3624,122 @@ size_t wxCSConv::GetMBNulLen() const
          return m_convReal->GetMBNulLen();
      }
  
+    // otherwise, we are ISO-8859-1
      return 1;
  }
  
+#if wxUSE_UNICODE_UTF8
+bool wxCSConv::IsUTF8() const
+{
+    CreateConvIfNeeded();
+
+    if ( m_convReal )
+    {
+        return m_convReal->IsUTF8();
+    }
+
+    // otherwise, we are ISO-8859-1
+    return false;
+}
+#endif
+
+
+#if wxUSE_UNICODE
+
+wxWCharBuffer wxSafeConvertMB2WX(const char *s)
+{
+    if ( !s )
+        return wxWCharBuffer();
+
+    wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
+    if ( !wbuf )
+        wbuf = wxMBConvUTF8().cMB2WX(s);
+    if ( !wbuf )
+        wbuf = wxConvISO8859_1.cMB2WX(s);
+
+    return wbuf;
+}
+
+wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
+{
+    if ( !ws )
+        return wxCharBuffer();
+
+    wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
+    if ( !buf )
+        buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
+
+    return buf;
+}
+
+#endif // wxUSE_UNICODE
+
  // ----------------------------------------------------------------------------
  // globals
  // ----------------------------------------------------------------------------
  
+// NB: The reason why we create converted objects in this convoluted way,
+//     using a factory function instead of global variable, is that they
+//     may be used at static initialization time (some of them are used by
+//     wxString ctors and there may be a global wxString object). In other
+//     words, possibly _before_ the converter global object would be
+//     initialized.
+
+#undef wxConvLibc
+#undef wxConvUTF8
+#undef wxConvUTF7
+#undef wxConvLocal
+#undef wxConvISO8859_1
+
+#define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
+    WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
+    WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
+    {                                                                   \
+        static impl_klass name##Obj ctor_args;                          \
+        return &name##Obj;                                              \
+    }                                                                   \
+    /* this ensures that all global converter objects are created */    \
+    /* by the time static initialization is done, i.e. before any */    \
+    /* thread is launched: */                                           \
+    static klass* gs_##name##instance = wxGet_##name##Ptr()
+
+#define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
+    WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
+
  #ifdef __WINDOWS__
-    static wxMBConv_win32 wxConvLibcObj;
+    WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
  #elif defined(__WXMAC__) && !defined(__MACH__)
-    static wxMBConv_mac wxConvLibcObj ;
+    WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
  #else
-    static wxMBConvLibc wxConvLibcObj;
+    WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
  #endif
  
-static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
-static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
-static wxMBConvUTF7 wxConvUTF7Obj;
-static wxMBConvUTF8 wxConvUTF8Obj;
-
-WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
-WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
-WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
-WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
-WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
-WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
-WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
-WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
+WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
+WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
+
+WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
+WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
+
+WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
+WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
+
+#if defined(__WXMAC__) && defined(TARGET_CARBON)
+static wxMBConv_macUTF8D wxConvMacUTF8DObj;
+#endif
+WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
  #ifdef __WXOSX__
-                                    wxConvUTF8Obj;
+#if defined(__WXMAC__) && defined(TARGET_CARBON)
+                                    &wxConvMacUTF8DObj;
  #else
-                                    wxConvLibcObj;
+                                    wxGet_wxConvUTF8Ptr();
  #endif
+#else // !__WXOSX__
+                                    wxGet_wxConvLibcPtr();
+#endif // __WXOSX__/!__WXOSX__
  
  #else // !wxUSE_WCHAR_T
  
+// FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
  // stand-ins in absence of wchar_t
  WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
                                  wxConvISO8859_1,