Add wxUSE_ARCSTREAM so that other archive classes can be used without wxZip

[wxWidgets.git] / src / common / strconv.cpp
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp

index c93bcc313222a6f54925226419d973d4eb11e21b..5ceede42250f07aab2c5352388833b425d04c2a2 100644 (file)
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -70,6 +70,7 @@
  
  #ifdef HAVE_ICONV
      #include <iconv.h>
+    #include "wx/thread.h"
  #endif
  
  #include "wx/encconv.h"
@@ -217,14 +218,18 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
      return buf;
  }
  
-size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString, 
-                       size_t outsize, size_t nStringLen) const
+const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
  {
+    wxASSERT(pOutSize != NULL);
+
      const char* szEnd = szString + nStringLen + 1;
      const char* szPos = szString;
      const char* szStart = szPos;
  
      size_t nActualLength = 0;
+    size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
+
+    wxWCharBuffer theBuffer(nCurrentSize);
  
      //Convert the string until the length() is reached, continuing the
      //loop every time a null character is reached
@@ -237,18 +242,31 @@ size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
  
          //Invalid conversion?
          if( nLen == (size_t)-1 )
-            return nLen;
+        {
+            *pOutSize = 0;
+            theBuffer.data()[0u] = wxT('\0');
+            return theBuffer;
+        }
+
  
          //Increase the actual length (+1 for current null character)
          nActualLength += nLen + 1;
  
-        //Only copy data in if buffer size is big enough
-        if (szBuffer != NULL &&
-            nActualLength <= outsize)
+        //if buffer too big, realloc the buffer
+        if (nActualLength > (nCurrentSize+1))
          {
-            //Convert the current (sub)string
-            if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
-                return (size_t)-1;
+            wxWCharBuffer theNewBuffer(nCurrentSize << 1);
+            memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
+            theBuffer = theNewBuffer;
+            nCurrentSize <<= 1;
+        }
+
+        //Convert the current (sub)string
+        if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
+        {
+            *pOutSize = 0;
+            theBuffer.data()[0u] = wxT('\0');
+            return theBuffer;
          }
  
          //Increment to next (sub)string
@@ -258,17 +276,23 @@ size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
          szPos += strlen(szPos) + 1;
      }
  
-    return nActualLength - 1; //success - return actual length
+    //success - return actual length and the buffer
+    *pOutSize = nActualLength;
+    return theBuffer;  
  }
  
-size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString, 
-                       size_t outsize, size_t nStringLen) const
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
  {
+    wxASSERT(pOutSize != NULL);
+
      const wchar_t* szEnd = szString + nStringLen + 1;
      const wchar_t* szPos = szString;
      const wchar_t* szStart = szPos;
  
      size_t nActualLength = 0;
+    size_t nCurrentSize = nStringLen << 2; //try * 4 first
+
+    wxCharBuffer theBuffer(nCurrentSize);
  
      //Convert the string until the length() is reached, continuing the
      //loop every time a null character is reached
@@ -281,18 +305,30 @@ size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
  
          //Invalid conversion?
          if( nLen == (size_t)-1 )
-            return nLen;
+        {
+            *pOutSize = 0;
+            theBuffer.data()[0u] = wxT('\0');
+            return theBuffer;
+        }
  
          //Increase the actual length (+1 for current null character)
          nActualLength += nLen + 1;
          
-        //Only copy data in if buffer size is big enough
-        if (szBuffer != NULL &&
-            nActualLength <= outsize)
+        //if buffer too big, realloc the buffer
+        if (nActualLength > (nCurrentSize+1))
          {
-            //Convert the current (sub)string
-            if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
-                return (size_t)-1;
+            wxCharBuffer theNewBuffer(nCurrentSize << 1);
+            memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
+            theBuffer = theNewBuffer;
+            nCurrentSize <<= 1;
+        }
+
+        //Convert the current (sub)string
+        if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
+        {
+            *pOutSize = 0;
+            theBuffer.data()[0u] = wxT('\0');
+            return theBuffer;
          }
  
          //Increment to next (sub)string
@@ -302,7 +338,9 @@ size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
          szPos += wxWcslen(szPos) + 1;
      }
  
-    return nActualLength - 1;  //success - return actual length
+    //success - return actual length and the buffer
+    *pOutSize = nActualLength;
+    return theBuffer;  
  }
  
  // ----------------------------------------------------------------------------
@@ -365,7 +403,6 @@ static const unsigned char utf7unb64[] =
  
  size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  {
-
      size_t len = 0;
  
      while (*psz && ((!buf) || (len < n)))
@@ -399,7 +436,7 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
                  d += cc;
                  for (l += 6; l >= 8; lsb = !lsb)
                  {
-                    c = (d >> (l -= 8)) % 256;
+                    c = (unsigned char)((d >> (l -= 8)) % 256);
                      if (lsb)
                      {
                          if (buf)
@@ -408,7 +445,7 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
                      }
                      else
                          if (buf)
-                            *buf = c << 8;
+                            *buf = (wchar_t)(c << 8);
                  }
              }
              if (*psz == '-')
@@ -455,8 +492,7 @@ static const unsigned char utf7encode[128] =
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
  };
  
-size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t 
-*psz, size_t n) const
+size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
  {
  
  
@@ -473,12 +509,8 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
              len++;
          }
  #ifndef WC_UTF16
-#ifdef __VMS
-       else if (cc > 0xffff)
-#else
-       else if (cc > ((const wchar_t)0xffff))
-#endif
-        {
+        else if (((wxUint32)cc) > 0xffff)
+           {
              // no surrogate pair generation (yet?)
              return (size_t)-1;
          }
@@ -1115,12 +1147,13 @@ size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
  
  #ifdef HAVE_ICONV
  
-// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
-//     if output buffer is _exactly_ as big as needed. Such case is (unless there's
-//     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
-//     (which means error) and says there are 0 bytes left in the input buffer --
-//     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
-//     this alternative test for iconv() failure.
+// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
+//     E2BIG if output buffer is _exactly_ as big as needed. Such case is
+//     (unless there's yet another bug in glibc) the only case when iconv()
+//     returns with (size_t)-1 (which means error) and says there are 0 bytes
+//     left in the input buffer -- when _real_ error occurs,
+//     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
+//     iconv() failure.
  //     [This bug does not appear in glibc 2.2.]
  #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
  #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
@@ -1152,6 +1185,10 @@ protected:
      // the other direction
      iconv_t m2w,
              w2m;
+#if wxUSE_THREADS
+    // guards access to m2w and w2m objects
+    wxMutex m_iconvMutex;
+#endif
  
  private:
      // the name (for iconv_open()) of a wide char charset -- if none is
@@ -1263,6 +1300,16 @@ wxMBConv_iconv::~wxMBConv_iconv()
  
  size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  {
+#if wxUSE_THREADS
+    // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
+    //     Unfortunately there is a couple of global wxCSConv objects such as
+    //     wxConvLocal that are used all over wx code, so we have to make sure
+    //     the handle is used by at most one thread at the time. Otherwise
+    //     only a few wx classes would be safe to use from non-main threads
+    //     as MB<->WC conversion would fail "randomly".
+    wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
+#endif
+ 
      size_t inbuf = strlen(psz);
      size_t outbuf = n * SIZEOF_WCHAR_T;
      size_t res, cres;
@@ -1320,6 +1367,11 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  
  size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
  {
+#if wxUSE_THREADS
+    // NB: explained in MB2WC
+    wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
+#endif
+    
      size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
      size_t outbuf = n;
      size_t res, cres;
@@ -1423,10 +1475,20 @@ public:
          // and break the library itself, e.g. wxTextInputStream::NextChar()
          // wouldn't work if reading an incomplete MB char didn't result in an
          // error
+        //
+        // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
+        // an error (tested under Windows Server 2003) and apparently it is
+        // done on purpose, i.e. the function accepts any input in this case
+        // and although I'd prefer to return error on ill-formed output, our
+        // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
+        // explicitly ill-formed according to RFC 2152) neither so we don't
+        // even have any fallback here...
+        int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
+
          const size_t len = ::MultiByteToWideChar
                               (
                                  m_CodePage,     // code page
-                                MB_ERR_INVALID_CHARS, // flags: fall on error
+                                flags,          // flags: fall on error
                                  psz,            // input string
                                  -1,             // its length (NUL-terminated)
                                  buf,            // output string
@@ -2024,7 +2086,7 @@ public:
          if (buf == NULL)
          {
              //apple specs say at least 32
-            n = 32 ;
+            n = wxMax( 32 , byteInLen ) ;
              tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
          }
          ByteCount byteBufferLen = n * sizeof( UniChar ) ;
@@ -2065,7 +2127,7 @@ public:
          if (buf == NULL)
          {
              //apple specs say at least 32
-            n = 32;
+            n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
              tbuf = (char*) malloc( n ) ;
          }
  
@@ -2162,7 +2224,10 @@ public:
      {
          size_t inbuf = strlen(psz);
          if (buf)
-            m2w.Convert(psz,buf);
+        {
+            if (!m2w.Convert(psz,buf))
+                return (size_t)-1;
+        }
          return inbuf;
      }
  
@@ -2170,7 +2235,10 @@ public:
      {
          const size_t inbuf = wxWcslen(psz);
          if (buf)
-            w2m.Convert(psz,buf);
+        {
+            if (!w2m.Convert(psz,buf))
+                return (size_t)-1;
+        }
  
          return inbuf;
      }