Make EquivalentEncodings 3-dimensional array const, moving 864 bytes from data segmen...

[wxWidgets.git] / src / common / strconv.cpp
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp

index 00ab2fc8f51348f0d442a13e651583455fc91037..01e0dc358c37bbf375ff837743ef4df34cc1da92 100644 (file)
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -86,6 +86,15 @@
  // implementation
  // ============================================================================
  
+// helper function of cMB2WC(): check if n bytes at this location are all NUL
+static bool NotAllNULs(const char *p, size_t n)
+{
+    while ( n && *p++ == '\0' )
+        n--;
+
+    return n != 0;
+}
+
  // ----------------------------------------------------------------------------
  // UTF-16 en/decoding to/from UCS-4
  // ----------------------------------------------------------------------------
@@ -138,185 +147,127 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
  // wxMBConv
  // ----------------------------------------------------------------------------
  
-wxMBConv::~wxMBConv()
-{
-    // nothing to do here (necessary for Darwin linking probably)
-}
-
-const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
-{
-    if ( psz )
-    {
-        // calculate the length of the buffer needed first
-        size_t nLen = MB2WC(NULL, psz, 0);
-        if ( nLen != (size_t)-1 )
-        {
-            // now do the actual conversion
-            wxWCharBuffer buf(nLen);
-            nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
-            if ( nLen != (size_t)-1 )
-            {
-                return buf;
-            }
-        }
-    }
-
-    wxWCharBuffer buf((wchar_t *)NULL);
-
-    return buf;
-}
-
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
-{
-    if ( pwz )
-    {
-        size_t nLen = WC2MB(NULL, pwz, 0);
-        if ( nLen != (size_t)-1 )
-        {
-            wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
-            nLen = WC2MB(buf.data(), pwz, nLen + 4);
-            if ( nLen != (size_t)-1 )
-            {
-                return buf;
-            }
-        }
-    }
-
-    wxCharBuffer buf((char *)NULL);
-
-    return buf;
-}
-
-// helper of cMB2WC(): check if n bytes at this location are all NUL
-static bool NotAllNULs(const char *p, size_t n)
+size_t
+wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
+                  const char *src, size_t srcLen) const
  {
-    while ( n && *p++ == '\0' )
-        n--;
+    // although new conversion classes are supposed to implement this function
+    // directly, the existins ones only implement the old MB2WC() and so, to
+    // avoid to have to rewrite all conversion classes at once, we provide a
+    // default (but not efficient) implementation of this one in terms of the
+    // old function by copying the input to ensure that it's NUL-terminated and
+    // then using MB2WC() to convert it
  
-    return n != 0;
-}
-
-const wxWCharBuffer
-wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
-{
-    // the currently accumulated wide characters
-    wxWCharBuffer wbuf;
-
-    // the current length of wbuf
-    size_t lenBuf = 0;
+    // the number of chars [which would be] written to dst [if it were not NULL]
+    size_t dstWritten = 0;
  
      // the number of NULs terminating this string
-    size_t nulLen   wxDUMMY_INITIALIZE(0);
-
-    // make a copy of the input string unless it is already properly
-    // NUL-terminated
-    wxCharBuffer bufTmp;
+    size_t nulLen wxDUMMY_INITIALIZE(0);
  
      // if we were not given the input size we just have to assume that the
      // string is properly terminated as we have no way of knowing how long it
      // is anyhow, but if we do have the size check whether there are enough
      // NULs at the end
-    if ( inLen != (size_t)-1 )
+    wxCharBuffer bufTmp;
+    const char *srcEnd;
+    if ( srcLen != (size_t)-1 )
      {
          // we need to know how to find the end of this string
-        nulLen = GetMinMBCharWidth();
-        if ( nulLen == (size_t)-1 )
-            return wbuf;
+        nulLen = GetMBNulLen();
+        if ( nulLen == wxCONV_FAILED )
+            return wxCONV_FAILED;
  
          // if there are enough NULs we can avoid the copy
-        if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
+        if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
          {
              // make a copy in order to properly NUL-terminate the string
-            bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
+            bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
              char * const p = bufTmp.data();
-            memcpy(p, in, inLen);
-            for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
+            memcpy(p, src, srcLen);
+            for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
                  *s = '\0';
+
+            src = bufTmp;
          }
-    }
  
-    if ( bufTmp )
-        in = bufTmp;
+        srcEnd = src + srcLen;
+    }
+    else // quit after the first loop iteration
+    {
+        srcEnd = NULL;
+    }
  
-    size_t lenChunk;
-    for ( const char * const inEnd = in + inLen;; )
+    for ( ;; )
      {
          // try to convert the current chunk
-        lenChunk = MB2WC(NULL, in, 0);
+        size_t lenChunk = MB2WC(NULL, src, 0);
          if ( lenChunk == 0 )
          {
              // nothing left in the input string, conversion succeeded
              break;
          }
  
-        if ( lenChunk == (size_t)-1 )
-            break;
+        if ( lenChunk == wxCONV_FAILED )
+            return wxCONV_FAILED;
  
          // if we already have a previous chunk, leave the NUL separating it
          // from this one
-        if ( lenBuf )
-            lenBuf++;
-
-        const size_t lenBufNew = lenBuf + lenChunk;
-        if ( !wbuf.extend(lenBufNew) )
+        if ( dstWritten )
          {
-            lenChunk = (size_t)-1;
-            break;
+            dstWritten++;
+            if ( dst )
+                dst++;
          }
  
-        lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
-        if ( lenChunk == (size_t)-1 )
-            break;
+        dstWritten += lenChunk;
  
-        lenBuf = lenBufNew;
+        if ( dst )
+        {
+            if ( dstWritten > dstLen )
+                return wxCONV_FAILED;
  
-        if ( inLen == (size_t)-1 )
+            lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
+            if ( lenChunk == wxCONV_FAILED )
+                return wxCONV_FAILED;
+
+            dst += lenChunk;
+        }
+
+        if ( !srcEnd )
          {
-            // convert only one chunk in this case, as we suppose that the
-            // string is NUL-terminated and so inEnd is not used at all
+            // we convert the entire string in this cas, as we suppose that the
+            // string is NUL-terminated and so srcEnd is not used at all
              break;
          }
  
          // advance the input pointer past the end of this chunk
-        while ( NotAllNULs(in, nulLen) )
+        while ( NotAllNULs(src, nulLen) )
          {
              // notice that we must skip over multiple bytes here as we suppose
              // that if NUL takes 2 or 4 bytes, then all the other characters do
              // too and so if advanced by a single byte we might erroneously
              // detect sequences of NUL bytes in the middle of the input
-            in += nulLen;
+            src += nulLen;
          }
  
-        in += nulLen; // skipping over its terminator as well
+        src += nulLen; // skipping over its terminator as well
  
          // note that ">=" (and not just "==") is needed here as the terminator
          // we skipped just above could be inside or just after the buffer
          // delimited by inEnd
-        if ( in >= inEnd )
+        if ( src >= srcEnd )
              break;
      }
  
-    if ( lenChunk == (size_t)-1 )
-    {
-        // conversion failed
-        lenBuf = 0;
-        wbuf.reset();
-    }
-
-    if ( outLen )
-        *outLen = lenBuf;
-
-    return wbuf;
+    return dstWritten;
  }
  
-const wxCharBuffer
-wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+size_t
+wxMBConv::FromWChar(char *dst, size_t dstLen,
+                    const wchar_t *src, size_t srcLen) const
  {
-    // the currently accumulated multibyte characters
-    wxCharBuffer buf;
-
-    // the current length of buf
-    size_t lenBuf = 0;
+    // the number of chars [which would be] written to dst [if it were not NULL]
+    size_t dstWritten = 0;
  
      // make a copy of the input string unless it is already properly
      // NUL-terminated
@@ -324,50 +275,151 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
      // if we don't know its length we have no choice but to assume that it is,
      // indeed, properly terminated
      wxWCharBuffer bufTmp;
-    if ( inLen == (size_t)-1 )
+    if ( srcLen == (size_t)-1 )
      {
-        inLen = wxWcslen(in) + 1;
+        srcLen = wxWcslen(src) + 1;
      }
-    else if ( inLen != 0 && in[inLen - 1] != L'\0' )
+    else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
      {
          // make a copy in order to properly NUL-terminate the string
-        bufTmp = wxWCharBuffer(inLen);
-        memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
+        bufTmp = wxWCharBuffer(srcLen);
+        memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
+        src = bufTmp;
      }
  
-    if ( bufTmp )
-        in = bufTmp;
+    const size_t lenNul = GetMBNulLen();
+    for ( const wchar_t * const srcEnd = src + srcLen;
+          src < srcEnd;
+          src += wxWcslen(src) + 1 /* skip L'\0' too */ )
+    {
+        // try to convert the current chunk
+        size_t lenChunk = WC2MB(NULL, src, 0);
+
+        if ( lenChunk == wxCONV_FAILED )
+            return wxCONV_FAILED;
  
-    for ( const wchar_t * const inEnd = in + inLen;; )
+        lenChunk += lenNul;
+        dstWritten += lenChunk;
+
+        if ( dst )
+        {
+            if ( dstWritten > dstLen )
+                return wxCONV_FAILED;
+
+            if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+                return wxCONV_FAILED;
+
+            dst += lenChunk;
+        }
+    }
+
+    return dstWritten;
+}
+
+size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
+{
+    size_t rc = ToWChar(out, outLen, in);
+    if ( rc != wxCONV_FAILED )
      {
-        // try to convert the current chunk, if anything left
-        size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
-        if ( lenChunk == 0 )
+        // ToWChar() returns the buffer length, i.e. including the trailing
+        // NUL, while this method doesn't take it into account
+        rc--;
+    }
+
+    return rc;
+}
+
+size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
+{
+    size_t rc = FromWChar(out, outLen, in);
+    if ( rc != wxCONV_FAILED )
+    {
+        rc -= GetMBNulLen();
+    }
+
+    return rc;
+}
+
+wxMBConv::~wxMBConv()
+{
+    // nothing to do here (necessary for Darwin linking probably)
+}
+
+const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
+{
+    if ( psz )
+    {
+        // calculate the length of the buffer needed first
+        const size_t nLen = MB2WC(NULL, psz, 0);
+        if ( nLen != wxCONV_FAILED )
          {
-            // nothing left in the input string, conversion succeeded
-            if ( outLen )
-                *outLen = lenBuf ? lenBuf - 1 : lenBuf;
+            // now do the actual conversion
+            wxWCharBuffer buf(nLen /* +1 added implicitly */);
  
-            return buf;
+            // +1 for the trailing NULL
+            if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+                return buf;
          }
+    }
  
-        if ( lenChunk == (size_t)-1 )
-            break;
+    return wxWCharBuffer();
+}
  
-        const size_t lenBufNew = lenBuf + lenChunk;
-        if ( !buf.extend(lenBufNew) )
-            break;
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
+{
+    if ( pwz )
+    {
+        const size_t nLen = WC2MB(NULL, pwz, 0);
+        if ( nLen != wxCONV_FAILED )
+        {
+            // extra space for trailing NUL(s)
+            static const size_t extraLen = GetMaxMBNulLen();
  
-        lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
-        if ( lenChunk == (size_t)-1 )
-            break;
+            wxCharBuffer buf(nLen + extraLen - 1);
+            if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+                return buf;
+        }
+    }
  
-        // chunk successfully converted, go to the next one
-        in += wxWcslen(in) + 1 /* skip NUL too */;
-        lenBuf = lenBufNew + 1;
+    return wxCharBuffer();
+}
+
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
+{
+    const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+    if ( dstLen != wxCONV_FAILED )
+    {
+        wxWCharBuffer wbuf(dstLen);
+        if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
+        {
+            if ( outLen )
+                *outLen = dstLen;
+            return wbuf;
+        }
+    }
+
+    if ( outLen )
+        *outLen = 0;
+
+    return wxWCharBuffer();
+}
+
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+{
+    const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+    if ( dstLen != wxCONV_FAILED )
+    {
+        wxCharBuffer buf(dstLen);
+        if ( FromWChar(buf.data(), dstLen, in, inLen) )
+        {
+            if ( outLen )
+                *outLen = dstLen;
+            return buf;
+        }
      }
  
-    // conversion failed
      if ( outLen )
          *outLen = 0;
  
@@ -1368,6 +1420,10 @@ public:
      virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
      virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
  
+    // classify this encoding as explained in wxMBConv::GetMBNulLen()
+    // comment
+    virtual size_t GetMBNulLen() const;
+
      bool IsOk() const
          { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
  
@@ -1382,10 +1438,6 @@ protected:
  #endif
  
  private:
-    // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
-    // comment
-    virtual size_t GetMinMBCharWidth() const;
-
      // the name (for iconv_open()) of a wide char charset -- if none is
      // available on this machine, it will remain NULL
      static wxString ms_wcCharsetName;
@@ -1394,7 +1446,7 @@ private:
      // different endian-ness than the native one
      static bool ms_wcNeedsSwap;
  
-    // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
+    // cached result of GetMBNulLen(); set to 0 meaning "unknown"
      // initially
      size_t m_minMBCharWidth;
  };
@@ -1540,6 +1592,31 @@ wxMBConv_iconv::~wxMBConv_iconv()
  
  size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  {
+    // find the string length: notice that must be done differently for
+    // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
+    size_t inbuf;
+    const size_t nulLen = GetMBNulLen();
+    switch ( nulLen )
+    {
+        default:
+            return (size_t)-1;
+
+        case 1:
+            inbuf = strlen(psz); // arguably more optimized than our version
+            break;
+
+        case 2:
+        case 4:
+            // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
+            // they also have to start at character boundary and not span two
+            // adjacent characters
+            const char *p;
+            for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
+                ;
+            inbuf = p - psz;
+            break;
+    }
+
  #if wxUSE_THREADS
      // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
      //     Unfortunately there is a couple of global wxCSConv objects such as
@@ -1548,9 +1625,9 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
      //     only a few wx classes would be safe to use from non-main threads
      //     as MB<->WC conversion would fail "randomly".
      wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
-#endif
+#endif // wxUSE_THREADS
+
  
-    size_t inbuf = strlen(psz);
      size_t outbuf = n * SIZEOF_WCHAR_T;
      size_t res, cres;
      // VS: Use these instead of psz, buf because iconv() modifies its arguments:
@@ -1572,9 +1649,7 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
                  buf[n] = WC_BSWAP(buf[i]);
          }
  
-        // NB: iconv was given only strlen(psz) characters on input, and so
-        //     it couldn't convert the trailing zero. Let's do it ourselves
-        //     if there's some room left for it in the output buffer.
+        // NUL-terminate the string if there is any space left
          if (res < n)
              buf[res] = 0;
      }
@@ -1674,7 +1749,7 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
      return res;
  }
  
-size_t wxMBConv_iconv::GetMinMBCharWidth() const
+size_t wxMBConv_iconv::GetMBNulLen() const
  {
      if ( m_minMBCharWidth == 0 )
      {
@@ -1907,6 +1982,44 @@ public:
          return len - 1;
      }
  
+    virtual size_t GetMBNulLen() const
+    {
+        if ( m_minMBCharWidth == 0 )
+        {
+            int len = ::WideCharToMultiByte
+                        (
+                            m_CodePage,     // code page
+                            0,              // no flags
+                            L"",            // input string
+                            1,              // translate just the NUL
+                            NULL,           // output buffer
+                            0,              // and its size
+                            NULL,           // no replacement char
+                            NULL            // [out] don't care if it was used
+                        );
+
+            wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
+            switch ( len )
+            {
+                default:
+                    wxLogDebug(_T("Unexpected NUL length %d"), len);
+                    // fall through
+
+                case 0:
+                    self->m_minMBCharWidth = (size_t)-1;
+                    break;
+
+                case 1:
+                case 2:
+                case 4:
+                    self->m_minMBCharWidth = len;
+                    break;
+            }
+        }
+
+        return m_minMBCharWidth;
+    }
+
      bool IsOk() const { return m_CodePage != -1; }
  
  private:
@@ -1965,48 +2078,11 @@ private:
  #endif
      }
  
-    virtual size_t GetMinMBCharWidth() const
-    {
-        if ( m_minMBCharWidth == 0 )
-        {
-            int len = ::WideCharToMultiByte
-                        (
-                            m_CodePage,     // code page
-                            0,              // no flags
-                            L"",            // input string
-                            1,              // translate just the NUL
-                            NULL,           // output buffer
-                            0,              // and its size
-                            NULL,           // no replacement char
-                            NULL            // [out] don't care if it was used
-                        );
-
-            wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
-            switch ( len )
-            {
-                default:
-                    wxLogDebug(_T("Unexpected NUL length %d"), len);
-                    // fall through
-
-                case 0:
-                    self->m_minMBCharWidth = (size_t)-1;
-                    break;
-
-                case 1:
-                case 2:
-                case 4:
-                    self->m_minMBCharWidth = len;
-                    break;
-            }
-        }
-
-        return m_minMBCharWidth;
-    }
  
      // the code page we're working with
      long m_CodePage;
  
-    // cached result of GetMinMBCharWidth(), set to 0 initially meaning
+    // cached result of GetMBNulLen(), set to 0 initially meaning
      // "unknown"
      size_t m_minMBCharWidth;
  };
@@ -2642,14 +2718,7 @@ public:
          return inbuf;
      }
  
-    bool IsOk() const { return m_ok; }
-
-public:
-    wxFontEncoding m_enc;
-    wxEncodingConverter m2w, w2m;
-
-private:
-    virtual size_t GetMinMBCharWidth() const
+    virtual size_t GetMBNulLen() const
      {
          switch ( m_enc )
          {
@@ -2666,6 +2735,13 @@ private:
          }
      }
  
+    bool IsOk() const { return m_ok; }
+
+public:
+    wxFontEncoding m_enc;
+    wxEncodingConverter m2w, w2m;
+
+private:
      // were we initialized successfully?
      bool m_ok;
  
@@ -3058,14 +3134,13 @@ size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
      return len;
  }
  
-size_t wxCSConv::GetMinMBCharWidth() const
+size_t wxCSConv::GetMBNulLen() const
  {
      CreateConvIfNeeded();
  
      if ( m_convReal )
      {
-        // cast needed just to call private function of m_convReal
-        return ((wxCSConv *)m_convReal)->GetMinMBCharWidth();
+        return m_convReal->GetMBNulLen();
      }
  
      return 1;