fixing overrelease and out-of-bounds write, fixes #13725

[wxWidgets.git] / src / common / stringimpl.cpp
diff --git a/src/common/stringimpl.cpp b/src/common/stringimpl.cpp

index f16811fa7a41c97d977f135ca43245d55b99d502..498793722e8f171a109b642db979fbcc0ff57ec6 100644 (file)
--- a/src/common/stringimpl.cpp
+++ b/src/common/stringimpl.cpp
@@ -1,5 +1,5 @@
  /////////////////////////////////////////////////////////////////////////////
-// Name:        src/common/string.cpp
+// Name:        src/common/stringimpl.cpp
  // Purpose:     wxString class
  // Author:      Vadim Zeitlin, Ryan Norton
  // Modified by:
@@ -30,6 +30,7 @@
  
  #ifndef WX_PRECOMP
      #include "wx/stringimpl.h"
+    #include "wx/wxcrt.h"
  #endif
  
  #include <ctype.h>
@@ -41,10 +42,6 @@
  #include <string.h>
  #include <stdlib.h>
  
-#ifdef __SALFORDC__
-    #include <clib.h>
-#endif
-
  // allocating extra space for each string consumes more memory but speeds up
  // the concatenation operations (nLen is the current string's length)
  // NB: EXTRA_ALLOC must be >= 0!
@@ -56,12 +53,10 @@
      #define wxStringMemcpy   memcpy
      #define wxStringMemcmp   memcmp
      #define wxStringMemchr   memchr
-    #define wxStringStrlen   strlen
  #else
      #define wxStringMemcpy   wxTmemcpy
      #define wxStringMemcmp   wxTmemcmp
      #define wxStringMemchr   wxTmemchr
-    #define wxStringStrlen   wxStrlen
  #endif
  
  
@@ -80,7 +75,11 @@ const size_t wxStringImpl::npos = (size_t) -1;
  
  #if wxUSE_STL_BASED_WXSTRING
  
-extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+#if wxUSE_UNICODE_UTF8
+const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = "";
+#endif
+const wxChar WXDLLIMPEXP_BASE *wxEmptyString = wxT("");
  
  #else
  
@@ -90,11 +89,17 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
  static const struct
  {
    wxStringData data;
-  wxChar dummy;
+  wxStringCharType dummy;
  } g_strEmpty = { {-1, 0, 0}, wxT('\0') };
  
  // empty C style string: points to 'string data' byte of g_strEmpty
-extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#if wxUSE_UNICODE_UTF8
+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = &g_strEmpty.dummy;
+const wxChar WXDLLIMPEXP_BASE *wxEmptyString = wxT("");
+#else
+const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#endif
  
  #endif
  
@@ -106,20 +111,27 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
  // ----------------------------------------------------------------------------
  
  // this small class is used to gather statistics for performance tuning
+
+// uncomment this to enable gathering of some statistics about wxString
+// efficiency
  //#define WXSTRING_STATISTICS
+
  #ifdef  WXSTRING_STATISTICS
    class Averager
    {
    public:
-    Averager(const wxChar *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
+    Averager(const wxStringCharType *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
     ~Averager()
-   { wxPrintf("wxString: average %s = %f\n", m_sz, ((float)m_nTotal)/m_nCount); }
+    {
+        wxPrintf("wxString %s: total = %lu, average = %f\n",
+                 m_sz, m_nTotal, ((float)m_nTotal)/m_nCount);
+    }
  
      void Add(size_t n) { m_nTotal += n; m_nCount++; }
  
    private:
-    size_t m_nCount, m_nTotal;
-    const wxChar *m_sz;
+    unsigned long m_nCount, m_nTotal;
+    const wxStringCharType *m_sz;
    } g_averageLength("allocation size"),
      g_averageSummandLength("summand length"),
      g_averageConcatHit("hit probability in concat"),
@@ -147,13 +159,14 @@ void wxStringData::Free()
  // ===========================================================================
  
  // takes nLength elements of psz starting at nPos
-void wxStringImpl::InitWith(const wxChar *psz, size_t nPos, size_t nLength)
+void wxStringImpl::InitWith(const wxStringCharType *psz,
+                            size_t nPos, size_t nLength)
  {
    Init();
  
    // if the length is not given, assume the string to be NUL terminated
    if ( nLength == npos ) {
-    wxASSERT_MSG( nPos <= wxStrlen(psz), _T("index out of bounds") );
+    wxASSERT_MSG( nPos <= wxStrlen(psz), wxT("index out of bounds") );
  
      nLength = wxStrlen(psz + nPos);
    }
@@ -163,24 +176,22 @@ void wxStringImpl::InitWith(const wxChar *psz, size_t nPos, size_t nLength)
    if ( nLength > 0 ) {
      // trailing '\0' is written in AllocBuffer()
      if ( !AllocBuffer(nLength) ) {
-      wxFAIL_MSG( _T("out of memory in wxStringImpl::InitWith") );
+      wxFAIL_MSG( wxT("out of memory in wxStringImpl::InitWith") );
        return;
      }
      wxStringMemcpy(m_pchData, psz + nPos, nLength);
    }
  }
  
-// poor man's iterators are "void *" pointers
-wxStringImpl::wxStringImpl(const void *pStart, const void *pEnd)
+wxStringImpl::wxStringImpl(const_iterator first, const_iterator last)
  {
-  if ( pEnd >= pStart )
+  if ( last >= first )
    {
-    InitWith((const wxChar *)pStart, 0,
-             (const wxChar *)pEnd - (const wxChar *)pStart);
+    InitWith(first.GetPtr(), 0, last - first);
    }
    else
    {
-    wxFAIL_MSG( _T("pStart is not before pEnd") );
+    wxFAIL_MSG( wxT("first must be before last") );
      Init();
    }
  }
@@ -203,8 +214,8 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
    wxASSERT( nLen >  0 );
  
    // make sure that we don't overflow
-  wxASSERT( nLen < (INT_MAX / sizeof(wxChar)) -
-                   (sizeof(wxStringData) + EXTRA_ALLOC + 1) );
+  wxCHECK( nLen < (INT_MAX / sizeof(wxStringCharType)) -
+                  (sizeof(wxStringData) + EXTRA_ALLOC + 1), false );
  
    STATISTICS_ADD(Length, nLen);
  
@@ -212,7 +223,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
    // 1) one extra character for '\0' termination
    // 2) sizeof(wxStringData) for housekeeping info
    wxStringData* pData = (wxStringData*)
-    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxChar));
+    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxStringCharType));
  
    if ( pData == NULL ) {
      // allocation failures are handled by the caller
@@ -271,7 +282,8 @@ bool wxStringImpl::AllocBeforeWrite(size_t nLen)
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData*)
-          realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+          realloc(pData,
+                  sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failures are handled by the caller
@@ -299,7 +311,8 @@ wxStringImpl& wxStringImpl::append(size_t n, wxStringCharType ch)
      size_type len = length();
  
      if ( !Alloc(len + n) || !CopyBeforeWrite() ) {
-      wxFAIL_MSG( _T("out of memory in wxStringImpl::append") );
+      wxFAIL_MSG( wxT("out of memory in wxStringImpl::append") );
+      return *this;
      }
      GetStringData()->nDataLength = len + n;
      m_pchData[len + n] = '\0';
@@ -329,10 +342,12 @@ bool wxStringImpl::Alloc(size_t nLen)
    wxStringData *pData = GetStringData();
    if ( pData->nAllocLength <= nLen ) {
      if ( pData->IsEmpty() ) {
+      STATISTICS_ADD(Length, nLen);
+
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData *)
-                malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+             malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failure handled by caller
@@ -353,14 +368,14 @@ bool wxStringImpl::Alloc(size_t nLen)
          return false;
        }
        // +1 to copy the terminator, too
-      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxChar));
+      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxStringCharType));
        GetStringData()->nDataLength = nOldLen;
      }
      else {
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData *)
-        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failure handled by caller
@@ -380,14 +395,14 @@ bool wxStringImpl::Alloc(size_t nLen)
  
  wxStringImpl::iterator wxStringImpl::begin()
  {
-    if (length() > 0)
+    if ( !empty() )
          CopyBeforeWrite();
      return m_pchData;
  }
  
  wxStringImpl::iterator wxStringImpl::end()
  {
-    if (length() > 0)
+    if ( !empty() )
          CopyBeforeWrite();
      return m_pchData + length();
  }
@@ -412,7 +427,8 @@ wxStringImpl& wxStringImpl::erase(size_t nStart, size_t nLen)
      return *this;
  }
  
-wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
+wxStringImpl& wxStringImpl::insert(size_t nPos,
+                                   const wxStringCharType *sz, size_t n)
  {
      wxASSERT( nPos <= length() );
  
@@ -420,12 +436,13 @@ wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
      if ( n == 0 ) return *this;
  
      if ( !Alloc(length() + n) || !CopyBeforeWrite() ) {
-        wxFAIL_MSG( _T("out of memory in wxStringImpl::insert") );
+        wxFAIL_MSG( wxT("out of memory in wxStringImpl::insert") );
+        return *this;
      }
  
      memmove(m_pchData + nPos + n, m_pchData + nPos,
-            (length() - nPos) * sizeof(wxChar));
-    memcpy(m_pchData + nPos, sz, n * sizeof(wxChar));
+            (length() - nPos) * sizeof(wxStringCharType));
+    memcpy(m_pchData + nPos, sz, n * sizeof(wxStringCharType));
      GetStringData()->nDataLength = length() + n;
      m_pchData[length()] = '\0';
  
@@ -487,7 +504,8 @@ size_t wxStringImpl::find(const wxStringImpl& str, size_t nStart) const
      return p - c_str() + nLenOther <= nLen ? p - c_str() : npos;
  }
  
-size_t wxStringImpl::find(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::find(const wxStringCharType* sz,
+                          size_t nStart, size_t n) const
  {
      return find(wxStringImpl(sz, n), nStart);
  }
@@ -510,7 +528,7 @@ size_t wxStringImpl::rfind(const wxStringImpl& str, size_t nStart) const
      if ( length() >= str.length() )
      {
          // avoids a corner case later
-        if ( length() == 0 && str.length() == 0 )
+        if ( empty() && str.empty() )
              return 0;
  
          // "top" is the point where search starts from
@@ -534,7 +552,8 @@ size_t wxStringImpl::rfind(const wxStringImpl& str, size_t nStart) const
      return npos;
  }
  
-size_t wxStringImpl::rfind(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::rfind(const wxStringCharType* sz,
+                           size_t nStart, size_t n) const
  {
      return rfind(wxStringImpl(sz, n), nStart);
  }
@@ -562,54 +581,45 @@ size_t wxStringImpl::rfind(wxStringCharType ch, size_t nStart) const
  }
  
  wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar *sz)
-{
-  wxASSERT_MSG( nStart <= length(),
-                _T("index out of bounds in wxStringImpl::replace") );
-  size_t strLen = length() - nStart;
-  nLen = strLen < nLen ? strLen : nLen;
-
-  wxStringImpl strTmp;
-  strTmp.reserve(length()); // micro optimisation to avoid multiple mem allocs
-
-  //This is kind of inefficient, but its pretty good considering...
-  //we don't want to use character access operators here because on STL
-  //it will freeze the reference count of strTmp, which means a deep copy
-  //at the end when swap is called
-  //
-  //Also, we can't use append with the full character pointer and must
-  //do it manually because this string can contain null characters
-  for(size_t i1 = 0; i1 < nStart; ++i1)
-      strTmp.append(1, this->c_str()[i1]);
-
-  //its safe to do the full version here because
-  //sz must be a normal c string
-  strTmp.append(sz);
-
-  for(size_t i2 = nStart + nLen; i2 < length(); ++i2)
-      strTmp.append(1, this->c_str()[i2]);
-
-  swap(strTmp);
-  return *this;
-}
-
-wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    size_t nCount, wxStringCharType ch)
+                                    const wxStringCharType *sz, size_t nCount)
  {
-  return replace(nStart, nLen, wxStringImpl(nCount, ch).c_str());
-}
+    // check and adjust parameters
+    const size_t lenOld = length();
  
-wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxStringImpl& str,
-                                    size_t nStart2, size_t nLen2)
-{
-  return replace(nStart, nLen, str.substr(nStart2, nLen2));
-}
+    wxASSERT_MSG( nStart <= lenOld,
+                  wxT("index out of bounds in wxStringImpl::replace") );
+    size_t nEnd = nStart + nLen;
+    if ( nLen > lenOld - nStart )
+    {
+        // nLen may be out of range, as it can be npos, just clump it down
+        nLen = lenOld - nStart;
+        nEnd = lenOld;
+    }
  
-wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar* sz, size_t nCount)
-{
-  return replace(nStart, nLen, wxStringImpl(sz, nCount).c_str());
+    if ( nCount == npos )
+        nCount = wxStrlen(sz);
+
+    // build the new string from 3 pieces: part of this string before nStart,
+    // the new substring and the part of this string after nStart+nLen
+    wxStringImpl tmp;
+    const size_t lenNew = lenOld + nCount - nLen;
+    if ( lenNew )
+    {
+        tmp.AllocBuffer(lenOld + nCount - nLen);
+
+        wxStringCharType *dst = tmp.m_pchData;
+        memcpy(dst, m_pchData, nStart*sizeof(wxStringCharType));
+        dst += nStart;
+
+        memcpy(dst, sz, nCount*sizeof(wxStringCharType));
+        dst += nCount;
+
+        memcpy(dst, m_pchData + nEnd, (lenOld - nEnd)*sizeof(wxStringCharType));
+    }
+
+    // and replace this string contents with the new one
+    swap(tmp);
+    return *this;
  }
  
  wxStringImpl wxStringImpl::substr(size_t nStart, size_t nLen) const
@@ -643,24 +653,25 @@ wxStringImpl& wxStringImpl::operator=(const wxStringImpl& stringSrc)
  // assigns a single character
  wxStringImpl& wxStringImpl::operator=(wxStringCharType ch)
  {
-  wxChar c(ch);
+  wxStringCharType c(ch);
    if ( !AssignCopy(1, &c) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxChar)") );
+    wxFAIL_MSG( wxT("out of memory in wxStringImpl::operator=(wxStringCharType)") );
    }
    return *this;
  }
  
  // assigns C string
-wxStringImpl& wxStringImpl::operator=(const wxChar *psz)
+wxStringImpl& wxStringImpl::operator=(const wxStringCharType *psz)
  {
    if ( !AssignCopy(wxStrlen(psz), psz) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxChar *)") );
+    wxFAIL_MSG( wxT("out of memory in wxStringImpl::operator=(const wxStringCharType *)") );
    }
    return *this;
  }
  
  // helper function: does real copy
-bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
+bool wxStringImpl::AssignCopy(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData)
  {
    if ( nSrcLen == 0 ) {
      Reinit();
@@ -670,7 +681,11 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
        // allocation failure handled by caller
        return false;
      }
-    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxChar));
+
+    // use memmove() and not memcpy() here as we might be copying from our own
+    // buffer in case of assignment such as "s = s.c_str()" (see #11294)
+    memmove(m_pchData, pszSrcData, nSrcLen*sizeof(wxStringCharType));
+
      GetStringData()->nDataLength = nSrcLen;
      m_pchData[nSrcLen] = wxT('\0');
    }
@@ -682,7 +697,8 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
  // ---------------------------------------------------------------------------
  
  // add something to this string
-bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
+bool wxStringImpl::ConcatSelf(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData,
                                size_t nMaxLen)
  {
    STATISTICS_ADD(SummandLength, nSrcLen);
@@ -693,6 +709,17 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
    if ( nSrcLen > 0 ) {
      wxStringData *pData = GetStringData();
      size_t nLen = pData->nDataLength;
+
+    // take special care when appending part of this string to itself: the code
+    // below reallocates our buffer and this invalidates pszSrcData pointer so
+    // we have to copy it in another temporary string in this case (but avoid
+    // doing this unnecessarily)
+    if ( pszSrcData >= m_pchData && pszSrcData < m_pchData + nLen )
+    {
+        wxStringImpl tmp(pszSrcData, nSrcLen);
+        return ConcatSelf(nSrcLen, tmp.m_pchData, nSrcLen);
+    }
+
      size_t nNewLen = nLen + nSrcLen;
  
      // alloc new buffer if current is too small
@@ -705,7 +732,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
            // allocation failure handled by caller
            return false;
        }
-      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxChar));
+      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxStringCharType));
        pOldData->Unlock();
      }
      else if ( nNewLen > pData->nAllocLength ) {
@@ -728,7 +755,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
      wxASSERT( nNewLen <= GetStringData()->nAllocLength );
  
      // fast concatenation - all is done in our buffer
-    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxChar));
+    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxStringCharType));
  
      m_pchData[nNewLen] = wxT('\0');          // put terminating '\0'
      GetStringData()->nDataLength = nNewLen; // and fix the length
@@ -737,9 +764,8 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
    return true;
  }
  
-#if !wxUSE_UNICODE_UTF8
  // get the pointer to writable buffer of (at least) nLen bytes
-wxChar *wxStringImpl::DoGetWriteBuf(size_t nLen)
+wxStringCharType *wxStringImpl::DoGetWriteBuf(size_t nLen)
  {
    if ( !AllocBeforeWrite(nLen) ) {
      // allocation failure handled by caller
@@ -762,13 +788,12 @@ void wxStringImpl::DoUngetWriteBuf(size_t nLen)
  {
    wxStringData * const pData = GetStringData();
  
-  wxASSERT_MSG( nLen < pData->nAllocLength, _T("buffer overrun") );
+  wxASSERT_MSG( nLen < pData->nAllocLength, wxT("buffer overrun") );
  
    // the strings we store are always NUL-terminated
-  pData->data()[nLen] = _T('\0');
+  pData->data()[nLen] = wxT('\0');
    pData->nDataLength = nLen;
    pData->Validate(true);
  }
-#endif // !wxUSE_UNICODE_UTF8
  
  #endif // !wxUSE_STL_BASED_WXSTRING