Revised #ifndef WX_PRECOMP headers, added missing #include wx/wxcrtvararg.h

[wxWidgets.git] / src / common / string.cpp
diff --git a/src/common/string.cpp b/src/common/string.cpp

index e401661db80bd78982742e51d23610cd84ab584c..595caf71257daa6259c51b4c006b22304f15a879 100644 (file)
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -35,10 +35,6 @@
  #include <string.h>
  #include <stdlib.h>
  
-#ifdef __SALFORDC__
-    #include <clib.h>
-#endif
-
  #include "wx/hashmap.h"
  
  // string handling functions used by wxString:
@@ -62,6 +58,105 @@
  //According to STL _must_ be a -1 size_t
  const size_t wxString::npos = (size_t) -1;
  
+#if wxUSE_STRING_POS_CACHE
+
+#ifdef wxHAS_COMPILER_TLS
+
+wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
+
+#else // !wxHAS_COMPILER_TLS
+
+struct wxStrCacheInitializer
+{
+    wxStrCacheInitializer()
+    {
+        // calling this function triggers s_cache initialization in it, and
+        // from now on it becomes safe to call from multiple threads
+        wxString::GetCache();
+    }
+};
+
+/*
+wxString::Cache& wxString::GetCache()
+{
+    static wxTLS_TYPE(Cache) s_cache;
+
+    return wxTLS_VALUE(s_cache);
+}
+*/
+
+static wxStrCacheInitializer gs_stringCacheInit;
+
+#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
+
+// gdb seems to be unable to display thread-local variables correctly, at least
+// not my 6.4.98 version under amd64, so provide this debugging helper to do it
+#ifdef __WXDEBUG__
+
+struct wxStrCacheDumper
+{
+    static void ShowAll()
+    {
+        puts("*** wxString cache dump:");
+        for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
+        {
+            const wxString::Cache::Element&
+                c = wxString::GetCacheBegin()[n];
+
+            printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
+                   n,
+                   n == wxString::LastUsedCacheElement() ? " [*]" : "",
+                   c.str,
+                   (unsigned long)c.pos,
+                   (unsigned long)c.impl,
+                   (long)c.len);
+        }
+    }
+};
+
+void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
+
+#endif // __WXDEBUG__
+
+#ifdef wxPROFILE_STRING_CACHE
+
+wxString::CacheStats wxString::ms_cacheStats;
+
+struct wxStrCacheStatsDumper
+{
+    ~wxStrCacheStatsDumper()
+    {
+        const wxString::CacheStats& stats = wxString::ms_cacheStats;
+
+        if ( stats.postot )
+        {
+            puts("*** wxString cache statistics:");
+            printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
+                   stats.postot);
+            printf("\tHits %u (of which %u not used) or %.2f%%\n",
+                   stats.poshits,
+                   stats.mishits,
+                   100.*float(stats.poshits - stats.mishits)/stats.postot);
+            printf("\tAverage position requested: %.2f\n",
+                   float(stats.sumpos) / stats.postot);
+            printf("\tAverage offset after cached hint: %.2f\n",
+                   float(stats.sumofs) / stats.postot);
+        }
+
+        if ( stats.lentot )
+        {
+            printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
+                   stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
+        }
+    }
+};
+
+static wxStrCacheStatsDumper s_showCacheStats;
+
+#endif // wxPROFILE_STRING_CACHE
+
+#endif // wxUSE_STRING_POS_CACHE
+
  // ----------------------------------------------------------------------------
  // global functions
  // ----------------------------------------------------------------------------
@@ -127,22 +222,30 @@ void wxString::PosLenToImpl(size_t pos, size_t len,
                              size_t *implPos, size_t *implLen) const
  {
      if ( pos == npos )
+    {
          *implPos = npos;
-    else
+    }
+    else // have valid start position
      {
-        const_iterator i = begin() + pos;
-        *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
+        const const_iterator b = GetIterForNthChar(pos);
+        *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
          if ( len == npos )
+        {
              *implLen = npos;
-        else
+        }
+        else // have valid length too
          {
-            // too large length is interpreted as "to the end of the string"
-            // FIXME-UTF8: verify this is the case in std::string, assert
-            // otherwise
-            if ( pos + len > length() )
-                len = length() - pos;
-
-            *implLen = (i + len).impl() - i.impl();
+            // we need to handle the case of length specifying a substring
+            // going beyond the end of the string, just as std::string does
+            const const_iterator e(end());
+            const_iterator i(b);
+            while ( len && i <= e )
+            {
+                ++i;
+                --len;
+            }
+
+            *implLen = i.impl() - b.impl();
          }
      }
  }
@@ -373,6 +476,9 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
          // UTF-8 sequence and psz may be invalid:
          if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
          {
+            // we must pass the real string length to SubstrBufFromMB ctor
+            if ( nLength == npos )
+                nLength = psz ? strlen(psz) : 0;
              return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
          }
          // else: do the roundtrip through wchar_t*
@@ -1238,35 +1344,59 @@ size_t wxString::Replace(const wxString& strOld,
      wxCHECK_MSG( !strOld.empty(), 0,
                   _T("wxString::Replace(): invalid parameter") );
  
+    wxSTRING_INVALIDATE_CACHE();
+
      size_t uiCount = 0;   // count of replacements made
  
-    size_t uiOldLen = strOld.length();
-    size_t uiNewLen = strNew.length();
+    // optimize the special common case: replacement of one character by
+    // another one (in UTF-8 case we can only do this for ASCII characters)
+    //
+    // benchmarks show that this special version is around 3 times faster
+    // (depending on the proportion of matching characters and UTF-8/wchar_t
+    // build)
+    if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
+    {
+        const wxStringCharType chOld = strOld.m_impl[0],
+                               chNew = strNew.m_impl[0];
+
+        // this loop is the simplified version of the one below
+        for ( size_t pos = 0; ; )
+        {
+            pos = m_impl.find(chOld, pos);
+            if ( pos == npos )
+                break;
+
+            m_impl[pos++] = chNew;
  
-    size_t dwPos = 0;
+            uiCount++;
  
-    while ( (*this)[dwPos] != wxT('\0') )
+            if ( !bReplaceAll )
+                break;
+        }
+    }
+    else // general case
      {
-        //DO NOT USE STRSTR HERE
-        //this string can contain embedded null characters,
-        //so strstr will function incorrectly
-        dwPos = find(strOld, dwPos);
-        if ( dwPos == npos )
-            break;                  // exit the loop
-        else
+        const size_t uiOldLen = strOld.m_impl.length();
+        const size_t uiNewLen = strNew.m_impl.length();
+
+        for ( size_t pos = 0; ; )
          {
-            //replace this occurance of the old string with the new one
-            replace(dwPos, uiOldLen, strNew, uiNewLen);
+            pos = m_impl.find(strOld.m_impl, pos);
+            if ( pos == npos )
+                break;
  
-            //move up pos past the string that was replaced
-            dwPos += uiNewLen;
+            // replace this occurrence of the old string with the new one
+            m_impl.replace(pos, uiOldLen, strNew.m_impl);
  
-            //increase replace count
-            ++uiCount;
+            // move up pos past the string that was replaced
+            pos += uiNewLen;
  
-            // stop now?
+            // increase replace count
+            uiCount++;
+
+            // stop after the first one?
              if ( !bReplaceAll )
-                break;                  // exit the loop
+                break;
          }
      }
  
@@ -1342,13 +1472,27 @@ wxString& wxString::MakeLower()
    return *this;
  }
  
+wxString& wxString::MakeCapitalized()
+{
+    const iterator en = end();
+    iterator it = begin();
+    if ( it != en )
+    {
+        *it = (wxChar)wxToupper(*it);
+        for ( ++it; it != en; ++it )
+            *it = (wxChar)wxTolower(*it);
+    }
+
+    return *this;
+}
+
  // ---------------------------------------------------------------------------
  // trimming and padding
  // ---------------------------------------------------------------------------
  
  // some compilers (VC++ 6.0 not to name them) return true for a call to
  // isspace('\xEA') in the C locale which seems to be broken to me, but we have
-// to live with this by checking that the character is a 7 bit one - even if 
+// to live with this by checking that the character is a 7 bit one - even if
  // this may fail to detect some spaces (I don't know if Unicode doesn't have
  // space-like symbols somewhere except in the first 128 chars), it is arguably
  // still better than trimming away accented letters
@@ -1444,61 +1588,62 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const
      #define DO_IF_NOT_WINCE(x)
  #endif
  
-#define WX_STRING_TO_INT_TYPE(val, base, func)                              \
-    wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
+#define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
+    wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
      wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
                                                                              \
      DO_IF_NOT_WINCE( errno = 0; )                                           \
                                                                              \
      const wxStringCharType *start = wx_str();                               \
      wxStringCharType *end;                                                  \
-    *val = func(start, &end, base);                                         \
+    T val = func(start, &end, base);                                        \
                                                                              \
      /* return true only if scan was stopped by the terminating NUL and */   \
      /* if the string was not empty to start with and no under/overflow */   \
      /* occurred: */                                                         \
-    return !*end && (end != start)                                          \
-        DO_IF_NOT_WINCE( && (errno != ERANGE) )
+    if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
+        return false;                                                       \
+    *out = val;                                                             \
+    return true
  
-bool wxString::ToLong(long *val, int base) const
+bool wxString::ToLong(long *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
  }
  
-bool wxString::ToULong(unsigned long *val, int base) const
+bool wxString::ToULong(unsigned long *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
  }
  
-bool wxString::ToLongLong(wxLongLong_t *val, int base) const
+bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
  }
  
-bool wxString::ToULongLong(wxULongLong_t *val, int base) const
+bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
  }
  
-bool wxString::ToDouble(double *val) const
+bool wxString::ToDouble(double *pVal) const
  {
-    wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
+    wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
  
-#ifndef __WXWINCE__
-    errno = 0;
-#endif
+    DO_IF_NOT_WINCE( errno = 0; )
  
      const wxChar *start = c_str();
      wxChar *end;
-    *val = wxStrtod(start, &end);
+    double val = wxStrtod(start, &end);
  
      // return true only if scan was stopped by the terminating NUL and if the
      // string was not empty to start with and no under/overflow occurred
-    return !*end && (end != start)
-#ifndef __WXWINCE__
-        && (errno != ERANGE)
-#endif
-    ;
+    if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
+        return false;
+
+    *pVal = val;
+
+    return true;
  }
  
  // ---------------------------------------------------------------------------
@@ -1620,7 +1765,7 @@ int wxString::DoPrintfUtf8(const char *format, ...)
      an undersized buffer and no other errno are defined we treat those two
      as meaning hard errors and everything else gets the old behavior which
      is to keep looping and increasing buffer size until the function succeeds.
- 
+
      In practice it's impossible to determine before compilation which behavior
      may be used.  The vswprintf function may have vsnprintf-like behavior or
      vice-versa.  Behavior detected on one release can theoretically change
@@ -1730,7 +1875,7 @@ static int DoStringPrintfV(wxString& str,
          else if ( len >= size )
          {
  #if wxUSE_WXVSNPRINTF
-            // we know that our own implementation of wxVsnprintf() returns 
+            // we know that our own implementation of wxVsnprintf() returns
              // size+1 when there's not enough space but that's not the size
              // of the required buffer!
              size *= 2;      // so we just double the current size of the buffer
@@ -1945,13 +2090,6 @@ int wxString::Freq(wxUniChar ch) const
      return count;
  }
  
-// convert to upper case, return the copy of the string
-wxString wxString::Upper() const
-{ wxString s(*this); return s.MakeUpper(); }
-
-// convert to lower case, return the copy of the string
-wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
-
  // ----------------------------------------------------------------------------
  // wxUTF8StringBuffer
  // ----------------------------------------------------------------------------
@@ -1980,3 +2118,19 @@ wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
      wbuf.SetLength(wlen);
  }
  #endif // wxUSE_UNICODE_WCHAR
+
+// ----------------------------------------------------------------------------
+// wxCharBufferType<T>
+// ----------------------------------------------------------------------------
+
+#ifndef __VMS_BROKEN_TEMPLATES
+template<>
+#endif
+wxCharTypeBuffer<char>::Data
+wxCharTypeBuffer<char>::NullData(NULL);
+
+#ifndef __VMS_BROKEN_TEMPLATES
+template<>
+#endif
+wxCharTypeBuffer<wchar_t>::Data
+wxCharTypeBuffer<wchar_t>::NullData(NULL);