make wxSocketImplUnix inherit from wxFDIOHandler as they're used for almost the same...

[wxWidgets.git] / src / common / string.cpp
diff --git a/src/common/string.cpp b/src/common/string.cpp

index e401661db80bd78982742e51d23610cd84ab584c..d7a7c7d9f3612d4fe5034ca246896d346c881b1e 100644 (file)
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -35,10 +35,6 @@
  #include <string.h>
  #include <stdlib.h>
  
-#ifdef __SALFORDC__
-    #include <clib.h>
-#endif
-
  #include "wx/hashmap.h"
  
  // string handling functions used by wxString:
@@ -54,6 +50,18 @@
      #define wxStringStrlen   wxStrlen
  #endif
  
+// ----------------------------------------------------------------------------
+// global variables
+// ----------------------------------------------------------------------------
+
+namespace wxPrivate
+{
+
+static UntypedBufferData s_untypedNullData(NULL);
+
+UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
+
+} // namespace wxPrivate
  
  // ---------------------------------------------------------------------------
  // static class variables definition
@@ -62,6 +70,105 @@
  //According to STL _must_ be a -1 size_t
  const size_t wxString::npos = (size_t) -1;
  
+#if wxUSE_STRING_POS_CACHE
+
+#ifdef wxHAS_COMPILER_TLS
+
+wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
+
+#else // !wxHAS_COMPILER_TLS
+
+struct wxStrCacheInitializer
+{
+    wxStrCacheInitializer()
+    {
+        // calling this function triggers s_cache initialization in it, and
+        // from now on it becomes safe to call from multiple threads
+        wxString::GetCache();
+    }
+};
+
+/*
+wxString::Cache& wxString::GetCache()
+{
+    static wxTLS_TYPE(Cache) s_cache;
+
+    return wxTLS_VALUE(s_cache);
+}
+*/
+
+static wxStrCacheInitializer gs_stringCacheInit;
+
+#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
+
+// gdb seems to be unable to display thread-local variables correctly, at least
+// not my 6.4.98 version under amd64, so provide this debugging helper to do it
+#ifdef __WXDEBUG__
+
+struct wxStrCacheDumper
+{
+    static void ShowAll()
+    {
+        puts("*** wxString cache dump:");
+        for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
+        {
+            const wxString::Cache::Element&
+                c = wxString::GetCacheBegin()[n];
+
+            printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
+                   n,
+                   n == wxString::LastUsedCacheElement() ? " [*]" : "",
+                   c.str,
+                   (unsigned long)c.pos,
+                   (unsigned long)c.impl,
+                   (long)c.len);
+        }
+    }
+};
+
+void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
+
+#endif // __WXDEBUG__
+
+#ifdef wxPROFILE_STRING_CACHE
+
+wxString::CacheStats wxString::ms_cacheStats;
+
+struct wxStrCacheStatsDumper
+{
+    ~wxStrCacheStatsDumper()
+    {
+        const wxString::CacheStats& stats = wxString::ms_cacheStats;
+
+        if ( stats.postot )
+        {
+            puts("*** wxString cache statistics:");
+            printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
+                   stats.postot);
+            printf("\tHits %u (of which %u not used) or %.2f%%\n",
+                   stats.poshits,
+                   stats.mishits,
+                   100.*float(stats.poshits - stats.mishits)/stats.postot);
+            printf("\tAverage position requested: %.2f\n",
+                   float(stats.sumpos) / stats.postot);
+            printf("\tAverage offset after cached hint: %.2f\n",
+                   float(stats.sumofs) / stats.postot);
+        }
+
+        if ( stats.lentot )
+        {
+            printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
+                   stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
+        }
+    }
+};
+
+static wxStrCacheStatsDumper s_showCacheStats;
+
+#endif // wxPROFILE_STRING_CACHE
+
+#endif // wxUSE_STRING_POS_CACHE
+
  // ----------------------------------------------------------------------------
  // global functions
  // ----------------------------------------------------------------------------
@@ -73,7 +180,13 @@ const size_t wxString::npos = (size_t) -1;
  wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  {
  #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
-    return os << (const char *)str.AsCharBuf();
+    const wxCharBuffer buf(str.AsCharBuf());
+    if ( !buf )
+        os.clear(wxSTD ios_base::failbit);
+    else
+        os << buf.data();
+
+    return os;
  #else
      return os << str.AsInternal();
  #endif
@@ -127,22 +240,30 @@ void wxString::PosLenToImpl(size_t pos, size_t len,
                              size_t *implPos, size_t *implLen) const
  {
      if ( pos == npos )
+    {
          *implPos = npos;
-    else
+    }
+    else // have valid start position
      {
-        const_iterator i = begin() + pos;
-        *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
+        const const_iterator b = GetIterForNthChar(pos);
+        *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
          if ( len == npos )
+        {
              *implLen = npos;
-        else
+        }
+        else // have valid length too
          {
-            // too large length is interpreted as "to the end of the string"
-            // FIXME-UTF8: verify this is the case in std::string, assert
-            // otherwise
-            if ( pos + len > length() )
-                len = length() - pos;
-
-            *implLen = (i + len).impl() - i.impl();
+            // we need to handle the case of length specifying a substring
+            // going beyond the end of the string, just as std::string does
+            const const_iterator e(end());
+            const_iterator i(b);
+            while ( len && i <= e )
+            {
+                ++i;
+                --len;
+            }
+
+            *implLen = i.impl() - b.impl();
          }
      }
  }
@@ -373,6 +494,9 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
          // UTF-8 sequence and psz may be invalid:
          if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
          {
+            // we must pass the real string length to SubstrBufFromMB ctor
+            if ( nLength == npos )
+                nLength = psz ? strlen(psz) : 0;
              return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
          }
          // else: do the roundtrip through wchar_t*
@@ -1170,7 +1294,7 @@ wxString wxString::Right(size_t nCount) const
    return dest;
  }
  
-// get all characters after the last occurence of ch
+// get all characters after the last occurrence of ch
  // (returns the whole string if ch not found)
  wxString wxString::AfterLast(wxUniChar ch) const
  {
@@ -1179,7 +1303,7 @@ wxString wxString::AfterLast(wxUniChar ch) const
    if ( iPos == wxNOT_FOUND )
      str = *this;
    else
-    str = wx_str() + iPos + 1;
+    str.assign(*this, iPos + 1, npos);
  
    return str;
  }
@@ -1197,16 +1321,17 @@ wxString wxString::Left(size_t nCount) const
    return dest;
  }
  
-// get all characters before the first occurence of ch
+// get all characters before the first occurrence of ch
  // (returns the whole string if ch not found)
  wxString wxString::BeforeFirst(wxUniChar ch) const
  {
    int iPos = Find(ch);
-  if ( iPos == wxNOT_FOUND ) iPos = length();
+  if ( iPos == wxNOT_FOUND )
+      iPos = length();
    return wxString(*this, 0, iPos);
  }
  
-/// get all characters before the last occurence of ch
+/// get all characters before the last occurrence of ch
  /// (returns empty string if ch not found)
  wxString wxString::BeforeLast(wxUniChar ch) const
  {
@@ -1218,19 +1343,19 @@ wxString wxString::BeforeLast(wxUniChar ch) const
    return str;
  }
  
-/// get all characters after the first occurence of ch
+/// get all characters after the first occurrence of ch
  /// (returns empty string if ch not found)
  wxString wxString::AfterFirst(wxUniChar ch) const
  {
    wxString str;
    int iPos = Find(ch);
    if ( iPos != wxNOT_FOUND )
-    str = wx_str() + iPos + 1;
+      str.assign(*this, iPos + 1, npos);
  
    return str;
  }
  
-// replace first (or all) occurences of some substring with another one
+// replace first (or all) occurrences of some substring with another one
  size_t wxString::Replace(const wxString& strOld,
                           const wxString& strNew, bool bReplaceAll)
  {
@@ -1238,35 +1363,59 @@ size_t wxString::Replace(const wxString& strOld,
      wxCHECK_MSG( !strOld.empty(), 0,
                   _T("wxString::Replace(): invalid parameter") );
  
+    wxSTRING_INVALIDATE_CACHE();
+
      size_t uiCount = 0;   // count of replacements made
  
-    size_t uiOldLen = strOld.length();
-    size_t uiNewLen = strNew.length();
+    // optimize the special common case: replacement of one character by
+    // another one (in UTF-8 case we can only do this for ASCII characters)
+    //
+    // benchmarks show that this special version is around 3 times faster
+    // (depending on the proportion of matching characters and UTF-8/wchar_t
+    // build)
+    if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
+    {
+        const wxStringCharType chOld = strOld.m_impl[0],
+                               chNew = strNew.m_impl[0];
  
-    size_t dwPos = 0;
+        // this loop is the simplified version of the one below
+        for ( size_t pos = 0; ; )
+        {
+            pos = m_impl.find(chOld, pos);
+            if ( pos == npos )
+                break;
  
-    while ( (*this)[dwPos] != wxT('\0') )
+            m_impl[pos++] = chNew;
+
+            uiCount++;
+
+            if ( !bReplaceAll )
+                break;
+        }
+    }
+    else // general case
      {
-        //DO NOT USE STRSTR HERE
-        //this string can contain embedded null characters,
-        //so strstr will function incorrectly
-        dwPos = find(strOld, dwPos);
-        if ( dwPos == npos )
-            break;                  // exit the loop
-        else
+        const size_t uiOldLen = strOld.m_impl.length();
+        const size_t uiNewLen = strNew.m_impl.length();
+
+        for ( size_t pos = 0; ; )
          {
-            //replace this occurance of the old string with the new one
-            replace(dwPos, uiOldLen, strNew, uiNewLen);
+            pos = m_impl.find(strOld.m_impl, pos);
+            if ( pos == npos )
+                break;
+
+            // replace this occurrence of the old string with the new one
+            m_impl.replace(pos, uiOldLen, strNew.m_impl);
  
-            //move up pos past the string that was replaced
-            dwPos += uiNewLen;
+            // move up pos past the string that was replaced
+            pos += uiNewLen;
  
-            //increase replace count
-            ++uiCount;
+            // increase replace count
+            uiCount++;
  
-            // stop now?
+            // stop after the first one?
              if ( !bReplaceAll )
-                break;                  // exit the loop
+                break;
          }
      }
  
@@ -1342,13 +1491,27 @@ wxString& wxString::MakeLower()
    return *this;
  }
  
+wxString& wxString::MakeCapitalized()
+{
+    const iterator en = end();
+    iterator it = begin();
+    if ( it != en )
+    {
+        *it = (wxChar)wxToupper(*it);
+        for ( ++it; it != en; ++it )
+            *it = (wxChar)wxTolower(*it);
+    }
+
+    return *this;
+}
+
  // ---------------------------------------------------------------------------
  // trimming and padding
  // ---------------------------------------------------------------------------
  
  // some compilers (VC++ 6.0 not to name them) return true for a call to
  // isspace('\xEA') in the C locale which seems to be broken to me, but we have
-// to live with this by checking that the character is a 7 bit one - even if 
+// to live with this by checking that the character is a 7 bit one - even if
  // this may fail to detect some spaces (I don't know if Unicode doesn't have
  // space-like symbols somewhere except in the first 128 chars), it is arguably
  // still better than trimming away accented letters
@@ -1444,61 +1607,62 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const
      #define DO_IF_NOT_WINCE(x)
  #endif
  
-#define WX_STRING_TO_INT_TYPE(val, base, func)                              \
-    wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
+#define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
+    wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
      wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
                                                                              \
      DO_IF_NOT_WINCE( errno = 0; )                                           \
                                                                              \
      const wxStringCharType *start = wx_str();                               \
      wxStringCharType *end;                                                  \
-    *val = func(start, &end, base);                                         \
+    T val = func(start, &end, base);                                        \
                                                                              \
      /* return true only if scan was stopped by the terminating NUL and */   \
      /* if the string was not empty to start with and no under/overflow */   \
      /* occurred: */                                                         \
-    return !*end && (end != start)                                          \
-        DO_IF_NOT_WINCE( && (errno != ERANGE) )
+    if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
+        return false;                                                       \
+    *out = val;                                                             \
+    return true
  
-bool wxString::ToLong(long *val, int base) const
+bool wxString::ToLong(long *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
  }
  
-bool wxString::ToULong(unsigned long *val, int base) const
+bool wxString::ToULong(unsigned long *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
  }
  
-bool wxString::ToLongLong(wxLongLong_t *val, int base) const
+bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
  }
  
-bool wxString::ToULongLong(wxULongLong_t *val, int base) const
+bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
  {
-    WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
+    WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
  }
  
-bool wxString::ToDouble(double *val) const
+bool wxString::ToDouble(double *pVal) const
  {
-    wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
+    wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
  
-#ifndef __WXWINCE__
-    errno = 0;
-#endif
+    DO_IF_NOT_WINCE( errno = 0; )
  
      const wxChar *start = c_str();
      wxChar *end;
-    *val = wxStrtod(start, &end);
+    double val = wxStrtod(start, &end);
  
      // return true only if scan was stopped by the terminating NUL and if the
      // string was not empty to start with and no under/overflow occurred
-    return !*end && (end != start)
-#ifndef __WXWINCE__
-        && (errno != ERANGE)
-#endif
-    ;
+    if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
+        return false;
+
+    *pVal = val;
+
+    return true;
  }
  
  // ---------------------------------------------------------------------------
@@ -1620,7 +1784,7 @@ int wxString::DoPrintfUtf8(const char *format, ...)
      an undersized buffer and no other errno are defined we treat those two
      as meaning hard errors and everything else gets the old behavior which
      is to keep looping and increasing buffer size until the function succeeds.
- 
+
      In practice it's impossible to determine before compilation which behavior
      may be used.  The vswprintf function may have vsnprintf-like behavior or
      vice-versa.  Behavior detected on one release can theoretically change
@@ -1730,7 +1894,7 @@ static int DoStringPrintfV(wxString& str,
          else if ( len >= size )
          {
  #if wxUSE_WXVSNPRINTF
-            // we know that our own implementation of wxVsnprintf() returns 
+            // we know that our own implementation of wxVsnprintf() returns
              // size+1 when there's not enough space but that's not the size
              // of the required buffer!
              size *= 2;      // so we just double the current size of the buffer
@@ -1945,38 +2109,3 @@ int wxString::Freq(wxUniChar ch) const
      return count;
  }
  
-// convert to upper case, return the copy of the string
-wxString wxString::Upper() const
-{ wxString s(*this); return s.MakeUpper(); }
-
-// convert to lower case, return the copy of the string
-wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
-
-// ----------------------------------------------------------------------------
-// wxUTF8StringBuffer
-// ----------------------------------------------------------------------------
-
-#if wxUSE_UNICODE_WCHAR
-wxUTF8StringBuffer::~wxUTF8StringBuffer()
-{
-    wxMBConvStrictUTF8 conv;
-    size_t wlen = conv.ToWChar(NULL, 0, m_buf);
-    wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
-
-    wxStringInternalBuffer wbuf(m_str, wlen);
-    conv.ToWChar(wbuf, wlen, m_buf);
-}
-
-wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
-{
-    wxCHECK_RET(m_lenSet, "length not set");
-
-    wxMBConvStrictUTF8 conv;
-    size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
-    wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
-
-    wxStringInternalBufferLength wbuf(m_str, wlen);
-    conv.ToWChar(wbuf, wlen, m_buf, m_len);
-    wbuf.SetLength(wlen);
-}
-#endif // wxUSE_UNICODE_WCHAR