X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/dd79ca2fd85daac3dd2238043193a5af8b623a85..72516be4ffebfc82f210ea00f02d49c43580a4d7:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 16f764f44a..f3d684f1a4 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -36,6 +36,8 @@ #include #include "wx/hashmap.h" +#include "wx/vector.h" +#include "wx/xlocale.h" // string handling functions used by wxString: #if wxUSE_UNICODE_UTF8 @@ -50,6 +52,18 @@ #define wxStringStrlen wxStrlen #endif +// ---------------------------------------------------------------------------- +// global variables +// ---------------------------------------------------------------------------- + +namespace wxPrivate +{ + +static UntypedBufferData s_untypedNullData(NULL); + +UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData; + +} // namespace wxPrivate // --------------------------------------------------------------------------- // static class variables definition @@ -58,6 +72,105 @@ //According to STL _must_ be a -1 size_t const size_t wxString::npos = (size_t) -1; +#if wxUSE_STRING_POS_CACHE + +#ifdef wxHAS_COMPILER_TLS + +wxTLS_TYPE(wxString::Cache) wxString::ms_cache; + +#else // !wxHAS_COMPILER_TLS + +struct wxStrCacheInitializer +{ + wxStrCacheInitializer() + { + // calling this function triggers s_cache initialization in it, and + // from now on it becomes safe to call from multiple threads + wxString::GetCache(); + } +}; + +/* +wxString::Cache& wxString::GetCache() +{ + static wxTLS_TYPE(Cache) s_cache; + + return wxTLS_VALUE(s_cache); +} +*/ + +static wxStrCacheInitializer gs_stringCacheInit; + +#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS + +// gdb seems to be unable to display thread-local variables correctly, at least +// not my 6.4.98 version under amd64, so provide this debugging helper to do it +#if wxDEBUG_LEVEL >= 2 + +struct wxStrCacheDumper +{ + static void ShowAll() + { + puts("*** wxString cache dump:"); + for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ ) + { + const wxString::Cache::Element& + c = wxString::GetCacheBegin()[n]; + + printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n", + n, + n == wxString::LastUsedCacheElement() ? " [*]" : "", + c.str, + (unsigned long)c.pos, + (unsigned long)c.impl, + (long)c.len); + } + } +}; + +void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); } + +#endif // wxDEBUG_LEVEL >= 2 + +#ifdef wxPROFILE_STRING_CACHE + +wxString::CacheStats wxString::ms_cacheStats; + +struct wxStrCacheStatsDumper +{ + ~wxStrCacheStatsDumper() + { + const wxString::CacheStats& stats = wxString::ms_cacheStats; + + if ( stats.postot ) + { + puts("*** wxString cache statistics:"); + printf("\tTotal non-trivial calls to PosToImpl(): %u\n", + stats.postot); + printf("\tHits %u (of which %u not used) or %.2f%%\n", + stats.poshits, + stats.mishits, + 100.*float(stats.poshits - stats.mishits)/stats.postot); + printf("\tAverage position requested: %.2f\n", + float(stats.sumpos) / stats.postot); + printf("\tAverage offset after cached hint: %.2f\n", + float(stats.sumofs) / stats.postot); + } + + if ( stats.lentot ) + { + printf("\tNumber of calls to length(): %u, hits=%.2f%%\n", + stats.lentot, 100.*float(stats.lenhits)/stats.lentot); + } + } +}; + +static wxStrCacheStatsDumper s_showCacheStats; + +#endif // wxPROFILE_STRING_CACHE + +#endif // wxUSE_STRING_POS_CACHE + // ---------------------------------------------------------------------------- // global functions // ---------------------------------------------------------------------------- @@ -69,7 +182,13 @@ const size_t wxString::npos = (size_t) -1; wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 - return os << (const char *)str.AsCharBuf(); + const wxCharBuffer buf(str.AsCharBuf()); + if ( !buf ) + os.clear(wxSTD ios_base::failbit); + else + os << buf.data(); + + return os; #else return os << str.AsInternal(); #endif @@ -123,22 +242,30 @@ void wxString::PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const { if ( pos == npos ) + { *implPos = npos; - else + } + else // have valid start position { - const_iterator i = begin() + pos; - *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); + const const_iterator b = GetIterForNthChar(pos); + *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin(); if ( len == npos ) + { *implLen = npos; - else + } + else // have valid length too { - // too large length is interpreted as "to the end of the string" - // FIXME-UTF8: verify this is the case in std::string, assert - // otherwise - if ( pos + len > length() ) - len = length() - pos; - - *implLen = (i + len).impl() - i.impl(); + // we need to handle the case of length specifying a substring + // going beyond the end of the string, just as std::string does + const const_iterator e(end()); + const_iterator i(b); + while ( len && i <= e ) + { + ++i; + --len; + } + + *implLen = i.impl() - b.impl(); } } } @@ -990,6 +1117,7 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart, int wxString::CmpNoCase(const wxString& s) const { +#if wxUSE_UNICODE_UTF8 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added const_iterator i1 = begin(); @@ -1013,6 +1141,9 @@ int wxString::CmpNoCase(const wxString& s) const else if ( len1 > len2 ) return 1; return 0; +#else // wxUSE_UNICODE_WCHAR or ANSI + return wxStricmp(m_impl.c_str(), s.m_impl.c_str()); +#endif } @@ -1169,7 +1300,7 @@ wxString wxString::Right(size_t nCount) const return dest; } -// get all characters after the last occurence of ch +// get all characters after the last occurrence of ch // (returns the whole string if ch not found) wxString wxString::AfterLast(wxUniChar ch) const { @@ -1178,7 +1309,7 @@ wxString wxString::AfterLast(wxUniChar ch) const if ( iPos == wxNOT_FOUND ) str = *this; else - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } @@ -1196,16 +1327,17 @@ wxString wxString::Left(size_t nCount) const return dest; } -// get all characters before the first occurence of ch +// get all characters before the first occurrence of ch // (returns the whole string if ch not found) wxString wxString::BeforeFirst(wxUniChar ch) const { int iPos = Find(ch); - if ( iPos == wxNOT_FOUND ) iPos = length(); + if ( iPos == wxNOT_FOUND ) + iPos = length(); return wxString(*this, 0, iPos); } -/// get all characters before the last occurence of ch +/// get all characters before the last occurrence of ch /// (returns empty string if ch not found) wxString wxString::BeforeLast(wxUniChar ch) const { @@ -1217,19 +1349,19 @@ wxString wxString::BeforeLast(wxUniChar ch) const return str; } -/// get all characters after the first occurence of ch +/// get all characters after the first occurrence of ch /// (returns empty string if ch not found) wxString wxString::AfterFirst(wxUniChar ch) const { wxString str; int iPos = Find(ch); if ( iPos != wxNOT_FOUND ) - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } -// replace first (or all) occurences of some substring with another one +// replace first (or all) occurrences of some substring with another one size_t wxString::Replace(const wxString& strOld, const wxString& strNew, bool bReplaceAll) { @@ -1237,6 +1369,8 @@ size_t wxString::Replace(const wxString& strOld, wxCHECK_MSG( !strOld.empty(), 0, _T("wxString::Replace(): invalid parameter") ); + wxSTRING_INVALIDATE_CACHE(); + size_t uiCount = 0; // count of replacements made // optimize the special common case: replacement of one character by @@ -1265,30 +1399,62 @@ size_t wxString::Replace(const wxString& strOld, break; } } - else // general case + else if ( !bReplaceAll) + { + size_t pos = m_impl.find(strOld, 0); + if ( pos != npos ) + { + m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl); + uiCount = 1; + } + } + else // replace all occurrences { const size_t uiOldLen = strOld.m_impl.length(); const size_t uiNewLen = strNew.m_impl.length(); - for ( size_t pos = 0; ; ) + // first scan the string to find all positions at which the replacement + // should be made + wxVector replacePositions; + + size_t pos; + for ( pos = m_impl.find(strOld.m_impl, 0); + pos != npos; + pos = m_impl.find(strOld.m_impl, pos + uiOldLen)) { - pos = m_impl.find(strOld.m_impl, pos); - if ( pos == npos ) - break; + replacePositions.push_back(pos); + ++uiCount; + } - // replace this occurrence of the old string with the new one - m_impl.replace(pos, uiOldLen, strNew.m_impl); + if ( !uiCount ) + return 0; - // move up pos past the string that was replaced - pos += uiNewLen; + // allocate enough memory for the whole new string + wxString tmp; + tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen)); - // increase replace count - uiCount++; + // copy this string to tmp doing replacements on the fly + size_t replNum = 0; + for ( pos = 0; replNum < uiCount; replNum++ ) + { + const size_t nextReplPos = replacePositions[replNum]; - // stop after the first one? - if ( !bReplaceAll ) - break; + if ( pos != nextReplPos ) + { + tmp.m_impl.append(m_impl, pos, nextReplPos - pos); + } + + tmp.m_impl.append(strNew.m_impl); + pos = nextReplPos + uiOldLen; } + + if ( pos != m_impl.length() ) + { + // append the rest of the string unchanged + tmp.m_impl.append(m_impl, pos, m_impl.length() - pos); + } + + swap(tmp); } return uiCount; @@ -1479,64 +1645,105 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const #define DO_IF_NOT_WINCE(x) #endif -#define WX_STRING_TO_INT_TYPE(out, base, func, T) \ - wxCHECK_MSG( out, false, _T("NULL output pointer") ); \ - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \ - \ +#define WX_STRING_TO_X_TYPE_START \ + wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \ DO_IF_NOT_WINCE( errno = 0; ) \ - \ const wxStringCharType *start = wx_str(); \ - wxStringCharType *end; \ - T val = func(start, &end, base); \ - \ + wxStringCharType *end; + +#define WX_STRING_TO_X_TYPE_END \ /* return true only if scan was stopped by the terminating NUL and */ \ /* if the string was not empty to start with and no under/overflow */ \ /* occurred: */ \ if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \ return false; \ - *out = val; \ - return true + *pVal = val; \ + return true; bool wxString::ToLong(long *pVal, int base) const { - WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long); + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + + WX_STRING_TO_X_TYPE_START + long val = wxStrtol(start, &end, base); + WX_STRING_TO_X_TYPE_END } bool wxString::ToULong(unsigned long *pVal, int base) const { - WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long); + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + + WX_STRING_TO_X_TYPE_START + unsigned long val = wxStrtoul(start, &end, base); + WX_STRING_TO_X_TYPE_END } bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const { - WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t); + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxLongLong_t val = wxStrtoll(start, &end, base); + WX_STRING_TO_X_TYPE_END } bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const { - WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t); + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxULongLong_t val = wxStrtoull(start, &end, base); + WX_STRING_TO_X_TYPE_END } bool wxString::ToDouble(double *pVal) const { - wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); + WX_STRING_TO_X_TYPE_START + double val = wxStrtod(start, &end); + WX_STRING_TO_X_TYPE_END +} - DO_IF_NOT_WINCE( errno = 0; ) +#if wxUSE_XLOCALE - const wxChar *start = c_str(); - wxChar *end; - double val = wxStrtod(start, &end); +bool wxString::ToCLong(long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); - // return true only if scan was stopped by the terminating NUL and if the - // string was not empty to start with and no under/overflow occurred - if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) - return false; + WX_STRING_TO_X_TYPE_START +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE + long val = wxStrtol_lA(start, &end, base, wxCLocale); +#else + long val = wxStrtol_l(start, &end, base, wxCLocale); +#endif + WX_STRING_TO_X_TYPE_END +} - *pVal = val; +bool wxString::ToCULong(unsigned long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); - return true; + WX_STRING_TO_X_TYPE_START +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE + unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale); +#else + unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale); +#endif + WX_STRING_TO_X_TYPE_END +} + +bool wxString::ToCDouble(double *pVal) const +{ + WX_STRING_TO_X_TYPE_START +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE + double val = wxStrtod_lA(start, &end, wxCLocale); +#else + double val = wxStrtod_l(start, &end, wxCLocale); +#endif + WX_STRING_TO_X_TYPE_END } +#endif // wxUSE_XLOCALE + // --------------------------------------------------------------------------- // formatted output // --------------------------------------------------------------------------- @@ -1981,31 +2188,3 @@ int wxString::Freq(wxUniChar ch) const return count; } -// ---------------------------------------------------------------------------- -// wxUTF8StringBuffer -// ---------------------------------------------------------------------------- - -#if wxUSE_UNICODE_WCHAR -wxUTF8StringBuffer::~wxUTF8StringBuffer() -{ - wxMBConvStrictUTF8 conv; - size_t wlen = conv.ToWChar(NULL, 0, m_buf); - wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); - - wxStringInternalBuffer wbuf(m_str, wlen); - conv.ToWChar(wbuf, wlen, m_buf); -} - -wxUTF8StringBufferLength::~wxUTF8StringBufferLength() -{ - wxCHECK_RET(m_lenSet, "length not set"); - - wxMBConvStrictUTF8 conv; - size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len); - wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); - - wxStringInternalBufferLength wbuf(m_str, wlen); - conv.ToWChar(wbuf, wlen, m_buf, m_len); - wbuf.SetLength(wlen); -} -#endif // wxUSE_UNICODE_WCHAR