X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/a9a854d73ee5951ce800060fd2fbc87447f67725..8995db52f311ef389652b0ac3aff39bde723c5ac:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 5db2ba07a6..54a7281a58 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -24,6 +24,8 @@ #ifndef WX_PRECOMP #include "wx/string.h" #include "wx/wxcrtvararg.h" + #include "wx/intl.h" + #include "wx/log.h" #endif #include @@ -35,11 +37,17 @@ #include #include -#ifdef __SALFORDC__ - #include -#endif - #include "wx/hashmap.h" +#include "wx/vector.h" +#include "wx/xlocale.h" + +#ifdef __WXMSW__ + #include "wx/msw/wrapwin.h" +#endif // __WXMSW__ + +#if wxUSE_STD_IOSTREAM + #include +#endif // string handling functions used by wxString: #if wxUSE_UNICODE_UTF8 @@ -54,6 +62,22 @@ #define wxStringStrlen wxStrlen #endif +// define a function declared in wx/buffer.h here as we don't have buffer.cpp +// and don't want to add it just because of this simple function +namespace wxPrivate +{ + +// wxXXXBuffer classes can be (implicitly) used during global statics +// initialization so wrap the status UntypedBufferData variable in a function +// to make it safe to access it even before all global statics are initialized +UntypedBufferData *GetUntypedNullData() +{ + static UntypedBufferData s_untypedNullData(NULL, 0); + + return &s_untypedNullData; +} + +} // namespace wxPrivate // --------------------------------------------------------------------------- // static class variables definition @@ -62,6 +86,105 @@ //According to STL _must_ be a -1 size_t const size_t wxString::npos = (size_t) -1; +#if wxUSE_STRING_POS_CACHE + +#ifdef wxHAS_COMPILER_TLS + +wxTLS_TYPE(wxString::Cache) wxString::ms_cache; + +#else // !wxHAS_COMPILER_TLS + +struct wxStrCacheInitializer +{ + wxStrCacheInitializer() + { + // calling this function triggers s_cache initialization in it, and + // from now on it becomes safe to call from multiple threads + wxString::GetCache(); + } +}; + +/* +wxString::Cache& wxString::GetCache() +{ + static wxTLS_TYPE(Cache) s_cache; + + return wxTLS_VALUE(s_cache); +} +*/ + +static wxStrCacheInitializer gs_stringCacheInit; + +#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS + +// gdb seems to be unable to display thread-local variables correctly, at least +// not my 6.4.98 version under amd64, so provide this debugging helper to do it +#if wxDEBUG_LEVEL >= 2 + +struct wxStrCacheDumper +{ + static void ShowAll() + { + puts("*** wxString cache dump:"); + for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ ) + { + const wxString::Cache::Element& + c = wxString::GetCacheBegin()[n]; + + printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n", + n, + n == wxString::LastUsedCacheElement() ? " [*]" : "", + c.str, + (unsigned long)c.pos, + (unsigned long)c.impl, + (long)c.len); + } + } +}; + +void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); } + +#endif // wxDEBUG_LEVEL >= 2 + +#ifdef wxPROFILE_STRING_CACHE + +wxString::CacheStats wxString::ms_cacheStats; + +struct wxStrCacheStatsDumper +{ + ~wxStrCacheStatsDumper() + { + const wxString::CacheStats& stats = wxString::ms_cacheStats; + + if ( stats.postot ) + { + puts("*** wxString cache statistics:"); + printf("\tTotal non-trivial calls to PosToImpl(): %u\n", + stats.postot); + printf("\tHits %u (of which %u not used) or %.2f%%\n", + stats.poshits, + stats.mishits, + 100.*float(stats.poshits - stats.mishits)/stats.postot); + printf("\tAverage position requested: %.2f\n", + float(stats.sumpos) / stats.postot); + printf("\tAverage offset after cached hint: %.2f\n", + float(stats.sumofs) / stats.postot); + } + + if ( stats.lentot ) + { + printf("\tNumber of calls to length(): %u, hits=%.2f%%\n", + stats.lentot, 100.*float(stats.lenhits)/stats.lentot); + } + } +}; + +static wxStrCacheStatsDumper s_showCacheStats; + +#endif // wxPROFILE_STRING_CACHE + +#endif // wxUSE_STRING_POS_CACHE + // ---------------------------------------------------------------------------- // global functions // ---------------------------------------------------------------------------- @@ -73,7 +196,13 @@ const size_t wxString::npos = (size_t) -1; wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 - return os << (const char *)str.AsCharBuf(); + const wxScopedCharBuffer buf(str.AsCharBuf()); + if ( !buf ) + os.clear(wxSTD ios_base::failbit); + else + os << buf.data(); + + return os; #else return os << str.AsInternal(); #endif @@ -84,13 +213,13 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str) return os << str.c_str(); } -wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str) { return os << str.data(); } #ifndef __BORLANDC__ -wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str) { return os << str.data(); } @@ -108,7 +237,7 @@ wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str) return wos << str.AsWChar(); } -wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str) +wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str) { return wos << str.data(); } @@ -127,22 +256,30 @@ void wxString::PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const { if ( pos == npos ) + { *implPos = npos; - else + } + else // have valid start position { - const_iterator i = begin() + pos; - *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); + const const_iterator b = GetIterForNthChar(pos); + *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin(); if ( len == npos ) + { *implLen = npos; - else + } + else // have valid length too { - // too large length is interpreted as "to the end of the string" - // FIXME-UTF8: verify this is the case in std::string, assert - // otherwise - if ( pos + len > length() ) - len = length() - pos; - - *implLen = (i + len).impl() - i.impl(); + // we need to handle the case of length specifying a substring + // going beyond the end of the string, just as std::string does + const const_iterator e(end()); + const_iterator i(b); + while ( len && i <= e ) + { + ++i; + --len; + } + + *implLen = i.impl() - b.impl(); } } } @@ -238,95 +375,6 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char* wxCStrData::AsChar() const -{ -#if wxUSE_UNICODE_UTF8 - if ( wxLocaleIsUtf8 ) - return AsInternal(); -#endif - // under non-UTF8 locales, we have to convert the internal UTF-8 - // representation using wxConvLibc and cache the result - - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - // - // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we - // have it) but it's unfortunately not obvious to implement - // because we don't know how big buffer do we need for the - // given string length (in case of multibyte encodings, e.g. - // ISO-2022-JP or UTF-8 when internal representation is wchar_t) - // - // One idea would be to store more than just m_convertedToChar - // in wxString: then we could record the length of the string - // which was converted the last time and try to reuse the same - // buffer if the current length is not greater than it (this - // could still fail because string could have been modified in - // place but it would work most of the time, so we'd do it and - // only allocate the new buffer if in-place conversion returned - // an error). We could also store a bit saying if the string - // was modified since the last conversion (and update it in all - // operation modifying the string, of course) to avoid unneeded - // consequential conversions. But both of these ideas require - // adding more fields to wxString and require profiling results - // to be sure that we really gain enough from them to justify - // doing it. - wxCharBuffer buf(str->mb_str()); - - // if it failed, return empty string and not NULL to avoid crashes in code - // written with either wxWidgets 2 wxString or std::string behaviour in - // mind: neither of them ever returns NULL and so we shouldn't neither - if ( !buf ) - return ""; - - if ( str->m_convertedToChar && - strlen(buf) == strlen(str->m_convertedToChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - strcpy(str->m_convertedToChar, buf); - } - else - { - str->m_convertedToChar = buf.release(); - } - - // and keep it: - return str->m_convertedToChar + m_offset; -} -#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY - -#if !wxUSE_UNICODE_WCHAR -const wchar_t* wxCStrData::AsWChar() const -{ - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxWCharBuffer buf(str->wc_str()); - - // notice that here, unlike above in AsChar(), conversion can't fail as our - // internal UTF-8 is always well-formed -- or the string was corrupted and - // all bets are off anyhow - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToWChar && - wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); - } - else - { - str->m_convertedToWChar = buf.release(); - } - - // and keep it: - return str->m_convertedToWChar + m_offset; -} -#endif // !wxUSE_UNICODE_WCHAR - // =========================================================================== // wxString class core // =========================================================================== @@ -342,15 +390,15 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(L"", 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(_T(""), 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } @@ -363,7 +411,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // if psz is already in UTF-8, we don't have to do the roundtrip to // wchar_t* and back: @@ -373,7 +421,11 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // UTF-8 sequence and psz may be invalid: if ( wxStringOperations::IsValidUtf8String(psz, nLength) ) { - return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength); + // we must pass the real string length to SubstrBufFromMB ctor + if ( nLength == npos ) + nLength = psz ? strlen(psz) : 0; + return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength), + nLength); } // else: do the roundtrip through wchar_t* } @@ -383,14 +435,14 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // first convert to wide string: size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // and then to UTF-8: SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); // widechar -> UTF-8 conversion isn't supposed to ever fail: - wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") ); return buf; } @@ -403,71 +455,111 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t mbLen; - wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); + wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); else return SubstrBufFromWC(mbBuf, mbLen); } #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +// This std::string::c_str()-like method returns a wide char pointer to string +// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return +// a pointer to the internal representation. Otherwise a conversion is required +// and it returns a temporary buffer. +// +// However for compatibility with c_str() and to avoid breaking existing code +// doing +// +// for ( const wchar_t *p = s.wc_str(); *p; p++ ) +// ... use *p... +// +// we actually need to ensure that the returned buffer is _not_ temporary and +// so we use wxString::m_convertedToWChar to store the returned data +#if !wxUSE_UNICODE_WCHAR -#if wxUSE_UNICODE_WCHAR - -//Convert wxString in Unicode mode to a multi-byte string -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +const wchar_t *wxString::AsWChar(const wxMBConv& conv) const { - return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); -} + const char * const strMB = m_impl.c_str(); + const size_t lenMB = m_impl.length(); -#elif wxUSE_UNICODE_UTF8 + // find out the size of the buffer needed + const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB); + if ( lenWC == wxCONV_FAILED ) + return NULL; -const wxWCharBuffer wxString::wc_str() const -{ - return wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length() + 1, // size, not length - NULL - ); + // keep the same buffer if the string size didn't change: this is not only + // an optimization but also ensure that code which modifies string + // character by character (without changing its length) can continue to use + // the pointer returned by a previous wc_str() call even after changing the + // string + + // TODO-UTF8: we could check for ">" instead of "!=" here as this would + // allow to save on buffer reallocations but at the cost of + // consuming (even) more memory, we should benchmark this to + // determine if it's worth doing + if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len ) + { + if ( !const_cast(this)->m_convertedToWChar.Extend(lenWC) ) + return NULL; + } + + // finally do convert + m_convertedToWChar.m_str[lenWC] = L'\0'; + if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC, + strMB, lenMB) == wxCONV_FAILED ) + return NULL; + + return m_convertedToWChar.m_str; } -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +#endif // !wxUSE_UNICODE_WCHAR + + +// Same thing for mb_str() which returns a normal char pointer to string +// contents: this always requires converting it to the specified encoding in +// non-ANSI build except if we need to convert to UTF-8 and this is what we +// already use internally. +#if wxUSE_UNICODE + +const char *wxString::AsChar(const wxMBConv& conv) const { +#if wxUSE_UNICODE_UTF8 if ( conv.IsUTF8() ) - return wxCharBuffer::CreateNonOwned(m_impl.c_str()); + return m_impl.c_str(); - // FIXME-UTF8: use wc_str() here once we have buffers with length + const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8()); + const size_t lenWC = m_convertedToWChar.m_len; +#else // wxUSE_UNICODE_WCHAR + const wchar_t * const strWC = m_impl.c_str(); + const size_t lenWC = m_impl.length(); +#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR - size_t wcLen; - wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length() + 1, // size - &wcLen - )); - if ( !wcLen ) - return wxCharBuffer(""); + const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC); + if ( lenMB == wxCONV_FAILED ) + return NULL; - return conv.cWC2MB(wcBuf, wcLen+1, NULL); -} + if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len ) + { + if ( !const_cast(this)->m_convertedToChar.Extend(lenMB) ) + return NULL; + } -#else // ANSI + m_convertedToChar.m_str[lenMB] = '\0'; + if ( conv.FromWChar(m_convertedToChar.m_str, lenMB, + strWC, lenWC) == wxCONV_FAILED ) + return NULL; -//Converts this string to a wide character string if unicode -//mode is not enabled and wxUSE_WCHAR_T is enabled -const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const -{ - return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); + return m_convertedToChar.m_str; } -#endif // Unicode/ANSI +#endif // wxUSE_UNICODE // shrink to minimal size (releasing extra memory) bool wxString::Shrink() @@ -558,7 +650,7 @@ wxString operator+(const wxString& str, const char *psz) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += psz; @@ -574,7 +666,7 @@ wxString operator+(const wxString& str, const wchar_t *pwz) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += pwz; @@ -590,7 +682,7 @@ wxString operator+(const char *psz, const wxString& str) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = psz; s += str; @@ -606,7 +698,7 @@ wxString operator+(const wchar_t *pwz, const wxString& str) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = pwz; s += str; @@ -824,7 +916,7 @@ size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -838,7 +930,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -871,7 +963,7 @@ size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -895,7 +987,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -911,7 +1003,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -933,7 +1025,7 @@ size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -991,8 +1083,65 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart, int wxString::CmpNoCase(const wxString& s) const { - // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added +#if !wxUSE_UNICODE_UTF8 + // We compare NUL-delimited chunks of the strings inside the loop. We will + // do as many iterations as there are embedded NULs in the string, i.e. + // usually we will run it just once. + + typedef const wxStringImpl::value_type *pchar_type; + const pchar_type thisBegin = m_impl.c_str(); + const pchar_type thatBegin = s.m_impl.c_str(); + + const pchar_type thisEnd = thisBegin + m_impl.length(); + const pchar_type thatEnd = thatBegin + s.m_impl.length(); + + pchar_type thisCur = thisBegin; + pchar_type thatCur = thatBegin; + + int rc; + for ( ;; ) + { + // Compare until the next NUL, if the strings differ this is the final + // result. + rc = wxStricmp(thisCur, thatCur); + if ( rc ) + break; + + const size_t lenChunk = wxStrlen(thisCur); + thisCur += lenChunk; + thatCur += lenChunk; + + // Skip all the NULs as wxStricmp() doesn't handle them. + for ( ; !*thisCur; thisCur++, thatCur++ ) + { + // Check if we exhausted either of the strings. + if ( thisCur == thisEnd ) + { + // This one is exhausted, is the other one too? + return thatCur == thatEnd ? 0 : -1; + } + + if ( thatCur == thatEnd ) + { + // Because of the test above we know that this one is not + // exhausted yet so it's greater than the other one that is. + return 1; + } + + if ( *thatCur ) + { + // Anything non-NUL is greater than NUL. + return -1; + } + } + } + return rc; +#else // wxUSE_UNICODE_UTF8 + // CRT functions can't be used for case-insensitive comparison of UTF-8 + // strings so do it in the naive, simple and inefficient way. + + // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added const_iterator i1 = begin(); const_iterator end1 = end(); const_iterator i2 = s.begin(); @@ -1014,6 +1163,7 @@ int wxString::CmpNoCase(const wxString& s) const else if ( len1 > len2 ) return 1; return 0; +#endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8 } @@ -1040,7 +1190,7 @@ wxString wxString::FromAscii(const char *ascii, size_t len) { unsigned char c = (unsigned char)*ascii++; wxASSERT_MSG( c < 0x80, - _T("Non-ASCII value passed to FromAscii().") ); + wxT("Non-ASCII value passed to FromAscii().") ); *dest++ = (wchar_t)c; } @@ -1060,13 +1210,13 @@ wxString wxString::FromAscii(char ascii) unsigned char c = (unsigned char)ascii; - wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); + wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") ); // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value return wxString(wxUniChar((wchar_t)c)); } -const wxCharBuffer wxString::ToAscii() const +const wxScopedCharBuffer wxString::ToAscii() const { // this will allocate enough space for the terminating NUL too wxCharBuffer buffer(length()); @@ -1115,7 +1265,7 @@ wxString wxString::Mid(size_t nFirst, size_t nCount) const wxString dest(*this, nFirst, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Mid") ); + wxFAIL_MSG( wxT("out of memory in wxString::Mid") ); } return dest; @@ -1165,12 +1315,12 @@ wxString wxString::Right(size_t nCount) const wxString dest(*this, length() - nCount, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Right") ); + wxFAIL_MSG( wxT("out of memory in wxString::Right") ); } return dest; } -// get all characters after the last occurence of ch +// get all characters after the last occurrence of ch // (returns the whole string if ch not found) wxString wxString::AfterLast(wxUniChar ch) const { @@ -1179,7 +1329,7 @@ wxString wxString::AfterLast(wxUniChar ch) const if ( iPos == wxNOT_FOUND ) str = *this; else - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } @@ -1192,21 +1342,22 @@ wxString wxString::Left(size_t nCount) const wxString dest(*this, 0, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Left") ); + wxFAIL_MSG( wxT("out of memory in wxString::Left") ); } return dest; } -// get all characters before the first occurence of ch +// get all characters before the first occurrence of ch // (returns the whole string if ch not found) wxString wxString::BeforeFirst(wxUniChar ch) const { int iPos = Find(ch); - if ( iPos == wxNOT_FOUND ) iPos = length(); + if ( iPos == wxNOT_FOUND ) + iPos = length(); return wxString(*this, 0, iPos); } -/// get all characters before the last occurence of ch +/// get all characters before the last occurrence of ch /// (returns empty string if ch not found) wxString wxString::BeforeLast(wxUniChar ch) const { @@ -1218,56 +1369,112 @@ wxString wxString::BeforeLast(wxUniChar ch) const return str; } -/// get all characters after the first occurence of ch +/// get all characters after the first occurrence of ch /// (returns empty string if ch not found) wxString wxString::AfterFirst(wxUniChar ch) const { wxString str; int iPos = Find(ch); if ( iPos != wxNOT_FOUND ) - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } -// replace first (or all) occurences of some substring with another one +// replace first (or all) occurrences of some substring with another one size_t wxString::Replace(const wxString& strOld, const wxString& strNew, bool bReplaceAll) { // if we tried to replace an empty string we'd enter an infinite loop below wxCHECK_MSG( !strOld.empty(), 0, - _T("wxString::Replace(): invalid parameter") ); + wxT("wxString::Replace(): invalid parameter") ); + + wxSTRING_INVALIDATE_CACHE(); size_t uiCount = 0; // count of replacements made - size_t uiOldLen = strOld.length(); - size_t uiNewLen = strNew.length(); + // optimize the special common case: replacement of one character by + // another one (in UTF-8 case we can only do this for ASCII characters) + // + // benchmarks show that this special version is around 3 times faster + // (depending on the proportion of matching characters and UTF-8/wchar_t + // build) + if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 ) + { + const wxStringCharType chOld = strOld.m_impl[0], + chNew = strNew.m_impl[0]; - size_t dwPos = 0; + // this loop is the simplified version of the one below + for ( size_t pos = 0; ; ) + { + pos = m_impl.find(chOld, pos); + if ( pos == npos ) + break; + + m_impl[pos++] = chNew; + + uiCount++; - while ( (*this)[dwPos] != wxT('\0') ) + if ( !bReplaceAll ) + break; + } + } + else if ( !bReplaceAll) { - //DO NOT USE STRSTR HERE - //this string can contain embedded null characters, - //so strstr will function incorrectly - dwPos = find(strOld, dwPos); - if ( dwPos == npos ) - break; // exit the loop - else + size_t pos = m_impl.find(strOld, 0); + if ( pos != npos ) { - //replace this occurance of the old string with the new one - replace(dwPos, uiOldLen, strNew, uiNewLen); + m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl); + uiCount = 1; + } + } + else // replace all occurrences + { + const size_t uiOldLen = strOld.m_impl.length(); + const size_t uiNewLen = strNew.m_impl.length(); - //move up pos past the string that was replaced - dwPos += uiNewLen; + // first scan the string to find all positions at which the replacement + // should be made + wxVector replacePositions; - //increase replace count + size_t pos; + for ( pos = m_impl.find(strOld.m_impl, 0); + pos != npos; + pos = m_impl.find(strOld.m_impl, pos + uiOldLen)) + { + replacePositions.push_back(pos); ++uiCount; + } - // stop now? - if ( !bReplaceAll ) - break; // exit the loop + if ( !uiCount ) + return 0; + + // allocate enough memory for the whole new string + wxString tmp; + tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen)); + + // copy this string to tmp doing replacements on the fly + size_t replNum = 0; + for ( pos = 0; replNum < uiCount; replNum++ ) + { + const size_t nextReplPos = replacePositions[replNum]; + + if ( pos != nextReplPos ) + { + tmp.m_impl.append(m_impl, pos, nextReplPos - pos); + } + + tmp.m_impl.append(strNew.m_impl); + pos = nextReplPos + uiOldLen; } + + if ( pos != m_impl.length() ) + { + // append the rest of the string unchanged + tmp.m_impl.append(m_impl, pos, m_impl.length() - pos); + } + + swap(tmp); } return uiCount; @@ -1302,7 +1509,7 @@ bool wxString::IsNumber() const const_iterator i = begin(); - if ( *i == _T('-') || *i == _T('+') ) + if ( *i == wxT('-') || *i == wxT('+') ) ++i; for ( ; i != end(); ++i ) @@ -1342,14 +1549,28 @@ wxString& wxString::MakeLower() return *this; } +wxString& wxString::MakeCapitalized() +{ + const iterator en = end(); + iterator it = begin(); + if ( it != en ) + { + *it = (wxChar)wxToupper(*it); + for ( ++it; it != en; ++it ) + *it = (wxChar)wxTolower(*it); + } + + return *this; +} + // --------------------------------------------------------------------------- // trimming and padding // --------------------------------------------------------------------------- // some compilers (VC++ 6.0 not to name them) return true for a call to -// isspace('ê') in the C locale which seems to be broken to me, but we have to -// live with this by checking that the character is a 7 bit one - even if this -// may fail to detect some spaces (I don't know if Unicode doesn't have +// isspace('\xEA') in the C locale which seems to be broken to me, but we have +// to live with this by checking that the character is a 7 bit one - even if +// this may fail to detect some spaces (I don't know if Unicode doesn't have // space-like symbols somewhere except in the first 128 chars), it is arguably // still better than trimming away accented letters inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); } @@ -1444,61 +1665,183 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const #define DO_IF_NOT_WINCE(x) #endif -#define WX_STRING_TO_INT_TYPE(val, base, func) \ - wxCHECK_MSG( val, false, _T("NULL output pointer") ); \ - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \ - \ +#define WX_STRING_TO_X_TYPE_START \ + wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \ DO_IF_NOT_WINCE( errno = 0; ) \ - \ const wxStringCharType *start = wx_str(); \ - wxStringCharType *end; \ - *val = func(start, &end, base); \ - \ - /* return true only if scan was stopped by the terminating NUL and */ \ - /* if the string was not empty to start with and no under/overflow */ \ - /* occurred: */ \ - return !*end && (end != start) \ - DO_IF_NOT_WINCE( && (errno != ERANGE) ) + wxStringCharType *end; + +// notice that we return false without modifying the output parameter at all if +// nothing could be parsed but we do modify it and return false then if we did +// parse something successfully but not the entire string +#define WX_STRING_TO_X_TYPE_END \ + if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \ + return false; \ + *pVal = val; \ + return !*end; + +bool wxString::ToLong(long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + long val = wxStrtol(start, &end, base); + WX_STRING_TO_X_TYPE_END +} -bool wxString::ToLong(long *val, int base) const +bool wxString::ToULong(unsigned long *pVal, int base) const { - WX_STRING_TO_INT_TYPE(val, base, wxStrtol); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + unsigned long val = wxStrtoul(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToULong(unsigned long *val, int base) const +bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const { - WX_STRING_TO_INT_TYPE(val, base, wxStrtoul); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxLongLong_t val = wxStrtoll(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToLongLong(wxLongLong_t *val, int base) const +bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const { - WX_STRING_TO_INT_TYPE(val, base, wxStrtoll); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxULongLong_t val = wxStrtoull(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToULongLong(wxULongLong_t *val, int base) const +bool wxString::ToDouble(double *pVal) const { - WX_STRING_TO_INT_TYPE(val, base, wxStrtoull); + WX_STRING_TO_X_TYPE_START + double val = wxStrtod(start, &end); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToDouble(double *val) const +#if wxUSE_XLOCALE + +bool wxString::ToCLong(long *pVal, int base) const { - wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); -#ifndef __WXWINCE__ - errno = 0; + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + long val = wxStrtol_lA(start, &end, base, wxCLocale); +#else + long val = wxStrtol_l(start, &end, base, wxCLocale); #endif + WX_STRING_TO_X_TYPE_END +} - const wxChar *start = c_str(); - wxChar *end; - *val = wxStrtod(start, &end); +bool wxString::ToCULong(unsigned long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); - // return true only if scan was stopped by the terminating NUL and if the - // string was not empty to start with and no under/overflow occurred - return !*end && (end != start) -#ifndef __WXWINCE__ - && (errno != ERANGE) + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale); +#else + unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale); +#endif + WX_STRING_TO_X_TYPE_END +} + +bool wxString::ToCDouble(double *pVal) const +{ + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + double val = wxStrtod_lA(start, &end, wxCLocale); +#else + double val = wxStrtod_l(start, &end, wxCLocale); #endif - ; + WX_STRING_TO_X_TYPE_END +} + +#else // wxUSE_XLOCALE + +// Provide implementation of these functions even when wxUSE_XLOCALE is +// disabled, we still need them in wxWidgets internal code. + +// For integers we just assume the current locale uses the same number +// representation as the C one as there is nothing else we can do. +bool wxString::ToCLong(long *pVal, int base) const +{ + return ToLong(pVal, base); +} + +bool wxString::ToCULong(unsigned long *pVal, int base) const +{ + return ToULong(pVal, base); +} + +// For floating point numbers we have to handle the problem of the decimal +// point which is different in different locales. +bool wxString::ToCDouble(double *pVal) const +{ + // Create a copy of this string using the decimal point instead of whatever + // separator the current locale uses. +#if wxUSE_INTL + wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT, + wxLOCALE_CAT_NUMBER); + if ( sep == "." ) + { + // We can avoid an unnecessary string copy in this case. + return ToDouble(pVal); + } +#else // !wxUSE_INTL + // We don't know what the current separator is so it might even be a point + // already, try to parse the string as a double: + if ( ToDouble(pVal) ) + { + // It must have been the point, nothing else to do. + return true; + } + + // Try to guess the separator, using the most common alternative value. + wxString sep(","); +#endif // wxUSE_INTL/!wxUSE_INTL + wxString cstr(*this); + cstr.Replace(".", sep); + + return cstr.ToDouble(pVal); +} + +#endif // wxUSE_XLOCALE/!wxUSE_XLOCALE + +// ---------------------------------------------------------------------------- +// number to string conversion +// ---------------------------------------------------------------------------- + +/* static */ +wxString wxString::FromCDouble(double val) +{ +#if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING + // We assume that we can use the ostream and not wstream for numbers. + wxSTD ostringstream os; + os << val; + return os.str(); +#else // wxUSE_STD_IOSTREAM + // Can't use iostream locale support, fall back to the manual method + // instead. + wxString s = FromDouble(val); +#if wxUSE_INTL + wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT, + wxLOCALE_CAT_NUMBER); +#else // !wxUSE_INTL + // As above, this is the most common alternative value. Notice that here it + // doesn't matter if we guess wrongly and the current separator is already + // ".": we'll just waste a call to Replace() in this case. + wxString sep(","); +#endif // wxUSE_INTL/!wxUSE_INTL + + s.Replace(sep, "."); + return s; +#endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM } // --------------------------------------------------------------------------- @@ -1620,7 +1963,7 @@ int wxString::DoPrintfUtf8(const char *format, ...) an undersized buffer and no other errno are defined we treat those two as meaning hard errors and everything else gets the old behavior which is to keep looping and increasing buffer size until the function succeeds. - + In practice it's impossible to determine before compilation which behavior may be used. The vswprintf function may have vsnprintf-like behavior or vice-versa. Behavior detected on one release can theoretically change @@ -1669,11 +2012,6 @@ static int DoStringPrintfV(wxString& str, if ( !buf ) { // out of memory - - // in UTF-8 build, leaving uninitialized junk in the buffer - // could result in invalid non-empty UTF-8 string, so just - // reset the string to empty on failure: - buf[0] = '\0'; return -1; } @@ -1694,7 +2032,7 @@ static int DoStringPrintfV(wxString& str, // always do it manually // FIXME: This really seems to be the wrong and would be an off-by-one // bug except the code above allocates an extra character. - buf[size] = _T('\0'); + buf[size] = wxT('\0'); // vsnprintf() may return either -1 (traditional Unix behaviour) or the // total number of characters which would have been written if the @@ -1730,7 +2068,7 @@ static int DoStringPrintfV(wxString& str, else if ( len >= size ) { #if wxUSE_WXVSNPRINTF - // we know that our own implementation of wxVsnprintf() returns + // we know that our own implementation of wxVsnprintf() returns // size+1 when there's not enough space but that's not the size // of the required buffer! size *= 2; // so we just double the current size of the buffer @@ -1800,31 +2138,31 @@ bool wxString::Matches(const wxString& mask) const wxString pattern; pattern.reserve(wxStrlen(pszMask)); - pattern += _T('^'); + pattern += wxT('^'); while ( *pszMask ) { switch ( *pszMask ) { - case _T('?'): - pattern += _T('.'); + case wxT('?'): + pattern += wxT('.'); break; - case _T('*'): - pattern += _T(".*"); + case wxT('*'): + pattern += wxT(".*"); break; - case _T('^'): - case _T('.'): - case _T('$'): - case _T('('): - case _T(')'): - case _T('|'): - case _T('+'): - case _T('\\'): + case wxT('^'): + case wxT('.'): + case wxT('$'): + case wxT('('): + case wxT(')'): + case wxT('|'): + case wxT('+'): + case wxT('\\'): // these characters are special in a RE, quote them // (however note that we don't quote '[' and ']' to allow // using them for Unix shell like matching) - pattern += _T('\\'); + pattern += wxT('\\'); // fall through default: @@ -1833,7 +2171,7 @@ bool wxString::Matches(const wxString& mask) const pszMask++; } - pattern += _T('$'); + pattern += wxT('$'); // and now use it return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str()); @@ -1842,8 +2180,8 @@ bool wxString::Matches(const wxString& mask) const // FIXME-UTF8: implement using iterators, remove #if #if wxUSE_UNICODE_UTF8 - wxWCharBuffer maskBuf = mask.wc_str(); - wxWCharBuffer txtBuf = wc_str(); + const wxScopedWCharBuffer maskBuf = mask.wc_str(); + const wxScopedWCharBuffer txtBuf = wc_str(); const wxChar *pszMask = maskBuf.data(); const wxChar *pszTxt = txtBuf.data(); #else @@ -1945,38 +2283,3 @@ int wxString::Freq(wxUniChar ch) const return count; } -// convert to upper case, return the copy of the string -wxString wxString::Upper() const -{ wxString s(*this); return s.MakeUpper(); } - -// convert to lower case, return the copy of the string -wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); } - -// ---------------------------------------------------------------------------- -// wxUTF8StringBuffer -// ---------------------------------------------------------------------------- - -#if wxUSE_UNICODE_WCHAR -wxUTF8StringBuffer::~wxUTF8StringBuffer() -{ - wxMBConvStrictUTF8 conv; - size_t wlen = conv.ToWChar(NULL, 0, m_buf); - wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); - - wxStringInternalBuffer wbuf(m_str, wlen); - conv.ToWChar(wbuf, wlen, m_buf); -} - -wxUTF8StringBufferLength::~wxUTF8StringBufferLength() -{ - wxCHECK_RET(m_lenSet, "length not set"); - - wxMBConvStrictUTF8 conv; - size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len); - wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); - - wxStringInternalBufferLength wbuf(m_str, wlen); - conv.ToWChar(wbuf, wlen, m_buf, m_len); - wbuf.SetLength(wlen); -} -#endif // wxUSE_UNICODE_WCHAR