X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/a33c7045d1b9ecc78a617bf0a568ca64a53c814b..e01a788ee05125b4fefa01a5b11f4088915cb54e:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index b9456ecee4..9e9ba528fc 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -24,6 +24,7 @@ #ifndef WX_PRECOMP #include "wx/string.h" #include "wx/wxcrtvararg.h" + #include "wx/log.h" #endif #include @@ -35,11 +36,13 @@ #include #include -#ifdef __SALFORDC__ - #include -#endif - #include "wx/hashmap.h" +#include "wx/vector.h" +#include "wx/xlocale.h" + +#ifdef __WXMSW__ + #include "wx/msw/wrapwin.h" +#endif // __WXMSW__ // string handling functions used by wxString: #if wxUSE_UNICODE_UTF8 @@ -54,6 +57,18 @@ #define wxStringStrlen wxStrlen #endif +// ---------------------------------------------------------------------------- +// global variables +// ---------------------------------------------------------------------------- + +namespace wxPrivate +{ + +static UntypedBufferData s_untypedNullData(NULL, 0); + +UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData; + +} // namespace wxPrivate // --------------------------------------------------------------------------- // static class variables definition @@ -62,6 +77,105 @@ //According to STL _must_ be a -1 size_t const size_t wxString::npos = (size_t) -1; +#if wxUSE_STRING_POS_CACHE + +#ifdef wxHAS_COMPILER_TLS + +wxTLS_TYPE(wxString::Cache) wxString::ms_cache; + +#else // !wxHAS_COMPILER_TLS + +struct wxStrCacheInitializer +{ + wxStrCacheInitializer() + { + // calling this function triggers s_cache initialization in it, and + // from now on it becomes safe to call from multiple threads + wxString::GetCache(); + } +}; + +/* +wxString::Cache& wxString::GetCache() +{ + static wxTLS_TYPE(Cache) s_cache; + + return wxTLS_VALUE(s_cache); +} +*/ + +static wxStrCacheInitializer gs_stringCacheInit; + +#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS + +// gdb seems to be unable to display thread-local variables correctly, at least +// not my 6.4.98 version under amd64, so provide this debugging helper to do it +#if wxDEBUG_LEVEL >= 2 + +struct wxStrCacheDumper +{ + static void ShowAll() + { + puts("*** wxString cache dump:"); + for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ ) + { + const wxString::Cache::Element& + c = wxString::GetCacheBegin()[n]; + + printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n", + n, + n == wxString::LastUsedCacheElement() ? " [*]" : "", + c.str, + (unsigned long)c.pos, + (unsigned long)c.impl, + (long)c.len); + } + } +}; + +void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); } + +#endif // wxDEBUG_LEVEL >= 2 + +#ifdef wxPROFILE_STRING_CACHE + +wxString::CacheStats wxString::ms_cacheStats; + +struct wxStrCacheStatsDumper +{ + ~wxStrCacheStatsDumper() + { + const wxString::CacheStats& stats = wxString::ms_cacheStats; + + if ( stats.postot ) + { + puts("*** wxString cache statistics:"); + printf("\tTotal non-trivial calls to PosToImpl(): %u\n", + stats.postot); + printf("\tHits %u (of which %u not used) or %.2f%%\n", + stats.poshits, + stats.mishits, + 100.*float(stats.poshits - stats.mishits)/stats.postot); + printf("\tAverage position requested: %.2f\n", + float(stats.sumpos) / stats.postot); + printf("\tAverage offset after cached hint: %.2f\n", + float(stats.sumofs) / stats.postot); + } + + if ( stats.lentot ) + { + printf("\tNumber of calls to length(): %u, hits=%.2f%%\n", + stats.lentot, 100.*float(stats.lenhits)/stats.lentot); + } + } +}; + +static wxStrCacheStatsDumper s_showCacheStats; + +#endif // wxPROFILE_STRING_CACHE + +#endif // wxUSE_STRING_POS_CACHE + // ---------------------------------------------------------------------------- // global functions // ---------------------------------------------------------------------------- @@ -72,11 +186,16 @@ const size_t wxString::npos = (size_t) -1; wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { -// FIXME-UTF8: always, not only if wxUSE_UNICODE -#if wxUSE_UNICODE && !defined(__BORLANDC__) - return os << (const wchar_t*)str.AsWCharBuf(); +#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 + const wxScopedCharBuffer buf(str.AsCharBuf()); + if ( !buf ) + os.clear(wxSTD ios_base::failbit); + else + os << buf.data(); + + return os; #else - return os << (const char*)str.AsCharBuf(); + return os << str.AsInternal(); #endif } @@ -85,18 +204,37 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str) return os << str.c_str(); } -wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str) { return os << str.data(); } #ifndef __BORLANDC__ -wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str) { return os << str.data(); } #endif +#if wxUSE_UNICODE && defined(HAVE_WOSTREAM) + +wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str) +{ + return wos << str.wc_str(); +} + +wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str) +{ + return wos << str.AsWChar(); +} + +wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str) +{ + return wos << str.data(); +} + +#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM) + #endif // wxUSE_STD_IOSTREAM // =========================================================================== @@ -109,22 +247,30 @@ void wxString::PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const { if ( pos == npos ) + { *implPos = npos; - else + } + else // have valid start position { - const_iterator i = begin() + pos; - *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); + const const_iterator b = GetIterForNthChar(pos); + *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin(); if ( len == npos ) + { *implLen = npos; - else + } + else // have valid length too { - // too large length is interpreted as "to the end of the string" - // FIXME-UTF8: verify this is the case in std::string, assert - // otherwise - if ( pos + len > length() ) - len = length() - pos; - - *implLen = (i + len).impl() - i.impl(); + // we need to handle the case of length specifying a substring + // going beyond the end of the string, just as std::string does + const const_iterator e(end()); + const_iterator i(b); + while ( len && i <= e ) + { + ++i; + --len; + } + + *implLen = i.impl() - b.impl(); } } } @@ -220,65 +366,6 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char* wxCStrData::AsChar() const -{ -#if wxUSE_UNICODE_UTF8 - if ( wxLocaleIsUtf8 ) - return AsInternal(); -#endif - // under non-UTF8 locales, we have to convert the internal UTF-8 - // representation using wxConvLibc and cache the result - - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxCharBuffer buf(str->mb_str()); - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToChar && - strlen(buf) == strlen(str->m_convertedToChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - strcpy(str->m_convertedToChar, buf); - } - else - { - str->m_convertedToChar = buf.release(); - } - - // and keep it: - return str->m_convertedToChar + m_offset; -} -#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY - -#if !wxUSE_UNICODE_WCHAR -const wchar_t* wxCStrData::AsWChar() const -{ - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxWCharBuffer buf(str->wc_str()); - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToWChar && - wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); - } - else - { - str->m_convertedToWChar = buf.release(); - } - - // and keep it: - return str->m_convertedToWChar + m_offset; -} -#endif // !wxUSE_UNICODE_WCHAR - // =========================================================================== // wxString class core // =========================================================================== @@ -294,15 +381,15 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(L"", 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(_T(""), 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } @@ -315,7 +402,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // if psz is already in UTF-8, we don't have to do the roundtrip to // wchar_t* and back: @@ -325,7 +412,11 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // UTF-8 sequence and psz may be invalid: if ( wxStringOperations::IsValidUtf8String(psz, nLength) ) { - return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength); + // we must pass the real string length to SubstrBufFromMB ctor + if ( nLength == npos ) + nLength = psz ? strlen(psz) : 0; + return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength), + nLength); } // else: do the roundtrip through wchar_t* } @@ -335,14 +426,14 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // first convert to wide string: size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // and then to UTF-8: - SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvUTF8())); + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); // widechar -> UTF-8 conversion isn't supposed to ever fail: - wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") ); return buf; } @@ -355,66 +446,111 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t mbLen; - wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); + wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); else return SubstrBufFromWC(mbBuf, mbLen); } #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +// This std::string::c_str()-like method returns a wide char pointer to string +// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return +// a pointer to the internal representation. Otherwise a conversion is required +// and it returns a temporary buffer. +// +// However for compatibility with c_str() and to avoid breaking existing code +// doing +// +// for ( const wchar_t *p = s.wc_str(); *p; p++ ) +// ... use *p... +// +// we actually need to ensure that the returned buffer is _not_ temporary and +// so we use wxString::m_convertedToWChar to store the returned data +#if !wxUSE_UNICODE_WCHAR -#if wxUSE_UNICODE_WCHAR - -//Convert wxString in Unicode mode to a multi-byte string -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +const wchar_t *wxString::AsWChar(const wxMBConv& conv) const { - return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); -} + const char * const strMB = m_impl.c_str(); + const size_t lenMB = m_impl.length(); -#elif wxUSE_UNICODE_UTF8 + // find out the size of the buffer needed + const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB); + if ( lenWC == wxCONV_FAILED ) + return NULL; -const wxWCharBuffer wxString::wc_str() const -{ - return wxMBConvUTF8().cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - NULL); + // keep the same buffer if the string size didn't change: this is not only + // an optimization but also ensure that code which modifies string + // character by character (without changing its length) can continue to use + // the pointer returned by a previous wc_str() call even after changing the + // string + + // TODO-UTF8: we could check for ">" instead of "!=" here as this would + // allow to save on buffer reallocations but at the cost of + // consuming (even) more memory, we should benchmark this to + // determine if it's worth doing + if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len ) + { + if ( !const_cast(this)->m_convertedToWChar.Extend(lenWC) ) + return NULL; + } + + // finally do convert + m_convertedToWChar.m_str[lenWC] = L'\0'; + if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC, + strMB, lenMB) == wxCONV_FAILED ) + return NULL; + + return m_convertedToWChar.m_str; } -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +#endif // !wxUSE_UNICODE_WCHAR + + +// Same thing for mb_str() which returns a normal char pointer to string +// contents: this always requires converting it to the specified encoding in +// non-ANSI build except if we need to convert to UTF-8 and this is what we +// already use internally. +#if wxUSE_UNICODE + +const char *wxString::AsChar(const wxMBConv& conv) const { +#if wxUSE_UNICODE_UTF8 if ( conv.IsUTF8() ) - return wxCharBuffer::CreateNonOwned(m_impl.c_str()); + return m_impl.c_str(); - // FIXME-UTF8: use wc_str() here once we have buffers with length + const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8()); + const size_t lenWC = m_convertedToWChar.m_len; +#else // wxUSE_UNICODE_WCHAR + const wchar_t * const strWC = m_impl.c_str(); + const size_t lenWC = m_impl.length(); +#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR - size_t wcLen; - wxWCharBuffer wcBuf( - wxMBConvUTF8().cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - &wcLen)); - if ( !wcLen ) - return wxCharBuffer(""); + const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC); + if ( lenMB == wxCONV_FAILED ) + return NULL; - return conv.cWC2MB(wcBuf, wcLen, NULL); -} + if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len ) + { + if ( !const_cast(this)->m_convertedToChar.Extend(lenMB) ) + return NULL; + } -#else // ANSI + m_convertedToChar.m_str[lenMB] = '\0'; + if ( conv.FromWChar(m_convertedToChar.m_str, lenMB, + strWC, lenWC) == wxCONV_FAILED ) + return NULL; -//Converts this string to a wide character string if unicode -//mode is not enabled and wxUSE_WCHAR_T is enabled -const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const -{ - return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); + return m_convertedToChar.m_str; } -#endif // Unicode/ANSI +#endif // wxUSE_UNICODE // shrink to minimal size (releasing extra memory) bool wxString::Shrink() @@ -505,7 +641,7 @@ wxString operator+(const wxString& str, const char *psz) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += psz; @@ -521,7 +657,7 @@ wxString operator+(const wxString& str, const wchar_t *pwz) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += pwz; @@ -537,7 +673,7 @@ wxString operator+(const char *psz, const wxString& str) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = psz; s += str; @@ -553,7 +689,7 @@ wxString operator+(const wchar_t *pwz, const wxString& str) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = pwz; s += str; @@ -771,7 +907,7 @@ size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -785,7 +921,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -818,7 +954,7 @@ size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -842,7 +978,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -858,7 +994,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -880,7 +1016,7 @@ size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -938,15 +1074,42 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart, int wxString::CmpNoCase(const wxString& s) const { - // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added +#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8 + // prefer to use CompareString() if available as it's more efficient than + // doing it manual or even using wxStricmp() (see #10375) + switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE, + m_impl.c_str(), m_impl.length(), + s.m_impl.c_str(), s.m_impl.length()) ) + { + case CSTR_LESS_THAN: + return -1; + + case CSTR_EQUAL: + return 0; - size_t idx = 0; + case CSTR_GREATER_THAN: + return 1; + + default: + wxFAIL_MSG( "unexpected CompareString() return value" ); + // fall through + + case 0: + wxLogLastError("CompareString"); + // use generic code below + } +#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8 + + // do the comparison manually: notice that we can't use wxStricmp() as it + // doesn't handle embedded NULs + + // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added const_iterator i1 = begin(); const_iterator end1 = end(); const_iterator i2 = s.begin(); const_iterator end2 = s.end(); - for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 ) + for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 ) { wxUniChar lower1 = (wxChar)wxTolower(*i1); wxUniChar lower2 = (wxChar)wxTolower(*i2); @@ -973,48 +1136,48 @@ int wxString::CmpNoCase(const wxString& s) const #endif #endif -wxString wxString::FromAscii(const char *ascii) +wxString wxString::FromAscii(const char *ascii, size_t len) { - if (!ascii) + if (!ascii || len == 0) return wxEmptyString; - size_t len = strlen(ascii); wxString res; - if ( len ) { - wxImplStringBuffer buf(res, len); + wxStringInternalBuffer buf(res, len); wxStringCharType *dest = buf; - for ( ;; ) + for ( ; len > 0; --len ) { unsigned char c = (unsigned char)*ascii++; wxASSERT_MSG( c < 0x80, - _T("Non-ASCII value passed to FromAscii().") ); + wxT("Non-ASCII value passed to FromAscii().") ); *dest++ = (wchar_t)c; - - if ( c == '\0' ) - break; } } return res; } -wxString wxString::FromAscii(const char ascii) +wxString wxString::FromAscii(const char *ascii) +{ + return FromAscii(ascii, wxStrlen(ascii)); +} + +wxString wxString::FromAscii(char ascii) { // What do we do with '\0' ? unsigned char c = (unsigned char)ascii; - wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); + wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") ); // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value return wxString(wxUniChar((wchar_t)c)); } -const wxCharBuffer wxString::ToAscii() const +const wxScopedCharBuffer wxString::ToAscii() const { // this will allocate enough space for the terminating NUL too wxCharBuffer buffer(length()); @@ -1063,7 +1226,7 @@ wxString wxString::Mid(size_t nFirst, size_t nCount) const wxString dest(*this, nFirst, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Mid") ); + wxFAIL_MSG( wxT("out of memory in wxString::Mid") ); } return dest; @@ -1113,12 +1276,12 @@ wxString wxString::Right(size_t nCount) const wxString dest(*this, length() - nCount, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Right") ); + wxFAIL_MSG( wxT("out of memory in wxString::Right") ); } return dest; } -// get all characters after the last occurence of ch +// get all characters after the last occurrence of ch // (returns the whole string if ch not found) wxString wxString::AfterLast(wxUniChar ch) const { @@ -1127,7 +1290,7 @@ wxString wxString::AfterLast(wxUniChar ch) const if ( iPos == wxNOT_FOUND ) str = *this; else - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } @@ -1140,21 +1303,22 @@ wxString wxString::Left(size_t nCount) const wxString dest(*this, 0, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Left") ); + wxFAIL_MSG( wxT("out of memory in wxString::Left") ); } return dest; } -// get all characters before the first occurence of ch +// get all characters before the first occurrence of ch // (returns the whole string if ch not found) wxString wxString::BeforeFirst(wxUniChar ch) const { int iPos = Find(ch); - if ( iPos == wxNOT_FOUND ) iPos = length(); + if ( iPos == wxNOT_FOUND ) + iPos = length(); return wxString(*this, 0, iPos); } -/// get all characters before the last occurence of ch +/// get all characters before the last occurrence of ch /// (returns empty string if ch not found) wxString wxString::BeforeLast(wxUniChar ch) const { @@ -1166,56 +1330,112 @@ wxString wxString::BeforeLast(wxUniChar ch) const return str; } -/// get all characters after the first occurence of ch +/// get all characters after the first occurrence of ch /// (returns empty string if ch not found) wxString wxString::AfterFirst(wxUniChar ch) const { wxString str; int iPos = Find(ch); if ( iPos != wxNOT_FOUND ) - str = wx_str() + iPos + 1; + str.assign(*this, iPos + 1, npos); return str; } -// replace first (or all) occurences of some substring with another one +// replace first (or all) occurrences of some substring with another one size_t wxString::Replace(const wxString& strOld, const wxString& strNew, bool bReplaceAll) { // if we tried to replace an empty string we'd enter an infinite loop below wxCHECK_MSG( !strOld.empty(), 0, - _T("wxString::Replace(): invalid parameter") ); + wxT("wxString::Replace(): invalid parameter") ); + + wxSTRING_INVALIDATE_CACHE(); size_t uiCount = 0; // count of replacements made - size_t uiOldLen = strOld.length(); - size_t uiNewLen = strNew.length(); + // optimize the special common case: replacement of one character by + // another one (in UTF-8 case we can only do this for ASCII characters) + // + // benchmarks show that this special version is around 3 times faster + // (depending on the proportion of matching characters and UTF-8/wchar_t + // build) + if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 ) + { + const wxStringCharType chOld = strOld.m_impl[0], + chNew = strNew.m_impl[0]; + + // this loop is the simplified version of the one below + for ( size_t pos = 0; ; ) + { + pos = m_impl.find(chOld, pos); + if ( pos == npos ) + break; + + m_impl[pos++] = chNew; - size_t dwPos = 0; + uiCount++; - while ( (*this)[dwPos] != wxT('\0') ) + if ( !bReplaceAll ) + break; + } + } + else if ( !bReplaceAll) { - //DO NOT USE STRSTR HERE - //this string can contain embedded null characters, - //so strstr will function incorrectly - dwPos = find(strOld, dwPos); - if ( dwPos == npos ) - break; // exit the loop - else + size_t pos = m_impl.find(strOld, 0); + if ( pos != npos ) { - //replace this occurance of the old string with the new one - replace(dwPos, uiOldLen, strNew, uiNewLen); + m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl); + uiCount = 1; + } + } + else // replace all occurrences + { + const size_t uiOldLen = strOld.m_impl.length(); + const size_t uiNewLen = strNew.m_impl.length(); - //move up pos past the string that was replaced - dwPos += uiNewLen; + // first scan the string to find all positions at which the replacement + // should be made + wxVector replacePositions; - //increase replace count + size_t pos; + for ( pos = m_impl.find(strOld.m_impl, 0); + pos != npos; + pos = m_impl.find(strOld.m_impl, pos + uiOldLen)) + { + replacePositions.push_back(pos); ++uiCount; + } - // stop now? - if ( !bReplaceAll ) - break; // exit the loop + if ( !uiCount ) + return 0; + + // allocate enough memory for the whole new string + wxString tmp; + tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen)); + + // copy this string to tmp doing replacements on the fly + size_t replNum = 0; + for ( pos = 0; replNum < uiCount; replNum++ ) + { + const size_t nextReplPos = replacePositions[replNum]; + + if ( pos != nextReplPos ) + { + tmp.m_impl.append(m_impl, pos, nextReplPos - pos); + } + + tmp.m_impl.append(strNew.m_impl); + pos = nextReplPos + uiOldLen; + } + + if ( pos != m_impl.length() ) + { + // append the rest of the string unchanged + tmp.m_impl.append(m_impl, pos, m_impl.length() - pos); } + + swap(tmp); } return uiCount; @@ -1250,7 +1470,7 @@ bool wxString::IsNumber() const const_iterator i = begin(); - if ( *i == _T('-') || *i == _T('+') ) + if ( *i == wxT('-') || *i == wxT('+') ) ++i; for ( ; i != end(); ++i ) @@ -1290,14 +1510,28 @@ wxString& wxString::MakeLower() return *this; } +wxString& wxString::MakeCapitalized() +{ + const iterator en = end(); + iterator it = begin(); + if ( it != en ) + { + *it = (wxChar)wxToupper(*it); + for ( ++it; it != en; ++it ) + *it = (wxChar)wxTolower(*it); + } + + return *this; +} + // --------------------------------------------------------------------------- // trimming and padding // --------------------------------------------------------------------------- // some compilers (VC++ 6.0 not to name them) return true for a call to -// isspace('ê') in the C locale which seems to be broken to me, but we have to -// live with this by checking that the character is a 7 bit one - even if this -// may fail to detect some spaces (I don't know if Unicode doesn't have +// isspace('\xEA') in the C locale which seems to be broken to me, but we have +// to live with this by checking that the character is a 7 bit one - even if +// this may fail to detect some spaces (I don't know if Unicode doesn't have // space-like symbols somewhere except in the first 128 chars), it is arguably // still better than trimming away accented letters inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); } @@ -1318,7 +1552,7 @@ wxString& wxString::Trim(bool bFromRight) // find last non-space character reverse_iterator psz = rbegin(); while ( (psz != rend()) && wxSafeIsspace(*psz) ) - psz++; + ++psz; // truncate at trailing space start erase(psz.base(), end()); @@ -1328,7 +1562,7 @@ wxString& wxString::Trim(bool bFromRight) // find first non-space character iterator psz = begin(); while ( (psz != end()) && wxSafeIsspace(*psz) ) - psz++; + ++psz; // fix up data and length erase(begin(), psz); @@ -1386,72 +1620,111 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const // it out. Note that number extraction works correctly on UTF-8 strings, so // we can use wxStringCharType and wx_str() for maximum efficiency. -template -bool wxStringToIntType(const wxStringCharType *start, - T *val, - int base, - T (*func)(const wxStringCharType*, wxStringCharType**, int)) -{ - wxCHECK_MSG( val, false, _T("NULL output pointer") ); - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); - #ifndef __WXWINCE__ - errno = 0; + #define DO_IF_NOT_WINCE(x) x +#else + #define DO_IF_NOT_WINCE(x) #endif +#define WX_STRING_TO_X_TYPE_START \ + wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \ + DO_IF_NOT_WINCE( errno = 0; ) \ + const wxStringCharType *start = wx_str(); \ wxStringCharType *end; - *val = (*func)(start, &end, base); - // return true only if scan was stopped by the terminating NUL and if the - // string was not empty to start with and no under/overflow occurred - return !*end && (end != start) -#ifndef __WXWINCE__ - && (errno != ERANGE) -#endif - ; +// notice that we return false without modifying the output parameter at all if +// nothing could be parsed but we do modify it and return false then if we did +// parse something successfully but not the entire string +#define WX_STRING_TO_X_TYPE_END \ + if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \ + return false; \ + *pVal = val; \ + return !*end; + +bool wxString::ToLong(long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + long val = wxStrtol(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToLong(long *val, int base) const +bool wxString::ToULong(unsigned long *pVal, int base) const { - return wxStringToIntType(wx_str(), val, base, wxStrtol); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + unsigned long val = wxStrtoul(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToULong(unsigned long *val, int base) const +bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const { - return wxStringToIntType(wx_str(), val, base, wxStrtoul); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxLongLong_t val = wxStrtoll(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToLongLong(wxLongLong_t *val, int base) const +bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const { - return wxStringToIntType(wx_str(), val, base, wxStrtoll); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); + + WX_STRING_TO_X_TYPE_START + wxULongLong_t val = wxStrtoull(start, &end, base); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToULongLong(wxULongLong_t *val, int base) const +bool wxString::ToDouble(double *pVal) const { - return wxStringToIntType(wx_str(), val, base, wxStrtoull); + WX_STRING_TO_X_TYPE_START + double val = wxStrtod(start, &end); + WX_STRING_TO_X_TYPE_END } -bool wxString::ToDouble(double *val) const +#if wxUSE_XLOCALE + +bool wxString::ToCLong(long *pVal, int base) const { - wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); -#ifndef __WXWINCE__ - errno = 0; + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + long val = wxStrtol_lA(start, &end, base, wxCLocale); +#else + long val = wxStrtol_l(start, &end, base, wxCLocale); #endif + WX_STRING_TO_X_TYPE_END +} - const wxChar *start = c_str(); - wxChar *end; - *val = wxStrtod(start, &end); +bool wxString::ToCULong(unsigned long *pVal, int base) const +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); - // return true only if scan was stopped by the terminating NUL and if the - // string was not empty to start with and no under/overflow occurred - return !*end && (end != start) -#ifndef __WXWINCE__ - && (errno != ERANGE) + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale); +#else + unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale); #endif - ; + WX_STRING_TO_X_TYPE_END } +bool wxString::ToCDouble(double *pVal) const +{ + WX_STRING_TO_X_TYPE_START +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) + double val = wxStrtod_lA(start, &end, wxCLocale); +#else + double val = wxStrtod_l(start, &end, wxCLocale); +#endif + WX_STRING_TO_X_TYPE_END +} + +#endif // wxUSE_XLOCALE + // --------------------------------------------------------------------------- // formatted output // --------------------------------------------------------------------------- @@ -1541,6 +1814,60 @@ int wxString::DoPrintfUtf8(const char *format, ...) } #endif // wxUSE_UNICODE_UTF8 +/* + Uses wxVsnprintf and places the result into the this string. + + In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build + it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in + the ISO C99 (and thus SUSv3) standard the return value for the case of + an undersized buffer is inconsistent. For conforming vsnprintf + implementations the function must return the number of characters that + would have been printed had the buffer been large enough. For conforming + vswprintf implementations the function must return a negative number + and set errno. + + What vswprintf sets errno to is undefined but Darwin seems to set it to + EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of + those are defined in the standard and backed up by several conformance + statements. Note that ENOMEM mentioned in the manual page does not + apply to swprintf, only wprintf and fwprintf. + + Official manual page: + http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html + + Some conformance statements (AIX, Solaris): + http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3 + http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10 + + Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since + EILSEQ and EINVAL are specifically defined to mean the error is other than + an undersized buffer and no other errno are defined we treat those two + as meaning hard errors and everything else gets the old behavior which + is to keep looping and increasing buffer size until the function succeeds. + + In practice it's impossible to determine before compilation which behavior + may be used. The vswprintf function may have vsnprintf-like behavior or + vice-versa. Behavior detected on one release can theoretically change + with an updated release. Not to mention that configure testing for it + would require the test to be run on the host system, not the build system + which makes cross compilation difficult. Therefore, we make no assumptions + about behavior and try our best to handle every known case, including the + case where wxVsnprintf returns a negative number and fails to set errno. + + There is yet one more non-standard implementation and that is our own. + Fortunately, that can be detected at compile-time. + + On top of all that, ISO C99 explicitly defines snprintf to write a null + character to the last position of the specified buffer. That would be at + at the given buffer size minus 1. It is supposed to do this even if it + turns out that the buffer is sized too small. + + Darwin (tested on 10.5) follows the C99 behavior exactly. + + Glibc 2.6 almost follows the C99 behavior except vswprintf never sets + errno even when it fails. However, it only seems to ever fail due + to an undersized buffer. +*/ #if wxUSE_UNICODE_UTF8 template #else @@ -1578,13 +1905,20 @@ static int DoStringPrintfV(wxString& str, // only a copy va_list argptrcopy; wxVaCopy(argptrcopy, argptr); + +#ifndef __WXWINCE__ + // Set errno to 0 to make it determinate if wxVsnprintf fails to set it. + errno = 0; +#endif int len = wxVsnprintf(buf, size, format, argptrcopy); va_end(argptrcopy); // some implementations of vsnprintf() don't NUL terminate // the string if there is not enough space for it so // always do it manually - buf[size] = _T('\0'); + // FIXME: This really seems to be the wrong and would be an off-by-one + // bug except the code above allocates an extra character. + buf[size] = wxT('\0'); // vsnprintf() may return either -1 (traditional Unix behaviour) or the // total number of characters which would have been written if the @@ -1605,19 +1939,33 @@ static int DoStringPrintfV(wxString& str, // assume it only returns error if there is not enough space, but // as we don't know how much we need, double the current size of // the buffer - size *= 2; +#ifndef __WXWINCE__ + if( (errno == EILSEQ) || (errno == EINVAL) ) + // If errno was set to one of the two well-known hard errors + // then fail immediately to avoid an infinite loop. + return -1; + else +#endif // __WXWINCE__ + // still not enough, as we don't know how much we need, double the + // current size of the buffer + size *= 2; #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF } else if ( len >= size ) { #if wxUSE_WXVSNPRINTF - // we know that our own implementation of wxVsnprintf() returns + // we know that our own implementation of wxVsnprintf() returns // size+1 when there's not enough space but that's not the size // of the required buffer! size *= 2; // so we just double the current size of the buffer #else // some vsnprintf() implementations NUL-terminate the buffer and // some don't in len == size case, to be safe always add 1 + // FIXME: I don't quite understand this comment. The vsnprintf + // function is specifically defined to return the number of + // characters printed not including the null terminator. + // So OF COURSE you need to add 1 to get the right buffer size. + // The following line is definitely correct, no question. size = len + 1; #endif } @@ -1639,7 +1987,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr) #if wxUSE_STL_BASED_WXSTRING typedef wxStringTypeBuffer Utf8Buffer; #else - typedef wxImplStringBuffer Utf8Buffer; + typedef wxStringInternalBuffer Utf8Buffer; #endif #endif @@ -1676,31 +2024,31 @@ bool wxString::Matches(const wxString& mask) const wxString pattern; pattern.reserve(wxStrlen(pszMask)); - pattern += _T('^'); + pattern += wxT('^'); while ( *pszMask ) { switch ( *pszMask ) { - case _T('?'): - pattern += _T('.'); + case wxT('?'): + pattern += wxT('.'); break; - case _T('*'): - pattern += _T(".*"); + case wxT('*'): + pattern += wxT(".*"); break; - case _T('^'): - case _T('.'): - case _T('$'): - case _T('('): - case _T(')'): - case _T('|'): - case _T('+'): - case _T('\\'): + case wxT('^'): + case wxT('.'): + case wxT('$'): + case wxT('('): + case wxT(')'): + case wxT('|'): + case wxT('+'): + case wxT('\\'): // these characters are special in a RE, quote them // (however note that we don't quote '[' and ']' to allow // using them for Unix shell like matching) - pattern += _T('\\'); + pattern += wxT('\\'); // fall through default: @@ -1709,7 +2057,7 @@ bool wxString::Matches(const wxString& mask) const pszMask++; } - pattern += _T('$'); + pattern += wxT('$'); // and now use it return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str()); @@ -1718,8 +2066,8 @@ bool wxString::Matches(const wxString& mask) const // FIXME-UTF8: implement using iterators, remove #if #if wxUSE_UNICODE_UTF8 - wxWCharBuffer maskBuf = mask.wc_str(); - wxWCharBuffer txtBuf = wc_str(); + const wxScopedWCharBuffer maskBuf = mask.wc_str(); + const wxScopedWCharBuffer txtBuf = wc_str(); const wxChar *pszMask = maskBuf.data(); const wxChar *pszTxt = txtBuf.data(); #else @@ -1821,9 +2169,3 @@ int wxString::Freq(wxUniChar ch) const return count; } -// convert to upper case, return the copy of the string -wxString wxString::Upper() const -{ wxString s(*this); return s.MakeUpper(); } - -// convert to lower case, return the copy of the string -wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }