X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/8a540c887cd30960c77e6c171ecabe30273321a8..f13a38a5fae79b1416c97e9ffde8b51016924512:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 14e90941a3..25f604519c 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -10,13 +10,6 @@ // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// -/* - * About ref counting: - * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init()) - * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one - * 3) Unlock() decrements nRefs and frees memory if it goes to 0 - */ - // =========================================================================== // headers, declarations, constants // =========================================================================== @@ -45,6 +38,7 @@ #include #endif +#include "wx/hashmap.h" // string handling functions used by wxString: #if wxUSE_UNICODE_UTF8 @@ -79,9 +73,9 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { // FIXME-UTF8: always, not only if wxUSE_UNICODE #if wxUSE_UNICODE && !defined(__BORLANDC__) - return os << str.AsWChar(); + return os << (const wchar_t*)str.AsWCharBuf(); #else - return os << str.AsChar(); + return os << (const char*)str.AsCharBuf(); #endif } @@ -108,18 +102,191 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) // wxString class core // =========================================================================== +#if wxUSE_UNICODE_UTF8 + +void wxString::PosLenToImpl(size_t pos, size_t len, + size_t *implPos, size_t *implLen) const +{ + if ( pos == npos ) + *implPos = npos; + else + { + const_iterator i = begin() + pos; + *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); + if ( len == npos ) + *implLen = npos; + else + { + // too large length is interpreted as "to the end of the string" + // FIXME-UTF8: verify this is the case in std::string, assert + // otherwise + if ( pos + len > length() ) + len = length() - pos; + + *implLen = (i + len).impl() - i.impl(); + } + } +} + +#endif // wxUSE_UNICODE_UTF8 + +// ---------------------------------------------------------------------------- +// wxCStrData converted strings caching +// ---------------------------------------------------------------------------- + +// FIXME-UTF8: temporarily disabled because it doesn't work with global +// string objects; re-enable after fixing this bug and benchmarking +// performance to see if using a hash is a good idea at all +#if 0 + +// For backward compatibility reasons, it must be possible to assign the value +// returned by wxString::c_str() to a char* or wchar_t* variable and work with +// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick, +// because the memory would be freed immediately, but it has to be valid as long +// as the string is not modified, so that code like this still works: +// +// const wxChar *s = str.c_str(); +// while ( s ) { ... } + +// FIXME-UTF8: not thread safe! +// FIXME-UTF8: we currently clear the cached conversion only when the string is +// destroyed, but we should do it when the string is modified, to +// keep memory usage down +// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we +// invalidated the cache on every change, we could keep the previous +// conversion +// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed +// to use mb_str() or wc_str() instead of (const [w]char*)c_str() + +template +static inline void DeleteStringFromConversionCache(T& hash, const wxString *s) +{ + typename T::iterator i = hash.find(wxConstCast(s, wxString)); + if ( i != hash.end() ) + { + free(i->second); + hash.erase(i); + } +} + +#if wxUSE_UNICODE +// NB: non-STL implementation doesn't compile with "const wxString*" key type, +// so we have to use wxString* here and const-cast when used +WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual, + wxStringCharConversionCache); +static wxStringCharConversionCache gs_stringsCharCache; + +const char* wxCStrData::AsChar() const +{ + // remove previously cache value, if any (see FIXMEs above): + DeleteStringFromConversionCache(gs_stringsCharCache, m_str); + + // convert the string and keep it: + const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] = + m_str->mb_str().release(); + + return s + m_offset; +} +#endif // wxUSE_UNICODE + +#if !wxUSE_UNICODE_WCHAR +WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual, + wxStringWCharConversionCache); +static wxStringWCharConversionCache gs_stringsWCharCache; + +const wchar_t* wxCStrData::AsWChar() const +{ + // remove previously cache value, if any (see FIXMEs above): + DeleteStringFromConversionCache(gs_stringsWCharCache, m_str); + + // convert the string and keep it: + const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] = + m_str->wc_str().release(); + + return s + m_offset; +} +#endif // !wxUSE_UNICODE_WCHAR + +wxString::~wxString() +{ +#if wxUSE_UNICODE + // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8 + DeleteStringFromConversionCache(gs_stringsCharCache, this); +#endif +#if !wxUSE_UNICODE_WCHAR + DeleteStringFromConversionCache(gs_stringsWCharCache, this); +#endif +} +#endif + +#if wxUSE_UNICODE +const char* wxCStrData::AsChar() const +{ + wxString *str = wxConstCast(m_str, wxString); + + // convert the string: + wxCharBuffer buf(str->mb_str()); + + // FIXME-UTF8: do the conversion in-place in the existing buffer + if ( str->m_convertedToChar && + strlen(buf) == strlen(str->m_convertedToChar) ) + { + // keep the same buffer for as long as possible, so that several calls + // to c_str() in a row still work: + strcpy(str->m_convertedToChar, buf); + } + else + { + str->m_convertedToChar = buf.release(); + } + + // and keep it: + return str->m_convertedToChar + m_offset; +} +#endif // wxUSE_UNICODE + +#if !wxUSE_UNICODE_WCHAR +const wchar_t* wxCStrData::AsWChar() const +{ + wxString *str = wxConstCast(m_str, wxString); + + // convert the string: + wxWCharBuffer buf(str->wc_str()); + + // FIXME-UTF8: do the conversion in-place in the existing buffer + if ( str->m_convertedToWChar && + wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) + { + // keep the same buffer for as long as possible, so that several calls + // to c_str() in a row still work: + memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); + } + else + { + str->m_convertedToWChar = buf.release(); + } + + // and keep it: + return str->m_convertedToWChar + m_offset; +} +#endif // !wxUSE_UNICODE_WCHAR + +// =========================================================================== +// wxString class core +// =========================================================================== + // --------------------------------------------------------------------------- // construction and conversion // --------------------------------------------------------------------------- -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR /* static */ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(); + return SubstrBufFromMB(L"", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -127,18 +294,51 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, size_t wcLen; wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(); + return SubstrBufFromMB(_T(""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } -#else +#endif // wxUSE_UNICODE_WCHAR + +#if wxUSE_UNICODE_UTF8 +/* static */ +wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, + const wxMBConv& conv) +{ + // FIXME-UTF8: return as-is without copying under UTF8 locale, return + // converted string under other locales - needs wxCharBuffer + // changes + + // anything to do? + if ( !psz || nLength == 0 ) + return SubstrBufFromMB("", 0); + + if ( nLength == npos ) + nLength = wxNO_LEN; + + // first convert to wide string: + size_t wcLen; + wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + if ( !wcLen ) + return SubstrBufFromMB("", 0); + + // and then to UTF-8: + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8)); + // widechar -> UTF-8 conversion isn't supposed to ever fail: + wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + + return buf; +} +#endif // wxUSE_UNICODE_UTF8 + +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE /* static */ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -146,34 +346,56 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt size_t mbLen; wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); else return SubstrBufFromWC(mbBuf, mbLen); } -#endif +#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR //Convert wxString in Unicode mode to a multi-byte string const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const { - return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); } -#else // ANSI +#elif wxUSE_UNICODE_UTF8 + +const wxWCharBuffer wxString::wc_str() const +{ + return wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + NULL); +} + +const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +{ + // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc + // under UTF8 locale + // FIXME-UTF8: use wc_str() here once we have buffers with length + + size_t wcLen; + wxWCharBuffer wcBuf( + wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + &wcLen)); + if ( !wcLen ) + return wxCharBuffer(""); -#if wxUSE_WCHAR_T + return conv.cWC2MB(wcBuf, wcLen, NULL); +} + +#else // ANSI //Converts this string to a wide character string if unicode //mode is not enabled and wxUSE_WCHAR_T is enabled const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const { - return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); } -#endif // wxUSE_WCHAR_T - #endif // Unicode/ANSI // shrink to minimal size (releasing extra memory) @@ -186,7 +408,7 @@ bool wxString::Shrink() // deprecated compatibility code: #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 -wxChar *wxString::GetWriteBuf(size_t nLen) +wxStringCharType *wxString::GetWriteBuf(size_t nLen) { return DoGetWriteBuf(nLen); } @@ -854,7 +1076,8 @@ bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") ); int start = length() - wxStrlen(suffix); - if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 ) + + if ( start < 0 || compare(start, npos, suffix) != 0 ) return false; if ( rest ) @@ -985,34 +1208,43 @@ size_t wxString::Replace(const wxString& strOld, bool wxString::IsAscii() const { - const wxChar *s = (const wxChar*) *this; - while(*s){ - if(!isascii(*s)) return(false); - s++; - } - return(true); + for ( const_iterator i = begin(); i != end(); ++i ) + { + if ( !(*i).IsAscii() ) + return false; + } + + return true; } bool wxString::IsWord() const { - const wxChar *s = (const wxChar*) *this; - while(*s){ - if(!wxIsalpha(*s)) return(false); - s++; - } - return(true); + for ( const_iterator i = begin(); i != end(); ++i ) + { + if ( !wxIsalpha(*i) ) + return false; + } + + return true; } bool wxString::IsNumber() const { - const wxChar *s = (const wxChar*) *this; - if (wxStrlen(s)) - if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++; - while(*s){ - if(!wxIsdigit(*s)) return(false); - s++; - } - return(true); + if ( empty() ) + return true; + + const_iterator i = begin(); + + if ( *i == _T('-') || *i == _T('+') ) + ++i; + + for ( ; i != end(); ++i ) + { + if ( !wxIsdigit(*i) ) + return false; + } + + return true; } wxString wxString::Strip(stripType w) const @@ -1175,26 +1407,12 @@ bool wxString::ToULong(unsigned long *val, int base) const bool wxString::ToLongLong(wxLongLong_t *val, int base) const { -#ifdef wxHAS_STRTOLL return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll); -#else - // TODO: implement this ourselves - wxUnusedVar(val); - wxUnusedVar(base); - return false; -#endif // wxHAS_STRTOLL } bool wxString::ToULongLong(wxULongLong_t *val, int base) const { -#ifdef wxHAS_STRTOLL return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull); -#else - // TODO: implement this ourselves - wxUnusedVar(val); - wxUnusedVar(base); - return false; -#endif } bool wxString::ToDouble(double *val) const @@ -1292,7 +1510,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr) // only a copy va_list argptrcopy; wxVaCopy(argptrcopy, argptr); - int len = wxVsnprintf(buf, size, format, argptrcopy); + int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy); va_end(argptrcopy); // some implementations of vsnprintf() don't NUL terminate @@ -1510,4 +1728,3 @@ wxString wxString::Upper() const // convert to lower case, return the copy of the string wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); } -