X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/4b6a582bef796b43ad4cf0a96bd40bfd631460c6..a43a9e5521440dbb28037646ed4a07125c8823a9:/src/common/string.cpp?ds=sidebyside diff --git a/src/common/string.cpp b/src/common/string.cpp index f3d684f1a4..511068e101 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -24,6 +24,7 @@ #ifndef WX_PRECOMP #include "wx/string.h" #include "wx/wxcrtvararg.h" + #include "wx/log.h" #endif #include @@ -39,6 +40,10 @@ #include "wx/vector.h" #include "wx/xlocale.h" +#ifdef __WXMSW__ + #include "wx/msw/wrapwin.h" +#endif // __WXMSW__ + // string handling functions used by wxString: #if wxUSE_UNICODE_UTF8 #define wxStringMemcpy memcpy @@ -59,7 +64,7 @@ namespace wxPrivate { -static UntypedBufferData s_untypedNullData(NULL); +static UntypedBufferData s_untypedNullData(NULL, 0); UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData; @@ -182,7 +187,7 @@ static wxStrCacheStatsDumper s_showCacheStats; wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 - const wxCharBuffer buf(str.AsCharBuf()); + const wxScopedCharBuffer buf(str.AsCharBuf()); if ( !buf ) os.clear(wxSTD ios_base::failbit); else @@ -199,13 +204,13 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str) return os << str.c_str(); } -wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str) { return os << str.data(); } #ifndef __BORLANDC__ -wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) +wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str) { return os << str.data(); } @@ -223,7 +228,7 @@ wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str) return wos << str.AsWChar(); } -wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str) +wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str) { return wos << str.data(); } @@ -361,95 +366,6 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char* wxCStrData::AsChar() const -{ -#if wxUSE_UNICODE_UTF8 - if ( wxLocaleIsUtf8 ) - return AsInternal(); -#endif - // under non-UTF8 locales, we have to convert the internal UTF-8 - // representation using wxConvLibc and cache the result - - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - // - // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we - // have it) but it's unfortunately not obvious to implement - // because we don't know how big buffer do we need for the - // given string length (in case of multibyte encodings, e.g. - // ISO-2022-JP or UTF-8 when internal representation is wchar_t) - // - // One idea would be to store more than just m_convertedToChar - // in wxString: then we could record the length of the string - // which was converted the last time and try to reuse the same - // buffer if the current length is not greater than it (this - // could still fail because string could have been modified in - // place but it would work most of the time, so we'd do it and - // only allocate the new buffer if in-place conversion returned - // an error). We could also store a bit saying if the string - // was modified since the last conversion (and update it in all - // operation modifying the string, of course) to avoid unneeded - // consequential conversions. But both of these ideas require - // adding more fields to wxString and require profiling results - // to be sure that we really gain enough from them to justify - // doing it. - wxCharBuffer buf(str->mb_str()); - - // if it failed, return empty string and not NULL to avoid crashes in code - // written with either wxWidgets 2 wxString or std::string behaviour in - // mind: neither of them ever returns NULL and so we shouldn't neither - if ( !buf ) - return ""; - - if ( str->m_convertedToChar && - strlen(buf) == strlen(str->m_convertedToChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - strcpy(str->m_convertedToChar, buf); - } - else - { - str->m_convertedToChar = buf.release(); - } - - // and keep it: - return str->m_convertedToChar + m_offset; -} -#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY - -#if !wxUSE_UNICODE_WCHAR -const wchar_t* wxCStrData::AsWChar() const -{ - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxWCharBuffer buf(str->wc_str()); - - // notice that here, unlike above in AsChar(), conversion can't fail as our - // internal UTF-8 is always well-formed -- or the string was corrupted and - // all bets are off anyhow - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToWChar && - wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); - } - else - { - str->m_convertedToWChar = buf.release(); - } - - // and keep it: - return str->m_convertedToWChar + m_offset; -} -#endif // !wxUSE_UNICODE_WCHAR - // =========================================================================== // wxString class core // =========================================================================== @@ -465,15 +381,15 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(L"", 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(_T(""), 0); + return SubstrBufFromMB(wxWCharBuffer(L""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } @@ -486,7 +402,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // if psz is already in UTF-8, we don't have to do the roundtrip to // wchar_t* and back: @@ -499,7 +415,8 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // we must pass the real string length to SubstrBufFromMB ctor if ( nLength == npos ) nLength = psz ? strlen(psz) : 0; - return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength); + return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength), + nLength); } // else: do the roundtrip through wchar_t* } @@ -509,9 +426,9 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // first convert to wide string: size_t wcLen; - wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB("", 0); + return SubstrBufFromMB(wxCharBuffer(""), 0); // and then to UTF-8: SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); @@ -529,71 +446,111 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); if ( nLength == npos ) nLength = wxNO_LEN; size_t mbLen; - wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); + wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC("", 0); + return SubstrBufFromWC(wxCharBuffer(""), 0); else return SubstrBufFromWC(mbBuf, mbLen); } #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +// This std::string::c_str()-like method returns a wide char pointer to string +// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return +// a pointer to the internal representation. Otherwise a conversion is required +// and it returns a temporary buffer. +// +// However for compatibility with c_str() and to avoid breaking existing code +// doing +// +// for ( const wchar_t *p = s.wc_str(); *p; p++ ) +// ... use *p... +// +// we actually need to ensure that the returned buffer is _not_ temporary and +// so we use wxString::m_convertedToWChar to store the returned data +#if !wxUSE_UNICODE_WCHAR -#if wxUSE_UNICODE_WCHAR - -//Convert wxString in Unicode mode to a multi-byte string -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +const wchar_t *wxString::AsWChar(const wxMBConv& conv) const { - return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); -} + const char * const strMB = m_impl.c_str(); + const size_t lenMB = m_impl.length(); -#elif wxUSE_UNICODE_UTF8 + // find out the size of the buffer needed + const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB); + if ( lenWC == wxCONV_FAILED ) + return NULL; -const wxWCharBuffer wxString::wc_str() const -{ - return wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length() + 1, // size, not length - NULL - ); + // keep the same buffer if the string size didn't change: this is not only + // an optimization but also ensure that code which modifies string + // character by character (without changing its length) can continue to use + // the pointer returned by a previous wc_str() call even after changing the + // string + + // TODO-UTF8: we could check for ">" instead of "!=" here as this would + // allow to save on buffer reallocations but at the cost of + // consuming (even) more memory, we should benchmark this to + // determine if it's worth doing + if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len ) + { + if ( !const_cast(this)->m_convertedToWChar.Extend(lenWC) ) + return NULL; + } + + // finally do convert + m_convertedToWChar.m_str[lenWC] = L'\0'; + if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC, + strMB, lenMB) == wxCONV_FAILED ) + return NULL; + + return m_convertedToWChar.m_str; } -const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +#endif // !wxUSE_UNICODE_WCHAR + + +// Same thing for mb_str() which returns a normal char pointer to string +// contents: this always requires converting it to the specified encoding in +// non-ANSI build except if we need to convert to UTF-8 and this is what we +// already use internally. +#if wxUSE_UNICODE + +const char *wxString::AsChar(const wxMBConv& conv) const { +#if wxUSE_UNICODE_UTF8 if ( conv.IsUTF8() ) - return wxCharBuffer::CreateNonOwned(m_impl.c_str()); + return m_impl.c_str(); - // FIXME-UTF8: use wc_str() here once we have buffers with length + const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8()); + const size_t lenWC = m_convertedToWChar.m_len; +#else // wxUSE_UNICODE_WCHAR + const wchar_t * const strWC = m_impl.c_str(); + const size_t lenWC = m_impl.length(); +#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR - size_t wcLen; - wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length() + 1, // size - &wcLen - )); - if ( !wcLen ) - return wxCharBuffer(""); + const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC); + if ( lenMB == wxCONV_FAILED ) + return NULL; - return conv.cWC2MB(wcBuf, wcLen+1, NULL); -} + if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len ) + { + if ( !const_cast(this)->m_convertedToChar.Extend(lenMB) ) + return NULL; + } -#else // ANSI + m_convertedToChar.m_str[lenMB] = '\0'; + if ( conv.FromWChar(m_convertedToChar.m_str, lenMB, + strWC, lenWC) == wxCONV_FAILED ) + return NULL; -//Converts this string to a wide character string if unicode -//mode is not enabled and wxUSE_WCHAR_T is enabled -const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const -{ - return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); + return m_convertedToChar.m_str; } -#endif // Unicode/ANSI +#endif // wxUSE_UNICODE // shrink to minimal size (releasing extra memory) bool wxString::Shrink() @@ -1117,9 +1074,36 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart, int wxString::CmpNoCase(const wxString& s) const { -#if wxUSE_UNICODE_UTF8 - // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added +#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8 + // prefer to use CompareString() if available as it's more efficient than + // doing it manual or even using wxStricmp() (see #10375) + switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE, + m_impl.c_str(), m_impl.length(), + s.m_impl.c_str(), s.m_impl.length()) ) + { + case CSTR_LESS_THAN: + return -1; + + case CSTR_EQUAL: + return 0; + + case CSTR_GREATER_THAN: + return 1; + default: + wxFAIL_MSG( "unexpected CompareString() return value" ); + // fall through + + case 0: + wxLogLastError("CompareString"); + // use generic code below + } +#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8 + + // do the comparison manually: notice that we can't use wxStricmp() as it + // doesn't handle embedded NULs + + // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added const_iterator i1 = begin(); const_iterator end1 = end(); const_iterator i2 = s.begin(); @@ -1141,9 +1125,6 @@ int wxString::CmpNoCase(const wxString& s) const else if ( len1 > len2 ) return 1; return 0; -#else // wxUSE_UNICODE_WCHAR or ANSI - return wxStricmp(m_impl.c_str(), s.m_impl.c_str()); -#endif } @@ -1196,7 +1177,7 @@ wxString wxString::FromAscii(char ascii) return wxString(wxUniChar((wchar_t)c)); } -const wxCharBuffer wxString::ToAscii() const +const wxScopedCharBuffer wxString::ToAscii() const { // this will allocate enough space for the terminating NUL too wxCharBuffer buffer(length()); @@ -1710,7 +1691,7 @@ bool wxString::ToCLong(long *pVal, int base) const wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) long val = wxStrtol_lA(start, &end, base, wxCLocale); #else long val = wxStrtol_l(start, &end, base, wxCLocale); @@ -1723,7 +1704,7 @@ bool wxString::ToCULong(unsigned long *pVal, int base) const wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale); #else unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale); @@ -1734,7 +1715,7 @@ bool wxString::ToCULong(unsigned long *pVal, int base) const bool wxString::ToCDouble(double *pVal) const { WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) double val = wxStrtod_lA(start, &end, wxCLocale); #else double val = wxStrtod_l(start, &end, wxCLocale); @@ -2085,8 +2066,8 @@ bool wxString::Matches(const wxString& mask) const // FIXME-UTF8: implement using iterators, remove #if #if wxUSE_UNICODE_UTF8 - wxWCharBuffer maskBuf = mask.wc_str(); - wxWCharBuffer txtBuf = wc_str(); + const wxScopedWCharBuffer maskBuf = mask.wc_str(); + const wxScopedWCharBuffer txtBuf = wc_str(); const wxChar *pszMask = maskBuf.data(); const wxChar *pszTxt = txtBuf.data(); #else