X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/ba7e7253c67aa0b098d8a00e6d2052db75775cda..bc9666c99939f6f1a6fc11fd10fa2db3deb747ae:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 762410411d..f3e8c0b260 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -57,16 +57,20 @@ #define wxStringStrlen wxStrlen #endif -// ---------------------------------------------------------------------------- -// global variables -// ---------------------------------------------------------------------------- - +// define a function declared in wx/buffer.h here as we don't have buffer.cpp +// and don't want to add it just because of this simple function namespace wxPrivate { -static UntypedBufferData s_untypedNullData(NULL, 0); +// wxXXXBuffer classes can be (implicitly) used during global statics +// initialization so wrap the status UntypedBufferData variable in a function +// to make it safe to access it even before all global statics are initialized +UntypedBufferData *GetUntypedNullData() +{ + static UntypedBufferData s_untypedNullData(NULL, 0); -UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData; + return &s_untypedNullData; +} } // namespace wxPrivate @@ -366,95 +370,6 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char* wxCStrData::AsChar() const -{ -#if wxUSE_UNICODE_UTF8 - if ( wxLocaleIsUtf8 ) - return AsInternal(); -#endif - // under non-UTF8 locales, we have to convert the internal UTF-8 - // representation using wxConvLibc and cache the result - - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - // - // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we - // have it) but it's unfortunately not obvious to implement - // because we don't know how big buffer do we need for the - // given string length (in case of multibyte encodings, e.g. - // ISO-2022-JP or UTF-8 when internal representation is wchar_t) - // - // One idea would be to store more than just m_convertedToChar - // in wxString: then we could record the length of the string - // which was converted the last time and try to reuse the same - // buffer if the current length is not greater than it (this - // could still fail because string could have been modified in - // place but it would work most of the time, so we'd do it and - // only allocate the new buffer if in-place conversion returned - // an error). We could also store a bit saying if the string - // was modified since the last conversion (and update it in all - // operation modifying the string, of course) to avoid unneeded - // consequential conversions. But both of these ideas require - // adding more fields to wxString and require profiling results - // to be sure that we really gain enough from them to justify - // doing it. - wxScopedCharBuffer buf(str->mb_str()); - - // if it failed, return empty string and not NULL to avoid crashes in code - // written with either wxWidgets 2 wxString or std::string behaviour in - // mind: neither of them ever returns NULL and so we shouldn't neither - if ( !buf ) - return ""; - - if ( str->m_convertedToChar && - strlen(buf) == strlen(str->m_convertedToChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - strcpy(str->m_convertedToChar, buf); - } - else - { - str->m_convertedToChar = buf.release(); - } - - // and keep it: - return str->m_convertedToChar + m_offset; -} -#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY - -#if !wxUSE_UNICODE_WCHAR -const wchar_t* wxCStrData::AsWChar() const -{ - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxScopedWCharBuffer buf(str->wc_str()); - - // notice that here, unlike above in AsChar(), conversion can't fail as our - // internal UTF-8 is always well-formed -- or the string was corrupted and - // all bets are off anyhow - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToWChar && - wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); - } - else - { - str->m_convertedToWChar = buf.release(); - } - - // and keep it: - return str->m_convertedToWChar + m_offset; -} -#endif // !wxUSE_UNICODE_WCHAR - // =========================================================================== // wxString class core // =========================================================================== @@ -522,7 +437,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, // and then to UTF-8: SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); // widechar -> UTF-8 conversion isn't supposed to ever fail: - wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") ); return buf; } @@ -549,61 +464,97 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt } #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +// This std::string::c_str()-like method returns a wide char pointer to string +// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return +// a pointer to the internal representation. Otherwise a conversion is required +// and it returns a temporary buffer. +// +// However for compatibility with c_str() and to avoid breaking existing code +// doing +// +// for ( const wchar_t *p = s.wc_str(); *p; p++ ) +// ... use *p... +// +// we actually need to ensure that the returned buffer is _not_ temporary and +// so we use wxString::m_convertedToWChar to store the returned data +#if !wxUSE_UNICODE_WCHAR -#if wxUSE_UNICODE_WCHAR - -//Convert wxString in Unicode mode to a multi-byte string -const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const +const wchar_t *wxString::AsWChar(const wxMBConv& conv) const { - // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return conv.cWC2MB(wx_str(), length(), NULL); -} + const char * const strMB = m_impl.c_str(); + const size_t lenMB = m_impl.length(); -#elif wxUSE_UNICODE_UTF8 + // find out the size of the buffer needed + const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB); + if ( lenWC == wxCONV_FAILED ) + return NULL; -const wxScopedWCharBuffer wxString::wc_str() const -{ - // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length(), - NULL - ); + // keep the same buffer if the string size didn't change: this is not only + // an optimization but also ensure that code which modifies string + // character by character (without changing its length) can continue to use + // the pointer returned by a previous wc_str() call even after changing the + // string + + // TODO-UTF8: we could check for ">" instead of "!=" here as this would + // allow to save on buffer reallocations but at the cost of + // consuming (even) more memory, we should benchmark this to + // determine if it's worth doing + if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len ) + { + if ( !const_cast(this)->m_convertedToWChar.Extend(lenWC) ) + return NULL; + } + + // finally do convert + m_convertedToWChar.m_str[lenWC] = L'\0'; + if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC, + strMB, lenMB) == wxCONV_FAILED ) + return NULL; + + return m_convertedToWChar.m_str; } -const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const +#endif // !wxUSE_UNICODE_WCHAR + + +// Same thing for mb_str() which returns a normal char pointer to string +// contents: this always requires converting it to the specified encoding in +// non-ANSI build except if we need to convert to UTF-8 and this is what we +// already use internally. +#if wxUSE_UNICODE + +const char *wxString::AsChar(const wxMBConv& conv) const { +#if wxUSE_UNICODE_UTF8 if ( conv.IsUTF8() ) - return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); + return m_impl.c_str(); - wxScopedWCharBuffer wcBuf(wc_str()); - if ( !wcBuf.length() ) - return wxCharBuffer(""); + const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8()); + const size_t lenWC = m_convertedToWChar.m_len; +#else // wxUSE_UNICODE_WCHAR + const wchar_t * const strWC = m_impl.c_str(); + const size_t lenWC = m_impl.length(); +#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR - return conv.cWC2MB(wcBuf.data(), wcBuf.length(), NULL); -} + const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC); + if ( lenMB == wxCONV_FAILED ) + return NULL; -#else // ANSI + if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len ) + { + if ( !const_cast(this)->m_convertedToChar.Extend(lenMB) ) + return NULL; + } -//Converts this string to a wide character string if unicode -//mode is not enabled and wxUSE_WCHAR_T is enabled -const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const -{ - // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return conv.cMB2WC(wx_str(), length(), NULL); + m_convertedToChar.m_str[lenMB] = '\0'; + if ( conv.FromWChar(m_convertedToChar.m_str, lenMB, + strWC, lenWC) == wxCONV_FAILED ) + return NULL; + + return m_convertedToChar.m_str; } -#endif // Unicode/ANSI +#endif // wxUSE_UNICODE // shrink to minimal size (releasing extra memory) bool wxString::Shrink() @@ -694,7 +645,7 @@ wxString operator+(const wxString& str, const char *psz) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += psz; @@ -710,7 +661,7 @@ wxString operator+(const wxString& str, const wchar_t *pwz) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s += str; s += pwz; @@ -726,7 +677,7 @@ wxString operator+(const char *psz, const wxString& str) wxString s; if ( !s.Alloc(strlen(psz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = psz; s += str; @@ -742,7 +693,7 @@ wxString operator+(const wchar_t *pwz, const wxString& str) wxString s; if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) { - wxFAIL_MSG( _T("out of memory in wxString::operator+") ); + wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); } s = pwz; s += str; @@ -960,7 +911,7 @@ size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -974,7 +925,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -1007,7 +958,7 @@ size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -1031,7 +982,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -1047,7 +998,7 @@ size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) con size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const { - wxASSERT_MSG( nStart <= length(), _T("invalid index") ); + wxASSERT_MSG( nStart <= length(), wxT("invalid index") ); size_t idx = nStart; for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i ) @@ -1069,7 +1020,7 @@ size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const } else { - wxASSERT_MSG( nStart <= len, _T("invalid index") ); + wxASSERT_MSG( nStart <= len, wxT("invalid index") ); } size_t idx = nStart; @@ -1128,9 +1079,15 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart, int wxString::CmpNoCase(const wxString& s) const { #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8 - // prefer to use CompareString() if available as it's more efficient than - // doing it manual or even using wxStricmp() (see #10375) - switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE, + // Prefer to use CompareString() if available as it's more efficient than + // doing it manually or even using wxStricmp() (see #10375) + // + // Also note that not using NORM_STRINGSORT may result in not having a + // strict weak ordering (e.g. s1 < s2 and s2 < s3 but s3 < s1) and so break + // algorithms such as std::sort that rely on it. It's also more consistent + // with the fall back version below. + switch ( ::CompareString(LOCALE_USER_DEFAULT, + NORM_IGNORECASE | SORT_STRINGSORT, m_impl.c_str(), m_impl.length(), s.m_impl.c_str(), s.m_impl.length()) ) { @@ -1204,7 +1161,7 @@ wxString wxString::FromAscii(const char *ascii, size_t len) { unsigned char c = (unsigned char)*ascii++; wxASSERT_MSG( c < 0x80, - _T("Non-ASCII value passed to FromAscii().") ); + wxT("Non-ASCII value passed to FromAscii().") ); *dest++ = (wchar_t)c; } @@ -1224,7 +1181,7 @@ wxString wxString::FromAscii(char ascii) unsigned char c = (unsigned char)ascii; - wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); + wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") ); // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value return wxString(wxUniChar((wchar_t)c)); @@ -1279,7 +1236,7 @@ wxString wxString::Mid(size_t nFirst, size_t nCount) const wxString dest(*this, nFirst, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Mid") ); + wxFAIL_MSG( wxT("out of memory in wxString::Mid") ); } return dest; @@ -1329,7 +1286,7 @@ wxString wxString::Right(size_t nCount) const wxString dest(*this, length() - nCount, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Right") ); + wxFAIL_MSG( wxT("out of memory in wxString::Right") ); } return dest; } @@ -1356,7 +1313,7 @@ wxString wxString::Left(size_t nCount) const wxString dest(*this, 0, nCount); if ( dest.length() != nCount ) { - wxFAIL_MSG( _T("out of memory in wxString::Left") ); + wxFAIL_MSG( wxT("out of memory in wxString::Left") ); } return dest; } @@ -1401,7 +1358,7 @@ size_t wxString::Replace(const wxString& strOld, { // if we tried to replace an empty string we'd enter an infinite loop below wxCHECK_MSG( !strOld.empty(), 0, - _T("wxString::Replace(): invalid parameter") ); + wxT("wxString::Replace(): invalid parameter") ); wxSTRING_INVALIDATE_CACHE(); @@ -1523,7 +1480,7 @@ bool wxString::IsNumber() const const_iterator i = begin(); - if ( *i == _T('-') || *i == _T('+') ) + if ( *i == wxT('-') || *i == wxT('+') ) ++i; for ( ; i != end(); ++i ) @@ -1680,23 +1637,23 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const #endif #define WX_STRING_TO_X_TYPE_START \ - wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \ + wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \ DO_IF_NOT_WINCE( errno = 0; ) \ const wxStringCharType *start = wx_str(); \ wxStringCharType *end; +// notice that we return false without modifying the output parameter at all if +// nothing could be parsed but we do modify it and return false then if we did +// parse something successfully but not the entire string #define WX_STRING_TO_X_TYPE_END \ - /* return true only if scan was stopped by the terminating NUL and */ \ - /* if the string was not empty to start with and no under/overflow */ \ - /* occurred: */ \ - if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \ + if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \ return false; \ *pVal = val; \ - return true; + return !*end; bool wxString::ToLong(long *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START long val = wxStrtol(start, &end, base); @@ -1705,7 +1662,7 @@ bool wxString::ToLong(long *pVal, int base) const bool wxString::ToULong(unsigned long *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START unsigned long val = wxStrtoul(start, &end, base); @@ -1714,7 +1671,7 @@ bool wxString::ToULong(unsigned long *pVal, int base) const bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START wxLongLong_t val = wxStrtoll(start, &end, base); @@ -1723,7 +1680,7 @@ bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START wxULongLong_t val = wxStrtoull(start, &end, base); @@ -1741,10 +1698,10 @@ bool wxString::ToDouble(double *pVal) const bool wxString::ToCLong(long *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) long val = wxStrtol_lA(start, &end, base, wxCLocale); #else long val = wxStrtol_l(start, &end, base, wxCLocale); @@ -1754,10 +1711,10 @@ bool wxString::ToCLong(long *pVal, int base) const bool wxString::ToCULong(unsigned long *pVal, int base) const { - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale); #else unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale); @@ -1768,7 +1725,7 @@ bool wxString::ToCULong(unsigned long *pVal, int base) const bool wxString::ToCDouble(double *pVal) const { WX_STRING_TO_X_TYPE_START -#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) double val = wxStrtod_lA(start, &end, wxCLocale); #else double val = wxStrtod_l(start, &end, wxCLocale); @@ -1971,7 +1928,7 @@ static int DoStringPrintfV(wxString& str, // always do it manually // FIXME: This really seems to be the wrong and would be an off-by-one // bug except the code above allocates an extra character. - buf[size] = _T('\0'); + buf[size] = wxT('\0'); // vsnprintf() may return either -1 (traditional Unix behaviour) or the // total number of characters which would have been written if the @@ -2077,31 +2034,31 @@ bool wxString::Matches(const wxString& mask) const wxString pattern; pattern.reserve(wxStrlen(pszMask)); - pattern += _T('^'); + pattern += wxT('^'); while ( *pszMask ) { switch ( *pszMask ) { - case _T('?'): - pattern += _T('.'); + case wxT('?'): + pattern += wxT('.'); break; - case _T('*'): - pattern += _T(".*"); + case wxT('*'): + pattern += wxT(".*"); break; - case _T('^'): - case _T('.'): - case _T('$'): - case _T('('): - case _T(')'): - case _T('|'): - case _T('+'): - case _T('\\'): + case wxT('^'): + case wxT('.'): + case wxT('$'): + case wxT('('): + case wxT(')'): + case wxT('|'): + case wxT('+'): + case wxT('\\'): // these characters are special in a RE, quote them // (however note that we don't quote '[' and ']' to allow // using them for Unix shell like matching) - pattern += _T('\\'); + pattern += wxT('\\'); // fall through default: @@ -2110,7 +2067,7 @@ bool wxString::Matches(const wxString& mask) const pszMask++; } - pattern += _T('$'); + pattern += wxT('$'); // and now use it return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());