X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/8116a0c55e7e657f36fed29035dc92d721b92928..193d0c93106d21067107c262874763d4c0921707:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 564f3ea4f1..717ce0b52c 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -10,13 +10,6 @@ // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// -/* - * About ref counting: - * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init()) - * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one - * 3) Unlock() decrements nRefs and frees memory if it goes to 0 - */ - // =========================================================================== // headers, declarations, constants // =========================================================================== @@ -30,6 +23,7 @@ #ifndef WX_PRECOMP #include "wx/string.h" + #include "wx/wxcrtvararg.h" #endif #include @@ -80,9 +74,9 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { // FIXME-UTF8: always, not only if wxUSE_UNICODE #if wxUSE_UNICODE && !defined(__BORLANDC__) - return os << str.AsWChar(); + return os << (const wchar_t*)str.AsWCharBuf(); #else - return os << str.AsChar(); + return os << (const char*)str.AsCharBuf(); #endif } @@ -105,6 +99,38 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) #endif // wxUSE_STD_IOSTREAM +// =========================================================================== +// wxString class core +// =========================================================================== + +#if wxUSE_UNICODE_UTF8 + +void wxString::PosLenToImpl(size_t pos, size_t len, + size_t *implPos, size_t *implLen) const +{ + if ( pos == npos ) + *implPos = npos; + else + { + const_iterator i = begin() + pos; + *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); + if ( len == npos ) + *implLen = npos; + else + { + // too large length is interpreted as "to the end of the string" + // FIXME-UTF8: verify this is the case in std::string, assert + // otherwise + if ( pos + len > length() ) + len = length() - pos; + + *implLen = (i + len).impl() - i.impl(); + } + } +} + +#endif // wxUSE_UNICODE_UTF8 + // ---------------------------------------------------------------------------- // wxCStrData converted strings caching // ---------------------------------------------------------------------------- @@ -198,8 +224,24 @@ wxString::~wxString() const char* wxCStrData::AsChar() const { wxString *str = wxConstCast(m_str, wxString); - // convert the string and keep it: - str->m_convertedToChar = str->mb_str().release(); + + // convert the string: + wxCharBuffer buf(str->mb_str()); + + // FIXME-UTF8: do the conversion in-place in the existing buffer + if ( str->m_convertedToChar && + strlen(buf) == strlen(str->m_convertedToChar) ) + { + // keep the same buffer for as long as possible, so that several calls + // to c_str() in a row still work: + strcpy(str->m_convertedToChar, buf); + } + else + { + str->m_convertedToChar = buf.release(); + } + + // and keep it: return str->m_convertedToChar + m_offset; } #endif // wxUSE_UNICODE @@ -208,8 +250,24 @@ const char* wxCStrData::AsChar() const const wchar_t* wxCStrData::AsWChar() const { wxString *str = wxConstCast(m_str, wxString); - // convert the string and keep it: - str->m_convertedToWChar = str->wc_str().release(); + + // convert the string: + wxWCharBuffer buf(str->wc_str()); + + // FIXME-UTF8: do the conversion in-place in the existing buffer + if ( str->m_convertedToWChar && + wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) + { + // keep the same buffer for as long as possible, so that several calls + // to c_str() in a row still work: + memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); + } + else + { + str->m_convertedToWChar = buf.release(); + } + + // and keep it: return str->m_convertedToWChar + m_offset; } #endif // !wxUSE_UNICODE_WCHAR @@ -222,14 +280,14 @@ const wchar_t* wxCStrData::AsWChar() const // construction and conversion // --------------------------------------------------------------------------- -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR /* static */ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(); + return SubstrBufFromMB(L"", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -237,18 +295,51 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, size_t wcLen; wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(); + return SubstrBufFromMB(_T(""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } -#else +#endif // wxUSE_UNICODE_WCHAR + +#if wxUSE_UNICODE_UTF8 +/* static */ +wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, + const wxMBConv& conv) +{ + // FIXME-UTF8: return as-is without copying under UTF8 locale, return + // converted string under other locales - needs wxCharBuffer + // changes + + // anything to do? + if ( !psz || nLength == 0 ) + return SubstrBufFromMB("", 0); + + if ( nLength == npos ) + nLength = wxNO_LEN; + + // first convert to wide string: + size_t wcLen; + wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + if ( !wcLen ) + return SubstrBufFromMB("", 0); + + // and then to UTF-8: + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8)); + // widechar -> UTF-8 conversion isn't supposed to ever fail: + wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + + return buf; +} +#endif // wxUSE_UNICODE_UTF8 + +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE /* static */ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -256,34 +347,56 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt size_t mbLen; wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); else return SubstrBufFromWC(mbBuf, mbLen); } -#endif +#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR //Convert wxString in Unicode mode to a multi-byte string const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const { - return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); } -#else // ANSI +#elif wxUSE_UNICODE_UTF8 -#if wxUSE_WCHAR_T +const wxWCharBuffer wxString::wc_str() const +{ + return wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + NULL); +} + +const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +{ + // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc + // under UTF8 locale + // FIXME-UTF8: use wc_str() here once we have buffers with length + + size_t wcLen; + wxWCharBuffer wcBuf( + wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + &wcLen)); + if ( !wcLen ) + return wxCharBuffer(""); + + return conv.cWC2MB(wcBuf, wcLen, NULL); +} + +#else // ANSI //Converts this string to a wide character string if unicode //mode is not enabled and wxUSE_WCHAR_T is enabled const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const { - return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); } -#endif // wxUSE_WCHAR_T - #endif // Unicode/ANSI // shrink to minimal size (releasing extra memory) @@ -296,7 +409,7 @@ bool wxString::Shrink() // deprecated compatibility code: #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 -wxChar *wxString::GetWriteBuf(size_t nLen) +wxStringCharType *wxString::GetWriteBuf(size_t nLen) { return DoGetWriteBuf(nLen); } @@ -842,19 +955,24 @@ wxString wxString::FromAscii(const char *ascii) if (!ascii) return wxEmptyString; - size_t len = strlen( ascii ); + size_t len = strlen(ascii); wxString res; if ( len ) { - wxStringBuffer buf(res, len); - - wchar_t *dest = buf; + wxImplStringBuffer buf(res, len); + wxStringCharType *dest = buf; for ( ;; ) { - if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' ) - break; + unsigned char c = (unsigned char)*ascii++; + wxASSERT_MSG( c < 0x80, + _T("Non-ASCII value passed to FromAscii().") ); + + *dest++ = (wchar_t)c; + + if ( c == '\0' ) + break; } } @@ -865,35 +983,36 @@ wxString wxString::FromAscii(const char ascii) { // What do we do with '\0' ? - wxString res; - res += (wchar_t)(unsigned char) ascii; + unsigned char c = (unsigned char)ascii; - return res; + wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); + + // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value + return wxString(wxUniChar((wchar_t)c)); } const wxCharBuffer wxString::ToAscii() const { // this will allocate enough space for the terminating NUL too wxCharBuffer buffer(length()); - - char *dest = buffer.data(); - const wchar_t *pwc = c_str(); - for ( ;; ) + for ( const_iterator i = begin(); i != end(); ++i ) { - *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc); + wxUniChar c(*i); + // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?') + *dest++ = c.IsAscii() ? (char)c : '_'; // the output string can't have embedded NULs anyhow, so we can safely // stop at first of them even if we do have any - if ( !*pwc++ ) + if ( !c ) break; } return buffer; } -#endif // Unicode +#endif // wxUSE_UNICODE // extract string of length nCount starting at nFirst wxString wxString::Mid(size_t nFirst, size_t nCount) const @@ -964,7 +1083,8 @@ bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") ); int start = length() - wxStrlen(suffix); - if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 ) + + if ( start < 0 || compare(start, npos, suffix) != 0 ) return false; if ( rest ) @@ -1095,34 +1215,43 @@ size_t wxString::Replace(const wxString& strOld, bool wxString::IsAscii() const { - const wxChar *s = (const wxChar*) *this; - while(*s){ - if(!isascii(*s)) return(false); - s++; - } - return(true); + for ( const_iterator i = begin(); i != end(); ++i ) + { + if ( !(*i).IsAscii() ) + return false; + } + + return true; } bool wxString::IsWord() const { - const wxChar *s = (const wxChar*) *this; - while(*s){ - if(!wxIsalpha(*s)) return(false); - s++; - } - return(true); + for ( const_iterator i = begin(); i != end(); ++i ) + { + if ( !wxIsalpha(*i) ) + return false; + } + + return true; } bool wxString::IsNumber() const { - const wxChar *s = (const wxChar*) *this; - if (wxStrlen(s)) - if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++; - while(*s){ - if(!wxIsdigit(*s)) return(false); - s++; - } - return(true); + if ( empty() ) + return true; + + const_iterator i = begin(); + + if ( *i == _T('-') || *i == _T('+') ) + ++i; + + for ( ; i != end(); ++i ) + { + if ( !wxIsdigit(*i) ) + return false; + } + + return true; } wxString wxString::Strip(stripType w) const @@ -1285,26 +1414,12 @@ bool wxString::ToULong(unsigned long *val, int base) const bool wxString::ToLongLong(wxLongLong_t *val, int base) const { -#ifdef wxHAS_STRTOLL return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll); -#else - // TODO: implement this ourselves - wxUnusedVar(val); - wxUnusedVar(base); - return false; -#endif // wxHAS_STRTOLL } bool wxString::ToULongLong(wxULongLong_t *val, int base) const { -#ifdef wxHAS_STRTOLL return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull); -#else - // TODO: implement this ourselves - wxUnusedVar(val); - wxUnusedVar(base); - return false; -#endif } bool wxString::ToDouble(double *val) const @@ -1334,9 +1449,9 @@ bool wxString::ToDouble(double *val) const /* static */ #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN -wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...) +wxString wxStringPrintfMixinBase::DoFormat(const wxString& format, ...) #else -wxString wxString::DoFormat(const wxChar *format, ...) +wxString wxString::DoFormat(const wxString& format, ...) #endif { va_list argptr; @@ -1359,9 +1474,9 @@ wxString wxString::FormatV(const wxString& format, va_list argptr) } #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN -int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...) +int wxStringPrintfMixinBase::DoPrintf(const wxString& format, ...) #else -int wxString::DoPrintf(const wxChar *format, ...) +int wxString::DoPrintf(const wxString& format, ...) #endif { va_list argptr; @@ -1383,14 +1498,27 @@ int wxString::DoPrintf(const wxChar *format, ...) return iLen; } -int wxString::PrintfV(const wxString& format, va_list argptr) +#if wxUSE_UNICODE_UTF8 +template +#else +// we only need one version in non-UTF8 builds and at least two Windows +// compilers have problems with this function template, so use just one +// normal function here +#endif +static int DoStringPrintfV(wxString& str, + const wxString& format, va_list argptr) { int size = 1024; for ( ;; ) { - wxStringBuffer tmp(*this, size + 1); +#if wxUSE_UNICODE_UTF8 + BufferType tmp(str, size + 1); + typename BufferType::CharType *buf = tmp; +#else + wxStringBuffer tmp(str, size + 1); wxChar *buf = tmp; +#endif if ( !buf ) { @@ -1446,9 +1574,37 @@ int wxString::PrintfV(const wxString& format, va_list argptr) } // we could have overshot - Shrink(); + str.Shrink(); + + return str.length(); +} + +int wxString::PrintfV(const wxString& format, va_list argptr) +{ + va_list argcopy; + wxVaCopy(argcopy, argptr); + +#if wxUSE_UNICODE_UTF8 + #if wxUSE_STL_BASED_WXSTRING + typedef wxStringTypeBuffer Utf8Buffer; + #else + typedef wxImplStringBuffer Utf8Buffer; + #endif +#endif - return length(); +#if wxUSE_UTF8_LOCALE_ONLY + return DoStringPrintfV(*this, format, argcopy); +#else + #if wxUSE_UNICODE_UTF8 + if ( wxLocaleIsUtf8 ) + return DoStringPrintfV(*this, format, argcopy); + else + // wxChar* version + return DoStringPrintfV(*this, format, argcopy); + #else + return DoStringPrintfV(*this, format, argcopy); + #endif // UTF8/WCHAR +#endif } // ----------------------------------------------------------------------------