X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/111d99489d509bc96819877b88596474f3253859..8c36c49496660f0779584c6a318c7416c519f84d:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index daa4901016..ff87c095c7 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -233,9 +233,35 @@ const char* wxCStrData::AsChar() const wxString *str = wxConstCast(m_str, wxString); // convert the string: + // + // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we + // have it) but it's unfortunately not obvious to implement + // because we don't know how big buffer do we need for the + // given string length (in case of multibyte encodings, e.g. + // ISO-2022-JP or UTF-8 when internal representation is wchar_t) + // + // One idea would be to store more than just m_convertedToChar + // in wxString: then we could record the length of the string + // which was converted the last time and try to reuse the same + // buffer if the current length is not greater than it (this + // could still fail because string could have been modified in + // place but it would work most of the time, so we'd do it and + // only allocate the new buffer if in-place conversion returned + // an error). We could also store a bit saying if the string + // was modified since the last conversion (and update it in all + // operation modifying the string, of course) to avoid unneeded + // consequential conversions. But both of these ideas require + // adding more fields to wxString and require profiling results + // to be sure that we really gain enough from them to justify + // doing it. wxCharBuffer buf(str->mb_str()); - // FIXME-UTF8: do the conversion in-place in the existing buffer + // if it failed, return empty string and not NULL to avoid crashes in code + // written with either wxWidgets 2 wxString or std::string behaviour in + // mind: neither of them ever returns NULL and so we shouldn't neither + if ( !buf ) + return ""; + if ( str->m_convertedToChar && strlen(buf) == strlen(str->m_convertedToChar) ) { @@ -261,6 +287,10 @@ const wchar_t* wxCStrData::AsWChar() const // convert the string: wxWCharBuffer buf(str->wc_str()); + // notice that here, unlike above in AsChar(), conversion can't fail as our + // internal UTF-8 is always well-formed -- or the string was corrupted and + // all bets are off anyhow + // FIXME-UTF8: do the conversion in-place in the existing buffer if ( str->m_convertedToWChar && wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) @@ -340,7 +370,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, return SubstrBufFromMB("", 0); // and then to UTF-8: - SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8)); + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); // widechar -> UTF-8 conversion isn't supposed to ever fail: wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); @@ -382,9 +412,12 @@ const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const const wxWCharBuffer wxString::wc_str() const { - return wxConvUTF8.cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - NULL); + return wxMBConvStrictUTF8().cMB2WC + ( + m_impl.c_str(), + m_impl.length() + 1, // size, not length + NULL + ); } const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const @@ -395,14 +428,16 @@ const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const // FIXME-UTF8: use wc_str() here once we have buffers with length size_t wcLen; - wxWCharBuffer wcBuf( - wxConvUTF8.cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - &wcLen)); + wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC + ( + m_impl.c_str(), + m_impl.length() + 1, // size + &wcLen + )); if ( !wcLen ) return wxCharBuffer(""); - return conv.cWC2MB(wcBuf, wcLen, NULL); + return conv.cWC2MB(wcBuf, wcLen+1, NULL); } #else // ANSI @@ -565,6 +600,12 @@ wxString operator+(const wchar_t *pwz, const wxString& str) // string comparison // --------------------------------------------------------------------------- +bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const +{ + return (length() == 1) && (compareWithCase ? GetChar(0u) == c + : wxToupper(GetChar(0u)) == wxToupper(c)); +} + #ifdef HAVE_STD_STRING_COMPARE // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with @@ -967,36 +1008,36 @@ int wxString::CmpNoCase(const wxString& s) const #endif #endif -wxString wxString::FromAscii(const char *ascii) +wxString wxString::FromAscii(const char *ascii, size_t len) { - if (!ascii) + if (!ascii || len == 0) return wxEmptyString; - size_t len = strlen(ascii); wxString res; - if ( len ) { - wxImplStringBuffer buf(res, len); + wxStringInternalBuffer buf(res, len); wxStringCharType *dest = buf; - for ( ;; ) + for ( ; len > 0; --len ) { unsigned char c = (unsigned char)*ascii++; wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); *dest++ = (wchar_t)c; - - if ( c == '\0' ) - break; } } return res; } -wxString wxString::FromAscii(const char ascii) +wxString wxString::FromAscii(const char *ascii) +{ + return FromAscii(ascii, wxStrlen(ascii)); +} + +wxString wxString::FromAscii(char ascii) { // What do we do with '\0' ? @@ -1065,28 +1106,15 @@ wxString wxString::Mid(size_t nFirst, size_t nCount) const // check that the string starts with prefix and return the rest of the string // in the provided pointer if it is not NULL, otherwise return false -bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const +bool wxString::StartsWith(const wxString& prefix, wxString *rest) const { - wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") ); - - // first check if the beginning of the string matches the prefix: note - // that we don't have to check that we don't run out of this string as - // when we reach the terminating NUL, either prefix string ends too (and - // then it's ok) or we break out of the loop because there is no match - const wxChar *p = c_str(); - while ( *prefix ) - { - if ( *prefix++ != *p++ ) - { - // no match - return false; - } - } + if ( compare(0, prefix.length(), prefix) != 0 ) + return false; if ( rest ) { // put the rest of the string into provided pointer - *rest = p; + rest->assign(*this, prefix.length(), npos); } return true; @@ -1095,11 +1123,9 @@ bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const // check that the string ends with suffix and return the rest of it in the // provided pointer if it is not NULL, otherwise return false -bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const +bool wxString::EndsWith(const wxString& suffix, wxString *rest) const { - wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") ); - - int start = length() - wxStrlen(suffix); + int start = length() - suffix.length(); if ( start < 0 || compare(start, npos, suffix) != 0 ) return false; @@ -1391,52 +1417,50 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const // conversion to numbers // ---------------------------------------------------------------------------- -// the implementation of all the functions below is exactly the same so factor -// it out - -template -bool wxStringToIntType(const wxChar *start, - T *val, - int base, - F func) -{ - wxCHECK_MSG( val, false, _T("NULL output pointer") ); - wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); +// The implementation of all the functions below is exactly the same so factor +// it out. Note that number extraction works correctly on UTF-8 strings, so +// we can use wxStringCharType and wx_str() for maximum efficiency. #ifndef __WXWINCE__ - errno = 0; + #define DO_IF_NOT_WINCE(x) x +#else + #define DO_IF_NOT_WINCE(x) #endif - wxChar *end; - *val = (*func)(start, &end, base); - - // return true only if scan was stopped by the terminating NUL and if the - // string was not empty to start with and no under/overflow occurred - return !*end && (end != start) -#ifndef __WXWINCE__ - && (errno != ERANGE) -#endif - ; -} +#define WX_STRING_TO_INT_TYPE(val, base, func) \ + wxCHECK_MSG( val, false, _T("NULL output pointer") ); \ + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \ + \ + DO_IF_NOT_WINCE( errno = 0; ) \ + \ + const wxStringCharType *start = wx_str(); \ + wxStringCharType *end; \ + *val = func(start, &end, base); \ + \ + /* return true only if scan was stopped by the terminating NUL and */ \ + /* if the string was not empty to start with and no under/overflow */ \ + /* occurred: */ \ + return !*end && (end != start) \ + DO_IF_NOT_WINCE( && (errno != ERANGE) ) bool wxString::ToLong(long *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol); + WX_STRING_TO_INT_TYPE(val, base, wxStrtol); } bool wxString::ToULong(unsigned long *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul); + WX_STRING_TO_INT_TYPE(val, base, wxStrtoul); } bool wxString::ToLongLong(wxLongLong_t *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll); + WX_STRING_TO_INT_TYPE(val, base, wxStrtoll); } bool wxString::ToULongLong(wxULongLong_t *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull); + WX_STRING_TO_INT_TYPE(val, base, wxStrtoull); } bool wxString::ToDouble(double *val) const @@ -1467,7 +1491,7 @@ bool wxString::ToDouble(double *val) const #if !wxUSE_UTF8_LOCALE_ONLY /* static */ #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN -wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...) +wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...) #else wxString wxString::DoFormatWchar(const wxChar *format, ...) #endif @@ -1574,6 +1598,11 @@ static int DoStringPrintfV(wxString& str, if ( !buf ) { // out of memory + + // in UTF-8 build, leaving uninitialized junk in the buffer + // could result in invalid non-empty UTF-8 string, so just + // reset the string to empty on failure: + buf[0] = '\0'; return -1; } @@ -1594,14 +1623,20 @@ static int DoStringPrintfV(wxString& str, // buffer were large enough (newer standards such as Unix98) if ( len < 0 ) { + // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or + // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF + // is true if *both* of them use our own implementation, + // otherwise we can't be sure #if wxUSE_WXVSNPRINTF // we know that our own implementation of wxVsnprintf() returns -1 // only for a format error - thus there's something wrong with // the user's format string + buf[0] = '\0'; return -1; -#else // assume that system version only returns error if not enough space - // still not enough, as we don't know how much we need, double the - // current size of the buffer +#else // possibly using system version + // assume it only returns error if there is not enough space, but + // as we don't know how much we need, double the current size of + // the buffer size *= 2; #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF } @@ -1632,28 +1667,25 @@ static int DoStringPrintfV(wxString& str, int wxString::PrintfV(const wxString& format, va_list argptr) { - va_list argcopy; - wxVaCopy(argcopy, argptr); - #if wxUSE_UNICODE_UTF8 #if wxUSE_STL_BASED_WXSTRING typedef wxStringTypeBuffer Utf8Buffer; #else - typedef wxImplStringBuffer Utf8Buffer; + typedef wxStringInternalBuffer Utf8Buffer; #endif #endif #if wxUSE_UTF8_LOCALE_ONLY - return DoStringPrintfV(*this, format, argcopy); + return DoStringPrintfV(*this, format, argptr); #else #if wxUSE_UNICODE_UTF8 if ( wxLocaleIsUtf8 ) - return DoStringPrintfV(*this, format, argcopy); + return DoStringPrintfV(*this, format, argptr); else // wxChar* version - return DoStringPrintfV(*this, format, argcopy); + return DoStringPrintfV(*this, format, argptr); #else - return DoStringPrintfV(*this, format, argcopy); + return DoStringPrintfV(*this, format, argptr); #endif // UTF8/WCHAR #endif } @@ -1827,3 +1859,32 @@ wxString wxString::Upper() const // convert to lower case, return the copy of the string wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); } + +// ---------------------------------------------------------------------------- +// wxUTF8StringBuffer +// ---------------------------------------------------------------------------- + +#if wxUSE_UNICODE_WCHAR +wxUTF8StringBuffer::~wxUTF8StringBuffer() +{ + wxMBConvStrictUTF8 conv; + size_t wlen = conv.ToWChar(NULL, 0, m_buf); + wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); + + wxStringInternalBuffer wbuf(m_str, wlen); + conv.ToWChar(wbuf, wlen, m_buf); +} + +wxUTF8StringBufferLength::~wxUTF8StringBufferLength() +{ + wxCHECK_RET(m_lenSet, "length not set"); + + wxMBConvStrictUTF8 conv; + size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len); + wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); + + wxStringInternalBufferLength wbuf(m_str, wlen); + conv.ToWChar(wbuf, wlen, m_buf, m_len); + wbuf.SetLength(wlen); +} +#endif // wxUSE_UNICODE_WCHAR