X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/467175ab3f2177aa824ceb7b67934fd54ce4e8e0..1c0338c5615c351371f704b326634f58d0bf7da9:/src/common/string.cpp diff --git a/src/common/string.cpp b/src/common/string.cpp index 9fc84b0878..d36f85bd5a 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -23,6 +23,7 @@ #ifndef WX_PRECOMP #include "wx/string.h" + #include "wx/wxcrtvararg.h" #endif #include @@ -219,9 +220,16 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE +#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY const char* wxCStrData::AsChar() const { +#if wxUSE_UNICODE_UTF8 + if ( wxLocaleIsUtf8 ) + return AsInternal(); +#endif + // under non-UTF8 locales, we have to convert the internal UTF-8 + // representation using wxConvLibc and cache the result + wxString *str = wxConstCast(m_str, wxString); // convert the string: @@ -243,7 +251,7 @@ const char* wxCStrData::AsChar() const // and keep it: return str->m_convertedToChar + m_offset; } -#endif // wxUSE_UNICODE +#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY #if !wxUSE_UNICODE_WCHAR const wchar_t* wxCStrData::AsWChar() const @@ -305,14 +313,23 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, const wxMBConv& conv) { - // FIXME-UTF8: return as-is without copying under UTF8 locale, return - // converted string under other locales - needs wxCharBuffer - // changes - // anything to do? if ( !psz || nLength == 0 ) return SubstrBufFromMB("", 0); + // if psz is already in UTF-8, we don't have to do the roundtrip to + // wchar_t* and back: + if ( conv.IsUTF8() ) + { + // we need to validate the input because UTF8 iterators assume valid + // UTF-8 sequence and psz may be invalid: + if ( wxStringOperations::IsValidUtf8String(psz, nLength) ) + { + return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength); + } + // else: do the roundtrip through wchar_t* + } + if ( nLength == npos ) nLength = wxNO_LEN; @@ -323,7 +340,7 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, return SubstrBufFromMB("", 0); // and then to UTF-8: - SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8)); + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8())); // widechar -> UTF-8 conversion isn't supposed to ever fail: wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); @@ -365,22 +382,28 @@ const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const const wxWCharBuffer wxString::wc_str() const { - return wxConvUTF8.cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - NULL); + return wxMBConvStrictUTF8().cMB2WC + ( + m_impl.c_str(), + m_impl.length() + 1, // size, not length + NULL + ); } const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const { - // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc - // under UTF8 locale + if ( conv.IsUTF8() ) + return wxCharBuffer::CreateNonOwned(m_impl.c_str()); + // FIXME-UTF8: use wc_str() here once we have buffers with length size_t wcLen; - wxWCharBuffer wcBuf( - wxConvUTF8.cMB2WC(m_impl.c_str(), - m_impl.length() + 1 /* size, not length */, - &wcLen)); + wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC + ( + m_impl.c_str(), + m_impl.length() + 1, // size + &wcLen + )); if ( !wcLen ) return wxCharBuffer(""); @@ -408,7 +431,7 @@ bool wxString::Shrink() // deprecated compatibility code: #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 -wxChar *wxString::GetWriteBuf(size_t nLen) +wxStringCharType *wxString::GetWriteBuf(size_t nLen) { return DoGetWriteBuf(nLen); } @@ -547,6 +570,12 @@ wxString operator+(const wchar_t *pwz, const wxString& str) // string comparison // --------------------------------------------------------------------------- +bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const +{ + return (length() == 1) && (compareWithCase ? GetChar(0u) == c + : wxToupper(GetChar(0u)) == wxToupper(c)); +} + #ifdef HAVE_STD_STRING_COMPARE // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with @@ -949,63 +978,69 @@ int wxString::CmpNoCase(const wxString& s) const #endif #endif -wxString wxString::FromAscii(const char *ascii) +wxString wxString::FromAscii(const char *ascii, size_t len) { - if (!ascii) + if (!ascii || len == 0) return wxEmptyString; - size_t len = strlen( ascii ); wxString res; - if ( len ) { - wxStringBuffer buf(res, len); + wxImplStringBuffer buf(res, len); + wxStringCharType *dest = buf; - wchar_t *dest = buf; - - for ( ;; ) + for ( ; len > 0; --len ) { - if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' ) - break; + unsigned char c = (unsigned char)*ascii++; + wxASSERT_MSG( c < 0x80, + _T("Non-ASCII value passed to FromAscii().") ); + + *dest++ = (wchar_t)c; } } return res; } +wxString wxString::FromAscii(const char *ascii) +{ + return FromAscii(ascii, wxStrlen(ascii)); +} + wxString wxString::FromAscii(const char ascii) { // What do we do with '\0' ? - wxString res; - res += (wchar_t)(unsigned char) ascii; + unsigned char c = (unsigned char)ascii; - return res; + wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") ); + + // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value + return wxString(wxUniChar((wchar_t)c)); } const wxCharBuffer wxString::ToAscii() const { // this will allocate enough space for the terminating NUL too wxCharBuffer buffer(length()); - - char *dest = buffer.data(); - const wchar_t *pwc = c_str(); - for ( ;; ) + for ( const_iterator i = begin(); i != end(); ++i ) { - *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc); + wxUniChar c(*i); + // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?') + *dest++ = c.IsAscii() ? (char)c : '_'; // the output string can't have embedded NULs anyhow, so we can safely // stop at first of them even if we do have any - if ( !*pwc++ ) + if ( !c ) break; } return buffer; } -#endif // Unicode +#endif // wxUSE_UNICODE // extract string of length nCount starting at nFirst wxString wxString::Mid(size_t nFirst, size_t nCount) const @@ -1041,28 +1076,15 @@ wxString wxString::Mid(size_t nFirst, size_t nCount) const // check that the string starts with prefix and return the rest of the string // in the provided pointer if it is not NULL, otherwise return false -bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const +bool wxString::StartsWith(const wxString& prefix, wxString *rest) const { - wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") ); - - // first check if the beginning of the string matches the prefix: note - // that we don't have to check that we don't run out of this string as - // when we reach the terminating NUL, either prefix string ends too (and - // then it's ok) or we break out of the loop because there is no match - const wxChar *p = c_str(); - while ( *prefix ) - { - if ( *prefix++ != *p++ ) - { - // no match - return false; - } - } + if ( compare(0, prefix.length(), prefix) != 0 ) + return false; if ( rest ) { // put the rest of the string into provided pointer - *rest = p; + rest->assign(*this, prefix.length(), npos); } return true; @@ -1071,11 +1093,9 @@ bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const // check that the string ends with suffix and return the rest of it in the // provided pointer if it is not NULL, otherwise return false -bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const +bool wxString::EndsWith(const wxString& suffix, wxString *rest) const { - wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") ); - - int start = length() - wxStrlen(suffix); + int start = length() - suffix.length(); if ( start < 0 || compare(start, npos, suffix) != 0 ) return false; @@ -1367,14 +1387,15 @@ int wxString::Find(wxUniChar ch, bool bFromEnd) const // conversion to numbers // ---------------------------------------------------------------------------- -// the implementation of all the functions below is exactly the same so factor -// it out +// The implementation of all the functions below is exactly the same so factor +// it out. Note that number extraction works correctly on UTF-8 strings, so +// we can use wxStringCharType and wx_str() for maximum efficiency. -template -bool wxStringToIntType(const wxChar *start, +template +bool wxStringToIntType(const wxStringCharType *start, T *val, int base, - F func) + T (*func)(const wxStringCharType*, wxStringCharType**, int)) { wxCHECK_MSG( val, false, _T("NULL output pointer") ); wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); @@ -1383,7 +1404,7 @@ bool wxStringToIntType(const wxChar *start, errno = 0; #endif - wxChar *end; + wxStringCharType *end; *val = (*func)(start, &end, base); // return true only if scan was stopped by the terminating NUL and if the @@ -1397,22 +1418,22 @@ bool wxStringToIntType(const wxChar *start, bool wxString::ToLong(long *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol); + return wxStringToIntType(wx_str(), val, base, wxStrtol); } bool wxString::ToULong(unsigned long *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul); + return wxStringToIntType(wx_str(), val, base, wxStrtoul); } bool wxString::ToLongLong(wxLongLong_t *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll); + return wxStringToIntType(wx_str(), val, base, wxStrtoll); } bool wxString::ToULongLong(wxULongLong_t *val, int base) const { - return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull); + return wxStringToIntType(wx_str(), val, base, wxStrtoull); } bool wxString::ToDouble(double *val) const @@ -1440,11 +1461,12 @@ bool wxString::ToDouble(double *val) const // formatted output // --------------------------------------------------------------------------- +#if !wxUSE_UTF8_LOCALE_ONLY /* static */ #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN -wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...) +wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...) #else -wxString wxString::DoFormat(const wxChar *format, ...) +wxString wxString::DoFormatWchar(const wxChar *format, ...) #endif { va_list argptr; @@ -1457,6 +1479,23 @@ wxString wxString::DoFormat(const wxChar *format, ...) return s; } +#endif // !wxUSE_UTF8_LOCALE_ONLY + +#if wxUSE_UNICODE_UTF8 +/* static */ +wxString wxString::DoFormatUtf8(const char *format, ...) +{ + va_list argptr; + va_start(argptr, format); + + wxString s; + s.PrintfV(format, argptr); + + va_end(argptr); + + return s; +} +#endif // wxUSE_UNICODE_UTF8 /* static */ wxString wxString::FormatV(const wxString& format, va_list argptr) @@ -1466,10 +1505,11 @@ wxString wxString::FormatV(const wxString& format, va_list argptr) return s; } +#if !wxUSE_UTF8_LOCALE_ONLY #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN -int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...) +int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...) #else -int wxString::DoPrintf(const wxChar *format, ...) +int wxString::DoPrintfWchar(const wxChar *format, ...) #endif { va_list argptr; @@ -1490,19 +1530,52 @@ int wxString::DoPrintf(const wxChar *format, ...) return iLen; } +#endif // !wxUSE_UTF8_LOCALE_ONLY -int wxString::PrintfV(const wxString& format, va_list argptr) +#if wxUSE_UNICODE_UTF8 +int wxString::DoPrintfUtf8(const char *format, ...) +{ + va_list argptr; + va_start(argptr, format); + + int iLen = PrintfV(format, argptr); + + va_end(argptr); + + return iLen; +} +#endif // wxUSE_UNICODE_UTF8 + +#if wxUSE_UNICODE_UTF8 +template +#else +// we only need one version in non-UTF8 builds and at least two Windows +// compilers have problems with this function template, so use just one +// normal function here +#endif +static int DoStringPrintfV(wxString& str, + const wxString& format, va_list argptr) { int size = 1024; for ( ;; ) { - wxStringBuffer tmp(*this, size + 1); +#if wxUSE_UNICODE_UTF8 + BufferType tmp(str, size + 1); + typename BufferType::CharType *buf = tmp; +#else + wxStringBuffer tmp(str, size + 1); wxChar *buf = tmp; +#endif if ( !buf ) { // out of memory + + // in UTF-8 build, leaving uninitialized junk in the buffer + // could result in invalid non-empty UTF-8 string, so just + // reset the string to empty on failure: + buf[0] = '\0'; return -1; } @@ -1510,7 +1583,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr) // only a copy va_list argptrcopy; wxVaCopy(argptrcopy, argptr); - int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy); + int len = wxVsnprintf(buf, size, format, argptrcopy); va_end(argptrcopy); // some implementations of vsnprintf() don't NUL terminate @@ -1523,14 +1596,20 @@ int wxString::PrintfV(const wxString& format, va_list argptr) // buffer were large enough (newer standards such as Unix98) if ( len < 0 ) { + // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or + // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF + // is true if *both* of them use our own implementation, + // otherwise we can't be sure #if wxUSE_WXVSNPRINTF // we know that our own implementation of wxVsnprintf() returns -1 // only for a format error - thus there's something wrong with // the user's format string + buf[0] = '\0'; return -1; -#else // assume that system version only returns error if not enough space - // still not enough, as we don't know how much we need, double the - // current size of the buffer +#else // possibly using system version + // assume it only returns error if there is not enough space, but + // as we don't know how much we need, double the current size of + // the buffer size *= 2; #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF } @@ -1554,9 +1633,34 @@ int wxString::PrintfV(const wxString& format, va_list argptr) } // we could have overshot - Shrink(); + str.Shrink(); - return length(); + return str.length(); +} + +int wxString::PrintfV(const wxString& format, va_list argptr) +{ +#if wxUSE_UNICODE_UTF8 + #if wxUSE_STL_BASED_WXSTRING + typedef wxStringTypeBuffer Utf8Buffer; + #else + typedef wxImplStringBuffer Utf8Buffer; + #endif +#endif + +#if wxUSE_UTF8_LOCALE_ONLY + return DoStringPrintfV(*this, format, argptr); +#else + #if wxUSE_UNICODE_UTF8 + if ( wxLocaleIsUtf8 ) + return DoStringPrintfV(*this, format, argptr); + else + // wxChar* version + return DoStringPrintfV(*this, format, argptr); + #else + return DoStringPrintfV(*this, format, argptr); + #endif // UTF8/WCHAR +#endif } // ----------------------------------------------------------------------------