From: Vadim Zeitlin Date: Sat, 19 Jul 2008 19:59:59 +0000 (+0000) Subject: check that string passed to FromUTF8() is valid even in release build, this is safer... X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/cc209a518f106f39fde26f8bc8dd3d9d53dc5cc5 check that string passed to FromUTF8() is valid even in release build, this is safer; add a separate FromUTF8Unchecked() which can be used for maximal efficiency git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54721 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/include/wx/cocoa/string.h b/include/wx/cocoa/string.h index 75e782dbe2..e578e0381e 100644 --- a/include/wx/cocoa/string.h +++ b/include/wx/cocoa/string.h @@ -44,7 +44,7 @@ inline NSString* wxInitNSStringWithWxString(NSString *nsstring, const wxString & inline wxString wxStringWithNSString(NSString *nsstring) { #if wxUSE_UNICODE - return wxString::FromUTF8([nsstring UTF8String]); + return wxString::FromUTF8Unchecked([nsstring UTF8String]); #else return wxString([nsstring lossyCString]); #endif // wxUSE_UNICODE diff --git a/include/wx/gtk/private.h b/include/wx/gtk/private.h index 1ca5e884d9..868dfe72eb 100644 --- a/include/wx/gtk/private.h +++ b/include/wx/gtk/private.h @@ -32,7 +32,7 @@ extern const gchar *wx_pango_version_check(int major, int minor, int micro); #define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s)) #define wxGTK_CONV_SYS(s) wxGTK_CONV((s)) - #define wxGTK_CONV_BACK(s) wxString::FromUTF8(s) + #define wxGTK_CONV_BACK(s) wxString::FromUTF8Unchecked(s) #define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s) #define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s) #define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s) diff --git a/include/wx/string.h b/include/wx/string.h index 91f25b154e..118232f1db 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -1268,7 +1268,7 @@ public: // conversion to/from UTF-8: #if wxUSE_UNICODE_UTF8 - static wxString FromUTF8(const char *utf8) + static wxString FromUTF8Unchecked(const char *utf8) { if ( !utf8 ) return wxEmptyString; @@ -1276,16 +1276,35 @@ public: wxASSERT( wxStringOperations::IsValidUtf8String(utf8) ); return FromImpl(wxStringImpl(utf8)); } - static wxString FromUTF8(const char *utf8, size_t len) + static wxString FromUTF8Unchecked(const char *utf8, size_t len) { if ( !utf8 ) return wxEmptyString; if ( len == npos ) - return FromUTF8(utf8); + return FromUTF8Unchecked(utf8); wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) ); return FromImpl(wxStringImpl(utf8, len)); } + + static wxString FromUTF8(const char *utf8) + { + if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) ) + return ""; + + return FromImpl(wxStringImpl(utf8)); + } + static wxString FromUTF8(const char *utf8, size_t len) + { + if ( len == npos ) + return FromUTF8(utf8); + + if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) ) + return ""; + + return FromImpl(wxStringImpl(utf8, len)); + } + const char* utf8_str() const { return wx_str(); } const char* ToUTF8() const { return wx_str(); } @@ -1293,10 +1312,15 @@ public: // internal UTF-8 representation size_t utf8_length() const { return m_impl.length(); } #elif wxUSE_UNICODE_WCHAR - static wxString FromUTF8(const char *utf8) - { return wxString(utf8, wxMBConvUTF8()); } - static wxString FromUTF8(const char *utf8, size_t len) + static wxString FromUTF8(const char *utf8, size_t len = npos) { return wxString(utf8, wxMBConvUTF8(), len); } + static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos) + { + const wxString s(utf8, wxMBConvUTF8(), len); + wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(), + "string must be valid UTF-8" ); + return s; + } const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } const wxCharBuffer ToUTF8() const { return utf8_str(); } #else // ANSI diff --git a/interface/wx/string.h b/interface/wx/string.h index 844fe0ea9a..b6cf403dc8 100644 --- a/interface/wx/string.h +++ b/interface/wx/string.h @@ -623,14 +623,39 @@ public: //@{ /** Converts C string encoded in UTF-8 to wxString. - Note that this method assumes that @a s is a valid UTF-8 sequence and - doesn't do any validation in release builds, it's validity is only checked in - debug builds. + + If @a s is not a valid UTF-8 string, an empty string is returned. + + Notice that when using UTF-8 wxWidgets build there is a more efficient + alternative to this function called FromUTF8Unchecked() which, unlike + this one, doesn't check that the input string is valid. + + @since 2.8.4 */ static wxString FromUTF8(const char* s); static wxString FromUTF8(const char* s, size_t len); //@} + //@{ + /** + Converts C string encoded in UTF-8 to wxString without checking its + validity. + + This method assumes that @a s is a valid UTF-8 sequence and doesn't do + any validation (although an assert failure is triggered in debug builds + if the string is invalid). Only use it if you are absolutely sure that + @a s is a correct UTF-8 string (e.g. because it comes from another + library using UTF-8) and if the performance matters, otherwise use + slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8 + string to this function will result in creating a corrupted wxString + and all the subsequent operations on it will be undefined. + + @since 2.8.9 + */ + static wxString FromUTF8Unchecked(const char* s); + static wxString FromUTF8Unchecked(const char* s, size_t len); + //@} + /** Returns the character at position @a n (read-only). */ diff --git a/src/xml/xml.cpp b/src/xml/xml.cpp index 4941e6104b..278adff98f 100644 --- a/src/xml/xml.cpp +++ b/src/xml/xml.cpp @@ -485,7 +485,7 @@ static wxString CharToString(wxMBConv *conv, #endif // !wxUSE_UNICODE wxUnusedVar(conv); - return wxString::FromUTF8(s, len); + return wxString::FromUTF8Unchecked(s, len); } // returns true if the given string contains only whitespaces