From cc209a518f106f39fde26f8bc8dd3d9d53dc5cc5 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sat, 19 Jul 2008 19:59:59 +0000
Subject: [PATCH] check that string passed to FromUTF8() is valid even in
 release build, this is safer; add a separate FromUTF8Unchecked() which can be
 used for maximal efficiency

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54721 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---
 include/wx/cocoa/string.h |  2 +-
 include/wx/gtk/private.h  |  2 +-
 include/wx/string.h       | 36 ++++++++++++++++++++++++++++++------
 interface/wx/string.h     | 31 ++++++++++++++++++++++++++++---
 src/xml/xml.cpp           |  2 +-
 5 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/include/wx/cocoa/string.h b/include/wx/cocoa/string.h
index 75e782dbe2..e578e0381e 100644
--- a/include/wx/cocoa/string.h
+++ b/include/wx/cocoa/string.h
@@ -44,7 +44,7 @@ inline NSString* wxInitNSStringWithWxString(NSString *nsstring, const wxString &
 inline wxString wxStringWithNSString(NSString *nsstring)
 {
 #if wxUSE_UNICODE
-    return wxString::FromUTF8([nsstring UTF8String]);
+    return wxString::FromUTF8Unchecked([nsstring UTF8String]);
 #else
     return wxString([nsstring lossyCString]);
 #endif // wxUSE_UNICODE
diff --git a/include/wx/gtk/private.h b/include/wx/gtk/private.h
index 1ca5e884d9..868dfe72eb 100644
--- a/include/wx/gtk/private.h
+++ b/include/wx/gtk/private.h
@@ -32,7 +32,7 @@ extern const gchar *wx_pango_version_check(int major, int minor, int micro);
     #define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s))
     #define wxGTK_CONV_SYS(s) wxGTK_CONV((s))
 
-    #define wxGTK_CONV_BACK(s) wxString::FromUTF8(s)
+    #define wxGTK_CONV_BACK(s) wxString::FromUTF8Unchecked(s)
     #define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s)
     #define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s)
     #define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s)
diff --git a/include/wx/string.h b/include/wx/string.h
index 91f25b154e..118232f1db 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -1268,7 +1268,7 @@ public:
 
     // conversion to/from UTF-8:
 #if wxUSE_UNICODE_UTF8
-    static wxString FromUTF8(const char *utf8)
+    static wxString FromUTF8Unchecked(const char *utf8)
     {
       if ( !utf8 )
           return wxEmptyString;
@@ -1276,16 +1276,35 @@ public:
       wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
       return FromImpl(wxStringImpl(utf8));
     }
-    static wxString FromUTF8(const char *utf8, size_t len)
+    static wxString FromUTF8Unchecked(const char *utf8, size_t len)
     {
       if ( !utf8 )
           return wxEmptyString;
       if ( len == npos )
-          return FromUTF8(utf8);
+          return FromUTF8Unchecked(utf8);
 
       wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
       return FromImpl(wxStringImpl(utf8, len));
     }
+
+    static wxString FromUTF8(const char *utf8)
+    {
+        if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
+            return "";
+
+        return FromImpl(wxStringImpl(utf8));
+    }
+    static wxString FromUTF8(const char *utf8, size_t len)
+    {
+        if ( len == npos )
+            return FromUTF8(utf8);
+
+        if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
+            return "";
+
+        return FromImpl(wxStringImpl(utf8, len));
+    }
+
     const char* utf8_str() const { return wx_str(); }
     const char* ToUTF8() const { return wx_str(); }
 
@@ -1293,10 +1312,15 @@ public:
     // internal UTF-8 representation
     size_t utf8_length() const { return m_impl.length(); }
 #elif wxUSE_UNICODE_WCHAR
-    static wxString FromUTF8(const char *utf8)
-      { return wxString(utf8, wxMBConvUTF8()); }
-    static wxString FromUTF8(const char *utf8, size_t len)
+    static wxString FromUTF8(const char *utf8, size_t len = npos)
       { return wxString(utf8, wxMBConvUTF8(), len); }
+    static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
+    {
+        const wxString s(utf8, wxMBConvUTF8(), len);
+        wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
+                      "string must be valid UTF-8" );
+        return s;
+    }
     const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
     const wxCharBuffer ToUTF8() const { return utf8_str(); }
 #else // ANSI
diff --git a/interface/wx/string.h b/interface/wx/string.h
index 844fe0ea9a..b6cf403dc8 100644
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -623,14 +623,39 @@ public:
     //@{
     /**
         Converts C string encoded in UTF-8 to wxString.
-        Note that this method assumes that @a s is a valid UTF-8 sequence and
-        doesn't do any validation in release builds, it's validity is only checked in
-        debug builds.
+
+        If @a s is not a valid UTF-8 string, an empty string is returned.
+
+        Notice that when using UTF-8 wxWidgets build there is a more efficient
+        alternative to this function called FromUTF8Unchecked() which, unlike
+        this one, doesn't check that the input string is valid.
+
+        @since 2.8.4
     */
     static wxString FromUTF8(const char* s);
     static wxString FromUTF8(const char* s, size_t len);
     //@}
 
+    //@{
+    /**
+        Converts C string encoded in UTF-8 to wxString without checking its
+        validity.
+
+        This method assumes that @a s is a valid UTF-8 sequence and doesn't do
+        any validation (although an assert failure is triggered in debug builds
+        if the string is invalid). Only use it if you are absolutely sure that
+        @a s is a correct UTF-8 string (e.g. because it comes from another
+        library using UTF-8) and if the performance matters, otherwise use
+        slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
+        string to this function will result in creating a corrupted wxString
+        and all the subsequent operations on it will be undefined.
+
+        @since 2.8.9
+    */
+    static wxString FromUTF8Unchecked(const char* s);
+    static wxString FromUTF8Unchecked(const char* s, size_t len);
+    //@}
+
     /**
         Returns the character at position @a n (read-only).
     */
diff --git a/src/xml/xml.cpp b/src/xml/xml.cpp
index 4941e6104b..278adff98f 100644
--- a/src/xml/xml.cpp
+++ b/src/xml/xml.cpp
@@ -485,7 +485,7 @@ static wxString CharToString(wxMBConv *conv,
 #endif // !wxUSE_UNICODE
 
     wxUnusedVar(conv);
-    return wxString::FromUTF8(s, len);
+    return wxString::FromUTF8Unchecked(s, len);
 }
 
 // returns true if the given string contains only whitespaces
-- 
2.47.2