check that string passed to FromUTF8() is valid even in release build, this is safer...

author Vadim Zeitlin <vadim@wxwidgets.org>

Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)
diff --git a/include/wx/cocoa/string.h b/include/wx/cocoa/string.h

index 75e782dbe24920ac361ce1c5fdd3a7feb35e90fc..e578e0381e85e0837d1fb22b2316c886cba4479a 100644 (file)
--- a/include/wx/cocoa/string.h
+++ b/include/wx/cocoa/string.h
@@ -44,7 +44,7 @@ inline NSString* wxInitNSStringWithWxString(NSString *nsstring, const wxString &
  inline wxString wxStringWithNSString(NSString *nsstring)
  {
  #if wxUSE_UNICODE
-    return wxString::FromUTF8([nsstring UTF8String]);
+    return wxString::FromUTF8Unchecked([nsstring UTF8String]);
  #else
      return wxString([nsstring lossyCString]);
  #endif // wxUSE_UNICODE
diff --git a/include/wx/gtk/private.h b/include/wx/gtk/private.h

index 1ca5e884d9b284c3660d7cf2a0d163cc61291531..868dfe72eb1344f8eba136dd503118d0dfd3c9d2 100644 (file)
--- a/include/wx/gtk/private.h
+++ b/include/wx/gtk/private.h
@@ -32,7 +32,7 @@ extern const gchar *wx_pango_version_check(int major, int minor, int micro);
      #define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s))
      #define wxGTK_CONV_SYS(s) wxGTK_CONV((s))
  
-    #define wxGTK_CONV_BACK(s) wxString::FromUTF8(s)
+    #define wxGTK_CONV_BACK(s) wxString::FromUTF8Unchecked(s)
      #define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s)
      #define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s)
      #define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s)
diff --git a/include/wx/string.h b/include/wx/string.h

index 91f25b154e01a9864c52d15c2f21ae48383384e2..118232f1dbb2a4d7a4a9da6f996288d068a534b1 100644 (file)
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -1268,7 +1268,7 @@ public:
  
      // conversion to/from UTF-8:
  #if wxUSE_UNICODE_UTF8
-    static wxString FromUTF8(const char *utf8)
+    static wxString FromUTF8Unchecked(const char *utf8)
      {
        if ( !utf8 )
            return wxEmptyString;
@@ -1276,16 +1276,35 @@ public:
        wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
        return FromImpl(wxStringImpl(utf8));
      }
-    static wxString FromUTF8(const char *utf8, size_t len)
+    static wxString FromUTF8Unchecked(const char *utf8, size_t len)
      {
        if ( !utf8 )
            return wxEmptyString;
        if ( len == npos )
-          return FromUTF8(utf8);
+          return FromUTF8Unchecked(utf8);
  
        wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
        return FromImpl(wxStringImpl(utf8, len));
      }
+
+    static wxString FromUTF8(const char *utf8)
+    {
+        if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
+            return "";
+
+        return FromImpl(wxStringImpl(utf8));
+    }
+    static wxString FromUTF8(const char *utf8, size_t len)
+    {
+        if ( len == npos )
+            return FromUTF8(utf8);
+
+        if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
+            return "";
+
+        return FromImpl(wxStringImpl(utf8, len));
+    }
+
      const char* utf8_str() const { return wx_str(); }
      const char* ToUTF8() const { return wx_str(); }
  
@@ -1293,10 +1312,15 @@ public:
      // internal UTF-8 representation
      size_t utf8_length() const { return m_impl.length(); }
  #elif wxUSE_UNICODE_WCHAR
-    static wxString FromUTF8(const char *utf8)
-      { return wxString(utf8, wxMBConvUTF8()); }
-    static wxString FromUTF8(const char *utf8, size_t len)
+    static wxString FromUTF8(const char *utf8, size_t len = npos)
        { return wxString(utf8, wxMBConvUTF8(), len); }
+    static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
+    {
+        const wxString s(utf8, wxMBConvUTF8(), len);
+        wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
+                      "string must be valid UTF-8" );
+        return s;
+    }
      const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
      const wxCharBuffer ToUTF8() const { return utf8_str(); }
  #else // ANSI
diff --git a/interface/wx/string.h b/interface/wx/string.h

index 844fe0ea9ad4e17e5c8352b3d1b5da92fe12c04b..b6cf403dc8d5a5a140ac3b1f8833cff599c29447 100644 (file)
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -623,14 +623,39 @@ public:
      //@{
      /**
          Converts C string encoded in UTF-8 to wxString.
-        Note that this method assumes that @a s is a valid UTF-8 sequence and
-        doesn't do any validation in release builds, it's validity is only checked in
-        debug builds.
+
+        If @a s is not a valid UTF-8 string, an empty string is returned.
+
+        Notice that when using UTF-8 wxWidgets build there is a more efficient
+        alternative to this function called FromUTF8Unchecked() which, unlike
+        this one, doesn't check that the input string is valid.
+
+        @since 2.8.4
      */
      static wxString FromUTF8(const char* s);
      static wxString FromUTF8(const char* s, size_t len);
      //@}
  
+    //@{
+    /**
+        Converts C string encoded in UTF-8 to wxString without checking its
+        validity.
+
+        This method assumes that @a s is a valid UTF-8 sequence and doesn't do
+        any validation (although an assert failure is triggered in debug builds
+        if the string is invalid). Only use it if you are absolutely sure that
+        @a s is a correct UTF-8 string (e.g. because it comes from another
+        library using UTF-8) and if the performance matters, otherwise use
+        slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
+        string to this function will result in creating a corrupted wxString
+        and all the subsequent operations on it will be undefined.
+
+        @since 2.8.9
+    */
+    static wxString FromUTF8Unchecked(const char* s);
+    static wxString FromUTF8Unchecked(const char* s, size_t len);
+    //@}
+
      /**
          Returns the character at position @a n (read-only).
      */
diff --git a/src/xml/xml.cpp b/src/xml/xml.cpp

index 4941e6104b0dbb349e1a21fb49eafb61e6d9a40b..278adff98fe598f0f63a3234306ca7d0c8869425 100644 (file)
--- a/src/xml/xml.cpp
+++ b/src/xml/xml.cpp
@@ -485,7 +485,7 @@ static wxString CharToString(wxMBConv *conv,
  #endif // !wxUSE_UNICODE
  
      wxUnusedVar(conv);
-    return wxString::FromUTF8(s, len);
+    return wxString::FromUTF8Unchecked(s, len);
  }
  
  // returns true if the given string contains only whitespaces
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Sat, 19 Jul 2008 19:59:59 +0000 (19:59 +0000)
include/wx/cocoa/string.h		patch \| blob \| blame \| history
include/wx/gtk/private.h		patch \| blob \| blame \| history
include/wx/string.h		patch \| blob \| blame \| history
interface/wx/string.h		patch \| blob \| blame \| history
src/xml/xml.cpp		patch \| blob \| blame \| history