Make code reading BMP files more robust.

[wxWidgets.git] / src / common / stringops.cpp
diff --git a/src/common/stringops.cpp b/src/common/stringops.cpp

index ae182c6cfa09f4079c3c8092baf20d80afdd0cc2..ff44bcd182d97ae9bc32752e92ac564da417d332 100644 (file)
--- a/src/common/stringops.cpp
+++ b/src/common/stringops.cpp
@@ -131,7 +131,16 @@ bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
              if ( !(b >= 0x80 && b <= 0xBF ) )
                  return false;
          }
-        else if ( b <= 0xEF ) // E1..EF
+        else if ( b == 0xED )
+        {
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0x9F ) )
+                return false;
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0xBF ) )
+                return false;
+        }
+        else if ( b <= 0xEF ) // E1..EC EE..EF
          {
              for ( int i = 0; i < 2; ++i )
              {
@@ -182,19 +191,11 @@ bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
      return true;
  }
  
-#ifdef __WXDEBUG__
-bool wxStringOperationsUtf8::IsValidUtf8LeadByte(unsigned char c)
-{
-    return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
-}
-#endif
-
-
  // NB: this is in this file and not unichar.cpp to keep all UTF-8 encoding
  //     code in single place
  wxUniChar::Utf8CharBuffer wxUniChar::AsUTF8() const
  {
-    Utf8CharBuffer buf;
+    Utf8CharBuffer buf = { "" }; // init to avoid g++ 4.1 warning with -O2
      char *out = buf.data;
  
      value_type code = GetValue();
@@ -241,7 +242,7 @@ wxUniChar::Utf8CharBuffer wxUniChar::AsUTF8() const
      }
      else
      {
-        wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
+        wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
          out[0] = 0;
      }
  
@@ -253,9 +254,8 @@ wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i)
  {
      wxASSERT( IsValidUtf8LeadByte(*i) );
  
-    wxUniChar::value_type code = 0;
      size_t len = GetUtf8CharLength(*i);
-    wxASSERT_MSG( len <= 4, _T("invalid UTF-8 sequence length") );
+    wxASSERT_MSG( len <= 4, wxT("invalid UTF-8 sequence length") );
  
      //    Char. number range   |        UTF-8 octet sequence
      //       (hexadecimal)     |              (binary)
@@ -271,7 +271,7 @@ wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i)
  
      // mask to extract lead byte's value ('x' bits above), by sequence's length:
      static const unsigned char s_leadValueMask[4] =  { 0x7F, 0x1F, 0x0F, 0x07 };
-#ifdef __WXDEBUG__
+#if wxDEBUG_LEVEL
      // mask and value of lead byte's most significant bits, by length:
      static const unsigned char s_leadMarkerMask[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
      static const unsigned char s_leadMarkerVal[4] =  { 0x00, 0xC0, 0xE0, 0xF0 };
@@ -280,15 +280,15 @@ wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i)
      // extract the lead byte's value bits:
      wxASSERT_MSG( ((unsigned char)*i & s_leadMarkerMask[len-1]) ==
                    s_leadMarkerVal[len-1],
-                  _T("invalid UTF-8 lead byte") );
-    code = (unsigned char)*i & s_leadValueMask[len-1];
+                  wxT("invalid UTF-8 lead byte") );
+    wxUniChar::value_type code = (unsigned char)*i & s_leadValueMask[len-1];
  
      // all remaining bytes, if any, are handled in the same way regardless of
      // sequence's length:
      for ( ++i ; len > 1; --len, ++i )
      {
          wxASSERT_MSG( ((unsigned char)*i & 0xC0) == 0x80,
-                      _T("invalid UTF-8 byte") );
+                      wxT("invalid UTF-8 byte") );
  
          code <<= 6;
          code |= (unsigned char)*i & 0x3F;