fixing the usage of hishape

[wxWidgets.git] / src / common / stringops.cpp
diff --git a/src/common/stringops.cpp b/src/common/stringops.cpp

index ac0455da53cd8b75d8f6333f044be29f9ea7972d..ae182c6cfa09f4079c3c8092baf20d80afdd0cc2 100644 (file)
--- a/src/common/stringops.cpp
+++ b/src/common/stringops.cpp
@@ -34,7 +34,7 @@
  // UTF-8 sequences lengths
  // ---------------------------------------------------------------------------
  
  // UTF-8 sequences lengths
  // ---------------------------------------------------------------------------
  
-unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
+const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
      // single-byte sequences (ASCII):
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
      // single-byte sequences (ASCII):
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
@@ -87,17 +87,26 @@ unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
  // U+100000..U+10FFFF |  F4      |  80..8F  |  80..BF  |  80..BF  |
  // -------------------+----------+----------+----------+----------+
  
  // U+100000..U+10FFFF |  F4      |  80..8F  |  80..BF  |  80..BF  |
  // -------------------+----------+----------+----------+----------+
  
-bool wxStringOperationsUtf8::IsValidUtf8String(const char *str)
+bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
  {
      if ( !str )
          return true; // empty string is UTF8 string
  
      const unsigned char *c = (const unsigned char*)str;
  {
      if ( !str )
          return true; // empty string is UTF8 string
  
      const unsigned char *c = (const unsigned char*)str;
+    const unsigned char * const end = (len == wxStringImpl::npos) ? NULL : c + len;
  
  
-    for ( ; *c; ++c )
+    for ( ; c != end && *c; ++c )
      {
          unsigned char b = *c;
  
      {
          unsigned char b = *c;
  
+        if ( end != NULL )
+        {
+            // if the string is not NULL-terminated, verify we have enough
+            // bytes in it left for current character's encoding:
+            if ( c + ms_utf8IterTable[*c] > end )
+                return false;
+        }
+
          if ( b <= 0x7F ) // 00..7F
              continue;
  
          if ( b <= 0x7F ) // 00..7F
              continue;
  
@@ -181,13 +190,14 @@ bool wxStringOperationsUtf8::IsValidUtf8LeadByte(unsigned char c)
  #endif
  
  
  #endif
  
  
-wxStringOperationsUtf8::Utf8CharBuffer
-wxStringOperationsUtf8::EncodeChar(const wxUniChar& ch)
+// NB: this is in this file and not unichar.cpp to keep all UTF-8 encoding
+//     code in single place
+wxUniChar::Utf8CharBuffer wxUniChar::AsUTF8() const
  {
      Utf8CharBuffer buf;
      char *out = buf.data;
  
  {
      Utf8CharBuffer buf;
      char *out = buf.data;
  
-    wxUniChar::value_type code = ch.GetValue();
+    value_type code = GetValue();
  
      //    Char. number range   |        UTF-8 octet sequence
      //       (hexadecimal)     |              (binary)
  
      //    Char. number range   |        UTF-8 octet sequence
      //       (hexadecimal)     |              (binary)
@@ -239,7 +249,7 @@ wxStringOperationsUtf8::EncodeChar(const wxUniChar& ch)
  }
  
  wxUniChar
  }
  
  wxUniChar
-wxStringOperationsUtf8::DecodeChar(wxStringImpl::const_iterator i)
+wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i)
  {
      wxASSERT( IsValidUtf8LeadByte(*i) );
  
  {
      wxASSERT( IsValidUtf8LeadByte(*i) );