]> git.saurik.com Git - wxWidgets.git/blobdiff - include/wx/stringops.h
fixed wxHTML parsing to run in O(n) even in UTF8 build
[wxWidgets.git] / include / wx / stringops.h
index 6451275bb1d1a7460d36e8d61e490970a586d34c..2e36900adc7ecb90908c2c1ce709857ff1a7fcea 100644 (file)
@@ -15,6 +15,7 @@
 #include "wx/chartype.h"
 #include "wx/stringimpl.h"
 #include "wx/unichar.h"
+#include "wx/buffer.h"
 
 // This header contains wxStringOperations "namespace" class that implements
 // elementary operations on string data as static methods; wxString methods and
@@ -62,13 +63,14 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
 {
     // checks correctness of UTF-8 sequence
-    static bool IsValidUtf8String(const char *c);
+    static bool IsValidUtf8String(const char *c,
+                                  size_t len = wxStringImpl::npos);
 #ifdef __WXDEBUG__
     static bool IsValidUtf8LeadByte(unsigned char c);
 #endif
 
     // table of offsets to skip forward when iterating over UTF-8 sequence
-    static unsigned char ms_utf8IterTable[256];
+    static const unsigned char ms_utf8IterTable[256];
 
 
     template<typename Iterator>
@@ -138,15 +140,10 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
         return dist;
     }
 
-    // buffer for single UTF-8 character
-    struct Utf8CharBuffer
-    {
-        char data[5];
-        operator const char*() const { return data; }
-    };
-
     // encodes the character as UTF-8:
-    static Utf8CharBuffer EncodeChar(const wxUniChar& ch);
+    typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
+    static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
+        { return ch.AsUTF8(); }
 
     // returns n copies of ch encoded in UTF-8 string
     static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
@@ -159,7 +156,15 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
     }
 
     // decodes single UTF-8 character from UTF-8 string
-    static wxUniChar DecodeChar(wxStringImpl::const_iterator i);
+    static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
+    {
+        if ( (unsigned char)*i < 0x80 )
+            return (int)*i;
+        return DecodeNonAsciiChar(i);
+    }
+
+private:
+    static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
 };
 #endif // wxUSE_UNICODE_UTF8