]> git.saurik.com Git - wxWidgets.git/commitdiff
optimized wxMBConvStringUTF8::ToWchar() for ASCII characters
authorVáclav Slavík <vslavik@fastmail.fm>
Tue, 28 Aug 2007 10:40:40 +0000 (10:40 +0000)
committerVáclav Slavík <vslavik@fastmail.fm>
Tue, 28 Aug 2007 10:40:40 +0000 (10:40 +0000)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@48427 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775

src/common/strconv.cpp

index eeaa071f3101c348f9ca430390622019f19f5e87..609f44b9d171c4655dd421b6cca8c934c78340f2 100644 (file)
@@ -778,58 +778,73 @@ wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
             return written;
         }
 
-        unsigned char c = *p;
-        unsigned len = tableUtf8Lengths[c];
-        if ( !len )
+        if ( out && !dstLen-- )
             break;
 
-        if ( srcLen < len ) // the test works for wxNO_LEN too
-            break;
+        wxUint32 code;
+        unsigned char c = *p;
 
-        if ( srcLen != wxNO_LEN )
-            srcLen -= len;
+        if ( c < 0x80 )
+        {
+            if ( srcLen == 0 ) // the test works for wxNO_LEN too
+                break;
 
-        if ( out && !dstLen-- )
-            break;
+            if ( srcLen != wxNO_LEN )
+                srcLen--;
 
+            code = c;
+        }
+        else
+        {
+            unsigned len = tableUtf8Lengths[c];
+            if ( !len )
+                break;
 
-        //   Char. number range   |        UTF-8 octet sequence
-        //      (hexadecimal)     |              (binary)
-        //  ----------------------+---------------------------------------------
-        //  0000 0000 - 0000 007F | 0xxxxxxx
-        //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
-        //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
-        //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        //
-        //  Code point value is stored in bits marked with 'x', lowest-order bit
-        //  of the value on the right side in the diagram above.
-        //                                                       (from RFC 3629)
+            if ( srcLen < len ) // the test works for wxNO_LEN too
+                break;
 
-        // mask to extract lead byte's value ('x' bits above), by sequence length:
-        static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
+            if ( srcLen != wxNO_LEN )
+                srcLen -= len;
 
-        // mask and value of lead byte's most significant bits, by length:
-        static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
-        static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
+            //   Char. number range   |        UTF-8 octet sequence
+            //      (hexadecimal)     |              (binary)
+            //  ----------------------+----------------------------------------
+            //  0000 0000 - 0000 007F | 0xxxxxxx
+            //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+            //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+            //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+            //
+            //  Code point value is stored in bits marked with 'x',
+            //  lowest-order bit of the value on the right side in the diagram
+            //  above.                                         (from RFC 3629)
 
-        len--; // it's more convenient to work with 0-based length here
+            // mask to extract lead byte's value ('x' bits above), by sequence
+            // length:
+            static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
 
-        // extract the lead byte's value bits:
-        if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
-            break;
+            // mask and value of lead byte's most significant bits, by length:
+            static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
+            static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
 
-        wxUint32 code = c & leadValueMask[len];
+            len--; // it's more convenient to work with 0-based length here
 
-        // all remaining bytes, if any, are handled in the same way regardless of
-        // sequence's length:
-        for ( ; len; --len )
-        {
-            c = *++p;
-            if ( (c & 0xC0) != 0x80 )
-                return wxCONV_FAILED;
+            // extract the lead byte's value bits:
+            if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
+                break;
 
-            code <<= 6;
-            code |= c & 0x3F;
+            code = c & leadValueMask[len];
+
+            // all remaining bytes, if any, are handled in the same way
+            // regardless of sequence's length:
+            for ( ; len; --len )
+            {
+                c = *++p;
+                if ( (c & 0xC0) != 0x80 )
+                    return wxCONV_FAILED;
+
+                code <<= 6;
+                code |= c & 0x3F;
+            }
         }
 
 #ifdef WC_UTF16