X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/467175ab3f2177aa824ceb7b67934fd54ce4e8e0..0bbe61b8c18a1795189f0cf73cc61c14a0fb846d:/src/common/stringops.cpp?ds=sidebyside diff --git a/src/common/stringops.cpp b/src/common/stringops.cpp index ac0455da53..ae182c6cfa 100644 --- a/src/common/stringops.cpp +++ b/src/common/stringops.cpp @@ -34,7 +34,7 @@ // UTF-8 sequences lengths // --------------------------------------------------------------------------- -unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = { +const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = { // single-byte sequences (ASCII): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F @@ -87,17 +87,26 @@ unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = { // U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | // -------------------+----------+----------+----------+----------+ -bool wxStringOperationsUtf8::IsValidUtf8String(const char *str) +bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len) { if ( !str ) return true; // empty string is UTF8 string const unsigned char *c = (const unsigned char*)str; + const unsigned char * const end = (len == wxStringImpl::npos) ? NULL : c + len; - for ( ; *c; ++c ) + for ( ; c != end && *c; ++c ) { unsigned char b = *c; + if ( end != NULL ) + { + // if the string is not NULL-terminated, verify we have enough + // bytes in it left for current character's encoding: + if ( c + ms_utf8IterTable[*c] > end ) + return false; + } + if ( b <= 0x7F ) // 00..7F continue; @@ -181,13 +190,14 @@ bool wxStringOperationsUtf8::IsValidUtf8LeadByte(unsigned char c) #endif -wxStringOperationsUtf8::Utf8CharBuffer -wxStringOperationsUtf8::EncodeChar(const wxUniChar& ch) +// NB: this is in this file and not unichar.cpp to keep all UTF-8 encoding +// code in single place +wxUniChar::Utf8CharBuffer wxUniChar::AsUTF8() const { Utf8CharBuffer buf; char *out = buf.data; - wxUniChar::value_type code = ch.GetValue(); + value_type code = GetValue(); // Char. number range | UTF-8 octet sequence // (hexadecimal) | (binary) @@ -239,7 +249,7 @@ wxStringOperationsUtf8::EncodeChar(const wxUniChar& ch) } wxUniChar -wxStringOperationsUtf8::DecodeChar(wxStringImpl::const_iterator i) +wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i) { wxASSERT( IsValidUtf8LeadByte(*i) );