// UTF-8 sequences lengths
// ---------------------------------------------------------------------------
-unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
+const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
// single-byte sequences (ASCII):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
if ( !(b >= 0x80 && b <= 0xBF ) )
return false;
}
- else if ( b <= 0xEF ) // E1..EF
+ else if ( b == 0xED )
+ {
+ b = *(++c);
+ if ( !(b >= 0x80 && b <= 0x9F ) )
+ return false;
+ b = *(++c);
+ if ( !(b >= 0x80 && b <= 0xBF ) )
+ return false;
+ }
+ else if ( b <= 0xEF ) // E1..EC EE..EF
{
for ( int i = 0; i < 2; ++i )
{
#endif
-wxStringOperationsUtf8::Utf8CharBuffer
-wxStringOperationsUtf8::EncodeChar(const wxUniChar& ch)
+// NB: this is in this file and not unichar.cpp to keep all UTF-8 encoding
+// code in single place
+wxUniChar::Utf8CharBuffer wxUniChar::AsUTF8() const
{
- Utf8CharBuffer buf;
+ Utf8CharBuffer buf = { "" }; // init to avoid g++ 4.1 warning with -O2
char *out = buf.data;
- wxUniChar::value_type code = ch.GetValue();
+ value_type code = GetValue();
// Char. number range | UTF-8 octet sequence
// (hexadecimal) | (binary)
}
wxUniChar
-wxStringOperationsUtf8::DecodeChar(wxStringImpl::const_iterator i)
+wxStringOperationsUtf8::DecodeNonAsciiChar(wxStringImpl::const_iterator i)
{
wxASSERT( IsValidUtf8LeadByte(*i) );