X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/b64f93b67f1fa4f93c0f1d2f2500197b3a32ec9f..9142a5ec3f9e9232d47ef09fe4a3c96f39816dc4:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index fb90c2b03b..a433128011 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -5,7 +5,6 @@ // Ryan Norton, Fredrik Roubert (UTF7) // Modified by: // Created: 29/01/98 -// RCS-ID: $Id$ // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik // (c) 2000-2003 Vadim Zeitlin // (c) 2004 Ryan Norton, Fredrik Roubert @@ -460,7 +459,6 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const // because we want the buffer to always be NUL-terminated, even if the // input isn't (as otherwise the caller has no way to know its length) wxWCharBuffer wbuf(dstLen); - wbuf.data()[dstLen] = L'\0'; if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) { if ( outLen ) @@ -995,7 +993,7 @@ wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen, for ( const char *p = src; ; p++ ) { - if ( !(srcLen == wxNO_LEN ? *p : srcLen) ) + if ( (srcLen == wxNO_LEN ? !*p : !srcLen) ) { // all done successfully, just add the trailing NULL if we are not // using explicit length @@ -1115,7 +1113,7 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen, for ( const wchar_t *wp = src; ; wp++ ) { - if ( !(srcLen == wxNO_LEN ? *wp : srcLen) ) + if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) ) { // all done successfully, just add the trailing NULL if we are not // using explicit length @@ -1145,6 +1143,8 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen, { // skip the next char too as we decoded a surrogate wp++; + if ( srcLen != wxNO_LEN ) + srcLen--; } #else // wchar_t is UTF-32 code = *wp & 0x7fffffff; @@ -1230,7 +1230,10 @@ size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n, size_t len = 0; - while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n))) + // The length can be either given explicitly or computed implicitly for the + // NUL-terminated strings. + const bool isNulTerminated = srcLen == wxNO_LEN; + while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n))) { const char *opsz = psz; bool invalid = false; @@ -1364,10 +1367,17 @@ size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n, } } - if (srcLen == wxNO_LEN && buf && (len < n)) - *buf = 0; + if ( isNulTerminated ) + { + // Add the trailing NUL in this case if we have a large enough buffer. + if ( buf && (len < n) ) + *buf = 0; - return len + 1; + // And count it in any case. + len++; + } + + return len; } static inline bool isoctal(wchar_t wch) @@ -1383,7 +1393,10 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n, size_t len = 0; - while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n))) + // The length can be either given explicitly or computed implicitly for the + // NUL-terminated strings. + const bool isNulTerminated = srcLen == wxNO_LEN; + while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n))) { wxUint32 cc; @@ -1451,10 +1464,17 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n, } } - if (srcLen == wxNO_LEN && buf && (len < n)) - *buf = 0; + if ( isNulTerminated ) + { + // Add the trailing NUL in this case if we have a large enough buffer. + if ( buf && (len < n) ) + *buf = 0; + + // And count it in any case. + len++; + } - return len + 1; + return len; } // ============================================================================ @@ -1645,7 +1665,7 @@ wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen, wxUint16 *outBuff = reinterpret_cast(dst); for ( size_t n = 0; n < srcLen; n++ ) { - wxUint16 cc[2]; + wxUint16 cc[2] = { 0 }; const size_t numChars = encode_utf16(*src++, cc); if ( numChars == wxCONV_FAILED ) return wxCONV_FAILED; @@ -1728,7 +1748,7 @@ wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen, wxUint16 *outBuff = reinterpret_cast(dst); for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ ) { - wxUint16 cc[2]; + wxUint16 cc[2] = { 0 }; const size_t numChars = encode_utf16(*src, cc); if ( numChars == wxCONV_FAILED ) return wxCONV_FAILED; @@ -1812,7 +1832,7 @@ wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen, size_t outLen = 0; for ( size_t n = 0; n < inLen; n++ ) { - wxUint16 cc[2]; + wxUint16 cc[2] = { 0 }; const size_t numChars = encode_utf16(*inBuff++, cc); if ( numChars == wxCONV_FAILED ) return wxCONV_FAILED; @@ -1890,7 +1910,7 @@ wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen, size_t outLen = 0; for ( size_t n = 0; n < inLen; n++, inBuff++ ) { - wxUint16 cc[2]; + wxUint16 cc[2] = { 0 }; const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc); if ( numChars == wxCONV_FAILED ) return wxCONV_FAILED; @@ -2183,7 +2203,7 @@ wxMBConv_iconv::wxMBConv_iconv(const char *name) { #if SIZEOF_WCHAR_T == 4 wxT("UCS-4"), -#elif SIZEOF_WCHAR_T = 2 +#elif SIZEOF_WCHAR_T == 2 wxT("UCS-2"), #endif NULL @@ -2945,7 +2965,41 @@ void wxCSConv::Init() { m_name = NULL; m_convReal = NULL; - m_deferred = true; +} + +void wxCSConv::SetEncoding(wxFontEncoding encoding) +{ + switch ( encoding ) + { + case wxFONTENCODING_MAX: + case wxFONTENCODING_SYSTEM: + if ( m_name ) + { + // It's ok to not have encoding value if we have a name for it. + m_encoding = wxFONTENCODING_SYSTEM; + } + else // No name neither. + { + // Fall back to the system default encoding in this case (not + // sure how much sense does this make but this is how the old + // code used to behave). +#if wxUSE_INTL + m_encoding = wxLocale::GetSystemEncoding(); + if ( m_encoding == wxFONTENCODING_SYSTEM ) +#endif // wxUSE_INTL + m_encoding = wxFONTENCODING_ISO8859_1; + } + break; + + case wxFONTENCODING_DEFAULT: + // wxFONTENCODING_DEFAULT is same as US-ASCII in this context + m_encoding = wxFONTENCODING_ISO8859_1; + break; + + default: + // Just use the provided encoding. + m_encoding = encoding; + } } wxCSConv::wxCSConv(const wxString& charset) @@ -2958,20 +3012,12 @@ wxCSConv::wxCSConv(const wxString& charset) } #if wxUSE_FONTMAP - m_encoding = wxFontMapperBase::GetEncodingFromName(charset); - if ( m_encoding == wxFONTENCODING_MAX ) - { - // set to unknown/invalid value - m_encoding = wxFONTENCODING_SYSTEM; - } - else if ( m_encoding == wxFONTENCODING_DEFAULT ) - { - // wxFONTENCODING_DEFAULT is same as US-ASCII in this context - m_encoding = wxFONTENCODING_ISO8859_1; - } + SetEncoding(wxFontMapperBase::GetEncodingFromName(charset)); #else - m_encoding = wxFONTENCODING_SYSTEM; + SetEncoding(wxFONTENCODING_SYSTEM); #endif + + m_convReal = DoCreate(); } wxCSConv::wxCSConv(wxFontEncoding encoding) @@ -2985,7 +3031,9 @@ wxCSConv::wxCSConv(wxFontEncoding encoding) Init(); - m_encoding = encoding; + SetEncoding(encoding); + + m_convReal = DoCreate(); } wxCSConv::~wxCSConv() @@ -2999,7 +3047,9 @@ wxCSConv::wxCSConv(const wxCSConv& conv) Init(); SetName(conv.m_name); - m_encoding = conv.m_encoding; + SetEncoding(conv.m_encoding); + + m_convReal = DoCreate(); } wxCSConv& wxCSConv::operator=(const wxCSConv& conv) @@ -3007,7 +3057,9 @@ wxCSConv& wxCSConv::operator=(const wxCSConv& conv) Clear(); SetName(conv.m_name); - m_encoding = conv.m_encoding; + SetEncoding(conv.m_encoding); + + m_convReal = DoCreate(); return *this; } @@ -3015,18 +3067,15 @@ wxCSConv& wxCSConv::operator=(const wxCSConv& conv) void wxCSConv::Clear() { free(m_name); - wxDELETE(m_convReal); - m_name = NULL; + + wxDELETE(m_convReal); } void wxCSConv::SetName(const char *charset) { - if (charset) - { + if ( charset ) m_name = wxStrdup(charset); - m_deferred = true; - } } #if wxUSE_FONTMAP @@ -3049,8 +3098,7 @@ wxMBConv *wxCSConv::DoCreate() const // check for the special case of ASCII or ISO8859-1 charset: as we have // special knowledge of it anyhow, we don't need to create a special // conversion object - if ( m_encoding == wxFONTENCODING_ISO8859_1 || - m_encoding == wxFONTENCODING_DEFAULT ) + if ( m_encoding == wxFONTENCODING_ISO8859_1 ) { // don't convert at all return NULL; @@ -3225,33 +3273,8 @@ wxMBConv *wxCSConv::DoCreate() const return NULL; } -void wxCSConv::CreateConvIfNeeded() const -{ - if ( m_deferred ) - { - wxCSConv *self = const_cast(this); - - // if we don't have neither the name nor the encoding, use the default - // encoding for this system - if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM ) - { -#if wxUSE_INTL - self->m_encoding = wxLocale::GetSystemEncoding(); -#else - // fallback to some reasonable default: - self->m_encoding = wxFONTENCODING_ISO8859_1; -#endif // wxUSE_INTL - } - - self->m_convReal = DoCreate(); - self->m_deferred = false; - } -} - bool wxCSConv::IsOk() const { - CreateConvIfNeeded(); - // special case: no convReal created for wxFONTENCODING_ISO8859_1 if ( m_encoding == wxFONTENCODING_ISO8859_1 ) return true; // always ok as we do it ourselves @@ -3264,8 +3287,6 @@ bool wxCSConv::IsOk() const size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen) const { - CreateConvIfNeeded(); - if (m_convReal) return m_convReal->ToWChar(dst, dstLen, src, srcLen); @@ -3288,8 +3309,6 @@ size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen, size_t wxCSConv::FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen) const { - CreateConvIfNeeded(); - if (m_convReal) return m_convReal->FromWChar(dst, dstLen, src, srcLen); @@ -3325,12 +3344,8 @@ size_t wxCSConv::FromWChar(char *dst, size_t dstLen, size_t wxCSConv::GetMBNulLen() const { - CreateConvIfNeeded(); - if ( m_convReal ) - { return m_convReal->GetMBNulLen(); - } // otherwise, we are ISO-8859-1 return 1; @@ -3339,12 +3354,8 @@ size_t wxCSConv::GetMBNulLen() const #if wxUSE_UNICODE_UTF8 bool wxCSConv::IsUTF8() const { - CreateConvIfNeeded(); - if ( m_convReal ) - { return m_convReal->IsUTF8(); - } // otherwise, we are ISO-8859-1 return false; @@ -3443,8 +3454,9 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr(); WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr(); #ifdef __DARWIN__ -// The xnu kernel always communicates file paths in decomposed UTF-8. -// WARNING: Are we sure that CFString's conversion will cause decomposition? +// It is important to use this conversion object under Darwin as it ensures +// that Unicode strings are (re)composed correctly even though xnu kernel uses +// decomposed form internally (at least for the file names). static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8); #endif