X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/17a1ebd101f0653e69736416a2a28d0ada423141..86948c99a6f0fd177b09b3db0ef702739ec62a27:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 63de22405c..7b0e78a69d 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: strconv.cpp +// Name: src/common/strconv.cpp // Purpose: Unicode conversion classes // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, // Ryan Norton, Fredrik Roubert (UTF7) @@ -78,6 +78,10 @@ #define TRACE_STRCONV _T("strconv") +#if SIZEOF_WCHAR_T == 2 + #define WC_UTF16 +#endif + // ============================================================================ // implementation // ============================================================================ @@ -523,7 +527,7 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const { // BASE64 encode string unsigned int lsb, d, l; - for (d = 0, l = 0;; psz++) + for (d = 0, l = 0; /*nothing*/; psz++) { for (lsb = 0; lsb < 2; lsb ++) { @@ -653,7 +657,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const } #else // !WC_UTF16 if (buf) - *buf++ = res; + *buf++ = (wchar_t)res; len++; #endif // WC_UTF16/!WC_UTF16 } @@ -674,7 +678,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const len += pa; #else if (buf) - *buf++ = wxUnicodePUA + (unsigned char)*opsz; + *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz); opsz++; len++; #endif @@ -902,7 +906,7 @@ size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) con return pa; if (buf) - *buf++ = cc; + *buf++ = (wchar_t)cc; len++; psz += pa * sizeof(wxUint16); } @@ -962,7 +966,7 @@ size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const return pa; if (buf) - *buf++ = cc; + *buf++ = (wchar_t)cc; len++; psz += pa * sizeof(wxUint16); @@ -1163,7 +1167,7 @@ size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) con while (*(wxUint32*)psz && (!buf || len < n)) { if (buf) - *buf++ = *(wxUint32*)psz; + *buf++ = (wchar_t)(*(wxUint32*)psz); len++; psz += sizeof(wxUint32); } @@ -1349,6 +1353,8 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) // check for charset that represents wchar_t: if ( ms_wcCharsetName.empty() ) { + wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:")); + #if wxUSE_FONTMAP const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC); #else // !wxUSE_FONTMAP @@ -1363,7 +1369,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) }; #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP - for ( ; *names; ++names ) + for ( ; *names && ms_wcCharsetName.empty(); ++names ) { const wxString nameCS(*names); @@ -1375,10 +1381,15 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) nameXE += _T("LE"); #endif + wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), + nameXE.c_str()); + m2w = iconv_open(nameXE.ToAscii(), cname); if ( m2w == ICONV_T_INVALID ) { // try charset w/o bytesex info (e.g. "UCS4") + wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), + nameCS.c_str()); m2w = iconv_open(nameCS.ToAscii(), cname); // and check for bytesex ourselves: @@ -1584,7 +1595,6 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const if (ICONV_FAILED(cres, inbuf)) { - //VS: it is ok if iconv fails, hence trace only wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; } @@ -1642,7 +1652,28 @@ public: // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is // explicitly ill-formed according to RFC 2152) neither so we don't // even have any fallback here... - int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS; + // + // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or + // Win XP or newer and if it is specified on older versions, conversion + // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS) + // fails. So we can only use the flag on newer Windows versions. + // Additionally, the flag is not supported by UTF7, symbol and CJK + // encodings. See here: + // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx + // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp + int flags = 0; + if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL && + m_CodePage < 50000 && + IsAtLeastWin2kSP4() ) + { + flags = MB_ERR_INVALID_CHARS; + } + else if ( m_CodePage == CP_UTF8 ) + { + // Avoid round-trip in the special case of UTF-8 by using our + // own UTF-8 conversion code: + return wxMBConvUTF8().MB2WC(buf, psz, n); + } const size_t len = ::MultiByteToWideChar ( @@ -1653,11 +1684,40 @@ public: buf, // output string buf ? n : 0 // size of output buffer ); + if ( !len ) + { + // function totally failed + return (size_t)-1; + } + + // if we were really converting and didn't use MB_ERR_INVALID_CHARS, + // check if we succeeded, by doing a double trip: + if ( !flags && buf ) + { + wxCharBuffer mbBuf(n); + if ( ::WideCharToMultiByte + ( + m_CodePage, + 0, + buf, + -1, + mbBuf.data(), + n, + NULL, + NULL + ) == 0 || + strcmp(mbBuf, psz) != 0 ) + { + // we didn't obtain the same thing we started from, hence + // the conversion was lossy and we consider that it failed + return (size_t)-1; + } + } // note that it returns count of written chars for buf != NULL and size // of the needed buffer for buf == NULL so in either case the length of // the string (which never includes the terminating NUL) is one less - return len ? len - 1 : (size_t)-1; + return len - 1; } size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const @@ -1772,6 +1832,33 @@ private: return s_isWin98Or2k == 1; } + static bool IsAtLeastWin2kSP4() + { +#ifdef __WXWINCE__ + return false; +#else + static int s_isAtLeastWin2kSP4 = -1; + + if ( s_isAtLeastWin2kSP4 == -1 ) + { + OSVERSIONINFOEX ver; + + memset(&ver, 0, sizeof(ver)); + ver.dwOSVersionInfoSize = sizeof(ver); + GetVersionEx((OSVERSIONINFO*)&ver); + + s_isAtLeastWin2kSP4 = + ((ver.dwMajorVersion > 5) || // Vista+ + (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003 + (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 && + ver.wServicePackMajor >= 4)) // 2000 SP4+ + ? 1 : 0; + } + + return s_isAtLeastWin2kSP4 == 1; +#endif + } + long m_CodePage; }; @@ -2452,7 +2539,11 @@ wxCSConv::wxCSConv(const wxChar *charset) SetName(charset); } +#if wxUSE_FONTMAP + m_encoding = wxFontMapperBase::GetEncodingFromName(charset); +#else m_encoding = wxFONTENCODING_SYSTEM; +#endif } wxCSConv::wxCSConv(wxFontEncoding encoding) @@ -2532,7 +2623,8 @@ wxMBConv *wxCSConv::DoCreate() const // check for the special case of ASCII or ISO8859-1 charset: as we have // special knowledge of it anyhow, we don't need to create a special // conversion object - if ( m_encoding == wxFONTENCODING_ISO8859_1 ) + if ( m_encoding == wxFONTENCODING_ISO8859_1 || + m_encoding == wxFONTENCODING_DEFAULT ) { // don't convert at all return NULL; @@ -2839,5 +2931,3 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc, wxConvUTF8; #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T - -