X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/422e411e954e0232d9765665a7640f095ca3c9d5..86948c99a6f0fd177b09b3db0ef702739ec62a27:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 74e3a3be0b..7b0e78a69d 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: strconv.cpp +// Name: src/common/strconv.cpp // Purpose: Unicode conversion classes // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, // Ryan Norton, Fredrik Roubert (UTF7) @@ -20,10 +20,6 @@ // headers // ---------------------------------------------------------------------------- -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) - #pragma implementation "strconv.h" -#endif - // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" @@ -82,6 +78,10 @@ #define TRACE_STRCONV _T("strconv") +#if SIZEOF_WCHAR_T == 2 + #define WC_UTF16 +#endif + // ============================================================================ // implementation // ============================================================================ @@ -527,7 +527,7 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const { // BASE64 encode string unsigned int lsb, d, l; - for (d = 0, l = 0;; psz++) + for (d = 0, l = 0; /*nothing*/; psz++) { for (lsb = 0; lsb < 2; lsb ++) { @@ -657,7 +657,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const } #else // !WC_UTF16 if (buf) - *buf++ = res; + *buf++ = (wchar_t)res; len++; #endif // WC_UTF16/!WC_UTF16 } @@ -678,7 +678,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const len += pa; #else if (buf) - *buf++ = wxUnicodePUA + (unsigned char)*opsz; + *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz); opsz++; len++; #endif @@ -690,11 +690,11 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const { if ( buf && len + 3 < n ) { - unsigned char n = *opsz; + unsigned char on = *opsz; *buf++ = L'\\'; - *buf++ = (wchar_t)( L'0' + n / 0100 ); - *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 ); - *buf++ = (wchar_t)( L'0' + n % 010 ); + *buf++ = (wchar_t)( L'0' + on / 0100 ); + *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 ); + *buf++ = (wchar_t)( L'0' + on % 010 ); } opsz++; len += 4; @@ -906,7 +906,7 @@ size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) con return pa; if (buf) - *buf++ = cc; + *buf++ = (wchar_t)cc; len++; psz += pa * sizeof(wxUint16); } @@ -966,7 +966,7 @@ size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const return pa; if (buf) - *buf++ = cc; + *buf++ = (wchar_t)cc; len++; psz += pa * sizeof(wxUint16); @@ -1167,7 +1167,7 @@ size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) con while (*(wxUint32*)psz && (!buf || len < n)) { if (buf) - *buf++ = *(wxUint32*)psz; + *buf++ = (wchar_t)(*(wxUint32*)psz); len++; psz += sizeof(wxUint32); } @@ -1353,6 +1353,8 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) // check for charset that represents wchar_t: if ( ms_wcCharsetName.empty() ) { + wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:")); + #if wxUSE_FONTMAP const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC); #else // !wxUSE_FONTMAP @@ -1367,23 +1369,28 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) }; #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP - for ( ; *names; ++names ) + for ( ; *names && ms_wcCharsetName.empty(); ++names ) { - const wxString name(*names); + const wxString nameCS(*names); // first try charset with explicit bytesex info (e.g. "UCS-4LE"): - wxString nameXE(name); + wxString nameXE(nameCS); #ifdef WORDS_BIGENDIAN nameXE += _T("BE"); #else // little endian nameXE += _T("LE"); #endif + wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), + nameXE.c_str()); + m2w = iconv_open(nameXE.ToAscii(), cname); if ( m2w == ICONV_T_INVALID ) { // try charset w/o bytesex info (e.g. "UCS4") - m2w = iconv_open(name.ToAscii(), cname); + wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), + nameCS.c_str()); + m2w = iconv_open(nameCS.ToAscii(), cname); // and check for bytesex ourselves: if ( m2w != ICONV_T_INVALID ) @@ -1408,11 +1415,11 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) { wxLogLastError(wxT("iconv")); wxLogError(_("Conversion to charset '%s' doesn't work."), - name.c_str()); + nameCS.c_str()); } else // ok, can convert to this encoding, remember it { - ms_wcCharsetName = name; + ms_wcCharsetName = nameCS; ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0]; } } @@ -1425,7 +1432,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) wxLogTrace(TRACE_STRCONV, wxT("iconv wchar_t charset is \"%s\"%s"), - ms_wcCharsetName.empty() ? "" + ms_wcCharsetName.empty() ? _T("") : ms_wcCharsetName.c_str(), ms_wcNeedsSwap ? _T(" (needs swap)") : _T("")); @@ -1489,8 +1496,8 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const if (ms_wcNeedsSwap) { // convert to native endianness - for ( unsigned n = 0; n < res; n++ ) - buf[n] = WC_BSWAP(buf[n]); + for ( unsigned i = 0; i < res; i++ ) + buf[n] = WC_BSWAP(buf[i]); } // NB: iconv was given only strlen(psz) characters on input, and so @@ -1534,7 +1541,8 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); #endif - size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T; + size_t inlen = wxWcslen(psz); + size_t inbuf = inlen * SIZEOF_WCHAR_T; size_t outbuf = n; size_t res, cres; @@ -1546,9 +1554,9 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const // (doing WC_BSWAP twice on the original buffer won't help, as it // could be in read-only memory, or be accessed in some other thread) tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T); - for ( size_t n = 0; n < inbuf; n++ ) - tmpbuf[n] = WC_BSWAP(psz[n]); - tmpbuf[inbuf] = L'\0'; + for ( size_t i = 0; i < inlen; i++ ) + tmpbuf[n] = WC_BSWAP(psz[i]); + tmpbuf[inlen] = L'\0'; psz = tmpbuf; } @@ -1587,7 +1595,6 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const if (ICONV_FAILED(cres, inbuf)) { - //VS: it is ok if iconv fails, hence trace only wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; } @@ -1645,7 +1652,28 @@ public: // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is // explicitly ill-formed according to RFC 2152) neither so we don't // even have any fallback here... - int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS; + // + // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or + // Win XP or newer and if it is specified on older versions, conversion + // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS) + // fails. So we can only use the flag on newer Windows versions. + // Additionally, the flag is not supported by UTF7, symbol and CJK + // encodings. See here: + // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx + // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp + int flags = 0; + if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL && + m_CodePage < 50000 && + IsAtLeastWin2kSP4() ) + { + flags = MB_ERR_INVALID_CHARS; + } + else if ( m_CodePage == CP_UTF8 ) + { + // Avoid round-trip in the special case of UTF-8 by using our + // own UTF-8 conversion code: + return wxMBConvUTF8().MB2WC(buf, psz, n); + } const size_t len = ::MultiByteToWideChar ( @@ -1656,11 +1684,40 @@ public: buf, // output string buf ? n : 0 // size of output buffer ); + if ( !len ) + { + // function totally failed + return (size_t)-1; + } + + // if we were really converting and didn't use MB_ERR_INVALID_CHARS, + // check if we succeeded, by doing a double trip: + if ( !flags && buf ) + { + wxCharBuffer mbBuf(n); + if ( ::WideCharToMultiByte + ( + m_CodePage, + 0, + buf, + -1, + mbBuf.data(), + n, + NULL, + NULL + ) == 0 || + strcmp(mbBuf, psz) != 0 ) + { + // we didn't obtain the same thing we started from, hence + // the conversion was lossy and we consider that it failed + return (size_t)-1; + } + } // note that it returns count of written chars for buf != NULL and size // of the needed buffer for buf == NULL so in either case the length of // the string (which never includes the terminating NUL) is one less - return len ? len - 1 : (size_t)-1; + return len - 1; } size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const @@ -1775,6 +1832,33 @@ private: return s_isWin98Or2k == 1; } + static bool IsAtLeastWin2kSP4() + { +#ifdef __WXWINCE__ + return false; +#else + static int s_isAtLeastWin2kSP4 = -1; + + if ( s_isAtLeastWin2kSP4 == -1 ) + { + OSVERSIONINFOEX ver; + + memset(&ver, 0, sizeof(ver)); + ver.dwOSVersionInfoSize = sizeof(ver); + GetVersionEx((OSVERSIONINFO*)&ver); + + s_isAtLeastWin2kSP4 = + ((ver.dwMajorVersion > 5) || // Vista+ + (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003 + (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 && + ver.wServicePackMajor >= 4)) // 2000 SP4+ + ? 1 : 0; + } + + return s_isAtLeastWin2kSP4 == 1; +#endif + } + long m_CodePage; }; @@ -2455,7 +2539,11 @@ wxCSConv::wxCSConv(const wxChar *charset) SetName(charset); } +#if wxUSE_FONTMAP + m_encoding = wxFontMapperBase::GetEncodingFromName(charset); +#else m_encoding = wxFONTENCODING_SYSTEM; +#endif } wxCSConv::wxCSConv(wxFontEncoding encoding) @@ -2535,7 +2623,8 @@ wxMBConv *wxCSConv::DoCreate() const // check for the special case of ASCII or ISO8859-1 charset: as we have // special knowledge of it anyhow, we don't need to create a special // conversion object - if ( m_encoding == wxFONTENCODING_ISO8859_1 ) + if ( m_encoding == wxFONTENCODING_ISO8859_1 || + m_encoding == wxFONTENCODING_DEFAULT ) { // don't convert at all return NULL; @@ -2842,5 +2931,3 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc, wxConvUTF8; #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T - -