/////////////////////////////////////////////////////////////////////////////
-// Name: strconv.cpp
+// Name: src/common/strconv.cpp
// Purpose: Unicode conversion classes
// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
// Ryan Norton, Fredrik Roubert (UTF7)
#define TRACE_STRCONV _T("strconv")
+#if SIZEOF_WCHAR_T == 2
+ #define WC_UTF16
+#endif
+
// ============================================================================
// implementation
// ============================================================================
{
// BASE64 encode string
unsigned int lsb, d, l;
- for (d = 0, l = 0;; psz++)
+ for (d = 0, l = 0; /*nothing*/; psz++)
{
for (lsb = 0; lsb < 2; lsb ++)
{
}
#else // !WC_UTF16
if (buf)
- *buf++ = res;
+ *buf++ = (wchar_t)res;
len++;
#endif // WC_UTF16/!WC_UTF16
}
len += pa;
#else
if (buf)
- *buf++ = wxUnicodePUA + (unsigned char)*opsz;
+ *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
opsz++;
len++;
#endif
return pa;
if (buf)
- *buf++ = cc;
+ *buf++ = (wchar_t)cc;
len++;
psz += pa * sizeof(wxUint16);
}
return pa;
if (buf)
- *buf++ = cc;
+ *buf++ = (wchar_t)cc;
len++;
psz += pa * sizeof(wxUint16);
while (*(wxUint32*)psz && (!buf || len < n))
{
if (buf)
- *buf++ = *(wxUint32*)psz;
+ *buf++ = (wchar_t)(*(wxUint32*)psz);
len++;
psz += sizeof(wxUint32);
}
};
#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
- for ( ; *names; ++names )
+ for ( ; *names && ms_wcCharsetName.empty(); ++names )
{
const wxString nameCS(*names);
// own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
// explicitly ill-formed according to RFC 2152) neither so we don't
// even have any fallback here...
- int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
+ //
+ // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
+ // Win XP or newer and if it is specified on older versions, conversion
+ // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
+ // fails. So we can only use the flag on newer Windows versions.
+ // Additionally, the flag is not supported by UTF7, symbol and CJK
+ // encodings. See here:
+ // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
+ // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
+ int flags = 0;
+ if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
+ m_CodePage < 50000 &&
+ IsAtLeastWin2kSP4() )
+ {
+ flags = MB_ERR_INVALID_CHARS;
+ }
+ else if ( m_CodePage == CP_UTF8 )
+ {
+ // Avoid round-trip in the special case of UTF-8 by using our
+ // own UTF-8 conversion code:
+ return wxMBConvUTF8().MB2WC(buf, psz, n);
+ }
const size_t len = ::MultiByteToWideChar
(
buf, // output string
buf ? n : 0 // size of output buffer
);
+ if ( !len )
+ {
+ // function totally failed
+ return (size_t)-1;
+ }
+
+ // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
+ // check if we succeeded, by doing a double trip:
+ if ( !flags && buf )
+ {
+ wxCharBuffer mbBuf(n);
+ if ( ::WideCharToMultiByte
+ (
+ m_CodePage,
+ 0,
+ buf,
+ -1,
+ mbBuf.data(),
+ n,
+ NULL,
+ NULL
+ ) == 0 ||
+ strcmp(mbBuf, psz) != 0 )
+ {
+ // we didn't obtain the same thing we started from, hence
+ // the conversion was lossy and we consider that it failed
+ return (size_t)-1;
+ }
+ }
// note that it returns count of written chars for buf != NULL and size
// of the needed buffer for buf == NULL so in either case the length of
// the string (which never includes the terminating NUL) is one less
- return len ? len - 1 : (size_t)-1;
+ return len - 1;
}
size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
return s_isWin98Or2k == 1;
}
+ static bool IsAtLeastWin2kSP4()
+ {
+#ifdef __WXWINCE__
+ return false;
+#else
+ static int s_isAtLeastWin2kSP4 = -1;
+
+ if ( s_isAtLeastWin2kSP4 == -1 )
+ {
+ OSVERSIONINFOEX ver;
+
+ memset(&ver, 0, sizeof(ver));
+ ver.dwOSVersionInfoSize = sizeof(ver);
+ GetVersionEx((OSVERSIONINFO*)&ver);
+
+ s_isAtLeastWin2kSP4 =
+ ((ver.dwMajorVersion > 5) || // Vista+
+ (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
+ (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
+ ver.wServicePackMajor >= 4)) // 2000 SP4+
+ ? 1 : 0;
+ }
+
+ return s_isAtLeastWin2kSP4 == 1;
+#endif
+ }
+
long m_CodePage;
};
SetName(charset);
}
+#if wxUSE_FONTMAP
+ m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
+#else
m_encoding = wxFONTENCODING_SYSTEM;
+#endif
}
wxCSConv::wxCSConv(wxFontEncoding encoding)
// check for the special case of ASCII or ISO8859-1 charset: as we have
// special knowledge of it anyhow, we don't need to create a special
// conversion object
- if ( m_encoding == wxFONTENCODING_ISO8859_1 )
+ if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
+ m_encoding == wxFONTENCODING_DEFAULT )
{
// don't convert at all
return NULL;
wxConvUTF8;
#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
-
-