X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/5c250a10332dc17263c66deb629b5fa8c4320f8a..739e35e4fba5df59532ef7e62bcfc8ebbcd72bb5:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 6363e4001c..395c2535ee 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -42,6 +42,10 @@ #include "wx/msw/private.h" #endif +#ifdef __WINDOWS__ + #include "wx/msw/missing.h" +#endif + #ifndef __WXWINCE__ #include #endif @@ -68,11 +72,12 @@ #include "wx/encconv.h" #include "wx/fontmap.h" +#include "wx/utils.h" #ifdef __WXMAC__ -#include "ATSUnicode.h" -#include "TextCommon.h" -#include "TextEncodingConverter.h" +#include +#include +#include #include "wx/mac/private.h" // includes mac headers #endif @@ -176,9 +181,11 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const { // now do the actual conversion wxWCharBuffer buf(nLen); - MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL - - return buf; + nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL + if ( nLen != (size_t)-1 ) + { + return buf; + } } } @@ -195,9 +202,11 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const if ( nLen != (size_t)-1 ) { wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero - WC2MB(buf.data(), pwz, nLen + 4); - - return buf; + nLen = WC2MB(buf.data(), pwz, nLen + 4); + if ( nLen != (size_t)-1 ) + { + return buf; + } } } @@ -950,7 +959,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) { ms_wcCharsetName = NULL; - // VS: we must not output an error here, since wxWindows will safely + // VS: we must not output an error here, since wxWidgets will safely // fall back to using wxEncodingConverter. wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name); //wxLogError( @@ -1112,8 +1121,10 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const #ifdef wxHAVE_WIN32_MB2WC // from utils.cpp +#if wxUSE_FONTMAP extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset); extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding); +#endif class wxMBConv_win32 : public wxMBConv { @@ -1123,6 +1134,7 @@ public: m_CodePage = CP_ACP; } +#if wxUSE_FONTMAP wxMBConv_win32(const wxChar* name) { m_CodePage = wxCharsetToCodepage(name); @@ -1132,13 +1144,19 @@ public: { m_CodePage = wxEncodingToCodepage(encoding); } +#endif size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const { + // note that we have to use MB_ERR_INVALID_CHARS flag as it without it + // the behaviour is not compatible with the Unix version (using iconv) + // and break the library itself, e.g. wxTextInputStream::NextChar() + // wouldn't work if reading an incomplete MB char didn't result in an + // error const size_t len = ::MultiByteToWideChar ( m_CodePage, // code page - 0, // flags (none) + MB_ERR_INVALID_CHARS, // flags: fall on error psz, // input string -1, // its length (NUL-terminated) buf, // output string @@ -1151,28 +1169,118 @@ public: return len ? len - 1 : (size_t)-1; } - size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const - { + size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const + { + /* + we have a problem here: by default, WideCharToMultiByte() may + replace characters unrepresentable in the target code page with bad + quality approximations such as turning "1/2" symbol (U+00BD) into + "1" for the code pages which don't have it and we, obviously, want + to avoid this at any price + + the trouble is that this function does it _silently_, i.e. it won't + even tell us whether it did or not... Win98/2000 and higher provide + WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and + we have to resort to a round trip, i.e. check that converting back + results in the same string -- this is, of course, expensive but + otherwise we simply can't be sure to not garble the data. + */ + + // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN + // it doesn't work with CJK encodings (which we test for rather roughly + // here...) nor with UTF-7/8 nor, of course, with Windows versions not + // supporting it + BOOL usedDef wxDUMMY_INITIALIZE(false), + *pUsedDef; + int flags; + if ( CanUseNoBestFit() && m_CodePage < 50000 ) + { + // it's our lucky day + flags = WC_NO_BEST_FIT_CHARS; + pUsedDef = &usedDef; + } + else // old system or unsupported encoding + { + flags = 0; + pUsedDef = NULL; + } + const size_t len = ::WideCharToMultiByte ( m_CodePage, // code page - 0, // flags (none) - psz, // input string + flags, // either none or no best fit + pwz, // input string -1, // it is (wide) NUL-terminated buf, // output buffer buf ? n : 0, // and its size NULL, // default "replacement" char - NULL // [out] was it used? + pUsedDef // [out] was it used? ); + if ( !len ) + { + // function totally failed + return (size_t)-1; + } + + // if we were really converting, check if we succeeded + if ( buf ) + { + if ( flags ) + { + // check if the conversion failed, i.e. if any replacements + // were done + if ( usedDef ) + return (size_t)-1; + } + else // we must resort to double tripping... + { + wxWCharBuffer wcBuf(n); + if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 || + wcscmp(wcBuf, pwz) != 0 ) + { + // we didn't obtain the same thing we started from, hence + // the conversion was lossy and we consider that it failed + return (size_t)-1; + } + } + } + // see the comment above for the reason of "len - 1" - return len ? len - 1 : (size_t)-1; + return len - 1; } - bool IsOk() const - { return m_CodePage != -1; } + bool IsOk() const { return m_CodePage != -1; } + +private: + static bool CanUseNoBestFit() + { + static int s_isWin98Or2k = -1; + + if ( s_isWin98Or2k == -1 ) + { + int verMaj, verMin; + switch ( wxGetOsVersion(&verMaj, &verMin) ) + { + case wxWIN95: + s_isWin98Or2k = verMaj >= 4 && verMin >= 10; + break; + + case wxWINDOWS_NT: + s_isWin98Or2k = verMaj >= 5; + break; + + default: + // unknown, be conseravtive by default + s_isWin98Or2k = 0; + } + + wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") ); + } + + return s_isWin98Or2k == 1; + } -public: long m_CodePage; }; @@ -1194,104 +1302,127 @@ public: wxMBConv_mac(const wxChar* name) { - Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ; + Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ; } wxMBConv_mac(wxFontEncoding encoding) { - Init( wxMacGetSystemEncFromFontEnc(encoding) ); - } - - ~wxMBConv_mac() - { - OSStatus status = noErr ; - status = TECDisposeConverter(m_MB2WC_converter); - status = TECDisposeConverter(m_WC2MB_converter); - } - - - void Init( TextEncodingBase encoding) - { - OSStatus status = noErr ; - m_char_encoding = encoding ; -#if SIZEOF_WCHAR_T == 4 - m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode32BitFormat) ; -#else - m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ; -#endif - status = TECCreateConverter(&m_MB2WC_converter, - m_char_encoding, - m_unicode_encoding); - status = TECCreateConverter(&m_WC2MB_converter, - m_unicode_encoding, - m_char_encoding); - } - + Init( wxMacGetSystemEncFromFontEnc(encoding) ); + } + + ~wxMBConv_mac() + { + OSStatus status = noErr ; + status = TECDisposeConverter(m_MB2WC_converter); + status = TECDisposeConverter(m_WC2MB_converter); + } + + + void Init( TextEncodingBase encoding) + { + OSStatus status = noErr ; + m_char_encoding = encoding ; + m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ; + + status = TECCreateConverter(&m_MB2WC_converter, + m_char_encoding, + m_unicode_encoding); + status = TECCreateConverter(&m_WC2MB_converter, + m_unicode_encoding, + m_char_encoding); + } + size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const { - OSStatus status = noErr ; - ByteCount byteOutLen ; - ByteCount byteInLen = strlen(psz) ; - wchar_t *tbuf = NULL ; - - if (buf == NULL) - { - n = byteInLen ; - tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ; - } - - ByteCount byteBufferLen = n * SIZEOF_WCHAR_T ; - status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen, - (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen); - - if ( buf == NULL ) - free(tbuf) ; - - size_t res = byteOutLen / SIZEOF_WCHAR_T ; + OSStatus status = noErr ; + ByteCount byteOutLen ; + ByteCount byteInLen = strlen(psz) ; + wchar_t *tbuf = NULL ; + UniChar* ubuf = NULL ; + size_t res = 0 ; + + if (buf == NULL) + { + n = byteInLen ; + tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ; + } + ByteCount byteBufferLen = n * sizeof( UniChar ) ; +#if SIZEOF_WCHAR_T == 4 + ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ; +#else + ubuf = (UniChar*) (buf ? buf : tbuf) ; +#endif + status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen, + (TextPtr) ubuf , byteBufferLen, &byteOutLen); +#if SIZEOF_WCHAR_T == 4 + // we have to terminate here, because n might be larger for the trailing zero, and if UniChar + // is not properly terminated we get random characters at the end + ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ; + wxMBConvUTF16BE converter ; + res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ; + free( ubuf ) ; +#else + res = byteOutLen / sizeof( UniChar ) ; +#endif + if ( buf == NULL ) + free(tbuf) ; + if ( buf && res < n) buf[res] = 0; - return res ; + return res ; } size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const - { - OSStatus status = noErr ; - ByteCount byteOutLen ; - ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ; - - char *tbuf = NULL ; - - if (buf == NULL) - { - // worst case - n = byteInLen * 2 ; - tbuf = (char*) malloc( n ) ; - } - - ByteCount byteBufferLen = n ; - status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) psz , byteInLen, &byteInLen, - (TextPtr) ( buf ? buf : tbuf ) , byteBufferLen, &byteOutLen); - - if ( buf == NULL ) - free(tbuf) ; - - size_t res = byteOutLen ; + { + OSStatus status = noErr ; + ByteCount byteOutLen ; + ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ; + + char *tbuf = NULL ; + + if (buf == NULL) + { + // worst case + n = byteInLen * 2 ; + tbuf = (char*) malloc( n ) ; + } + + ByteCount byteBufferLen = n ; + UniChar* ubuf = NULL ; +#if SIZEOF_WCHAR_T == 4 + wxMBConvUTF16BE converter ; + size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ; + byteInLen = unicharlen ; + ubuf = (UniChar*) malloc( byteInLen + 2 ) ; + converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ; +#else + ubuf = (UniChar*) psz ; +#endif + status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen, + (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen); +#if SIZEOF_WCHAR_T == 4 + free( ubuf ) ; +#endif + if ( buf == NULL ) + free(tbuf) ; + + size_t res = byteOutLen ; if ( buf && res < n) buf[res] = 0; - return res ; + return res ; } bool IsOk() const { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; } private: - TECObjectRef m_MB2WC_converter ; - TECObjectRef m_WC2MB_converter ; - - TextEncodingBase m_char_encoding ; - TextEncodingBase m_unicode_encoding ; + TECObjectRef m_MB2WC_converter ; + TECObjectRef m_WC2MB_converter ; + + TextEncodingBase m_char_encoding ; + TextEncodingBase m_unicode_encoding ; }; #endif // defined(__WXMAC__) && defined(TARGET_CARBON) @@ -1483,26 +1614,30 @@ wxMBConv *wxCSConv::DoCreate() const #ifdef wxHAVE_WIN32_MB2WC { +#if wxUSE_FONTMAP wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name) : new wxMBConv_win32(m_encoding); if ( conv->IsOk() ) return conv; delete conv; +#else + return NULL; +#endif } #endif // wxHAVE_WIN32_MB2WC -#if defined(__WXMAC__) +#if defined(__WXMAC__) { - if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) ) - { - - wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name) - : new wxMBConv_mac(m_encoding); - if ( conv->IsOk() ) - return conv; + if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) ) + { - delete conv; - } + wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name) + : new wxMBConv_mac(m_encoding); + if ( conv->IsOk() ) + return conv; + + delete conv; + } } #endif // step (2) @@ -1655,6 +1790,8 @@ size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const #ifdef __WINDOWS__ static wxMBConv_win32 wxConvLibcObj; +#elif defined(__WXMAC__) && !defined(__MACH__) + static wxMBConv_mac wxConvLibcObj ; #else static wxMBConvLibc wxConvLibcObj; #endif