X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/35d764b08c37a42299e79859d16351ffde43af73..ab5fe83396c13f9fbf01630c52adf4df7607cbfe:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 8d27a4dd24..bd39f333b0 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// // Name: strconv.cpp // Purpose: Unicode conversion classes -// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Modified by: // Created: 29/01/98 // RCS-ID: $Id$ -// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Licence: wxWindows license ///////////////////////////////////////////////////////////////////////////// @@ -37,6 +37,25 @@ #include #include + +#include "wx/debug.h" +#include "wx/strconv.h" +#include "wx/intl.h" +#include "wx/log.h" + +// ---------------------------------------------------------------------------- +// globals +// ---------------------------------------------------------------------------- + +WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc; + + +// ============================================================================ +// implementation +// ============================================================================ + +#if wxUSE_WCHAR_T + #ifdef __SALFORDC__ #include #endif @@ -49,21 +68,8 @@ #include #endif -#include "wx/debug.h" -#include "wx/strconv.h" -#include "wx/intl.h" -#include "wx/log.h" - -#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__) -#define BSWAP_UCS4(str, len) -#define BSWAP_UCS2(str, len) -#else #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c wxConvUTF8) // - move wxEncodingConverter meat in here -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) + +#if wxUSE_GUI + +// VZ: the new version of wxCharsetToCodepage() is more politically correct +// and should work on other Windows versions as well but the old version is +// still needed for !wxUSE_FONTMAP || !wxUSE_GUI case + +extern long wxEncodingToCodepage(wxFontEncoding encoding) +{ + // translate encoding into the Windows CHARSET + wxNativeEncodingInfo natveEncInfo; + if ( !wxGetNativeFontEncoding(encoding, &natveEncInfo) ) + return -1; + + // translate CHARSET to code page + CHARSETINFO csetInfo; + if ( !::TranslateCharsetInfo((DWORD *)(DWORD)natveEncInfo.charset, + &csetInfo, + TCI_SRCCHARSET) ) + { + wxLogLastError(_T("TranslateCharsetInfo(TCI_SRCCHARSET)")); + + return -1; + } + + return csetInfo.ciACP; +} + +#if wxUSE_FONTMAP + +extern long wxCharsetToCodepage(const wxChar *name) +{ + // first get the font encoding for this charset + if ( !name ) + return -1; + + wxFontEncoding enc = wxTheFontMapper->CharsetToEncoding(name, FALSE); + if ( enc == wxFONTENCODING_SYSTEM ) + return -1; + + // the use the helper function + return wxEncodingToCodepage(enc); +} + +#endif // wxUSE_FONTMAP + +#endif // wxUSE_GUI + +// include old wxCharsetToCodepage() by OK if needed +#if !wxUSE_GUI || !wxUSE_FONTMAP + #include "wx/msw/registry.h" -// this should work if M$ Internet Exploiter is installed -static long CharsetToCodepage(const wxChar *name) + +// this should work if Internet Exploiter is installed +extern long wxCharsetToCodepage(const wxChar *name) { if (!name) return GetACP(); @@ -435,7 +492,10 @@ static long CharsetToCodepage(const wxChar *name) return CP; } -#endif + +#endif // !wxUSE_GUI || !wxUSE_FONTMAP + +#endif // Win32 class wxCharacterSet { @@ -475,6 +535,9 @@ public: #ifdef HAVE_ICONV_H +bool g_wcNeedsSwap = FALSE; +static const char *g_wcCharset = NULL; + // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG // if output buffer is _exactly_ as big as needed. Such case is (unless there's // yet another bug in glibc) the only case when iconv() returns with (size_t)-1 @@ -495,11 +558,75 @@ public: IC_CharSet(const wxChar *name) : wxCharacterSet(name) { - m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname)); - w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME); - } + // check for charset that represents wchar_t: + if (g_wcCharset == NULL) + { + g_wcNeedsSwap = FALSE; + + // try charset with explicit bytesex info (e.g. "UCS-4LE"): + g_wcCharset = WC_NAME_BEST; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + if (m2w == (iconv_t)-1) + { + // try charset w/o bytesex info (e.g. "UCS4") + // and check for bytesex ourselves: + g_wcCharset = WC_NAME; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); - ~IC_CharSet() + // last bet, try if it knows WCHAR_T pseudo-charset + if (m2w == (iconv_t)-1) + { + g_wcCharset = "WCHAR_T"; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + } + + if (m2w != (iconv_t)-1) + { + char buf[2], *bufPtr; + wchar_t wbuf[2], *wbufPtr; + size_t insz, outsz; + size_t res; + + buf[0] = 'A'; + buf[1] = 0; + wbuf[0] = 0; + insz = 2; + outsz = SIZEOF_WCHAR_T * 2; + wbufPtr = wbuf; + bufPtr = buf; + + #ifdef WX_ICONV_TAKES_CHAR + res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #else + res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #endif + if (ICONV_FAILED(res, insz)) + { + g_wcCharset = NULL; + wxLogLastError(wxT("iconv")); + wxLogError(_("Convertion to charset '%s' doesn't work."), name); + } + else + { + g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]); + } + } + else + { + g_wcCharset = NULL; + wxLogError(_("Don't know how to convert to/from charset '%s'."), name); + } + } + wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap); + } + else + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset); + } + + ~IC_CharSet() { if ( m2w != (iconv_t)-1 ) iconv_close(m2w); @@ -525,10 +652,12 @@ public: cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf); #endif res = n - (outbuf / SIZEOF_WCHAR_T); - // convert to native endianness -#ifdef WC_NEED_BSWAP - WC_BSWAP(buf /* _not_ bufPtr */, res) -#endif + + if (g_wcNeedsSwap) + { + // convert to native endianness + WC_BSWAP(buf /* _not_ bufPtr */, res) + } } else { @@ -548,7 +677,11 @@ public: } if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -562,17 +695,21 @@ public: #endif size_t outbuf = n; size_t res, cres; + + wchar_t *tmpbuf = 0; + + if (g_wcNeedsSwap) + { + // need to copy to temp buffer to switch endianness + // this absolutely doesn't rock! + // (no, doing WC_BSWAP twice on the original buffer won't help, as it + // could be in read-only memory, or be accessed in some other thread) + tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); + memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); + WC_BSWAP(tmpbuf, inbuf) + psz=tmpbuf; + } -#ifdef WC_NEED_BSWAP - // need to copy to temp buffer to switch endianness - // this absolutely doesn't rock! - // (no, doing WC_BSWAP twice on the original buffer won't help, as it - // could be in read-only memory, or be accessed in some other thread) - wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); - memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); - WC_BSWAP(tmpbuf, inbuf) - psz=tmpbuf; -#endif if (buf) { // have destination buffer, convert there @@ -599,11 +736,18 @@ public: res += 16 - outbuf; } while ((cres==(size_t)-1) && (errno==E2BIG)); } -#ifdef WC_NEED_BSWAP - free(tmpbuf); -#endif + + if (g_wcNeedsSwap) + { + free(tmpbuf); + } + if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -611,51 +755,56 @@ public: bool usable() { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); } -public: +protected: iconv_t m2w, w2m; }; #endif -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) class CP_CharSet : public wxCharacterSet { public: - CP_CharSet(const wxChar*name) - : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {} + CP_CharSet(const wxChar* name) + : wxCharacterSet(name) + { + m_CodePage = wxCharsetToCodepage(name); + } size_t MB2WC(wchar_t *buf, const char *psz, size_t n) { size_t len = - MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0); - //VS: returns # of written chars for buf!=NULL and *size* + MultiByteToWideChar(m_CodePage, 0, psz, -1, buf, buf ? n : 0); + //VS: returns # of written chars for buf!=NULL and *size* // needed buffer for buf==NULL return len ? (buf ? len : len-1) : (size_t)-1; } size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { - size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf, + size_t len = WideCharToMultiByte(m_CodePage, 0, psz, -1, buf, buf ? n : 0, NULL, NULL); - //VS: returns # of written chars for buf!=NULL and *size* + //VS: returns # of written chars for buf!=NULL and *size* // needed buffer for buf==NULL return len ? (buf ? len : len-1) : (size_t)-1; } bool usable() - { return CodePage != -1; } + { return m_CodePage != -1; } public: - long CodePage; + long m_CodePage; }; -#endif +#endif // __WIN32__ + +#if wxUSE_FONTMAP class EC_CharSet : public wxCharacterSet { public: // temporarily just use wxEncodingConverter stuff, // so that it works while a better implementation is built - EC_CharSet(const wxChar*name) : wxCharacterSet(name), - enc(wxFONTENCODING_SYSTEM) + EC_CharSet(const wxChar* name) : wxCharacterSet(name), + enc(wxFONTENCODING_SYSTEM) { if (name) enc = wxTheFontMapper->CharsetToEncoding(name, FALSE); @@ -673,7 +822,8 @@ public: size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { -#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530) +#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \ + || ( defined(__MWERKS__) && defined(__WXMSW__) ) size_t inbuf = std::wcslen(psz); #else size_t inbuf = ::wcslen(psz); @@ -692,6 +842,8 @@ public: wxEncodingConverter m2w, w2m; }; +#endif // wxUSE_FONTMAP + static wxCharacterSet *wxGetCharacterSet(const wxChar *name) { wxCharacterSet *cset = NULL; @@ -709,14 +861,16 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) } } - if (cset && cset->usable()) return cset; + if (cset && cset->usable()) + return cset; + if (cset) { delete cset; cset = NULL; } -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) cset = new CP_CharSet(name); // may take NULL if (cset->usable()) return cset; @@ -724,9 +878,11 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) delete cset; #endif // __WIN32__ +#if wxUSE_FONTMAP cset = new EC_CharSet(name); if (cset->usable()) return cset; +#endif // wxUSE_FONTMAP delete cset; wxLogError(_("Unknown encoding '%s'!"), name); @@ -768,7 +924,8 @@ void wxCSConv::LoadNow() SetName(name); } - m_cset = wxGetCharacterSet(m_name); + // wxGetCharacterSet() complains about NULL name + m_cset = m_name ? wxGetCharacterSet(m_name) : NULL; m_deferred = FALSE; } } @@ -800,7 +957,8 @@ size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const return m_cset->WC2MB(buf, psz, n); // latin-1 (direct) -#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530) +#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \ + || ( defined(__MWERKS__) && defined(__WXMSW__) ) size_t len=std::wcslen(psz); #else size_t len=::wcslen(psz); @@ -854,10 +1012,10 @@ public: class EC_CharSetConverter { public: - EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to) + EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to) { cnv.Init(from->enc,to->enc); } - size_t Convert(char*buf, const char*psz, size_t n) + size_t Convert(char* buf, const char* psz, size_t n) { size_t inbuf = strlen(psz); if (buf) cnv.Convert(psz,buf);