X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/dccce9eae10e099d791cc055dd02d3dda731778e..4aaef122cbbd5bbe0e70b824e320458e2329dd13:/src/common/strconv.cpp?ds=inline diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 33b934ef9f..bd39f333b0 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// // Name: strconv.cpp // Purpose: Unicode conversion classes -// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Modified by: // Created: 29/01/98 // RCS-ID: $Id$ -// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Licence: wxWindows license ///////////////////////////////////////////////////////////////////////////// @@ -37,6 +37,25 @@ #include #include + +#include "wx/debug.h" +#include "wx/strconv.h" +#include "wx/intl.h" +#include "wx/log.h" + +// ---------------------------------------------------------------------------- +// globals +// ---------------------------------------------------------------------------- + +WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc; + + +// ============================================================================ +// implementation +// ============================================================================ + +#if wxUSE_WCHAR_T + #ifdef __SALFORDC__ #include #endif @@ -44,54 +63,46 @@ #ifdef HAVE_ICONV_H #include #endif -#ifdef HAVE_LANGINFO_H - #include -#endif #ifdef __WXMSW__ #include #endif -#include "wx/debug.h" -#include "wx/strconv.h" -#include "wx/intl.h" -#include "wx/log.h" - -#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__) -#define BSWAP_UCS4(str, len) -#define BSWAP_UCS2(str, len) -#else #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c0xdfff)) { @@ -351,7 +362,7 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const { wxUint32 cc; #ifdef WC_UTF16 - size_t pa = decode_utf16(psz,cc); + size_t pa = decode_utf16(psz, cc); psz += (pa == (size_t)-1) ? 1 : pa; #else cc=(*psz++) & 0x7fffffff; @@ -396,10 +407,62 @@ WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL); // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8) // - move wxEncodingConverter meat in here -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) + +#if wxUSE_GUI + +// VZ: the new version of wxCharsetToCodepage() is more politically correct +// and should work on other Windows versions as well but the old version is +// still needed for !wxUSE_FONTMAP || !wxUSE_GUI case + +extern long wxEncodingToCodepage(wxFontEncoding encoding) +{ + // translate encoding into the Windows CHARSET + wxNativeEncodingInfo natveEncInfo; + if ( !wxGetNativeFontEncoding(encoding, &natveEncInfo) ) + return -1; + + // translate CHARSET to code page + CHARSETINFO csetInfo; + if ( !::TranslateCharsetInfo((DWORD *)(DWORD)natveEncInfo.charset, + &csetInfo, + TCI_SRCCHARSET) ) + { + wxLogLastError(_T("TranslateCharsetInfo(TCI_SRCCHARSET)")); + + return -1; + } + + return csetInfo.ciACP; +} + +#if wxUSE_FONTMAP + +extern long wxCharsetToCodepage(const wxChar *name) +{ + // first get the font encoding for this charset + if ( !name ) + return -1; + + wxFontEncoding enc = wxTheFontMapper->CharsetToEncoding(name, FALSE); + if ( enc == wxFONTENCODING_SYSTEM ) + return -1; + + // the use the helper function + return wxEncodingToCodepage(enc); +} + +#endif // wxUSE_FONTMAP + +#endif // wxUSE_GUI + +// include old wxCharsetToCodepage() by OK if needed +#if !wxUSE_GUI || !wxUSE_FONTMAP + #include "wx/msw/registry.h" -// this should work if M$ Internet Exploiter is installed -static long CharsetToCodepage(const wxChar *name) + +// this should work if Internet Exploiter is installed +extern long wxCharsetToCodepage(const wxChar *name) { if (!name) return GetACP(); @@ -412,7 +475,7 @@ static long CharsetToCodepage(const wxChar *name) path += cn; wxRegKey key(wxRegKey::HKCR, path); - if (!key.Exists()) continue; + if (!key.Exists()) break; // two cases: either there's an AliasForCharset string, // or there are Codepage and InternetEncoding dwords. @@ -429,7 +492,10 @@ static long CharsetToCodepage(const wxChar *name) return CP; } -#endif + +#endif // !wxUSE_GUI || !wxUSE_FONTMAP + +#endif // Win32 class wxCharacterSet { @@ -469,6 +535,9 @@ public: #ifdef HAVE_ICONV_H +bool g_wcNeedsSwap = FALSE; +static const char *g_wcCharset = NULL; + // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG // if output buffer is _exactly_ as big as needed. Such case is (unless there's // yet another bug in glibc) the only case when iconv() returns with (size_t)-1 @@ -489,11 +558,75 @@ public: IC_CharSet(const wxChar *name) : wxCharacterSet(name) { - m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname)); - w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME); - } + // check for charset that represents wchar_t: + if (g_wcCharset == NULL) + { + g_wcNeedsSwap = FALSE; - ~IC_CharSet() + // try charset with explicit bytesex info (e.g. "UCS-4LE"): + g_wcCharset = WC_NAME_BEST; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + if (m2w == (iconv_t)-1) + { + // try charset w/o bytesex info (e.g. "UCS4") + // and check for bytesex ourselves: + g_wcCharset = WC_NAME; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + // last bet, try if it knows WCHAR_T pseudo-charset + if (m2w == (iconv_t)-1) + { + g_wcCharset = "WCHAR_T"; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + } + + if (m2w != (iconv_t)-1) + { + char buf[2], *bufPtr; + wchar_t wbuf[2], *wbufPtr; + size_t insz, outsz; + size_t res; + + buf[0] = 'A'; + buf[1] = 0; + wbuf[0] = 0; + insz = 2; + outsz = SIZEOF_WCHAR_T * 2; + wbufPtr = wbuf; + bufPtr = buf; + + #ifdef WX_ICONV_TAKES_CHAR + res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #else + res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #endif + if (ICONV_FAILED(res, insz)) + { + g_wcCharset = NULL; + wxLogLastError(wxT("iconv")); + wxLogError(_("Convertion to charset '%s' doesn't work."), name); + } + else + { + g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]); + } + } + else + { + g_wcCharset = NULL; + wxLogError(_("Don't know how to convert to/from charset '%s'."), name); + } + } + wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap); + } + else + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset); + } + + ~IC_CharSet() { if ( m2w != (iconv_t)-1 ) iconv_close(m2w); @@ -519,10 +652,12 @@ public: cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf); #endif res = n - (outbuf / SIZEOF_WCHAR_T); - // convert to native endianness -#ifdef WC_NEED_BSWAP - WC_BSWAP(buf /* _not_ bufPtr */, res) -#endif + + if (g_wcNeedsSwap) + { + // convert to native endianness + WC_BSWAP(buf /* _not_ bufPtr */, res) + } } else { @@ -542,7 +677,11 @@ public: } if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -556,17 +695,21 @@ public: #endif size_t outbuf = n; size_t res, cres; + + wchar_t *tmpbuf = 0; + + if (g_wcNeedsSwap) + { + // need to copy to temp buffer to switch endianness + // this absolutely doesn't rock! + // (no, doing WC_BSWAP twice on the original buffer won't help, as it + // could be in read-only memory, or be accessed in some other thread) + tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); + memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); + WC_BSWAP(tmpbuf, inbuf) + psz=tmpbuf; + } -#ifdef WC_NEED_BSWAP - // need to copy to temp buffer to switch endianness - // this absolutely doesn't rock! - // (no, doing WC_BSWAP twice on the original buffer won't help, as it - // could be in read-only memory, or be accessed in some other thread) - wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); - memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); - WC_BSWAP(tmpbuf, inbuf) - psz=tmpbuf; -#endif if (buf) { // have destination buffer, convert there @@ -593,11 +736,18 @@ public: res += 16 - outbuf; } while ((cres==(size_t)-1) && (errno==E2BIG)); } -#ifdef WC_NEED_BSWAP - free(tmpbuf); -#endif + + if (g_wcNeedsSwap) + { + free(tmpbuf); + } + if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -605,47 +755,56 @@ public: bool usable() { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); } -public: +protected: iconv_t m2w, w2m; }; #endif -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) class CP_CharSet : public wxCharacterSet { public: - CP_CharSet(const wxChar*name) - : wxCharacterSet(name), CodePage(CharsetToCodepage(name)) {} + CP_CharSet(const wxChar* name) + : wxCharacterSet(name) + { + m_CodePage = wxCharsetToCodepage(name); + } size_t MB2WC(wchar_t *buf, const char *psz, size_t n) { size_t len = - MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0); - return len ? len : (size_t)-1; + MultiByteToWideChar(m_CodePage, 0, psz, -1, buf, buf ? n : 0); + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { - size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf, + size_t len = WideCharToMultiByte(m_CodePage, 0, psz, -1, buf, buf ? n : 0, NULL, NULL); - return len ? len : (size_t)-1; + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } bool usable() - { return CodePage != -1; } + { return m_CodePage != -1; } public: - long CodePage; + long m_CodePage; }; -#endif +#endif // __WIN32__ + +#if wxUSE_FONTMAP class EC_CharSet : public wxCharacterSet { public: // temporarily just use wxEncodingConverter stuff, // so that it works while a better implementation is built - EC_CharSet(const wxChar*name) : wxCharacterSet(name), - enc(wxFONTENCODING_SYSTEM) + EC_CharSet(const wxChar* name) : wxCharacterSet(name), + enc(wxFONTENCODING_SYSTEM) { if (name) enc = wxTheFontMapper->CharsetToEncoding(name, FALSE); @@ -663,7 +822,8 @@ public: size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { -#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530) +#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \ + || ( defined(__MWERKS__) && defined(__WXMSW__) ) size_t inbuf = std::wcslen(psz); #else size_t inbuf = ::wcslen(psz); @@ -682,6 +842,8 @@ public: wxEncodingConverter m2w, w2m; }; +#endif // wxUSE_FONTMAP + static wxCharacterSet *wxGetCharacterSet(const wxChar *name) { wxCharacterSet *cset = NULL; @@ -699,14 +861,16 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) } } - if (cset && cset->usable()) return cset; + if (cset && cset->usable()) + return cset; + if (cset) { delete cset; cset = NULL; } -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) cset = new CP_CharSet(name); // may take NULL if (cset->usable()) return cset; @@ -714,9 +878,11 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) delete cset; #endif // __WIN32__ +#if wxUSE_FONTMAP cset = new EC_CharSet(name); if (cset->usable()) return cset; +#endif // wxUSE_FONTMAP delete cset; wxLogError(_("Unknown encoding '%s'!"), name); @@ -727,6 +893,8 @@ wxCSConv::wxCSConv(const wxChar *charset) { m_name = (wxChar *)NULL; m_cset = (wxCharacterSet *) NULL; + m_deferred = TRUE; + SetName(charset); } @@ -756,7 +924,8 @@ void wxCSConv::LoadNow() SetName(name); } - m_cset = wxGetCharacterSet(m_name); + // wxGetCharacterSet() complains about NULL name + m_cset = m_name ? wxGetCharacterSet(m_name) : NULL; m_deferred = FALSE; } } @@ -788,7 +957,8 @@ size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const return m_cset->WC2MB(buf, psz, n); // latin-1 (direct) -#if defined(__BORLANDC__) && (__BORLANDC__ > 0x530) +#if ( defined(__BORLANDC__) && (__BORLANDC__ > 0x530) ) \ + || ( defined(__MWERKS__) && defined(__WXMSW__) ) size_t len=std::wcslen(psz); #else size_t len=::wcslen(psz); @@ -842,10 +1012,10 @@ public: class EC_CharSetConverter { public: - EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to) + EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to) { cnv.Init(from->enc,to->enc); } - size_t Convert(char*buf, const char*psz, size_t n) + size_t Convert(char* buf, const char* psz, size_t n) { size_t inbuf = strlen(psz); if (buf) cnv.Convert(psz,buf);