X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/82713003dc446ad0353127abe66a1ecc66f4df67..1971d23c57b3c582f32d05704914ad902ec76fa5:/src/common/strconv.cpp?ds=sidebyside diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index f60201d7d3..82656dd1d2 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// // Name: strconv.cpp // Purpose: Unicode conversion classes -// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Modified by: // Created: 29/01/98 // RCS-ID: $Id$ -// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Licence: wxWindows license ///////////////////////////////////////////////////////////////////////////// @@ -37,6 +37,25 @@ #include #include + +#include "wx/debug.h" +#include "wx/strconv.h" +#include "wx/intl.h" +#include "wx/log.h" + +// ---------------------------------------------------------------------------- +// globals +// ---------------------------------------------------------------------------- + +WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc; + + +// ============================================================================ +// implementation +// ============================================================================ + +#if wxUSE_WCHAR_T + #ifdef __SALFORDC__ #include #endif @@ -44,54 +63,46 @@ #ifdef HAVE_ICONV_H #include #endif -#ifdef HAVE_LANGINFO_H - #include -#endif #ifdef __WXMSW__ #include #endif -#include "wx/debug.h" -#include "wx/strconv.h" -#include "wx/intl.h" -#include "wx/log.h" - -#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__) -#define BSWAP_UCS4(str, len) -#define BSWAP_UCS2(str, len) -#else #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c0xdfff)) { @@ -351,7 +362,7 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const { wxUint32 cc; #ifdef WC_UTF16 - size_t pa = decode_utf16(psz,cc); + size_t pa = decode_utf16(psz, cc); psz += (pa == (size_t)-1) ? 1 : pa; #else cc=(*psz++) & 0x7fffffff; @@ -396,7 +407,7 @@ WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL); // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8) // - move wxEncodingConverter meat in here -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) #include "wx/msw/registry.h" // this should work if M$ Internet Exploiter is installed static long CharsetToCodepage(const wxChar *name) @@ -412,7 +423,7 @@ static long CharsetToCodepage(const wxChar *name) path += cn; wxRegKey key(wxRegKey::HKCR, path); - if (!key.Exists()) continue; + if (!key.Exists()) break; // two cases: either there's an AliasForCharset string, // or there are Codepage and InternetEncoding dwords. @@ -469,6 +480,9 @@ public: #ifdef HAVE_ICONV_H +bool g_wcNeedsSwap = FALSE; +static const char *g_wcCharset = NULL; + // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG // if output buffer is _exactly_ as big as needed. Such case is (unless there's // yet another bug in glibc) the only case when iconv() returns with (size_t)-1 @@ -489,11 +503,75 @@ public: IC_CharSet(const wxChar *name) : wxCharacterSet(name) { - m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname)); - w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME); - } + // check for charset that represents wchar_t: + if (g_wcCharset == NULL) + { + g_wcNeedsSwap = FALSE; - ~IC_CharSet() + // try charset with explicit bytesex info (e.g. "UCS-4LE"): + g_wcCharset = WC_NAME_BEST; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + if (m2w == (iconv_t)-1) + { + // try charset w/o bytesex info (e.g. "UCS4") + // and check for bytesex ourselves: + g_wcCharset = WC_NAME; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + // last bet, try if it knows WCHAR_T pseudo-charset + if (m2w == (iconv_t)-1) + { + g_wcCharset = "WCHAR_T"; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + } + + if (m2w != (iconv_t)-1) + { + char buf[2], *bufPtr; + wchar_t wbuf[2], *wbufPtr; + size_t insz, outsz; + size_t res; + + buf[0] = 'A'; + buf[1] = 0; + wbuf[0] = 0; + insz = 2; + outsz = SIZEOF_WCHAR_T * 2; + wbufPtr = wbuf; + bufPtr = buf; + + #ifdef WX_ICONV_TAKES_CHAR + res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #else + res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #endif + if (ICONV_FAILED(res, insz)) + { + g_wcCharset = NULL; + wxLogLastError(wxT("iconv")); + wxLogError(_("Convertion to charset '%s' doesn't work."), name); + } + else + { + g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]); + } + } + else + { + g_wcCharset = NULL; + wxLogError(_("Don't know how to convert to/from charset '%s'."), name); + } + } + wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap); + } + else + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset); + } + + ~IC_CharSet() { if ( m2w != (iconv_t)-1 ) iconv_close(m2w); @@ -519,10 +597,12 @@ public: cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf); #endif res = n - (outbuf / SIZEOF_WCHAR_T); - // convert to native endianness -#ifdef WC_NEED_BSWAP - WC_BSWAP(buf /* _not_ bufPtr */, res) -#endif + + if (g_wcNeedsSwap) + { + // convert to native endianness + WC_BSWAP(buf /* _not_ bufPtr */, res) + } } else { @@ -542,7 +622,11 @@ public: } if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -556,17 +640,21 @@ public: #endif size_t outbuf = n; size_t res, cres; + + wchar_t *tmpbuf; + + if (g_wcNeedsSwap) + { + // need to copy to temp buffer to switch endianness + // this absolutely doesn't rock! + // (no, doing WC_BSWAP twice on the original buffer won't help, as it + // could be in read-only memory, or be accessed in some other thread) + tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); + memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); + WC_BSWAP(tmpbuf, inbuf) + psz=tmpbuf; + } -#ifdef WC_NEED_BSWAP - // need to copy to temp buffer to switch endianness - // this absolutely doesn't rock! - // (no, doing WC_BSWAP twice on the original buffer won't help, as it - // could be in read-only memory, or be accessed in some other thread) - wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); - memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); - WC_BSWAP(tmpbuf, inbuf) - psz=tmpbuf; -#endif if (buf) { // have destination buffer, convert there @@ -593,11 +681,18 @@ public: res += 16 - outbuf; } while ((cres==(size_t)-1) && (errno==E2BIG)); } -#ifdef WC_NEED_BSWAP - free(tmpbuf); -#endif + + if (g_wcNeedsSwap) + { + free(tmpbuf); + } + if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -605,12 +700,12 @@ public: bool usable() { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); } -public: +protected: iconv_t m2w, w2m; }; #endif -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) class CP_CharSet : public wxCharacterSet { public: @@ -621,14 +716,18 @@ public: { size_t len = MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0); - return len ? len : (size_t)-1; + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf, buf ? n : 0, NULL, NULL); - return len ? len : (size_t)-1; + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } bool usable() @@ -637,7 +736,9 @@ public: public: long CodePage; }; -#endif +#endif // __WIN32__ + +#if wxUSE_FONTMAP class EC_CharSet : public wxCharacterSet { @@ -682,6 +783,8 @@ public: wxEncodingConverter m2w, w2m; }; +#endif // wxUSE_FONTMAP + static wxCharacterSet *wxGetCharacterSet(const wxChar *name) { wxCharacterSet *cset = NULL; @@ -699,14 +802,16 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) } } - if (cset && cset->usable()) return cset; + if (cset && cset->usable()) + return cset; + if (cset) { delete cset; cset = NULL; } -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) cset = new CP_CharSet(name); // may take NULL if (cset->usable()) return cset; @@ -714,9 +819,11 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) delete cset; #endif // __WIN32__ +#if wxUSE_FONTMAP cset = new EC_CharSet(name); if (cset->usable()) return cset; +#endif // wxUSE_FONTMAP delete cset; wxLogError(_("Unknown encoding '%s'!"), name); @@ -758,7 +865,8 @@ void wxCSConv::LoadNow() SetName(name); } - m_cset = wxGetCharacterSet(m_name); + // wxGetCharacterSet() complains about NULL name + m_cset = m_name ? wxGetCharacterSet(m_name) : NULL; m_deferred = FALSE; } } @@ -844,10 +952,10 @@ public: class EC_CharSetConverter { public: - EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to) + EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to) { cnv.Init(from->enc,to->enc); } - size_t Convert(char*buf, const char*psz, size_t n) + size_t Convert(char* buf, const char* psz, size_t n) { size_t inbuf = strlen(psz); if (buf) cnv.Convert(psz,buf);