X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/bab1e7222184a58dc2507be38395d836cff51fcf..f2616db56739e4962f683f59ac9e207756efe23c:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index c129faee32..c0dbf63459 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// // Name: strconv.cpp // Purpose: Unicode conversion classes -// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Modified by: // Created: 29/01/98 // RCS-ID: $Id$ -// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin +// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik // Licence: wxWindows license ///////////////////////////////////////////////////////////////////////////// @@ -54,27 +54,35 @@ #include "wx/intl.h" #include "wx/log.h" -#if defined(WORDS_BIGENDIAN) || defined(__STDC_ISO_10646__) -#define BSWAP_UCS4(str, len) -#define BSWAP_UCS2(str, len) -#else #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); } -#define BSWAP_UCS2(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } -#define WC_NEED_BSWAP -#endif -#define BSWAP_UTF32(str, len) BSWAP_UCS4(str, len) -#define BSWAP_UTF16(str, len) BSWAP_UCS2(str, len) +#define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } + +// under Unix SIZEOF_WCHAR_T is defined by configure, but under other platforms +// it might be not defined - assume the most common value +#ifndef SIZEOF_WCHAR_T + #define SIZEOF_WCHAR_T 2 +#endif // !defined(SIZEOF_WCHAR_T) #if SIZEOF_WCHAR_T == 4 -#define WC_NAME "UCS4" -#define WC_BSWAP BSWAP_UCS4 + #define WC_NAME "UCS4" + #define WC_BSWAP BSWAP_UCS4 + #ifdef WORDS_BIGENDIAN + #define WC_NAME_BEST "UCS-4BE" + #else + #define WC_NAME_BEST "UCS-4LE" + #endif #elif SIZEOF_WCHAR_T == 2 -#define WC_NAME "UTF16" -#define WC_BSWAP BSWAP_UTF16 -#define WC_UTF16 + #define WC_NAME "UTF16" + #define WC_BSWAP BSWAP_UTF16 + #define WC_UTF16 + #ifdef WORDS_BIGENDIAN + #define WC_NAME_BEST "UTF-16BE" + #else + #define WC_NAME_BEST "UTF-16LE" + #endif #else // sizeof(wchar_t) != 2 nor 4 -// I don't know what to do about this -#error "Please report your platform details to wx-users mailing list" + // I don't know what to do about this + #error "Weird sizeof(wchar_t): please report your platform details to wx-users mailing list" #endif // ---------------------------------------------------------------------------- @@ -91,7 +99,7 @@ WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc; #ifdef WC_UTF16 -static size_t encode_utf16(wxUint32 input,wxUint16*output) +static size_t encode_utf16(wxUint32 input, wchar_t *output) { if (input<=0xffff) { @@ -113,7 +121,7 @@ static size_t encode_utf16(wxUint32 input,wxUint16*output) } } -static size_t decode_utf16(wxUint16*input,wxUint32&output) +static size_t decode_utf16(const wchar_t* input, wxUint32& output) { if ((*input<0xd800) || (*input>0xdfff)) { @@ -351,7 +359,7 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const { wxUint32 cc; #ifdef WC_UTF16 - size_t pa = decode_utf16(psz,cc); + size_t pa = decode_utf16(psz, cc); psz += (pa == (size_t)-1) ? 1 : pa; #else cc=(*psz++) & 0x7fffffff; @@ -396,7 +404,7 @@ WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL); // - perhaps common encodings to objects ("UTF8" -> wxConvUTF8) // - move wxEncodingConverter meat in here -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) #include "wx/msw/registry.h" // this should work if M$ Internet Exploiter is installed static long CharsetToCodepage(const wxChar *name) @@ -412,7 +420,7 @@ static long CharsetToCodepage(const wxChar *name) path += cn; wxRegKey key(wxRegKey::HKCR, path); - if (!key.Exists()) continue; + if (!key.Exists()) break; // two cases: either there's an AliasForCharset string, // or there are Codepage and InternetEncoding dwords. @@ -469,6 +477,9 @@ public: #ifdef HAVE_ICONV_H +bool g_wcNeedsSwap = FALSE; +static const char *g_wcCharset = NULL; + // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG // if output buffer is _exactly_ as big as needed. Such case is (unless there's // yet another bug in glibc) the only case when iconv() returns with (size_t)-1 @@ -489,11 +500,75 @@ public: IC_CharSet(const wxChar *name) : wxCharacterSet(name) { - m2w = iconv_open(WC_NAME, wxConvLibc.cWX2MB(cname)); - w2m = iconv_open(wxConvLibc.cWX2MB(cname), WC_NAME); - } + // check for charset that represents wchar_t: + if (g_wcCharset == NULL) + { + g_wcNeedsSwap = FALSE; - ~IC_CharSet() + // try charset with explicit bytesex info (e.g. "UCS-4LE"): + g_wcCharset = WC_NAME_BEST; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + if (m2w == (iconv_t)-1) + { + // try charset w/o bytesex info (e.g. "UCS4") + // and check for bytesex ourselves: + g_wcCharset = WC_NAME; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + // last bet, try if it knows WCHAR_T pseudo-charset + if (m2w == (iconv_t)-1) + { + g_wcCharset = "WCHAR_T"; + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + } + + if (m2w != (iconv_t)-1) + { + char buf[2], *bufPtr; + wchar_t wbuf[2], *wbufPtr; + size_t insz, outsz; + size_t res; + + buf[0] = 'A'; + buf[1] = 0; + wbuf[0] = 0; + insz = 2; + outsz = SIZEOF_WCHAR_T * 2; + wbufPtr = wbuf; + bufPtr = buf; + + #ifdef WX_ICONV_TAKES_CHAR + res = iconv(m2w, (char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #else + res = iconv(m2w, (const char**)&bufPtr, &insz, (char**)&wbufPtr, &outsz); + #endif + if (ICONV_FAILED(res, insz)) + { + g_wcCharset = NULL; + wxLogLastError(wxT("iconv")); + wxLogError(_("Convertion to charset '%s' doesn't work."), name); + } + else + { + g_wcNeedsSwap = (wbuf[0] != (wchar_t)buf[0]); + } + } + else + { + g_wcCharset = NULL; + wxLogError(_("Don't know how to convert to/from charset '%s'."), name); + } + } + wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), g_wcCharset, g_wcNeedsSwap); + } + else + m2w = iconv_open(g_wcCharset, wxConvLibc.cWX2MB(name)); + + w2m = iconv_open(wxConvLibc.cWX2MB(name), g_wcCharset); + } + + ~IC_CharSet() { if ( m2w != (iconv_t)-1 ) iconv_close(m2w); @@ -519,10 +594,12 @@ public: cres = iconv(m2w, &pszPtr, &inbuf, (char**)&bufPtr, &outbuf); #endif res = n - (outbuf / SIZEOF_WCHAR_T); - // convert to native endianness -#ifdef WC_NEED_BSWAP - WC_BSWAP(buf /* _not_ bufPtr */, res) -#endif + + if (g_wcNeedsSwap) + { + // convert to native endianness + WC_BSWAP(buf /* _not_ bufPtr */, res) + } } else { @@ -542,7 +619,11 @@ public: } if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -556,17 +637,21 @@ public: #endif size_t outbuf = n; size_t res, cres; + + wchar_t *tmpbuf; + + if (g_wcNeedsSwap) + { + // need to copy to temp buffer to switch endianness + // this absolutely doesn't rock! + // (no, doing WC_BSWAP twice on the original buffer won't help, as it + // could be in read-only memory, or be accessed in some other thread) + tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); + memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); + WC_BSWAP(tmpbuf, inbuf) + psz=tmpbuf; + } -#ifdef WC_NEED_BSWAP - // need to copy to temp buffer to switch endianness - // this absolutely doesn't rock! - // (no, doing WC_BSWAP twice on the original buffer won't help, as it - // could be in read-only memory, or be accessed in some other thread) - wchar_t *tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); - memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); - WC_BSWAP(tmpbuf, inbuf) - psz=tmpbuf; -#endif if (buf) { // have destination buffer, convert there @@ -593,11 +678,18 @@ public: res += 16 - outbuf; } while ((cres==(size_t)-1) && (errno==E2BIG)); } -#ifdef WC_NEED_BSWAP - free(tmpbuf); -#endif + + if (g_wcNeedsSwap) + { + free(tmpbuf); + } + if (ICONV_FAILED(cres, inbuf)) + { + //VS: it is ok if iconv fails, hence trace only + wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; + } return res; } @@ -605,12 +697,12 @@ public: bool usable() { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); } -public: +protected: iconv_t m2w, w2m; }; #endif -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) class CP_CharSet : public wxCharacterSet { public: @@ -621,14 +713,18 @@ public: { size_t len = MultiByteToWideChar(CodePage, 0, psz, -1, buf, buf ? n : 0); - return len ? len : (size_t)-1; + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } size_t WC2MB(char *buf, const wchar_t *psz, size_t n) { size_t len = WideCharToMultiByte(CodePage, 0, psz, -1, buf, buf ? n : 0, NULL, NULL); - return len ? len : (size_t)-1; + //VS: returns # of written chars for buf!=NULL and *size* + // needed buffer for buf==NULL + return len ? (buf ? len : len-1) : (size_t)-1; } bool usable() @@ -637,7 +733,9 @@ public: public: long CodePage; }; -#endif +#endif // __WIN32__ + +#if wxUSE_FONTMAP class EC_CharSet : public wxCharacterSet { @@ -682,6 +780,8 @@ public: wxEncodingConverter m2w, w2m; }; +#endif // wxUSE_FONTMAP + static wxCharacterSet *wxGetCharacterSet(const wxChar *name) { wxCharacterSet *cset = NULL; @@ -699,14 +799,16 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) } } - if (cset && cset->usable()) return cset; + if (cset && cset->usable()) + return cset; + if (cset) { delete cset; cset = NULL; } -#ifdef __WIN32__ +#if defined(__WIN32__) && !defined(__WXMICROWIN__) cset = new CP_CharSet(name); // may take NULL if (cset->usable()) return cset; @@ -714,9 +816,11 @@ static wxCharacterSet *wxGetCharacterSet(const wxChar *name) delete cset; #endif // __WIN32__ +#if wxUSE_FONTMAP cset = new EC_CharSet(name); if (cset->usable()) return cset; +#endif // wxUSE_FONTMAP delete cset; wxLogError(_("Unknown encoding '%s'!"), name); @@ -758,7 +862,8 @@ void wxCSConv::LoadNow() SetName(name); } - m_cset = wxGetCharacterSet(m_name); + // wxGetCharacterSet() complains about NULL name + m_cset = m_name ? wxGetCharacterSet(m_name) : NULL; m_deferred = FALSE; } } @@ -844,10 +949,10 @@ public: class EC_CharSetConverter { public: - EC_CharSetConverter(EC_CharSet*from,EC_CharSet*to) + EC_CharSetConverter(EC_CharSet* from,EC_CharSet* to) { cnv.Init(from->enc,to->enc); } - size_t Convert(char*buf, const char*psz, size_t n) + size_t Convert(char* buf, const char* psz, size_t n) { size_t inbuf = strlen(psz); if (buf) cnv.Convert(psz,buf);