X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/914955aaa034862c3b9b827463cde26455d06c79..6b91d1134092fb05310c3e72393cc2720c552e9e:/src/common/strconv.cpp?ds=sidebyside diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 20ff6f49ef..ed4d3d889e 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -40,11 +40,8 @@ #if wxUSE_WCHAR_T -#ifdef __WXMSW__ - #include "wx/msw/private.h" -#endif - #ifdef __WINDOWS__ + #include "wx/msw/private.h" #include "wx/msw/missing.h" #endif @@ -78,12 +75,17 @@ #include "wx/utils.h" #ifdef __WXMAC__ +#ifndef __DARWIN__ #include #include #include +#endif #include "wx/mac/private.h" // includes mac headers #endif + +#define TRACE_STRCONV _T("strconv") + // ---------------------------------------------------------------------------- // macros // ---------------------------------------------------------------------------- @@ -270,9 +272,9 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, si } //Increment to next (sub)string - //Note that we have to use strlen here instead of nLen - //here because XX2XX gives us the size of the output buffer, - //not neccessarly the length of the string + //Note that we have to use strlen instead of nLen here + //because XX2XX gives us the size of the output buffer, + //which is not necessarily the length of the string szPos += strlen(szPos) + 1; } @@ -332,9 +334,9 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, } //Increment to next (sub)string - //Note that we have to use wxWcslen here instead of nLen - //here because XX2XX gives us the size of the output buffer, - //not neccessarly the length of the string + //Note that we have to use wxWcslen instead of nLen here + //because XX2XX gives us the size of the output buffer, + //which is not necessarily the length of the string szPos += wxWcslen(szPos) + 1; } @@ -360,39 +362,16 @@ size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const #ifdef __UNIX__ // ---------------------------------------------------------------------------- -// wxConvBrokenFileNames +// wxConvBrokenFileNames // ---------------------------------------------------------------------------- -wxConvBrokenFileNames::wxConvBrokenFileNames() +wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset) { - // decide which conversion to use for the file names - - // (1) this variable exists for the sole purpose of specifying the encoding - // of the filenames for GTK+ programs, so use it if it is set - wxString encName(wxGetenv(_T("G_FILENAME_ENCODING"))); - encName.MakeUpper(); - if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") ) - { - m_conv = new wxCSConv(encName); - } - else // no G_FILENAME_ENCODING - { - if ( encName.empty() ) - encName = wxLocale::GetSystemEncodingName().Upper(); - - // (2) if a non default locale is set, assume that the user wants his - // filenames in this locale too - if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") ) - { - wxSetEnv(_T("G_FILENAME_ENCODING"), encName); - m_conv = new wxMBConvLibc; - } - else - { - // (3) finally use UTF-8 by default - m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); - } - } + if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0 + || wxStricmp(charset, _T("UTF8")) == 0 ) + m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); + else + m_conv = new wxCSConv(charset); } size_t @@ -647,6 +626,15 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const if (buf) *buf++ = cc; len++; + + // escape the escape character for octal escapes + if ((m_options & MAP_INVALID_UTF8_TO_OCTAL) + && cc == '\\' && (!buf || len < n)) + { + if (buf) + *buf++ = cc; + len++; + } } else { @@ -784,6 +772,14 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const *buf++ = (char)(cc - wxUnicodePUA); len++; } + else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) + && cc == L'\\' && psz[0] == L'\\' ) + { + if (buf) + *buf++ = (char)cc; + psz++; + len++; + } else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) && cc == L'\\' && isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) ) @@ -1353,15 +1349,26 @@ private: static bool ms_wcNeedsSwap; }; +// make the constructor available for unit testing +WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name ) +{ + wxMBConv_iconv* result = new wxMBConv_iconv( name ); + if ( !result->IsOk() ) + { + delete result; + return 0; + } + return result; +} + const char *wxMBConv_iconv::ms_wcCharsetName = NULL; bool wxMBConv_iconv::ms_wcNeedsSwap = false; wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) { - // Do it the hard way - char cname[100]; - for (size_t i = 0; i < wxStrlen(name)+1; i++) - cname[i] = (char) name[i]; + // iconv operates with chars, not wxChars, but luckily it uses only ASCII + // names for the charsets + const wxCharBuffer cname(wxString(name).ToAscii()); // check for charset that represents wchar_t: if (ms_wcCharsetName == NULL) @@ -1421,11 +1428,12 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) // VS: we must not output an error here, since wxWidgets will safely // fall back to using wxEncodingConverter. - wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name); - //wxLogError( + wxLogTrace(TRACE_STRCONV, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name); } } - wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap); + wxLogTrace(TRACE_STRCONV, + wxT("wchar_t charset is '%s', needs swap: %i"), + ms_wcCharsetName ? ms_wcCharsetName : "", ms_wcNeedsSwap); } else // we already have ms_wcCharsetName { @@ -1511,7 +1519,7 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const if (ICONV_FAILED(cres, inbuf)) { //VS: it is ok if iconv fails, hence trace only - wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); + wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; } @@ -1579,7 +1587,7 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const if (ICONV_FAILED(cres, inbuf)) { //VS: it is ok if iconv fails, hence trace only - wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); + wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; } @@ -2122,7 +2130,7 @@ public: UniChar* szUniBuffer = (UniChar*) szUnConv; #if SIZEOF_WCHAR_T == 4 - wxMBConvUTF16BE converter ; + wxMBConvUTF16 converter ; nBufSize = converter.WC2MB( NULL , szUnConv , 0 ); szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ; converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ; @@ -2258,7 +2266,7 @@ public: // we have to terminate here, because n might be larger for the trailing zero, and if UniChar // is not properly terminated we get random characters at the end ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ; - wxMBConvUTF16BE converter ; + wxMBConvUTF16 converter ; res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ; free( ubuf ) ; #else @@ -2291,7 +2299,7 @@ public: ByteCount byteBufferLen = n ; UniChar* ubuf = NULL ; #if SIZEOF_WCHAR_T == 4 - wxMBConvUTF16BE converter ; + wxMBConvUTF16 converter ; size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ; byteInLen = unicharlen ; ubuf = (UniChar*) malloc( byteInLen + 2 ) ; @@ -2412,6 +2420,18 @@ public: DECLARE_NO_COPY_CLASS(wxMBConv_wxwin) }; +// make the constructors available for unit testing +WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name ) +{ + wxMBConv_wxwin* result = new wxMBConv_wxwin( name ); + if ( !result->IsOk() ) + { + delete result; + return 0; + } + return result; +} + #endif // wxUSE_FONTMAP // ============================================================================ @@ -2493,8 +2513,24 @@ void wxCSConv::SetName(const wxChar *charset) } } +#if wxUSE_FONTMAP +#include "wx/hashmap.h" + +WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual, + wxEncodingNameCache ); + +static wxEncodingNameCache gs_nameCache; +#endif + wxMBConv *wxCSConv::DoCreate() const { +#if wxUSE_FONTMAP + wxLogTrace(TRACE_STRCONV, + wxT("creating conversion for %s"), + (m_name ? m_name + : wxFontMapperBase::GetEncodingName(m_encoding).c_str())); +#endif // wxUSE_FONTMAP + // check for the special case of ASCII or ISO8859-1 charset: as we have // special knowledge of it anyhow, we don't need to create a special // conversion object @@ -2519,17 +2555,53 @@ wxMBConv *wxCSConv::DoCreate() const #endif // !wxUSE_FONTMAP { wxString name(m_name); + wxFontEncoding encoding(m_encoding); + + if ( !name.empty() ) + { + wxMBConv_iconv *conv = new wxMBConv_iconv(name); + if ( conv->IsOk() ) + return conv; + + delete conv; #if wxUSE_FONTMAP - if ( name.empty() ) - name = wxFontMapperBase::Get()->GetEncodingName(m_encoding); + encoding = + wxFontMapperBase::Get()->CharsetToEncoding(name, false); #endif // wxUSE_FONTMAP + } +#if wxUSE_FONTMAP + { + const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding); + if ( it != gs_nameCache.end() ) + { + if ( it->second.empty() ) + return NULL; - wxMBConv_iconv *conv = new wxMBConv_iconv(name); - if ( conv->IsOk() ) - return conv; + wxMBConv_iconv *conv = new wxMBConv_iconv(it->second); + if ( conv->IsOk() ) + return conv; - delete conv; + delete conv; + } + + const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding); + + for ( ; *names; ++names ) + { + wxMBConv_iconv *conv = new wxMBConv_iconv(*names); + if ( conv->IsOk() ) + { + gs_nameCache[encoding] = *names; + return conv; + } + + delete conv; + } + + gs_nameCache[encoding] = _T(""); // cache the failure + } +#endif // wxUSE_FONTMAP } #endif // HAVE_ICONV