X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/a78c43f158671390e8a129605c9ad088aac9e572..1e510b1e2d0270caf227c3fc0cf34ae2e7dd6794:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 92b18ad615..9d093143fe 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -15,20 +15,21 @@ // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" +#ifdef __BORLANDC__ + #pragma hdrstop +#endif //__BORLANDC__ + #ifndef WX_PRECOMP #include "wx/intl.h" #include "wx/log.h" + #include "wx/utils.h" + #include "wx/hashmap.h" #endif #include "wx/strconv.h" #if wxUSE_WCHAR_T -#ifdef __WINDOWS__ - #include "wx/msw/private.h" - #include "wx/msw/missing.h" -#endif - #ifndef __WXWINCE__ #include #endif @@ -38,6 +39,8 @@ #include #if defined(__WIN32__) && !defined(__WXMICROWIN__) + #include "wx/msw/private.h" + #include "wx/msw/missing.h" #define wxHAVE_WIN32_MB2WC #endif @@ -52,7 +55,6 @@ #include "wx/encconv.h" #include "wx/fontmap.h" -#include "wx/utils.h" #ifdef __WXMAC__ #ifndef __DARWIN__ @@ -422,10 +424,12 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const { - const size_t dstLen = FromWChar(NULL, 0, inBuff, inLen); + size_t dstLen = FromWChar(NULL, 0, inBuff, inLen); if ( dstLen != wxCONV_FAILED ) { - wxCharBuffer buf(dstLen - 1); + // special case of empty input: can't allocate 0 size buffer below as + // wxCharBuffer insists on NUL-terminating it + wxCharBuffer buf(dstLen ? dstLen - 1 : 1); if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) { if ( outLen ) @@ -433,11 +437,12 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const *outLen = dstLen; const size_t nulLen = GetMBNulLen(); - if ( !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) ) + if ( dstLen >= nulLen && + !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) ) { // in this case the output is NUL-terminated and we're not // supposed to count NUL - (*outLen) -= nulLen; + *outLen -= nulLen; } } @@ -1664,7 +1669,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) #if wxUSE_FONTMAP const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC); #else // !wxUSE_FONTMAP - static const wxChar *names[] = + static const wxChar *names_static[] = { #if SIZEOF_WCHAR_T == 4 _T("UCS-4"), @@ -1673,6 +1678,7 @@ wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) #endif NULL }; + const wxChar **names = names_static; #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP for ( ; *names && ms_wcCharsetName.empty(); ++names ) @@ -1802,8 +1808,8 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const } #if wxUSE_THREADS - // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. - // Unfortunately there is a couple of global wxCSConv objects such as + // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle. + // Unfortunately there are a couple of global wxCSConv objects such as // wxConvLocal that are used all over wx code, so we have to make sure // the handle is used by at most one thread at the time. Otherwise // only a few wx classes would be safe to use from non-main threads @@ -1994,6 +2000,7 @@ public: } wxMBConv_win32(const wxMBConv_win32& conv) + : wxMBConv() { m_CodePage = conv.m_CodePage; m_minMBCharWidth = conv.m_minMBCharWidth; @@ -2223,11 +2230,11 @@ private: int verMaj, verMin; switch ( wxGetOsVersion(&verMaj, &verMin) ) { - case wxWIN95: + case wxOS_WINDOWS_9X: s_isWin98Or2k = verMaj >= 4 && verMin >= 10; break; - case wxWINDOWS_NT: + case wxOS_WINDOWS_NT: s_isWin98Or2k = verMaj >= 5; break; @@ -2579,7 +2586,7 @@ public: Init( wxCFStringEncFromFontEnc(encoding) ); } - ~wxMBConv_cocoa() + virtual ~wxMBConv_cocoa() { } @@ -2734,30 +2741,43 @@ public: Init( wxMacGetSystemEncFromFontEnc(encoding) ); } - ~wxMBConv_mac() + virtual ~wxMBConv_mac() { OSStatus status = noErr ; - status = TECDisposeConverter(m_MB2WC_converter); - status = TECDisposeConverter(m_WC2MB_converter); + if (m_MB2WC_converter) + status = TECDisposeConverter(m_MB2WC_converter); + if (m_WC2MB_converter) + status = TECDisposeConverter(m_WC2MB_converter); } - - void Init( TextEncodingBase encoding) + void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant , + TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat) { - OSStatus status = noErr ; - m_char_encoding = encoding ; + m_MB2WC_converter = NULL ; + m_WC2MB_converter = NULL ; + m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ; m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ; + } - status = TECCreateConverter(&m_MB2WC_converter, + virtual void CreateIfNeeded() const + { + if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL ) + { + OSStatus status = noErr ; + status = TECCreateConverter(&m_MB2WC_converter, m_char_encoding, m_unicode_encoding); - status = TECCreateConverter(&m_WC2MB_converter, + wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ; + status = TECCreateConverter(&m_WC2MB_converter, m_unicode_encoding, m_char_encoding); + wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ; + } } size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const { + CreateIfNeeded() ; OSStatus status = noErr ; ByteCount byteOutLen ; ByteCount byteInLen = strlen(psz) + 1; @@ -2806,6 +2826,7 @@ public: size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const { + CreateIfNeeded() ; OSStatus status = noErr ; ByteCount byteOutLen ; ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ; @@ -2868,16 +2889,192 @@ public: virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); } bool IsOk() const - { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; } + { + CreateIfNeeded() ; + return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; + } -private: - TECObjectRef m_MB2WC_converter; - TECObjectRef m_WC2MB_converter; +protected : + mutable TECObjectRef m_MB2WC_converter; + mutable TECObjectRef m_WC2MB_converter; TextEncodingBase m_char_encoding; TextEncodingBase m_unicode_encoding; }; +// MB is decomposed (D) normalized UTF8 + +class wxMBConv_macUTF8D : public wxMBConv_mac +{ +public : + wxMBConv_macUTF8D() + { + Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ; + m_uni = NULL; + m_uniBack = NULL ; + } + + virtual ~wxMBConv_macUTF8D() + { + if (m_uni!=NULL) + DisposeUnicodeToTextInfo(&m_uni); + if (m_uniBack!=NULL) + DisposeUnicodeToTextInfo(&m_uniBack); + } + + size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const + { + CreateIfNeeded() ; + OSStatus status = noErr ; + ByteCount byteOutLen ; + ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ; + + char *tbuf = NULL ; + + if (buf == NULL) + { + // Apple specs say at least 32 + n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T ); + tbuf = (char*) malloc( n ) ; + } + + ByteCount byteBufferLen = n ; + UniChar* ubuf = NULL ; + +#if SIZEOF_WCHAR_T == 4 + wxMBConvUTF16 converter ; + size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ; + byteInLen = unicharlen ; + ubuf = (UniChar*) malloc( byteInLen + 2 ) ; + converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ; +#else + ubuf = (UniChar*) psz ; +#endif + + // ubuf is a non-decomposed UniChar buffer + + ByteCount dcubuflen = byteInLen * 2 + 2 ; + ByteCount dcubufread , dcubufwritten ; + UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ; + + ConvertFromUnicodeToText( m_uni , byteInLen , ubuf , + kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ; + + // we now convert that decomposed buffer into UTF8 + + status = TECConvertText( + m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread, + (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen); + + free( dcubuf ); + +#if SIZEOF_WCHAR_T == 4 + free( ubuf ) ; +#endif + + if ( buf == NULL ) + free(tbuf) ; + + size_t res = byteOutLen ; + if ( buf && res < n) + { + buf[res] = 0; + // don't test for round-trip fidelity yet, we cannot guarantee it yet + } + + return res ; + } + + size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const + { + CreateIfNeeded() ; + OSStatus status = noErr ; + ByteCount byteOutLen ; + ByteCount byteInLen = strlen(psz) + 1; + wchar_t *tbuf = NULL ; + UniChar* ubuf = NULL ; + size_t res = 0 ; + + if (buf == NULL) + { + // Apple specs say at least 32 + n = wxMax( 32, byteInLen ) ; + tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ; + } + + ByteCount byteBufferLen = n * sizeof( UniChar ) ; + +#if SIZEOF_WCHAR_T == 4 + ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ; +#else + ubuf = (UniChar*) (buf ? buf : tbuf) ; +#endif + + ByteCount dcubuflen = byteBufferLen * 2 + 2 ; + ByteCount dcubufread , dcubufwritten ; + UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ; + + status = TECConvertText( + m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen, + (TextPtr) dcubuf, dcubuflen, &byteOutLen); + // we have to terminate here, because n might be larger for the trailing zero, and if UniChar + // is not properly terminated we get random characters at the end + dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ; + + // now from the decomposed UniChar to properly composed uniChar + ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf , + kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ; + + free( dcubuf ); + byteOutLen = dcubufwritten ; + ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ; + + +#if SIZEOF_WCHAR_T == 4 + wxMBConvUTF16 converter ; + res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ; + free( ubuf ) ; +#else + res = byteOutLen / sizeof( UniChar ) ; +#endif + + if ( buf == NULL ) + free(tbuf) ; + + if ( buf && res < n) + buf[res] = 0; + + return res ; + } + + virtual void CreateIfNeeded() const + { + wxMBConv_mac::CreateIfNeeded() ; + if ( m_uni == NULL ) + { + m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, + kUnicodeNoSubset, kTextEncodingDefaultFormat); + m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, + kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat); + m_map.mappingVersion = kUnicodeUseLatestMapping; + + OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni); + wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ; + + m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, + kUnicodeNoSubset, kTextEncodingDefaultFormat); + m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, + kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat); + m_map.mappingVersion = kUnicodeUseLatestMapping; + err = CreateUnicodeToTextInfo(&m_map, &m_uniBack); + wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ; + } + } +protected : + mutable UnicodeToTextInfo m_uni; + mutable UnicodeToTextInfo m_uniBack; + mutable UnicodeMapping m_map; +}; #endif // defined(__WXMAC__) && defined(TARGET_CARBON) // ============================================================================ @@ -3069,7 +3266,6 @@ void wxCSConv::SetName(const wxChar *charset) } #if wxUSE_FONTMAP -#include "wx/hashmap.h" WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual, wxEncodingNameCache ); @@ -3083,7 +3279,7 @@ wxMBConv *wxCSConv::DoCreate() const wxLogTrace(TRACE_STRCONV, wxT("creating conversion for %s"), (m_name ? m_name - : wxFontMapperBase::GetEncodingName(m_encoding).c_str())); + : (const wxChar*)wxFontMapperBase::GetEncodingName(m_encoding).c_str())); #endif // wxUSE_FONTMAP // check for the special case of ASCII or ISO8859-1 charset: as we have @@ -3111,7 +3307,9 @@ wxMBConv *wxCSConv::DoCreate() const #endif // !wxUSE_FONTMAP { wxString name(m_name); +#if wxUSE_FONTMAP wxFontEncoding encoding(m_encoding); +#endif if ( !name.empty() ) { @@ -3142,20 +3340,26 @@ wxMBConv *wxCSConv::DoCreate() const } const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding); - - for ( ; *names; ++names ) + // CS : in case this does not return valid names (eg for MacRoman) encoding + // got a 'failure' entry in the cache all the same, although it just has to + // be created using a different method, so only store failed iconv creation + // attempts (or perhaps we shoulnd't do this at all ?) + if ( names[0] != NULL ) { - wxMBConv_iconv *conv = new wxMBConv_iconv(*names); - if ( conv->IsOk() ) + for ( ; *names; ++names ) { - gs_nameCache[encoding] = *names; - return conv; + wxMBConv_iconv *conv = new wxMBConv_iconv(*names); + if ( conv->IsOk() ) + { + gs_nameCache[encoding] = *names; + return conv; + } + + delete conv; } - delete conv; + gs_nameCache[encoding] = _T(""); // cache the failure } - - gs_nameCache[encoding] = _T(""); // cache the failure } #endif // wxUSE_FONTMAP } @@ -3266,10 +3470,10 @@ wxMBConv *wxCSConv::DoCreate() const // NB: This is a hack to prevent deadlock. What could otherwise happen // in Unicode build: wxConvLocal creation ends up being here // because of some failure and logs the error. But wxLog will try to - // attach timestamp, for which it will need wxConvLocal (to convert - // time to char* and then wchar_t*), but that fails, tries to log - // error, but wxLog has a (already locked) critical section that - // guards static buffer. + // attach a timestamp, for which it will need wxConvLocal (to convert + // time to char* and then wchar_t*), but that fails, tries to log the + // error, but wxLog has an (already locked) critical section that + // guards the static buffer. static bool alreadyLoggingError = false; if (!alreadyLoggingError) { @@ -3278,9 +3482,9 @@ wxMBConv *wxCSConv::DoCreate() const m_name ? m_name : #if wxUSE_FONTMAP - wxFontMapperBase::GetEncodingDescription(m_encoding).c_str() + (const wxChar*)wxFontMapperBase::GetEncodingDescription(m_encoding).c_str() #else // !wxUSE_FONTMAP - wxString::Format(_("encoding %s"), m_encoding).c_str() + (const wxChar*)wxString::Format(_("encoding %i"), m_encoding).c_str() #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP ); @@ -3296,20 +3500,60 @@ void wxCSConv::CreateConvIfNeeded() const { wxCSConv *self = (wxCSConv *)this; // const_cast -#if wxUSE_INTL // if we don't have neither the name nor the encoding, use the default // encoding for this system if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM ) { - self->m_name = wxStrdup(wxLocale::GetSystemEncodingName()); - } +#if wxUSE_INTL + self->m_encoding = wxLocale::GetSystemEncoding(); +#else + // fallback to some reasonable default: + self->m_encoding = wxFONTENCODING_ISO8859_1; #endif // wxUSE_INTL + } self->m_convReal = DoCreate(); self->m_deferred = false; } } +bool wxCSConv::IsOk() const +{ + CreateConvIfNeeded(); + + // special case: no convReal created for wxFONTENCODING_ISO8859_1 + if ( m_encoding == wxFONTENCODING_ISO8859_1 ) + return true; // always ok as we do it ourselves + + // m_convReal->IsOk() is called at its own creation, so we know it must + // be ok if m_convReal is non-NULL + return m_convReal != NULL; +} + +size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen) const +{ + CreateConvIfNeeded(); + + if (m_convReal) + return m_convReal->ToWChar(dst, dstLen, src, srcLen); + + // latin-1 (direct) + return wxMBConv::ToWChar(dst, dstLen, src, srcLen); +} + +size_t wxCSConv::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const +{ + CreateConvIfNeeded(); + + if (m_convReal) + return m_convReal->FromWChar(dst, dstLen, src, srcLen); + + // latin-1 (direct) + return wxMBConv::FromWChar(dst, dstLen, src, srcLen); +} + size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const { CreateConvIfNeeded(); @@ -3388,7 +3632,9 @@ static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM); static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1); static wxMBConvUTF7 wxConvUTF7Obj; static wxMBConvUTF8 wxConvUTF8Obj; - +#if defined(__WXMAC__) && defined(TARGET_CARBON) +static wxMBConv_macUTF8D wxConvMacUTF8DObj; +#endif WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj; WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj; WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj; @@ -3398,10 +3644,44 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj; WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal; WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = & #ifdef __WXOSX__ - wxConvUTF8Obj; +#if defined(__WXMAC__) && defined(TARGET_CARBON) + wxConvMacUTF8DObj; #else - wxConvLibcObj; + wxConvUTF8Obj; #endif +#else // !__WXOSX__ + wxConvLibcObj; +#endif // __WXOSX__/!__WXOSX__ + +#if wxUSE_UNICODE + +wxWCharBuffer wxSafeConvertMB2WX(const char *s) +{ + if ( !s ) + return wxWCharBuffer(); + + wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s)); + if ( !wbuf ) + wbuf = wxConvUTF8.cMB2WX(s); + if ( !wbuf ) + wbuf = wxConvISO8859_1.cMB2WX(s); + + return wbuf; +} + +wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) +{ + if ( !ws ) + return wxCharBuffer(); + + wxCharBuffer buf(wxConvLibc.cWX2MB(ws)); + if ( !buf ) + buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws); + + return buf; +} + +#endif // wxUSE_UNICODE #else // !wxUSE_WCHAR_T