From 2c17722ec8a15d93ef9ec4a4ff390a0db9a34d9d Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 1 May 2007 14:11:06 +0000 Subject: [PATCH] fix wxStringOutputStream::Write() in Unicode build when the output overlaps a boundary between UTF-8 characters (closes bug 1701426) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45733 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- docs/changes.txt | 2 ++ include/wx/sstream.h | 8 +++++++ src/common/sstream.cpp | 49 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index d254faba40..80f192640e 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -151,6 +151,8 @@ wxX11: All: - Fix bug in wxFileConfig when recreating a group (Steven Van Ingelgem) +- Fix wxStringOutputStream::Write() in Unicode build when the argument + overlaps UTF-8 characters boundary - Account for lines without newline at the end in wxExecute() All (Unix): diff --git a/include/wx/sstream.h b/include/wx/sstream.h index 02f878681b..f8e75790ab 100644 --- a/include/wx/sstream.h +++ b/include/wx/sstream.h @@ -61,6 +61,9 @@ public: // The stream will write data either to the provided string or to an // internal string which can be retrieved using GetString() wxStringOutputStream(wxString *pString = NULL) +#if wxUSE_UNICODE_WCHAR + : m_unconv(0) +#endif // wxUSE_UNICODE_WCHAR { m_str = pString ? pString : &m_strInternal; m_pos = m_str->length() / sizeof(wxChar); @@ -90,6 +93,11 @@ private: wxMBConv m_conv; #endif +#if wxUSE_UNICODE_WCHAR + // unconverted data from the last call to OnSysWrite() + wxMemoryBuffer m_unconv; +#endif // wxUSE_UNICODE_WCHAR + DECLARE_NO_COPY_CLASS(wxStringOutputStream) }; diff --git a/src/common/sstream.cpp b/src/common/sstream.cpp index a16ea1b24a..f92916a667 100644 --- a/src/common/sstream.cpp +++ b/src/common/sstream.cpp @@ -153,9 +153,52 @@ size_t wxStringOutputStream::OnSysWrite(const void *buffer, size_t size) { const char *p = wx_static_cast(const char *, buffer); - // append the input buffer (may not be null terminated - thus - // the literal length - m_str->Append(wxString(p, m_conv, size)); +#if wxUSE_UNICODE_WCHAR + // the part of the string we have here may be incomplete, i.e. it can stop + // in the middle of an UTF-8 character and so converting it would fail; if + // this is the case, accumulate the part which we failed to convert until + // we get the rest (and also take into account the part which we might have + // left unconverted before) + const char *src; + size_t srcLen; + if ( m_unconv.GetDataLen() ) + { + // append the new data to the data remaining since the last time + m_unconv.AppendData(p, size); + src = m_unconv; + srcLen = m_unconv.GetDataLen(); + } + else // no unconverted data left, avoid extra copy + { + src = p; + srcLen = size; + } + + wxWCharBuffer wbuf(m_conv.cMB2WC(src, srcLen, NULL /* out len */)); + if ( wbuf ) + { + // conversion succeeded, clear the unconverted buffer + m_unconv = wxMemoryBuffer(0); + + *m_str += wbuf; + } + else // conversion failed + { + // remember unconverted data if there had been none before (otherwise + // we've already got it in the buffer) + if ( src == p ) + m_unconv.AppendData(src, srcLen); + + // pretend that we wrote the data anyhow, otherwise the caller would + // believe there was an error and this might not be the case, but do + // not update m_pos as m_str hasn't changed + return size; + } +#else // !wxUSE_UNICODE_WCHAR + // no recoding necessary, the data is supposed to already be in UTF-8 (if + // supported) or ASCII otherwise + m_str->append(p, size); +#endif // wxUSE_UNICODE_WCHAR/!wxUSE_UNICODE_WCHAR // update position m_pos += size; -- 2.45.2