X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/852dcba5286b2b423d7cb21799d11df7efcbd0e6..bf9ce2abdf465110ca83469c1b336b056ae8ec72:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 5d19539fc5..31dc602f35 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -160,11 +160,15 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen) const { // although new conversion classes are supposed to implement this function - // directly, the existins ones only implement the old MB2WC() and so, to + // directly, the existing ones only implement the old MB2WC() and so, to // avoid to have to rewrite all conversion classes at once, we provide a // default (but not efficient) implementation of this one in terms of the // old function by copying the input to ensure that it's NUL-terminated and // then using MB2WC() to convert it + // + // moreover, some conversion classes simply can't implement ToWChar() + // directly, the primary example is wxConvLibc: mbstowcs() only handles + // NUL-terminated strings // the number of chars [which would be] written to dst [if it were not NULL] size_t dstWritten = 0; @@ -205,6 +209,21 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, srcEnd = NULL; } + // the idea of this code is straightforward: it converts a NUL-terminated + // chunk of the string during each iteration and updates the output buffer + // with the result + // + // all the complication come from the fact that this function, for + // historical reasons, must behave in 2 subtly different ways when it's + // called with a fixed number of characters and when it's called for the + // entire NUL-terminated string: in the former case (srcEnd == NULL) we + // must count all characters we convert, NUL or not; but in the latter we + // do not count the trailing NUL -- but still count all the NULs inside the + // string + // + // so for the (simple) former case we just always count the trailing NUL, + // but for the latter we need to wait until we see if there is going to be + // another loop iteration and only count it then for ( ;; ) { // try to convert the current chunk @@ -257,9 +276,19 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, // note that ">=" (and not just "==") is needed here as the terminator // we skipped just above could be inside or just after the buffer - // delimited by inEnd + // delimited by srcEnd if ( src >= srcEnd ) break; + + // if we got here then this wasn't the last chunk in this string and + // hence we must count an extra char for L'\0' even when converting a + // fixed number of characters + if ( srcEnd ) + { + dstWritten++; + if ( dst ) + dst++; + } } return dstWritten; @@ -536,7 +565,7 @@ static const unsigned char utf7unb64[] = 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, @@ -580,11 +609,19 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, const unsigned char dc = utf7unb64[cc]; if ( dc == 0xff ) { - // end of encoded part, check that nothing was left: the bit - // field cycles through 0,6,4,2 sequence so check that we're at - // the end of it - if ( state.bit != 2 ) + // end of encoded part, check that nothing was left: there can + // be up to 4 bits of 0 padding but nothing else (we also need + // to check isLSB as we count bits modulo 8 while a valid UTF-7 + // encoded sequence must contain an integral number of UTF-16 + // characters) + if ( state.isLSB || state.bit > 4 || + (state.accum & ((1 << state.bit) - 1)) ) + { + if ( !len ) + state = stateOrig; + return wxCONV_FAILED; + } state.ToDirect(); @@ -637,7 +674,15 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, len++; src++; } - else + else if ( utf7unb64[(unsigned)*src] == 0xff ) + { + // empty encoded chunks are not allowed + if ( !len ) + state = stateOrig; + + return wxCONV_FAILED; + } + else // base-64 encoded chunk follows { state.ToShifted(); } @@ -2845,6 +2890,16 @@ wxCSConv::wxCSConv(const wxString& charset) #if wxUSE_FONTMAP m_encoding = wxFontMapperBase::GetEncodingFromName(charset); + if ( m_encoding == wxFONTENCODING_MAX ) + { + // set to unknown/invalid value + m_encoding = wxFONTENCODING_SYSTEM; + } + else if ( m_encoding == wxFONTENCODING_DEFAULT ) + { + // wxFONTENCODING_DEFAULT is same as US-ASCII in this context + m_encoding = wxFONTENCODING_ISO8859_1; + } #else m_encoding = wxFONTENCODING_SYSTEM; #endif