X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/f6a02087565a52d5c2a2ef7652d56a6a06bfa304..6d9b6716925a786d2455033ccb8775569001b631:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 54ac2c8489..31dc602f35 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -160,11 +160,15 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen) const { // although new conversion classes are supposed to implement this function - // directly, the existins ones only implement the old MB2WC() and so, to + // directly, the existing ones only implement the old MB2WC() and so, to // avoid to have to rewrite all conversion classes at once, we provide a // default (but not efficient) implementation of this one in terms of the // old function by copying the input to ensure that it's NUL-terminated and // then using MB2WC() to convert it + // + // moreover, some conversion classes simply can't implement ToWChar() + // directly, the primary example is wxConvLibc: mbstowcs() only handles + // NUL-terminated strings // the number of chars [which would be] written to dst [if it were not NULL] size_t dstWritten = 0; @@ -205,6 +209,21 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, srcEnd = NULL; } + // the idea of this code is straightforward: it converts a NUL-terminated + // chunk of the string during each iteration and updates the output buffer + // with the result + // + // all the complication come from the fact that this function, for + // historical reasons, must behave in 2 subtly different ways when it's + // called with a fixed number of characters and when it's called for the + // entire NUL-terminated string: in the former case (srcEnd == NULL) we + // must count all characters we convert, NUL or not; but in the latter we + // do not count the trailing NUL -- but still count all the NULs inside the + // string + // + // so for the (simple) former case we just always count the trailing NUL, + // but for the latter we need to wait until we see if there is going to be + // another loop iteration and only count it then for ( ;; ) { // try to convert the current chunk @@ -257,9 +276,19 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, // note that ">=" (and not just "==") is needed here as the terminator // we skipped just above could be inside or just after the buffer - // delimited by inEnd + // delimited by srcEnd if ( src >= srcEnd ) break; + + // if we got here then this wasn't the last chunk in this string and + // hence we must count an extra char for L'\0' even when converting a + // fixed number of characters + if ( srcEnd ) + { + dstWritten++; + if ( dst ) + dst++; + } } return dstWritten; @@ -536,14 +565,14 @@ static const unsigned char utf7unb64[] = 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen) const { DecoderState stateOrig, - *statePtr; + *statePtr; if ( srcLen == wxNO_LEN ) { // convert the entire string, up to and including the trailing NUL @@ -580,7 +609,20 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, const unsigned char dc = utf7unb64[cc]; if ( dc == 0xff ) { - // end of encoded part + // end of encoded part, check that nothing was left: there can + // be up to 4 bits of 0 padding but nothing else (we also need + // to check isLSB as we count bits modulo 8 while a valid UTF-7 + // encoded sequence must contain an integral number of UTF-16 + // characters) + if ( state.isLSB || state.bit > 4 || + (state.accum & ((1 << state.bit) - 1)) ) + { + if ( !len ) + state = stateOrig; + + return wxCONV_FAILED; + } + state.ToDirect(); // re-parse this character normally below unless it's '-' which @@ -624,9 +666,6 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, // start of an encoded segment? if ( cc == '+' ) { - if ( src == srcEnd ) - return wxCONV_FAILED; // can't have '+' at the end - if ( *src == '-' ) { // just the encoded plus sign, don't switch to shifted mode @@ -635,7 +674,15 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen, len++; src++; } - else + else if ( utf7unb64[(unsigned)*src] == 0xff ) + { + // empty encoded chunks are not allowed + if ( !len ) + state = stateOrig; + + return wxCONV_FAILED; + } + else // base-64 encoded chunk follows { state.ToShifted(); } @@ -2843,6 +2890,16 @@ wxCSConv::wxCSConv(const wxString& charset) #if wxUSE_FONTMAP m_encoding = wxFontMapperBase::GetEncodingFromName(charset); + if ( m_encoding == wxFONTENCODING_MAX ) + { + // set to unknown/invalid value + m_encoding = wxFONTENCODING_SYSTEM; + } + else if ( m_encoding == wxFONTENCODING_DEFAULT ) + { + // wxFONTENCODING_DEFAULT is same as US-ASCII in this context + m_encoding = wxFONTENCODING_ISO8859_1; + } #else m_encoding = wxFONTENCODING_SYSTEM; #endif