X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/40711af81b4a412892045837997211d597d98ff2..8ddbb137813bb2452a833c735da951d43238b806:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index ed4d3d889e..01e0dc358c 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: strconv.cpp +// Name: src/common/strconv.cpp // Purpose: Unicode conversion classes // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, // Ryan Norton, Fredrik Roubert (UTF7) @@ -20,10 +20,6 @@ // headers // ---------------------------------------------------------------------------- -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) - #pragma implementation "strconv.h" -#endif - // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" @@ -57,10 +53,6 @@ #define wxHAVE_WIN32_MB2WC #endif // __WIN32__ but !__WXMICROWIN__ -// ---------------------------------------------------------------------------- -// headers -// ---------------------------------------------------------------------------- - #ifdef __SALFORDC__ #include #endif @@ -86,39 +78,23 @@ #define TRACE_STRCONV _T("strconv") -// ---------------------------------------------------------------------------- -// macros -// ---------------------------------------------------------------------------- - -#define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c dstLen ) + return wxCONV_FAILED; + + lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */); + if ( lenChunk == wxCONV_FAILED ) + return wxCONV_FAILED; + + dst += lenChunk; } - } - wxCharBuffer buf((char *)NULL); + if ( !srcEnd ) + { + // we convert the entire string in this cas, as we suppose that the + // string is NUL-terminated and so srcEnd is not used at all + break; + } - return buf; -} + // advance the input pointer past the end of this chunk + while ( NotAllNULs(src, nulLen) ) + { + // notice that we must skip over multiple bytes here as we suppose + // that if NUL takes 2 or 4 bytes, then all the other characters do + // too and so if advanced by a single byte we might erroneously + // detect sequences of NUL bytes in the middle of the input + src += nulLen; + } -const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const -{ - wxASSERT(pOutSize != NULL); + src += nulLen; // skipping over its terminator as well - const char* szEnd = szString + nStringLen + 1; - const char* szPos = szString; - const char* szStart = szPos; + // note that ">=" (and not just "==") is needed here as the terminator + // we skipped just above could be inside or just after the buffer + // delimited by inEnd + if ( src >= srcEnd ) + break; + } - size_t nActualLength = 0; - size_t nCurrentSize = nStringLen; //try normal size first (should never resize?) + return dstWritten; +} - wxWCharBuffer theBuffer(nCurrentSize); +size_t +wxMBConv::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const +{ + // the number of chars [which would be] written to dst [if it were not NULL] + size_t dstWritten = 0; - //Convert the string until the length() is reached, continuing the - //loop every time a null character is reached - while(szPos != szEnd) + // make a copy of the input string unless it is already properly + // NUL-terminated + // + // if we don't know its length we have no choice but to assume that it is, + // indeed, properly terminated + wxWCharBuffer bufTmp; + if ( srcLen == (size_t)-1 ) { - wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true - - //Get the length of the current (sub)string - size_t nLen = MB2WC(NULL, szPos, 0); + srcLen = wxWcslen(src) + 1; + } + else if ( srcLen != 0 && src[srcLen - 1] != L'\0' ) + { + // make a copy in order to properly NUL-terminate the string + bufTmp = wxWCharBuffer(srcLen); + memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t)); + src = bufTmp; + } - //Invalid conversion? - if( nLen == (size_t)-1 ) - { - *pOutSize = 0; - theBuffer.data()[0u] = wxT('\0'); - return theBuffer; - } + const size_t lenNul = GetMBNulLen(); + for ( const wchar_t * const srcEnd = src + srcLen; + src < srcEnd; + src += wxWcslen(src) + 1 /* skip L'\0' too */ ) + { + // try to convert the current chunk + size_t lenChunk = WC2MB(NULL, src, 0); + if ( lenChunk == wxCONV_FAILED ) + return wxCONV_FAILED; - //Increase the actual length (+1 for current null character) - nActualLength += nLen + 1; + lenChunk += lenNul; + dstWritten += lenChunk; - //if buffer too big, realloc the buffer - if (nActualLength > (nCurrentSize+1)) + if ( dst ) { - wxWCharBuffer theNewBuffer(nCurrentSize << 1); - memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t)); - theBuffer = theNewBuffer; - nCurrentSize <<= 1; - } + if ( dstWritten > dstLen ) + return wxCONV_FAILED; - //Convert the current (sub)string - if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) - { - *pOutSize = 0; - theBuffer.data()[0u] = wxT('\0'); - return theBuffer; - } + if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED ) + return wxCONV_FAILED; - //Increment to next (sub)string - //Note that we have to use strlen instead of nLen here - //because XX2XX gives us the size of the output buffer, - //which is not necessarily the length of the string - szPos += strlen(szPos) + 1; + dst += lenChunk; + } } - //success - return actual length and the buffer - *pOutSize = nActualLength; - return theBuffer; + return dstWritten; } -const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const +size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const { - wxASSERT(pOutSize != NULL); + size_t rc = ToWChar(out, outLen, in); + if ( rc != wxCONV_FAILED ) + { + // ToWChar() returns the buffer length, i.e. including the trailing + // NUL, while this method doesn't take it into account + rc--; + } - const wchar_t* szEnd = szString + nStringLen + 1; - const wchar_t* szPos = szString; - const wchar_t* szStart = szPos; + return rc; +} + +size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const +{ + size_t rc = FromWChar(out, outLen, in); + if ( rc != wxCONV_FAILED ) + { + rc -= GetMBNulLen(); + } - size_t nActualLength = 0; - size_t nCurrentSize = nStringLen << 2; //try * 4 first + return rc; +} - wxCharBuffer theBuffer(nCurrentSize); +wxMBConv::~wxMBConv() +{ + // nothing to do here (necessary for Darwin linking probably) +} - //Convert the string until the length() is reached, continuing the - //loop every time a null character is reached - while(szPos != szEnd) +const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const +{ + if ( psz ) { - wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true + // calculate the length of the buffer needed first + const size_t nLen = MB2WC(NULL, psz, 0); + if ( nLen != wxCONV_FAILED ) + { + // now do the actual conversion + wxWCharBuffer buf(nLen /* +1 added implicitly */); - //Get the length of the current (sub)string - size_t nLen = WC2MB(NULL, szPos, 0); + // +1 for the trailing NULL + if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED ) + return buf; + } + } - //Invalid conversion? - if( nLen == (size_t)-1 ) + return wxWCharBuffer(); +} + +const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const +{ + if ( pwz ) + { + const size_t nLen = WC2MB(NULL, pwz, 0); + if ( nLen != wxCONV_FAILED ) { - *pOutSize = 0; - theBuffer.data()[0u] = wxT('\0'); - return theBuffer; + // extra space for trailing NUL(s) + static const size_t extraLen = GetMaxMBNulLen(); + + wxCharBuffer buf(nLen + extraLen - 1); + if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED ) + return buf; } + } - //Increase the actual length (+1 for current null character) - nActualLength += nLen + 1; + return wxCharBuffer(); +} - //if buffer too big, realloc the buffer - if (nActualLength > (nCurrentSize+1)) +const wxWCharBuffer +wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const +{ + const size_t dstLen = ToWChar(NULL, 0, in, inLen); + if ( dstLen != wxCONV_FAILED ) + { + wxWCharBuffer wbuf(dstLen); + if ( ToWChar(wbuf.data(), dstLen, in, inLen) ) { - wxCharBuffer theNewBuffer(nCurrentSize << 1); - memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize); - theBuffer = theNewBuffer; - nCurrentSize <<= 1; + if ( outLen ) + *outLen = dstLen; + return wbuf; } + } - //Convert the current (sub)string - if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) + if ( outLen ) + *outLen = 0; + + return wxWCharBuffer(); +} + +const wxCharBuffer +wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const +{ + const size_t dstLen = FromWChar(NULL, 0, in, inLen); + if ( dstLen != wxCONV_FAILED ) + { + wxCharBuffer buf(dstLen); + if ( FromWChar(buf.data(), dstLen, in, inLen) ) { - *pOutSize = 0; - theBuffer.data()[0u] = wxT('\0'); - return theBuffer; + if ( outLen ) + *outLen = dstLen; + return buf; } - - //Increment to next (sub)string - //Note that we have to use wxWcslen instead of nLen here - //because XX2XX gives us the size of the output buffer, - //which is not necessarily the length of the string - szPos += wxWcslen(szPos) + 1; } - //success - return actual length and the buffer - *pOutSize = nActualLength; - return theBuffer; + if ( outLen ) + *outLen = 0; + + return wxCharBuffer(); } // ---------------------------------------------------------------------------- @@ -359,12 +440,12 @@ size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const return wxWC2MB(buf, psz, n); } -#ifdef __UNIX__ - // ---------------------------------------------------------------------------- // wxConvBrokenFileNames // ---------------------------------------------------------------------------- +#ifdef __UNIX__ + wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset) { if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0 @@ -374,23 +455,7 @@ wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset) m_conv = new wxCSConv(charset); } -size_t -wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf, - const char *psz, - size_t outputSize) const -{ - return m_conv->MB2WC( outputBuf, psz, outputSize ); -} - -size_t -wxConvBrokenFileNames::WC2MB(char *outputBuf, - const wchar_t *psz, - size_t outputSize) const -{ - return m_conv->WC2MB( outputBuf, psz, outputSize ); -} - -#endif +#endif // __UNIX__ // ---------------------------------------------------------------------------- // UTF-7 @@ -441,7 +506,7 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const { size_t len = 0; - while (*psz && ((!buf) || (len < n))) + while ( *psz && (!buf || (len < n)) ) { unsigned char cc = *psz++; if (cc != '+') @@ -459,20 +524,19 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const len++; psz++; } - else + else // start of BASE64 encoded string { - // BASE64 encoded string - bool lsb; - unsigned char c; + bool lsb, ok; unsigned int d, l; - for (lsb = false, d = 0, l = 0; - (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++) + for ( ok = lsb = false, d = 0, l = 0; + (cc = utf7unb64[(unsigned char)*psz]) != 0xff; + psz++ ) { d <<= 6; d += cc; for (l += 6; l >= 8; lsb = !lsb) { - c = (unsigned char)((d >> (l -= 8)) % 256); + unsigned char c = (unsigned char)((d >> (l -= 8)) % 256); if (lsb) { if (buf) @@ -480,16 +544,29 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const len ++; } else + { if (buf) *buf = (wchar_t)(c << 8); + } + + ok = true; } } + + if ( !ok ) + { + // in valid UTF7 we should have valid characters after '+' + return (size_t)-1; + } + if (*psz == '-') psz++; } } - if (buf && (len < n)) - *buf = 0; + + if ( buf && (len < n) ) + *buf = '\0'; + return len; } @@ -530,8 +607,6 @@ static const unsigned char utf7encode[128] = size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const { - - size_t len = 0; while (*psz && ((!buf) || (len < n))) @@ -560,7 +635,7 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const { // BASE64 encode string unsigned int lsb, d, l; - for (d = 0, l = 0;; psz++) + for (d = 0, l = 0; /*nothing*/; psz++) { for (lsb = 0; lsb < 2; lsb ++) { @@ -690,7 +765,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const } #else // !WC_UTF16 if (buf) - *buf++ = res; + *buf++ = (wchar_t)res; len++; #endif // WC_UTF16/!WC_UTF16 } @@ -711,7 +786,7 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const len += pa; #else if (buf) - *buf++ = wxUnicodePUA + (unsigned char)*opsz; + *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz); opsz++; len++; #endif @@ -723,11 +798,11 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const { if ( buf && len + 3 < n ) { - unsigned char n = *opsz; + unsigned char on = *opsz; *buf++ = L'\\'; - *buf++ = (wchar_t)( L'0' + n / 0100 ); - *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 ); - *buf++ = (wchar_t)( L'0' + n % 010 ); + *buf++ = (wchar_t)( L'0' + on / 0100 ); + *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 ); + *buf++ = (wchar_t)( L'0' + on % 010 ); } opsz++; len += 4; @@ -883,20 +958,24 @@ size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) con // swap 16bit MB to 16bit String size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const { - size_t len=0; + size_t len = 0; - while (*(wxUint16*)psz && (!buf || len < n)) + // UTF16 string must be terminated by 2 NULs as single NULs may occur + // inside the string + while ( (psz[0] || psz[1]) && (!buf || len < n) ) { - if (buf) + if ( buf ) { ((char *)buf)[0] = psz[1]; ((char *)buf)[1] = psz[0]; buf++; } len++; - psz += sizeof(wxUint16); + psz += 2; } - if (buf && len", ms_wcNeedsSwap); + wxT("iconv wchar_t charset is \"%s\"%s"), + ms_wcCharsetName.empty() ? _T("") + : ms_wcCharsetName.c_str(), + ms_wcNeedsSwap ? _T(" (needs swap)") + : _T("")); } else // we already have ms_wcCharsetName { - m2w = iconv_open(ms_wcCharsetName, cname); + m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname); } - // NB: don't ever pass NULL to iconv_open(), it may crash! - if ( ms_wcCharsetName ) + if ( ms_wcCharsetName.empty() ) { - w2m = iconv_open( cname, ms_wcCharsetName); + w2m = ICONV_T_INVALID; } else { - w2m = (iconv_t)-1; + w2m = iconv_open(cname, ms_wcCharsetName.ToAscii()); + if ( w2m == ICONV_T_INVALID ) + { + wxLogTrace(TRACE_STRCONV, + wxT("\"%s\" -> \"%s\" works but not the converse!?"), + ms_wcCharsetName.c_str(), cname.data()); + } } } wxMBConv_iconv::~wxMBConv_iconv() { - if ( m2w != (iconv_t)-1 ) + if ( m2w != ICONV_T_INVALID ) iconv_close(m2w); - if ( w2m != (iconv_t)-1 ) + if ( w2m != ICONV_T_INVALID ) iconv_close(w2m); } size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const { + // find the string length: notice that must be done differently for + // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs + size_t inbuf; + const size_t nulLen = GetMBNulLen(); + switch ( nulLen ) + { + default: + return (size_t)-1; + + case 1: + inbuf = strlen(psz); // arguably more optimized than our version + break; + + case 2: + case 4: + // for UTF-16/32 not only we need to have 2/4 consecutive NULs but + // they also have to start at character boundary and not span two + // adjacent characters + const char *p; + for ( p = psz; NotAllNULs(p, nulLen); p += nulLen ) + ; + inbuf = p - psz; + break; + } + #if wxUSE_THREADS // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. // Unfortunately there is a couple of global wxCSConv objects such as @@ -1469,9 +1625,9 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const // only a few wx classes would be safe to use from non-main threads // as MB<->WC conversion would fail "randomly". wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); -#endif +#endif // wxUSE_THREADS + - size_t inbuf = strlen(psz); size_t outbuf = n * SIZEOF_WCHAR_T; size_t res, cres; // VS: Use these instead of psz, buf because iconv() modifies its arguments: @@ -1489,12 +1645,11 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const if (ms_wcNeedsSwap) { // convert to native endianness - WC_BSWAP(buf /* _not_ bufPtr */, res) + for ( unsigned i = 0; i < res; i++ ) + buf[n] = WC_BSWAP(buf[i]); } - // NB: iconv was given only strlen(psz) characters on input, and so - // it couldn't convert the trailing zero. Let's do it ourselves - // if there's some room left for it in the output buffer. + // NUL-terminate the string if there is any space left if (res < n) buf[res] = 0; } @@ -1533,7 +1688,8 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); #endif - size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T; + size_t inlen = wxWcslen(psz); + size_t inbuf = inlen * SIZEOF_WCHAR_T; size_t outbuf = n; size_t res, cres; @@ -1542,13 +1698,13 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const if (ms_wcNeedsSwap) { // need to copy to temp buffer to switch endianness - // this absolutely doesn't rock! - // (no, doing WC_BSWAP twice on the original buffer won't help, as it + // (doing WC_BSWAP twice on the original buffer won't help, as it // could be in read-only memory, or be accessed in some other thread) - tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T); - memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T); - WC_BSWAP(tmpbuf, inbuf) - psz=tmpbuf; + tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T); + for ( size_t i = 0; i < inlen; i++ ) + tmpbuf[n] = WC_BSWAP(psz[i]); + tmpbuf[inlen] = L'\0'; + psz = tmpbuf; } if (buf) @@ -1586,7 +1742,6 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const if (ICONV_FAILED(cres, inbuf)) { - //VS: it is ok if iconv fails, hence trace only wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); return (size_t)-1; } @@ -1594,6 +1749,36 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const return res; } +size_t wxMBConv_iconv::GetMBNulLen() const +{ + if ( m_minMBCharWidth == 0 ) + { + wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv); + +#if wxUSE_THREADS + // NB: explained in MB2WC + wxMutexLocker lock(self->m_iconvMutex); +#endif + + wchar_t *wnul = L""; + char buf[8]; // should be enough for NUL in any encoding + size_t inLen = sizeof(wchar_t), + outLen = WXSIZEOF(buf); + char *in = (char *)wnul; + char *out = buf; + if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 ) + { + self->m_minMBCharWidth = (size_t)-1; + } + else // ok + { + self->m_minMBCharWidth = out - buf; + } + } + + return m_minMBCharWidth; +} + #endif // HAVE_ICONV @@ -1615,19 +1800,22 @@ public: wxMBConv_win32() { m_CodePage = CP_ACP; + m_minMBCharWidth = 0; } #if wxUSE_FONTMAP wxMBConv_win32(const wxChar* name) { m_CodePage = wxCharsetToCodepage(name); + m_minMBCharWidth = 0; } wxMBConv_win32(wxFontEncoding encoding) { m_CodePage = wxEncodingToCodepage(encoding); + m_minMBCharWidth = 0; } -#endif +#endif // wxUSE_FONTMAP size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const { @@ -1644,7 +1832,28 @@ public: // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is // explicitly ill-formed according to RFC 2152) neither so we don't // even have any fallback here... - int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS; + // + // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or + // Win XP or newer and if it is specified on older versions, conversion + // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS) + // fails. So we can only use the flag on newer Windows versions. + // Additionally, the flag is not supported by UTF7, symbol and CJK + // encodings. See here: + // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx + // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp + int flags = 0; + if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL && + m_CodePage < 50000 && + IsAtLeastWin2kSP4() ) + { + flags = MB_ERR_INVALID_CHARS; + } + else if ( m_CodePage == CP_UTF8 ) + { + // Avoid round-trip in the special case of UTF-8 by using our + // own UTF-8 conversion code: + return wxMBConvUTF8().MB2WC(buf, psz, n); + } const size_t len = ::MultiByteToWideChar ( @@ -1655,11 +1864,41 @@ public: buf, // output string buf ? n : 0 // size of output buffer ); + if ( !len ) + { + // function totally failed + return (size_t)-1; + } + + // if we were really converting and didn't use MB_ERR_INVALID_CHARS, + // check if we succeeded, by doing a double trip: + if ( !flags && buf ) + { + const size_t mbLen = strlen(psz); + wxCharBuffer mbBuf(mbLen); + if ( ::WideCharToMultiByte + ( + m_CodePage, + 0, + buf, + -1, + mbBuf.data(), + mbLen + 1, // size in bytes, not length + NULL, + NULL + ) == 0 || + strcmp(mbBuf, psz) != 0 ) + { + // we didn't obtain the same thing we started from, hence + // the conversion was lossy and we consider that it failed + return (size_t)-1; + } + } // note that it returns count of written chars for buf != NULL and size // of the needed buffer for buf == NULL so in either case the length of // the string (which never includes the terminating NUL) is one less - return len ? len - 1 : (size_t)-1; + return len - 1; } size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const @@ -1743,6 +1982,44 @@ public: return len - 1; } + virtual size_t GetMBNulLen() const + { + if ( m_minMBCharWidth == 0 ) + { + int len = ::WideCharToMultiByte + ( + m_CodePage, // code page + 0, // no flags + L"", // input string + 1, // translate just the NUL + NULL, // output buffer + 0, // and its size + NULL, // no replacement char + NULL // [out] don't care if it was used + ); + + wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32); + switch ( len ) + { + default: + wxLogDebug(_T("Unexpected NUL length %d"), len); + // fall through + + case 0: + self->m_minMBCharWidth = (size_t)-1; + break; + + case 1: + case 2: + case 4: + self->m_minMBCharWidth = len; + break; + } + } + + return m_minMBCharWidth; + } + bool IsOk() const { return m_CodePage != -1; } private: @@ -1774,7 +2051,40 @@ private: return s_isWin98Or2k == 1; } + static bool IsAtLeastWin2kSP4() + { +#ifdef __WXWINCE__ + return false; +#else + static int s_isAtLeastWin2kSP4 = -1; + + if ( s_isAtLeastWin2kSP4 == -1 ) + { + OSVERSIONINFOEX ver; + + memset(&ver, 0, sizeof(ver)); + ver.dwOSVersionInfoSize = sizeof(ver); + GetVersionEx((OSVERSIONINFO*)&ver); + + s_isAtLeastWin2kSP4 = + ((ver.dwMajorVersion > 5) || // Vista+ + (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003 + (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 && + ver.wServicePackMajor >= 4)) // 2000 SP4+ + ? 1 : 0; + } + + return s_isAtLeastWin2kSP4 == 1; +#endif + } + + + // the code page we're working with long m_CodePage; + + // cached result of GetMBNulLen(), set to 0 initially meaning + // "unknown" + size_t m_minMBCharWidth; }; #endif // wxHAVE_WIN32_MB2WC @@ -2408,12 +2718,30 @@ public: return inbuf; } + virtual size_t GetMBNulLen() const + { + switch ( m_enc ) + { + case wxFONTENCODING_UTF16BE: + case wxFONTENCODING_UTF16LE: + return 2; + + case wxFONTENCODING_UTF32BE: + case wxFONTENCODING_UTF32LE: + return 4; + + default: + return 1; + } + } + bool IsOk() const { return m_ok; } public: wxFontEncoding m_enc; wxEncodingConverter m2w, w2m; +private: // were we initialized successfully? bool m_ok; @@ -2454,7 +2782,11 @@ wxCSConv::wxCSConv(const wxChar *charset) SetName(charset); } +#if wxUSE_FONTMAP + m_encoding = wxFontMapperBase::GetEncodingFromName(charset); +#else m_encoding = wxFONTENCODING_SYSTEM; +#endif } wxCSConv::wxCSConv(wxFontEncoding encoding) @@ -2534,7 +2866,8 @@ wxMBConv *wxCSConv::DoCreate() const // check for the special case of ASCII or ISO8859-1 charset: as we have // special knowledge of it anyhow, we don't need to create a special // conversion object - if ( m_encoding == wxFONTENCODING_ISO8859_1 ) + if ( m_encoding == wxFONTENCODING_ISO8859_1 || + m_encoding == wxFONTENCODING_DEFAULT ) { // don't convert at all return NULL; @@ -2801,6 +3134,18 @@ size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const return len; } +size_t wxCSConv::GetMBNulLen() const +{ + CreateConvIfNeeded(); + + if ( m_convReal ) + { + return m_convReal->GetMBNulLen(); + } + + return 1; +} + // ---------------------------------------------------------------------------- // globals // ---------------------------------------------------------------------------- @@ -2841,5 +3186,3 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc, wxConvUTF8; #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T - -