X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/7c8fad40bf740be73d48c8d6346c07f6b0238f76..26364344e58ae9c384965ff25c6920a75c55184e:/src/common/strconv.cpp diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index c93bcc3132..5ceede4225 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -70,6 +70,7 @@ #ifdef HAVE_ICONV #include + #include "wx/thread.h" #endif #include "wx/encconv.h" @@ -217,14 +218,18 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const return buf; } -size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString, - size_t outsize, size_t nStringLen) const +const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const { + wxASSERT(pOutSize != NULL); + const char* szEnd = szString + nStringLen + 1; const char* szPos = szString; const char* szStart = szPos; size_t nActualLength = 0; + size_t nCurrentSize = nStringLen; //try normal size first (should never resize?) + + wxWCharBuffer theBuffer(nCurrentSize); //Convert the string until the length() is reached, continuing the //loop every time a null character is reached @@ -237,18 +242,31 @@ size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString, //Invalid conversion? if( nLen == (size_t)-1 ) - return nLen; + { + *pOutSize = 0; + theBuffer.data()[0u] = wxT('\0'); + return theBuffer; + } + //Increase the actual length (+1 for current null character) nActualLength += nLen + 1; - //Only copy data in if buffer size is big enough - if (szBuffer != NULL && - nActualLength <= outsize) + //if buffer too big, realloc the buffer + if (nActualLength > (nCurrentSize+1)) { - //Convert the current (sub)string - if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) - return (size_t)-1; + wxWCharBuffer theNewBuffer(nCurrentSize << 1); + memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t)); + theBuffer = theNewBuffer; + nCurrentSize <<= 1; + } + + //Convert the current (sub)string + if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) + { + *pOutSize = 0; + theBuffer.data()[0u] = wxT('\0'); + return theBuffer; } //Increment to next (sub)string @@ -258,17 +276,23 @@ size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString, szPos += strlen(szPos) + 1; } - return nActualLength - 1; //success - return actual length + //success - return actual length and the buffer + *pOutSize = nActualLength; + return theBuffer; } -size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString, - size_t outsize, size_t nStringLen) const +const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const { + wxASSERT(pOutSize != NULL); + const wchar_t* szEnd = szString + nStringLen + 1; const wchar_t* szPos = szString; const wchar_t* szStart = szPos; size_t nActualLength = 0; + size_t nCurrentSize = nStringLen << 2; //try * 4 first + + wxCharBuffer theBuffer(nCurrentSize); //Convert the string until the length() is reached, continuing the //loop every time a null character is reached @@ -281,18 +305,30 @@ size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString, //Invalid conversion? if( nLen == (size_t)-1 ) - return nLen; + { + *pOutSize = 0; + theBuffer.data()[0u] = wxT('\0'); + return theBuffer; + } //Increase the actual length (+1 for current null character) nActualLength += nLen + 1; - //Only copy data in if buffer size is big enough - if (szBuffer != NULL && - nActualLength <= outsize) + //if buffer too big, realloc the buffer + if (nActualLength > (nCurrentSize+1)) { - //Convert the current (sub)string - if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) - return (size_t)-1; + wxCharBuffer theNewBuffer(nCurrentSize << 1); + memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize); + theBuffer = theNewBuffer; + nCurrentSize <<= 1; + } + + //Convert the current (sub)string + if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 ) + { + *pOutSize = 0; + theBuffer.data()[0u] = wxT('\0'); + return theBuffer; } //Increment to next (sub)string @@ -302,7 +338,9 @@ size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString, szPos += wxWcslen(szPos) + 1; } - return nActualLength - 1; //success - return actual length + //success - return actual length and the buffer + *pOutSize = nActualLength; + return theBuffer; } // ---------------------------------------------------------------------------- @@ -365,7 +403,6 @@ static const unsigned char utf7unb64[] = size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const { - size_t len = 0; while (*psz && ((!buf) || (len < n))) @@ -399,7 +436,7 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const d += cc; for (l += 6; l >= 8; lsb = !lsb) { - c = (d >> (l -= 8)) % 256; + c = (unsigned char)((d >> (l -= 8)) % 256); if (lsb) { if (buf) @@ -408,7 +445,7 @@ size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const } else if (buf) - *buf = c << 8; + *buf = (wchar_t)(c << 8); } } if (*psz == '-') @@ -455,8 +492,7 @@ static const unsigned char utf7encode[128] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 }; -size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t -*psz, size_t n) const +size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const { @@ -473,12 +509,8 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t len++; } #ifndef WC_UTF16 -#ifdef __VMS - else if (cc > 0xffff) -#else - else if (cc > ((const wchar_t)0xffff)) -#endif - { + else if (((wxUint32)cc) > 0xffff) + { // no surrogate pair generation (yet?) return (size_t)-1; } @@ -1115,12 +1147,13 @@ size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const #ifdef HAVE_ICONV -// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG -// if output buffer is _exactly_ as big as needed. Such case is (unless there's -// yet another bug in glibc) the only case when iconv() returns with (size_t)-1 -// (which means error) and says there are 0 bytes left in the input buffer -- -// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence, -// this alternative test for iconv() failure. +// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with +// E2BIG if output buffer is _exactly_ as big as needed. Such case is +// (unless there's yet another bug in glibc) the only case when iconv() +// returns with (size_t)-1 (which means error) and says there are 0 bytes +// left in the input buffer -- when _real_ error occurs, +// bytes-left-in-input buffer is non-zero. Hence, this alternative test for +// iconv() failure. // [This bug does not appear in glibc 2.2.] #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ @@ -1152,6 +1185,10 @@ protected: // the other direction iconv_t m2w, w2m; +#if wxUSE_THREADS + // guards access to m2w and w2m objects + wxMutex m_iconvMutex; +#endif private: // the name (for iconv_open()) of a wide char charset -- if none is @@ -1263,6 +1300,16 @@ wxMBConv_iconv::~wxMBConv_iconv() size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const { +#if wxUSE_THREADS + // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle. + // Unfortunately there is a couple of global wxCSConv objects such as + // wxConvLocal that are used all over wx code, so we have to make sure + // the handle is used by at most one thread at the time. Otherwise + // only a few wx classes would be safe to use from non-main threads + // as MB<->WC conversion would fail "randomly". + wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); +#endif + size_t inbuf = strlen(psz); size_t outbuf = n * SIZEOF_WCHAR_T; size_t res, cres; @@ -1320,6 +1367,11 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const { +#if wxUSE_THREADS + // NB: explained in MB2WC + wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); +#endif + size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T; size_t outbuf = n; size_t res, cres; @@ -1423,10 +1475,20 @@ public: // and break the library itself, e.g. wxTextInputStream::NextChar() // wouldn't work if reading an incomplete MB char didn't result in an // error + // + // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in + // an error (tested under Windows Server 2003) and apparently it is + // done on purpose, i.e. the function accepts any input in this case + // and although I'd prefer to return error on ill-formed output, our + // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is + // explicitly ill-formed according to RFC 2152) neither so we don't + // even have any fallback here... + int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS; + const size_t len = ::MultiByteToWideChar ( m_CodePage, // code page - MB_ERR_INVALID_CHARS, // flags: fall on error + flags, // flags: fall on error psz, // input string -1, // its length (NUL-terminated) buf, // output string @@ -2024,7 +2086,7 @@ public: if (buf == NULL) { //apple specs say at least 32 - n = 32 ; + n = wxMax( 32 , byteInLen ) ; tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ; } ByteCount byteBufferLen = n * sizeof( UniChar ) ; @@ -2065,7 +2127,7 @@ public: if (buf == NULL) { //apple specs say at least 32 - n = 32; + n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T ); tbuf = (char*) malloc( n ) ; } @@ -2162,7 +2224,10 @@ public: { size_t inbuf = strlen(psz); if (buf) - m2w.Convert(psz,buf); + { + if (!m2w.Convert(psz,buf)) + return (size_t)-1; + } return inbuf; } @@ -2170,7 +2235,10 @@ public: { const size_t inbuf = wxWcslen(psz); if (buf) - w2m.Convert(psz,buf); + { + if (!w2m.Convert(psz,buf)) + return (size_t)-1; + } return inbuf; }