// implementation
// ============================================================================
+// helper function of cMB2WC(): check if n bytes at this location are all NUL
+static bool NotAllNULs(const char *p, size_t n)
+{
+ while ( n && *p++ == '\0' )
+ n--;
+
+ return n != 0;
+}
+
// ----------------------------------------------------------------------------
// UTF-16 en/decoding to/from UCS-4
// ----------------------------------------------------------------------------
// wxMBConv
// ----------------------------------------------------------------------------
-wxMBConv::~wxMBConv()
+size_t
+wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- // nothing to do here (necessary for Darwin linking probably)
-}
+ // although new conversion classes are supposed to implement this function
+ // directly, the existins ones only implement the old MB2WC() and so, to
+ // avoid to have to rewrite all conversion classes at once, we provide a
+ // default (but not efficient) implementation of this one in terms of the
+ // old function by copying the input to ensure that it's NUL-terminated and
+ // then using MB2WC() to convert it
+
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
+
+ // the number of NULs terminating this string
+ size_t nulLen wxDUMMY_INITIALIZE(0);
+
+ // if we were not given the input size we just have to assume that the
+ // string is properly terminated as we have no way of knowing how long it
+ // is anyhow, but if we do have the size check whether there are enough
+ // NULs at the end
+ wxCharBuffer bufTmp;
+ const char *srcEnd;
+ if ( srcLen != (size_t)-1 )
+ {
+ // we need to know how to find the end of this string
+ nulLen = GetMBNulLen();
+ if ( nulLen == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ // if there are enough NULs we can avoid the copy
+ if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
+ {
+ // make a copy in order to properly NUL-terminate the string
+ bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
+ char * const p = bufTmp.data();
+ memcpy(p, src, srcLen);
+ for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
+ *s = '\0';
+
+ src = bufTmp;
+ }
-const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
-{
- if ( psz )
+ srcEnd = src + srcLen;
+ }
+ else // quit after the first loop iteration
{
- // calculate the length of the buffer needed first
- size_t nLen = MB2WC(NULL, psz, 0);
- if ( nLen != (size_t)-1 )
+ srcEnd = NULL;
+ }
+
+ for ( ;; )
+ {
+ // try to convert the current chunk
+ size_t lenChunk = MB2WC(NULL, src, 0);
+ if ( lenChunk == 0 )
{
- // now do the actual conversion
- wxWCharBuffer buf(nLen);
- nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
+ // nothing left in the input string, conversion succeeded
+ break;
}
- }
- wxWCharBuffer buf((wchar_t *)NULL);
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
- return buf;
-}
+ // if we already have a previous chunk, leave the NUL separating it
+ // from this one
+ if ( dstWritten )
+ {
+ dstWritten++;
+ if ( dst )
+ dst++;
+ }
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
-{
- if ( pwz )
- {
- size_t nLen = WC2MB(NULL, pwz, 0);
- if ( nLen != (size_t)-1 )
+ dstWritten += lenChunk;
+
+ if ( dst )
{
- wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
- nLen = WC2MB(buf.data(), pwz, nLen + 4);
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
+
+ lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ dst += lenChunk;
}
- }
- wxCharBuffer buf((char *)NULL);
+ if ( !srcEnd )
+ {
+ // we convert the entire string in this cas, as we suppose that the
+ // string is NUL-terminated and so srcEnd is not used at all
+ break;
+ }
- return buf;
-}
+ // advance the input pointer past the end of this chunk
+ while ( NotAllNULs(src, nulLen) )
+ {
+ // notice that we must skip over multiple bytes here as we suppose
+ // that if NUL takes 2 or 4 bytes, then all the other characters do
+ // too and so if advanced by a single byte we might erroneously
+ // detect sequences of NUL bytes in the middle of the input
+ src += nulLen;
+ }
-const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
-{
- wxASSERT(pOutSize != NULL);
+ src += nulLen; // skipping over its terminator as well
- const char* szEnd = szString + nStringLen + 1;
- const char* szPos = szString;
- const char* szStart = szPos;
+ // note that ">=" (and not just "==") is needed here as the terminator
+ // we skipped just above could be inside or just after the buffer
+ // delimited by inEnd
+ if ( src >= srcEnd )
+ break;
+ }
- size_t nActualLength = 0;
- size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
+ return dstWritten;
+}
+
+size_t
+wxMBConv::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
- wxWCharBuffer theBuffer(nCurrentSize);
+ // make a copy of the input string unless it is already properly
+ // NUL-terminated
+ //
+ // if we don't know its length we have no choice but to assume that it is,
+ // indeed, properly terminated
+ wxWCharBuffer bufTmp;
+ if ( srcLen == (size_t)-1 )
+ {
+ srcLen = wxWcslen(src) + 1;
+ }
+ else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
+ {
+ // make a copy in order to properly NUL-terminate the string
+ bufTmp = wxWCharBuffer(srcLen);
+ memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
+ src = bufTmp;
+ }
- //Convert the string until the length() is reached, continuing the
- //loop every time a null character is reached
- while(szPos != szEnd)
+ const size_t lenNul = GetMBNulLen();
+ for ( const wchar_t * const srcEnd = src + srcLen;
+ src < srcEnd;
+ src += wxWcslen(src) + 1 /* skip L'\0' too */ )
{
- wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+ // try to convert the current chunk
+ size_t lenChunk = WC2MB(NULL, src, 0);
- //Get the length of the current (sub)string
- size_t nLen = MB2WC(NULL, szPos, 0);
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
- //Invalid conversion?
- if( nLen == (size_t)-1 )
- {
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
- }
+ lenChunk += lenNul;
+ dstWritten += lenChunk;
+ if ( dst )
+ {
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
- //Increase the actual length (+1 for current null character)
- nActualLength += nLen + 1;
+ if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+ return wxCONV_FAILED;
- //if buffer too big, realloc the buffer
- if (nActualLength > (nCurrentSize+1))
- {
- wxWCharBuffer theNewBuffer(nCurrentSize << 1);
- memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
- theBuffer = theNewBuffer;
- nCurrentSize <<= 1;
+ dst += lenChunk;
}
+ }
- //Convert the current (sub)string
- if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
- {
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
- }
+ return dstWritten;
+}
- //Increment to next (sub)string
- //Note that we have to use strlen instead of nLen here
- //because XX2XX gives us the size of the output buffer,
- //which is not necessarily the length of the string
- szPos += strlen(szPos) + 1;
+size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
+{
+ size_t rc = ToWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
+ {
+ // ToWChar() returns the buffer length, i.e. including the trailing
+ // NUL, while this method doesn't take it into account
+ rc--;
}
- //success - return actual length and the buffer
- *pOutSize = nActualLength;
- return theBuffer;
+ return rc;
}
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
+size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
{
- wxASSERT(pOutSize != NULL);
-
- const wchar_t* szEnd = szString + nStringLen + 1;
- const wchar_t* szPos = szString;
- const wchar_t* szStart = szPos;
+ size_t rc = FromWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
+ {
+ rc -= GetMBNulLen();
+ }
- size_t nActualLength = 0;
- size_t nCurrentSize = nStringLen << 2; //try * 4 first
+ return rc;
+}
- wxCharBuffer theBuffer(nCurrentSize);
+wxMBConv::~wxMBConv()
+{
+ // nothing to do here (necessary for Darwin linking probably)
+}
- //Convert the string until the length() is reached, continuing the
- //loop every time a null character is reached
- while(szPos != szEnd)
+const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
+{
+ if ( psz )
{
- wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+ // calculate the length of the buffer needed first
+ const size_t nLen = MB2WC(NULL, psz, 0);
+ if ( nLen != wxCONV_FAILED )
+ {
+ // now do the actual conversion
+ wxWCharBuffer buf(nLen /* +1 added implicitly */);
- //Get the length of the current (sub)string
- size_t nLen = WC2MB(NULL, szPos, 0);
+ // +1 for the trailing NULL
+ if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+ return buf;
+ }
+ }
- //Invalid conversion?
- if( nLen == (size_t)-1 )
+ return wxWCharBuffer();
+}
+
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
+{
+ if ( pwz )
+ {
+ const size_t nLen = WC2MB(NULL, pwz, 0);
+ if ( nLen != wxCONV_FAILED )
{
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
+ // extra space for trailing NUL(s)
+ static const size_t extraLen = GetMaxMBNulLen();
+
+ wxCharBuffer buf(nLen + extraLen - 1);
+ if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+ return buf;
}
+ }
- //Increase the actual length (+1 for current null character)
- nActualLength += nLen + 1;
+ return wxCharBuffer();
+}
- //if buffer too big, realloc the buffer
- if (nActualLength > (nCurrentSize+1))
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxWCharBuffer wbuf(dstLen);
+ if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
{
- wxCharBuffer theNewBuffer(nCurrentSize << 1);
- memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
- theBuffer = theNewBuffer;
- nCurrentSize <<= 1;
+ if ( outLen )
+ *outLen = dstLen;
+ return wbuf;
}
+ }
+
+ if ( outLen )
+ *outLen = 0;
- //Convert the current (sub)string
- if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
+ return wxWCharBuffer();
+}
+
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxCharBuffer buf(dstLen);
+ if ( FromWChar(buf.data(), dstLen, in, inLen) )
{
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
+ if ( outLen )
+ *outLen = dstLen;
+ return buf;
}
-
- //Increment to next (sub)string
- //Note that we have to use wxWcslen instead of nLen here
- //because XX2XX gives us the size of the output buffer,
- //which is not necessarily the length of the string
- szPos += wxWcslen(szPos) + 1;
}
- //success - return actual length and the buffer
- *pOutSize = nActualLength;
- return theBuffer;
+ if ( outLen )
+ *outLen = 0;
+
+ return wxCharBuffer();
}
// ----------------------------------------------------------------------------
return wxWC2MB(buf, psz, n);
}
-#ifdef __UNIX__
-
// ----------------------------------------------------------------------------
// wxConvBrokenFileNames
// ----------------------------------------------------------------------------
+#ifdef __UNIX__
+
wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
{
if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
m_conv = new wxCSConv(charset);
}
-size_t
-wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
- const char *psz,
- size_t outputSize) const
-{
- return m_conv->MB2WC( outputBuf, psz, outputSize );
-}
-
-size_t
-wxConvBrokenFileNames::WC2MB(char *outputBuf,
- const wchar_t *psz,
- size_t outputSize) const
-{
- return m_conv->WC2MB( outputBuf, psz, outputSize );
-}
-
-#endif
+#endif // __UNIX__
// ----------------------------------------------------------------------------
// UTF-7
size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
-
-
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
// swap 16bit MB to 16bit String
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
- size_t len=0;
+ size_t len = 0;
- while (*psz && (!buf || len < n))
+ while ( *psz && (!buf || len < n) )
{
- if (buf)
+ if ( buf )
{
*buf++ = ((char*)psz)[1];
*buf++ = ((char*)psz)[0];
}
- len += sizeof(wxUint16);
+ len += 2;
psz++;
}
- if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
+
+ if ( buf && len < n )
+ *buf = '\0';
return len;
}
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
+ // classify this encoding as explained in wxMBConv::GetMBNulLen()
+ // comment
+ virtual size_t GetMBNulLen() const;
+
bool IsOk() const
{ return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
// true if the wide char encoding we use (i.e. ms_wcCharsetName) has
// different endian-ness than the native one
static bool ms_wcNeedsSwap;
+
+ // cached result of GetMBNulLen(); set to 0 meaning "unknown"
+ // initially
+ size_t m_minMBCharWidth;
};
// make the constructor available for unit testing
wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
{
+ m_minMBCharWidth = 0;
+
// iconv operates with chars, not wxChars, but luckily it uses only ASCII
// names for the charsets
const wxCharBuffer cname(wxString(name).ToAscii());
size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
+ // find the string length: notice that must be done differently for
+ // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
+ size_t inbuf;
+ const size_t nulLen = GetMBNulLen();
+ switch ( nulLen )
+ {
+ default:
+ return (size_t)-1;
+
+ case 1:
+ inbuf = strlen(psz); // arguably more optimized than our version
+ break;
+
+ case 2:
+ case 4:
+ // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
+ // they also have to start at character boundary and not span two
+ // adjacent characters
+ const char *p;
+ for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
+ ;
+ inbuf = p - psz;
+ break;
+ }
+
#if wxUSE_THREADS
// NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
// Unfortunately there is a couple of global wxCSConv objects such as
// only a few wx classes would be safe to use from non-main threads
// as MB<->WC conversion would fail "randomly".
wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
-#endif
+#endif // wxUSE_THREADS
+
- size_t inbuf = strlen(psz);
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
// VS: Use these instead of psz, buf because iconv() modifies its arguments:
buf[n] = WC_BSWAP(buf[i]);
}
- // NB: iconv was given only strlen(psz) characters on input, and so
- // it couldn't convert the trailing zero. Let's do it ourselves
- // if there's some room left for it in the output buffer.
+ // NUL-terminate the string if there is any space left
if (res < n)
buf[res] = 0;
}
return res;
}
+size_t wxMBConv_iconv::GetMBNulLen() const
+{
+ if ( m_minMBCharWidth == 0 )
+ {
+ wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
+
+#if wxUSE_THREADS
+ // NB: explained in MB2WC
+ wxMutexLocker lock(self->m_iconvMutex);
+#endif
+
+ wchar_t *wnul = L"";
+ char buf[8]; // should be enough for NUL in any encoding
+ size_t inLen = sizeof(wchar_t),
+ outLen = WXSIZEOF(buf);
+ char *in = (char *)wnul;
+ char *out = buf;
+ if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
+ {
+ self->m_minMBCharWidth = (size_t)-1;
+ }
+ else // ok
+ {
+ self->m_minMBCharWidth = out - buf;
+ }
+ }
+
+ return m_minMBCharWidth;
+}
+
#endif // HAVE_ICONV
wxMBConv_win32()
{
m_CodePage = CP_ACP;
+ m_minMBCharWidth = 0;
}
#if wxUSE_FONTMAP
wxMBConv_win32(const wxChar* name)
{
m_CodePage = wxCharsetToCodepage(name);
+ m_minMBCharWidth = 0;
}
wxMBConv_win32(wxFontEncoding encoding)
{
m_CodePage = wxEncodingToCodepage(encoding);
+ m_minMBCharWidth = 0;
}
-#endif
+#endif // wxUSE_FONTMAP
size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
return len - 1;
}
+ virtual size_t GetMBNulLen() const
+ {
+ if ( m_minMBCharWidth == 0 )
+ {
+ int len = ::WideCharToMultiByte
+ (
+ m_CodePage, // code page
+ 0, // no flags
+ L"", // input string
+ 1, // translate just the NUL
+ NULL, // output buffer
+ 0, // and its size
+ NULL, // no replacement char
+ NULL // [out] don't care if it was used
+ );
+
+ wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
+ switch ( len )
+ {
+ default:
+ wxLogDebug(_T("Unexpected NUL length %d"), len);
+ // fall through
+
+ case 0:
+ self->m_minMBCharWidth = (size_t)-1;
+ break;
+
+ case 1:
+ case 2:
+ case 4:
+ self->m_minMBCharWidth = len;
+ break;
+ }
+ }
+
+ return m_minMBCharWidth;
+ }
+
bool IsOk() const { return m_CodePage != -1; }
private:
#endif
}
+
+ // the code page we're working with
long m_CodePage;
+
+ // cached result of GetMBNulLen(), set to 0 initially meaning
+ // "unknown"
+ size_t m_minMBCharWidth;
};
#endif // wxHAVE_WIN32_MB2WC
return inbuf;
}
+ virtual size_t GetMBNulLen() const
+ {
+ switch ( m_enc )
+ {
+ case wxFONTENCODING_UTF16BE:
+ case wxFONTENCODING_UTF16LE:
+ return 2;
+
+ case wxFONTENCODING_UTF32BE:
+ case wxFONTENCODING_UTF32LE:
+ return 4;
+
+ default:
+ return 1;
+ }
+ }
+
bool IsOk() const { return m_ok; }
public:
wxFontEncoding m_enc;
wxEncodingConverter m2w, w2m;
+private:
// were we initialized successfully?
bool m_ok;
return len;
}
+size_t wxCSConv::GetMBNulLen() const
+{
+ CreateConvIfNeeded();
+
+ if ( m_convReal )
+ {
+ return m_convReal->GetMBNulLen();
+ }
+
+ return 1;
+}
+
// ----------------------------------------------------------------------------
// globals
// ----------------------------------------------------------------------------