// implementation
// ============================================================================
+// helper function of cMB2WC(): check if n bytes at this location are all NUL
+static bool NotAllNULs(const char *p, size_t n)
+{
+ while ( n && *p++ == '\0' )
+ n--;
+
+ return n != 0;
+}
+
// ----------------------------------------------------------------------------
// UTF-16 en/decoding to/from UCS-4
// ----------------------------------------------------------------------------
// wxMBConv
// ----------------------------------------------------------------------------
-wxMBConv::~wxMBConv()
-{
- // nothing to do here (necessary for Darwin linking probably)
-}
-
-const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
-{
- if ( psz )
- {
- // calculate the length of the buffer needed first
- size_t nLen = MB2WC(NULL, psz, 0);
- if ( nLen != (size_t)-1 )
- {
- // now do the actual conversion
- wxWCharBuffer buf(nLen);
- nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
- }
- }
-
- wxWCharBuffer buf((wchar_t *)NULL);
-
- return buf;
-}
-
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
-{
- if ( pwz )
- {
- size_t nLen = WC2MB(NULL, pwz, 0);
- if ( nLen != (size_t)-1 )
- {
- wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
- nLen = WC2MB(buf.data(), pwz, nLen + 4);
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
- }
- }
-
- wxCharBuffer buf((char *)NULL);
-
- return buf;
-}
-
-// helper of cMB2WC(): check if n bytes at this location are all NUL
-static bool NotAllNULs(const char *p, size_t n)
+size_t
+wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- while ( n && *p++ == '\0' )
- n--;
+ // although new conversion classes are supposed to implement this function
+ // directly, the existins ones only implement the old MB2WC() and so, to
+ // avoid to have to rewrite all conversion classes at once, we provide a
+ // default (but not efficient) implementation of this one in terms of the
+ // old function by copying the input to ensure that it's NUL-terminated and
+ // then using MB2WC() to convert it
- return n != 0;
-}
-
-const wxWCharBuffer
-wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
-{
- // the currently accumulated wide characters
- wxWCharBuffer wbuf;
-
- // the current length of wbuf
- size_t lenBuf = 0;
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
// the number of NULs terminating this string
- size_t nulLen wxDUMMY_INITIALIZE(0);
-
- // make a copy of the input string unless it is already properly
- // NUL-terminated
- wxCharBuffer bufTmp;
+ size_t nulLen wxDUMMY_INITIALIZE(0);
// if we were not given the input size we just have to assume that the
// string is properly terminated as we have no way of knowing how long it
// is anyhow, but if we do have the size check whether there are enough
// NULs at the end
- if ( inLen != (size_t)-1 )
+ wxCharBuffer bufTmp;
+ const char *srcEnd;
+ if ( srcLen != (size_t)-1 )
{
// we need to know how to find the end of this string
- nulLen = GetMinMBCharWidth();
- if ( nulLen == (size_t)-1 )
- return wbuf;
+ nulLen = GetMBNulLen();
+ if ( nulLen == wxCONV_FAILED )
+ return wxCONV_FAILED;
// if there are enough NULs we can avoid the copy
- if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
+ if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
{
// make a copy in order to properly NUL-terminate the string
- bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
+ bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
char * const p = bufTmp.data();
- memcpy(p, in, inLen);
- for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
+ memcpy(p, src, srcLen);
+ for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
*s = '\0';
+
+ src = bufTmp;
}
- }
- if ( bufTmp )
- in = bufTmp;
+ srcEnd = src + srcLen;
+ }
+ else // quit after the first loop iteration
+ {
+ srcEnd = NULL;
+ }
- size_t lenChunk;
- for ( const char * const inEnd = in + inLen;; )
+ for ( ;; )
{
// try to convert the current chunk
- lenChunk = MB2WC(NULL, in, 0);
+ size_t lenChunk = MB2WC(NULL, src, 0);
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
break;
}
- if ( lenChunk == (size_t)-1 )
- break;
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
// if we already have a previous chunk, leave the NUL separating it
// from this one
- if ( lenBuf )
- lenBuf++;
-
- const size_t lenBufNew = lenBuf + lenChunk;
- if ( !wbuf.extend(lenBufNew) )
+ if ( dstWritten )
{
- lenChunk = (size_t)-1;
- break;
+ dstWritten++;
+ if ( dst )
+ dst++;
}
- lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
- if ( lenChunk == (size_t)-1 )
- break;
+ dstWritten += lenChunk;
- lenBuf = lenBufNew;
+ if ( dst )
+ {
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
- if ( inLen == (size_t)-1 )
+ lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ dst += lenChunk;
+ }
+
+ if ( !srcEnd )
{
- // convert only one chunk in this case, as we suppose that the
- // string is NUL-terminated and so inEnd is not used at all
+ // we convert the entire string in this cas, as we suppose that the
+ // string is NUL-terminated and so srcEnd is not used at all
break;
}
// advance the input pointer past the end of this chunk
- while ( NotAllNULs(in, nulLen) )
+ while ( NotAllNULs(src, nulLen) )
{
// notice that we must skip over multiple bytes here as we suppose
// that if NUL takes 2 or 4 bytes, then all the other characters do
// too and so if advanced by a single byte we might erroneously
// detect sequences of NUL bytes in the middle of the input
- in += nulLen;
+ src += nulLen;
}
- in += nulLen; // skipping over its terminator as well
+ src += nulLen; // skipping over its terminator as well
// note that ">=" (and not just "==") is needed here as the terminator
// we skipped just above could be inside or just after the buffer
// delimited by inEnd
- if ( in >= inEnd )
+ if ( src >= srcEnd )
break;
}
- if ( lenChunk == (size_t)-1 )
- {
- // conversion failed
- lenBuf = 0;
- wbuf.reset();
- }
-
- if ( outLen )
- *outLen = lenBuf;
-
- return wbuf;
+ return dstWritten;
}
-const wxCharBuffer
-wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+size_t
+wxMBConv::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- // the currently accumulated multibyte characters
- wxCharBuffer buf;
-
- // the current length of buf
- size_t lenBuf = 0;
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
// make a copy of the input string unless it is already properly
// NUL-terminated
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
- if ( inLen == (size_t)-1 )
+ if ( srcLen == (size_t)-1 )
{
- inLen = wxWcslen(in) + 1;
+ srcLen = wxWcslen(src) + 1;
}
- else if ( inLen != 0 && in[inLen - 1] != L'\0' )
+ else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
{
// make a copy in order to properly NUL-terminate the string
- bufTmp = wxWCharBuffer(inLen);
- memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
+ bufTmp = wxWCharBuffer(srcLen);
+ memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
+ src = bufTmp;
}
- if ( bufTmp )
- in = bufTmp;
+ const size_t lenNul = GetMBNulLen();
+ for ( const wchar_t * const srcEnd = src + srcLen;
+ src < srcEnd;
+ src += wxWcslen(src) + 1 /* skip L'\0' too */ )
+ {
+ // try to convert the current chunk
+ size_t lenChunk = WC2MB(NULL, src, 0);
+
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
- for ( const wchar_t * const inEnd = in + inLen;; )
+ lenChunk += lenNul;
+ dstWritten += lenChunk;
+
+ if ( dst )
+ {
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
+
+ if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ dst += lenChunk;
+ }
+ }
+
+ return dstWritten;
+}
+
+size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
+{
+ size_t rc = ToWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
{
- // try to convert the current chunk, if anything left
- size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
- if ( lenChunk == 0 )
+ // ToWChar() returns the buffer length, i.e. including the trailing
+ // NUL, while this method doesn't take it into account
+ rc--;
+ }
+
+ return rc;
+}
+
+size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
+{
+ size_t rc = FromWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
+ {
+ rc -= GetMBNulLen();
+ }
+
+ return rc;
+}
+
+wxMBConv::~wxMBConv()
+{
+ // nothing to do here (necessary for Darwin linking probably)
+}
+
+const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
+{
+ if ( psz )
+ {
+ // calculate the length of the buffer needed first
+ const size_t nLen = MB2WC(NULL, psz, 0);
+ if ( nLen != wxCONV_FAILED )
{
- // nothing left in the input string, conversion succeeded
- if ( outLen )
- *outLen = lenBuf ? lenBuf - 1 : lenBuf;
+ // now do the actual conversion
+ wxWCharBuffer buf(nLen /* +1 added implicitly */);
- return buf;
+ // +1 for the trailing NULL
+ if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+ return buf;
}
+ }
- if ( lenChunk == (size_t)-1 )
- break;
+ return wxWCharBuffer();
+}
- const size_t lenBufNew = lenBuf + lenChunk;
- if ( !buf.extend(lenBufNew) )
- break;
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
+{
+ if ( pwz )
+ {
+ const size_t nLen = WC2MB(NULL, pwz, 0);
+ if ( nLen != wxCONV_FAILED )
+ {
+ // extra space for trailing NUL(s)
+ static const size_t extraLen = GetMaxMBNulLen();
- lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
- if ( lenChunk == (size_t)-1 )
- break;
+ wxCharBuffer buf(nLen + extraLen - 1);
+ if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+ return buf;
+ }
+ }
- // chunk successfully converted, go to the next one
- in += wxWcslen(in) + 1 /* skip NUL too */;
- lenBuf = lenBufNew + 1;
+ return wxCharBuffer();
+}
+
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxWCharBuffer wbuf(dstLen);
+ if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
+ {
+ if ( outLen )
+ *outLen = dstLen;
+ return wbuf;
+ }
+ }
+
+ if ( outLen )
+ *outLen = 0;
+
+ return wxWCharBuffer();
+}
+
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxCharBuffer buf(dstLen);
+ if ( FromWChar(buf.data(), dstLen, in, inLen) )
+ {
+ if ( outLen )
+ *outLen = dstLen;
+ return buf;
+ }
}
- // conversion failed
if ( outLen )
*outLen = 0;
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
+ // classify this encoding as explained in wxMBConv::GetMBNulLen()
+ // comment
+ virtual size_t GetMBNulLen() const;
+
bool IsOk() const
{ return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
#endif
private:
- // classify this encoding as explained in wxMBConv::GetMinMBCharWidth()
- // comment
- virtual size_t GetMinMBCharWidth() const;
-
// the name (for iconv_open()) of a wide char charset -- if none is
// available on this machine, it will remain NULL
static wxString ms_wcCharsetName;
// different endian-ness than the native one
static bool ms_wcNeedsSwap;
- // cached result of GetMinMBCharWidth(); set to 0 meaning "unknown"
+ // cached result of GetMBNulLen(); set to 0 meaning "unknown"
// initially
size_t m_minMBCharWidth;
};
size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
+ // find the string length: notice that must be done differently for
+ // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
+ size_t inbuf;
+ const size_t nulLen = GetMBNulLen();
+ switch ( nulLen )
+ {
+ default:
+ return (size_t)-1;
+
+ case 1:
+ inbuf = strlen(psz); // arguably more optimized than our version
+ break;
+
+ case 2:
+ case 4:
+ // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
+ // they also have to start at character boundary and not span two
+ // adjacent characters
+ const char *p;
+ for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
+ ;
+ inbuf = p - psz;
+ break;
+ }
+
#if wxUSE_THREADS
// NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
// Unfortunately there is a couple of global wxCSConv objects such as
// only a few wx classes would be safe to use from non-main threads
// as MB<->WC conversion would fail "randomly".
wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
-#endif
+#endif // wxUSE_THREADS
+
- size_t inbuf = strlen(psz);
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
// VS: Use these instead of psz, buf because iconv() modifies its arguments:
buf[n] = WC_BSWAP(buf[i]);
}
- // NB: iconv was given only strlen(psz) characters on input, and so
- // it couldn't convert the trailing zero. Let's do it ourselves
- // if there's some room left for it in the output buffer.
+ // NUL-terminate the string if there is any space left
if (res < n)
buf[res] = 0;
}
return res;
}
-size_t wxMBConv_iconv::GetMinMBCharWidth() const
+size_t wxMBConv_iconv::GetMBNulLen() const
{
if ( m_minMBCharWidth == 0 )
{
return len - 1;
}
+ virtual size_t GetMBNulLen() const
+ {
+ if ( m_minMBCharWidth == 0 )
+ {
+ int len = ::WideCharToMultiByte
+ (
+ m_CodePage, // code page
+ 0, // no flags
+ L"", // input string
+ 1, // translate just the NUL
+ NULL, // output buffer
+ 0, // and its size
+ NULL, // no replacement char
+ NULL // [out] don't care if it was used
+ );
+
+ wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
+ switch ( len )
+ {
+ default:
+ wxLogDebug(_T("Unexpected NUL length %d"), len);
+ // fall through
+
+ case 0:
+ self->m_minMBCharWidth = (size_t)-1;
+ break;
+
+ case 1:
+ case 2:
+ case 4:
+ self->m_minMBCharWidth = len;
+ break;
+ }
+ }
+
+ return m_minMBCharWidth;
+ }
+
bool IsOk() const { return m_CodePage != -1; }
private:
#endif
}
- virtual size_t GetMinMBCharWidth() const
- {
- if ( m_minMBCharWidth == 0 )
- {
- int len = ::WideCharToMultiByte
- (
- m_CodePage, // code page
- 0, // no flags
- L"", // input string
- 1, // translate just the NUL
- NULL, // output buffer
- 0, // and its size
- NULL, // no replacement char
- NULL // [out] don't care if it was used
- );
-
- wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
- switch ( len )
- {
- default:
- wxLogDebug(_T("Unexpected NUL length %d"), len);
- // fall through
-
- case 0:
- self->m_minMBCharWidth = (size_t)-1;
- break;
-
- case 1:
- case 2:
- case 4:
- self->m_minMBCharWidth = len;
- break;
- }
- }
-
- return m_minMBCharWidth;
- }
// the code page we're working with
long m_CodePage;
- // cached result of GetMinMBCharWidth(), set to 0 initially meaning
+ // cached result of GetMBNulLen(), set to 0 initially meaning
// "unknown"
size_t m_minMBCharWidth;
};
return inbuf;
}
- bool IsOk() const { return m_ok; }
-
-public:
- wxFontEncoding m_enc;
- wxEncodingConverter m2w, w2m;
-
-private:
- virtual size_t GetMinMBCharWidth() const
+ virtual size_t GetMBNulLen() const
{
switch ( m_enc )
{
}
}
+ bool IsOk() const { return m_ok; }
+
+public:
+ wxFontEncoding m_enc;
+ wxEncodingConverter m2w, w2m;
+
+private:
// were we initialized successfully?
bool m_ok;
return len;
}
-size_t wxCSConv::GetMinMBCharWidth() const
+size_t wxCSConv::GetMBNulLen() const
{
CreateConvIfNeeded();
if ( m_convReal )
{
- // cast needed just to call private function of m_convReal
- return ((wxCSConv *)m_convReal)->GetMinMBCharWidth();
+ return m_convReal->GetMBNulLen();
}
return 1;