#define wxHAVE_WIN32_MB2WC
#endif
-#ifdef __SALFORDC__
- #include <clib.h>
-#endif
-
#ifdef HAVE_ICONV
#include <iconv.h>
#include "wx/thread.h"
const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
if ( dstLen != wxCONV_FAILED )
{
- wxWCharBuffer wbuf(dstLen - 1);
+ // notice that we allocate space for dstLen+1 wide characters here
+ // because we want the buffer to always be NUL-terminated, even if the
+ // input isn't (as otherwise the caller has no way to know its length)
+ wxWCharBuffer wbuf(dstLen);
+ wbuf.data()[dstLen - 1] = L'\0';
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
if ( dstLen != wxCONV_FAILED )
{
- // special case of empty input: can't allocate 0 size buffer below as
- // wxCharBuffer insists on NUL-terminating it
- wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
+ const size_t nulLen = GetMBNulLen();
+
+ // as above, ensure that the buffer is always NUL-terminated, even if
+ // the input is not
+ wxCharBuffer buf(dstLen + nulLen - 1);
+ memset(buf.data() + dstLen, 0, nulLen);
if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
{
*outLen = dstLen;
- const size_t nulLen = GetMBNulLen();
if ( dstLen >= nulLen &&
!NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
{
// UTF-8
// ----------------------------------------------------------------------------
-static wxUint32 utf8_max[]=
+static const wxUint32 utf8_max[]=
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
// boundaries of the private use area we use to (temporarily) remap invalid
const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
// this table gives the length of the UTF-8 encoding from its first character:
-unsigned char tableUtf8Lengths[256] = {
+const unsigned char tableUtf8Lengths[256] = {
// single-byte sequences (ASCII):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
return written;
}
- unsigned char c = *p;
- unsigned len = tableUtf8Lengths[c];
- if ( !len )
+ if ( out && !dstLen-- )
break;
- if ( srcLen < len ) // the test works for wxNO_LEN too
- break;
+ wxUint32 code;
+ unsigned char c = *p;
- if ( srcLen != wxNO_LEN )
- srcLen -= len;
+ if ( c < 0x80 )
+ {
+ if ( srcLen == 0 ) // the test works for wxNO_LEN too
+ break;
- if ( out && !dstLen-- )
- break;
+ if ( srcLen != wxNO_LEN )
+ srcLen--;
+ code = c;
+ }
+ else
+ {
+ unsigned len = tableUtf8Lengths[c];
+ if ( !len )
+ break;
- // Char. number range | UTF-8 octet sequence
- // (hexadecimal) | (binary)
- // ----------------------+---------------------------------------------
- // 0000 0000 - 0000 007F | 0xxxxxxx
- // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
- // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
- // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- //
- // Code point value is stored in bits marked with 'x', lowest-order bit
- // of the value on the right side in the diagram above.
- // (from RFC 3629)
+ if ( srcLen < len ) // the test works for wxNO_LEN too
+ break;
- // mask to extract lead byte's value ('x' bits above), by sequence length:
- static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
+ if ( srcLen != wxNO_LEN )
+ srcLen -= len;
- // mask and value of lead byte's most significant bits, by length:
- static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
- static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
+ // Char. number range | UTF-8 octet sequence
+ // (hexadecimal) | (binary)
+ // ----------------------+----------------------------------------
+ // 0000 0000 - 0000 007F | 0xxxxxxx
+ // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+ // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ //
+ // Code point value is stored in bits marked with 'x',
+ // lowest-order bit of the value on the right side in the diagram
+ // above. (from RFC 3629)
- len--; // it's more convenient to work with 0-based length here
+ // mask to extract lead byte's value ('x' bits above), by sequence
+ // length:
+ static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
- // extract the lead byte's value bits:
- if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
- break;
+ // mask and value of lead byte's most significant bits, by length:
+ static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
+ static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
- wxUint32 code = c & leadValueMask[len];
+ len--; // it's more convenient to work with 0-based length here
- // all remaining bytes, if any, are handled in the same way regardless of
- // sequence's length:
- for ( ; len; --len )
- {
- c = *++p;
- if ( (c & 0xC0) != 0x80 )
- return wxCONV_FAILED;
+ // extract the lead byte's value bits:
+ if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
+ break;
+
+ code = c & leadValueMask[len];
+
+ // all remaining bytes, if any, are handled in the same way
+ // regardless of sequence's length:
+ for ( ; len; --len )
+ {
+ c = *++p;
+ if ( (c & 0xC0) != 0x80 )
+ return wxCONV_FAILED;
- code <<= 6;
- code |= c & 0x3F;
+ code <<= 6;
+ code |= c & 0x3F;
+ }
}
#ifdef WC_UTF16
return wxCONV_FAILED;
}
-size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
+ const char *psz, size_t srcLen) const
{
if ( m_options == MAP_INVALID_UTF8_NOT )
- return wxMBConvStrictUTF8::MB2WC(buf, psz, n);
+ return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
size_t len = 0;
- while (*psz && ((!buf) || (len < n)))
+ while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
{
const char *opsz = psz;
bool invalid = false;
}
}
- if (buf && (len < n))
+ if (srcLen == wxNO_LEN && buf && (len < n))
*buf = 0;
- return len;
+ return len + 1;
}
static inline bool isoctal(wchar_t wch)
return L'0' <= wch && wch <= L'7';
}
-size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
+ const wchar_t *psz, size_t srcLen) const
{
if ( m_options == MAP_INVALID_UTF8_NOT )
- return wxMBConvStrictUTF8::WC2MB(buf, psz, n);
+ return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
size_t len = 0;
- while (*psz && ((!buf) || (len < n)))
+ while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
{
wxUint32 cc;
}
}
- if (buf && (len < n))
+ if (srcLen == wxNO_LEN && buf && (len < n))
*buf = 0;
- return len;
+ return len + 1;
}
// ============================================================================
if ( m2w != ICONV_T_INVALID )
{
char buf[2], *bufPtr;
- wchar_t wbuf[2], *wbufPtr;
+ wchar_t wbuf[2];
size_t insz, outsz;
size_t res;
wbuf[0] = 0;
insz = 2;
outsz = SIZEOF_WCHAR_T * 2;
- wbufPtr = wbuf;
+ char* wbufPtr = (char*)wbuf;
bufPtr = buf;
res = iconv(
m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
- (char**)&wbufPtr, &outsz);
+ &wbufPtr, &outsz);
if (ICONV_FAILED(res, insz))
{
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
- // VS: Use these instead of psz, buf because iconv() modifies its arguments:
- wchar_t *bufPtr = buf;
const char *pszPtr = psz;
if (buf)
{
+ char* bufPtr = (char*)buf;
+
// have destination buffer, convert there
cres = iconv(m2w,
ICONV_CHAR_CAST(&pszPtr), &inbuf,
- (char**)&bufPtr, &outbuf);
+ &bufPtr, &outbuf);
res = n - (outbuf / SIZEOF_WCHAR_T);
if (ms_wcNeedsSwap)
do
{
- bufPtr = tbuf;
+ char* bufPtr = (char*)tbuf;
outbuf = 8 * SIZEOF_WCHAR_T;
cres = iconv(m2w,
ICONV_CHAR_CAST(&pszPtr), &inbuf,
- (char**)&bufPtr, &outbuf );
+ &bufPtr, &outbuf );
res += 8 - (outbuf / SIZEOF_WCHAR_T);
}
#endif
size_t inlen = wxWcslen(psz);
- size_t inbuf = inlen * SIZEOF_WCHAR_T;
- size_t outbuf = n;
+ size_t inbuflen = inlen * SIZEOF_WCHAR_T;
+ size_t outbuflen = n;
size_t res, cres;
wchar_t *tmpbuf = 0;
// need to copy to temp buffer to switch endianness
// (doing WC_BSWAP twice on the original buffer won't help, as it
// could be in read-only memory, or be accessed in some other thread)
- tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
+ tmpbuf = (wchar_t *)malloc(inbuflen + SIZEOF_WCHAR_T);
for ( size_t i = 0; i < inlen; i++ )
tmpbuf[n] = WC_BSWAP(psz[i]);
psz = tmpbuf;
}
+ char* inbuf = (char*)psz;
if (buf)
{
// have destination buffer, convert there
- cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
+ cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &buf, &outbuflen);
- res = n - outbuf;
+ res = n - outbuflen;
// NB: iconv was given only wcslen(psz) characters on input, and so
// it couldn't convert the trailing zero. Let's do it ourselves
do
{
buf = tbuf;
- outbuf = 16;
+ outbuflen = 16;
- cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
+ cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &buf, &outbuflen);
- res += 16 - outbuf;
+ res += 16 - outbuflen;
}
while ((cres == (size_t)-1) && (errno == E2BIG));
}
free(tmpbuf);
}
- if (ICONV_FAILED(cres, inbuf))
+ if (ICONV_FAILED(cres, inbuflen))
{
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
return wxCONV_FAILED;
return wxCONV_FAILED;
}
- // if we were really converting, check if we succeeded
- if ( buf )
+ // we did something, check if we really succeeded
+ if ( flags )
{
- if ( flags )
+ // check if the conversion failed, i.e. if any replacements
+ // were done
+ if ( usedDef )
+ return wxCONV_FAILED;
+ }
+ else // we must resort to double tripping...
+ {
+ // first we need to ensure that we really have the MB data: this is
+ // not the case if we're called with NULL buffer, in which case we
+ // need to do the conversion yet again
+ wxCharBuffer bufDef;
+ if ( !buf )
{
- // check if the conversion failed, i.e. if any replacements
- // were done
- if ( usedDef )
+ bufDef = wxCharBuffer(len);
+ buf = bufDef.data();
+ if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
+ buf, len, NULL, NULL) )
return wxCONV_FAILED;
}
- else // we must resort to double tripping...
+
+ if ( !n )
+ n = wcslen(pwz);
+ wxWCharBuffer wcBuf(n);
+ if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
+ wcscmp(wcBuf, pwz) != 0 )
{
- wxWCharBuffer wcBuf(n);
- if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
- wcscmp(wcBuf, pwz) != 0 )
- {
- // we didn't obtain the same thing we started from, hence
- // the conversion was lossy and we consider that it failed
- return wxCONV_FAILED;
- }
+ // we didn't obtain the same thing we started from, hence
+ // the conversion was lossy and we consider that it failed
+ return wxCONV_FAILED;
}
}
WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
#endif
-WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
-WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
+// NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
+// passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
+// provokes an error message about "not enough macro parameters"; and we
+// can't use "()" here as the name##Obj declaration would be parsed as a
+// function declaration then, so use a semicolon and live with an extra
+// empty statement (and hope that no compilers warns about this)
+WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
+WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));