#define TRACE_STRCONV _T("strconv")
+// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
+// be 4 bytes
#if SIZEOF_WCHAR_T == 2
#define WC_UTF16
#endif
}
// ----------------------------------------------------------------------------
-// UTF-16 en/decoding to/from UCS-4
+// UTF-16 en/decoding to/from UCS-4 with surrogates handling
// ----------------------------------------------------------------------------
-
static size_t encode_utf16(wxUint32 input, wxUint16 *output)
{
if (input<=0xffff)
}
else if (input>=0x110000)
{
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else
{
else if ((input[1]<0xdc00) || (input[1]>0xdfff))
{
output = *input;
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else
{
}
}
+#ifdef WC_UTF16
+
+// returns the next UTF-32 character from the wchar_t buffer and advances the
+// pointer to the character after this one
+//
+// if an invalid character is found, *pSrc is set to NULL, the caller must
+// check for this
+static wxUint32 wxDecodeSurrogate(const wchar_t **pSrc)
+{
+ wxUint32 out;
+ const size_t n = decode_utf16(*pSrc, out);
+ if ( n == wxCONV_FAILED )
+ *pSrc = NULL;
+ else
+ *pSrc += n;
+
+ return out;
+}
+
+#endif // WC_UTF16
// ----------------------------------------------------------------------------
// wxMBConv
// NULs at the end
wxCharBuffer bufTmp;
const char *srcEnd;
- if ( srcLen != (size_t)-1 )
+ if ( srcLen != wxNO_LEN )
{
// we need to know how to find the end of this string
nulLen = GetMBNulLen();
{
// try to convert the current chunk
size_t lenChunk = MB2WC(NULL, src, 0);
- if ( lenChunk == 0 )
- {
- // nothing left in the input string, conversion succeeded;
- // but still account for the trailing NULL
- dstWritten++;
- break;
- }
-
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
- lenChunk++; // for trailing NUL
+ lenChunk++; // for the L'\0' at the end of this chunk
dstWritten += lenChunk;
+ if ( lenChunk == 1 )
+ {
+ // nothing left in the input string, conversion succeeded
+ break;
+ }
+
if ( dst )
{
if ( dstWritten > dstLen )
if ( !srcEnd )
{
- // we convert the entire string in this case, as we suppose that the
- // string is NUL-terminated and so srcEnd is not used at all
+ // we convert just one chunk in this case as this is the entire
+ // string anyhow
break;
}
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
- if ( srcLen == (size_t)-1 )
+ if ( srcLen == wxNO_LEN )
{
srcLen = wxWcslen(src) + 1;
}
{
// make a copy in order to properly NUL-terminate the string
bufTmp = wxWCharBuffer(srcLen);
- memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
+ memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
src = bufTmp;
}
return dstWritten;
}
-size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
+size_t wxMBConv::MB2WC(wchar_t *out, const char *in, size_t outLen) const
{
- size_t rc = ToWChar(outBuff, outLen, inBuff);
- if ( rc != (size_t)wxCONV_FAILED )
+ size_t rc = ToWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
{
// ToWChar() returns the buffer length, i.e. including the trailing
// NUL, while this method doesn't take it into account
return rc;
}
-size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
+size_t wxMBConv::WC2MB(char *out, const wchar_t *in, size_t outLen) const
{
- size_t rc = FromWChar(outBuff, outLen, inBuff);
- if ( rc != (size_t)wxCONV_FAILED )
+ size_t rc = FromWChar(out, outLen, in);
+ if ( rc != wxCONV_FAILED )
{
rc -= GetMBNulLen();
}
{
// calculate the length of the buffer needed first
const size_t nLen = MB2WC(NULL, psz, 0);
- if ( nLen != (size_t)wxCONV_FAILED )
+ if ( nLen != wxCONV_FAILED )
{
// now do the actual conversion
wxWCharBuffer buf(nLen /* +1 added implicitly */);
if ( pwz )
{
const size_t nLen = WC2MB(NULL, pwz, 0);
- if ( nLen != (size_t)wxCONV_FAILED )
+ if ( nLen != wxCONV_FAILED )
{
// extra space for trailing NUL(s)
static const size_t extraLen = GetMaxMBNulLen();
}
const wxWCharBuffer
-wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
{
- const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
- if ( dstLen != (size_t)wxCONV_FAILED )
+ const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
{
wxWCharBuffer wbuf(dstLen - 1);
- if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) )
+ if ( ToWChar(wbuf.data(), dstLen, in, inLen) != wxCONV_FAILED )
{
if ( outLen )
- *outLen = dstLen - 1;
+ {
+ *outLen = dstLen;
+ if ( wbuf[dstLen - 1] == L'\0' )
+ (*outLen)--;
+ }
+
return wbuf;
}
}
}
const wxCharBuffer
-wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
{
- const size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
- if ( dstLen != (size_t)wxCONV_FAILED )
+ const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
{
wxCharBuffer buf(dstLen - 1);
- if ( FromWChar(buf.data(), dstLen, inBuff, inLen) )
+ if ( FromWChar(buf.data(), dstLen, in, inLen) != wxCONV_FAILED )
{
if ( outLen )
- *outLen = dstLen - 1;
+ {
+ *outLen = dstLen;
+
+ const size_t nulLen = GetMBNulLen();
+ if ( !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
+ {
+ // in this case the output is NUL-terminated and we're not
+ // supposed to count NUL
+ (*outLen) -= nulLen;
+ }
+ }
return buf;
}
if ( !ok )
{
// in valid UTF7 we should have valid characters after '+'
- return (size_t)-1;
+ return wxCONV_FAILED;
}
if (*psz == '-')
// plain ASCII char
if (buf)
*buf++ = (char)cc;
-
len++;
}
#ifndef WC_UTF16
else if (((wxUint32)cc) > 0xffff)
{
// no surrogate pair generation (yet?)
- return (size_t)-1;
+ return wxCONV_FAILED;
}
#endif
else
len++;
}
}
-
if (buf)
*buf++ = '-';
len++;
}
}
-
if (buf && (len < n))
*buf = 0;
-
return len;
}
unsigned cnt;
for (cnt = 0; fc & 0x80; cnt++)
fc <<= 1;
-
if (!cnt)
{
// plain ASCII char
invalid = true;
break;
}
-
psz++;
res = (res << 6) | (cc & 0x3f);
}
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16(res, (wxUint16 *)buf);
- if (pa == (size_t)-1)
+ if (pa == wxCONV_FAILED)
{
invalid = true;
}
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
- wxASSERT(pa != (size_t)-1);
+ wxASSERT(pa != wxCONV_FAILED);
if (buf)
buf += pa;
opsz++;
*buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
*buf++ = (wchar_t)( L'0' + on % 010 );
}
-
opsz++;
len += 4;
}
}
else // MAP_INVALID_UTF8_NOT
{
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
}
}
-
if (buf && (len < n))
*buf = 0;
-
return len;
}
while (*psz && ((!buf) || (len < n)))
{
wxUint32 cc;
-
#ifdef WC_UTF16
// cast is ok for WC_UTF16
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
- psz += (pa == (size_t)-1) ? 1 : pa;
+ psz += (pa == wxCONV_FAILED) ? 1 : pa;
#else
- cc = (*psz++) & 0x7fffffff;
+ cc=(*psz++) & 0x7fffffff;
#endif
if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
{
if (buf)
{
- *buf++ = (char) ((psz[0] - L'0') * 0100 +
- (psz[1] - L'0') * 010 +
+ *buf++ = (char) ((psz[0] - L'0')*0100 +
+ (psz[1] - L'0')*010 +
(psz[2] - L'0'));
}
else
{
unsigned cnt;
- for (cnt = 0; cc > utf8_max[cnt]; cnt++)
- {
- }
-
+ for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
if (!cnt)
{
// plain ASCII char
}
}
- if (buf && (len < n))
+ if (buf && (len<n))
*buf = 0;
return len;
}
-// ----------------------------------------------------------------------------
+// ============================================================================
// UTF-16
-// ----------------------------------------------------------------------------
+// ============================================================================
#ifdef WORDS_BIGENDIAN
#define wxMBConvUTF16straight wxMBConvUTF16BE
#define wxMBConvUTF16straight wxMBConvUTF16LE
#endif
+/* static */
+size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint16 *in = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( srcLen = 1; *in++; srcLen++ )
+ ;
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-16 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16 too
#ifdef WC_UTF16
-// copy 16bit MB to 16bit String
-size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ // set up the scene for using memcpy() (which is presumably more efficient
+ // than copying the bytes one by one)
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint16*)psz && (!buf || len < n))
+ const size_t inLen = srcLen/BYTES_PER_CHAR;
+ if ( dst )
{
- if (buf)
- *buf++ = *(wxUint16*)psz;
- len++;
+ if ( dstLen < inLen )
+ return wxCONV_FAILED;
- psz += sizeof(wxUint16);
+ memcpy(dst, src, srcLen);
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return inLen;
}
-
-// copy 16bit String to 16bit MB
-size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ srcLen *= BYTES_PER_CHAR;
+
+ if ( dst )
{
- if (buf)
- {
- *(wxUint16*)buf = *psz;
- buf += sizeof(wxUint16);
- }
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
- len += sizeof(wxUint16);
- psz++;
+ memcpy(dst, src, srcLen);
}
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
-
- return len;
+ return srcLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 16bit MB to 16bit String
-size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- // UTF16 string must be terminated by 2 NULs as single NULs may occur
- // inside the string
- while ( (psz[0] || psz[1]) && (!buf || len < n) )
+ srcLen /= BYTES_PER_CHAR;
+
+ if ( dst )
{
- if ( buf )
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ const wxUint16 *in = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( size_t n = 0; n < srcLen; n++, in++ )
{
- ((char *)buf)[0] = psz[1];
- ((char *)buf)[1] = psz[0];
- buf++;
+ *dst++ = wxUINT16_SWAP_ALWAYS(*in);
}
- len++;
- psz += 2;
}
- if ( buf && len < n )
- *buf = L'\0';
-
- return len;
+ return srcLen;
}
-
-// swap 16bit MB to 16bit String
-size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while ( *psz && (!buf || len < n) )
+ srcLen *= BYTES_PER_CHAR;
+
+ if ( dst )
{
- if ( buf )
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ wxUint16 *out = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
{
- *buf++ = ((char*)psz)[1];
- *buf++ = ((char*)psz)[0];
+ *out++ = wxUINT16_SWAP_ALWAYS(*src);
}
-
- len += 2;
- psz++;
}
- if ( buf && len < n - 1 )
- {
- buf[0] =
- buf[1] = '\0';
- }
-
- return len;
+ return srcLen;
}
+#else // !WC_UTF16: wchar_t is UTF-32
-#else // WC_UTF16
-
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
-// copy 16bit MB to 32bit String
size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
- size_t len = 0;
+ size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
wxUint32 cc;
- size_t pa = decode_utf16((wxUint16*)psz, cc);
- if (pa == (size_t)-1)
+ size_t pa=decode_utf16((wxUint16*)psz, cc);
+ if (pa == wxCONV_FAILED)
return pa;
if (buf)
len++;
psz += pa * sizeof(wxUint16);
}
-
- if (buf && len < n)
- *buf = 0;
+ if (buf && len<n) *buf=0;
return len;
}
while (*psz && (!buf || len < n))
{
wxUint16 cc[2];
- size_t pa = encode_utf16(*psz, cc);
+ size_t pa=encode_utf16(*psz, cc);
- if (pa == (size_t)-1)
+ if (pa == wxCONV_FAILED)
return pa;
if (buf)
len += pa*sizeof(wxUint16);
psz++;
}
-
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
+ if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
// swap 16bit MB to 32bit String
size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
wxUint32 cc;
char tmp[4];
+ tmp[0]=psz[1]; tmp[1]=psz[0];
+ tmp[2]=psz[3]; tmp[3]=psz[2];
- tmp[0] = psz[1];
- tmp[1] = psz[0];
- tmp[2] = psz[3];
- tmp[3] = psz[2];
-
- size_t pa = decode_utf16((wxUint16*)tmp, cc);
- if (pa == (size_t)-1)
+ size_t pa=decode_utf16((wxUint16*)tmp, cc);
+ if (pa == wxCONV_FAILED)
return pa;
if (buf)
len++;
psz += pa * sizeof(wxUint16);
}
-
- if (buf && len < n)
- *buf = 0;
+ if (buf && len<n) *buf=0;
return len;
}
// swap 32bit String to 16bit MB
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
- size_t len = 0;
+ size_t len=0;
while (*psz && (!buf || len < n))
{
wxUint16 cc[2];
- size_t pa = encode_utf16(*psz, cc);
+ size_t pa=encode_utf16(*psz, cc);
- if (pa == (size_t)-1)
+ if (pa == wxCONV_FAILED)
return pa;
if (buf)
}
}
- len += pa * sizeof(wxUint16);
+ len += pa*sizeof(wxUint16);
psz++;
}
-
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
+ if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
-#endif // WC_UTF16
+#endif // WC_UTF16/!WC_UTF16
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#ifdef WORDS_BIGENDIAN
-#define wxMBConvUTF32straight wxMBConvUTF32BE
-#define wxMBConvUTF32swap wxMBConvUTF32LE
+ #define wxMBConvUTF32straight wxMBConvUTF32BE
+ #define wxMBConvUTF32swap wxMBConvUTF32LE
#else
-#define wxMBConvUTF32swap wxMBConvUTF32BE
-#define wxMBConvUTF32straight wxMBConvUTF32LE
+ #define wxMBConvUTF32swap wxMBConvUTF32BE
+ #define wxMBConvUTF32straight wxMBConvUTF32LE
#endif
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
+/* static */
+size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint32 *in = wx_reinterpret_cast(const wxUint32 *, src);
+ for ( srcLen = 1; *in++; srcLen++ )
+ ;
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-32 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16
#ifdef WC_UTF16
-// copy 32bit MB to 16bit String
-size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint32*)psz && (!buf || len < n))
+ const wxUint32 *in = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen/BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++ )
{
wxUint16 cc[2];
+ const size_t numChars = encode_utf16(*in++, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- size_t pa = encode_utf16(*(wxUint32*)psz, cc);
- if (pa == (size_t)-1)
- return pa;
-
- if (buf)
+ outLen += numChars;
+ if ( dst )
{
- *buf++ = cc[0];
- if (pa > 1)
- *buf++ = cc[1];
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- len += pa;
- psz += sizeof(wxUint32);
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return outLen;
}
-
-// copy 16bit String to 32bit MB
-size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ if ( !dst )
{
- wxUint32 cc;
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen*BYTES_PER_CHAR;
+ }
- // cast is ok for WC_UTF16
- size_t pa = decode_utf16((const wxUint16 *)psz, cc);
- if (pa == (size_t)-1)
- return pa;
+ wxUint32 *out = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
- if (buf)
- {
- *(wxUint32*)buf = cc;
- buf += sizeof(wxUint32);
- }
+ outLen += BYTES_PER_CHAR;
- len += sizeof(wxUint32);
- psz += pa;
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ *out++ = ch;
+ }
- return len;
+ return outLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 32bit MB to 16bit String
-size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint32*)psz && (!buf || len < n))
+ const wxUint32 *in = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen/BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++, in++ )
{
- char tmp[4];
- tmp[0] = psz[3];
- tmp[1] = psz[2];
- tmp[2] = psz[1];
- tmp[3] = psz[0];
-
wxUint16 cc[2];
+ const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*in), cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- size_t pa = encode_utf16(*(wxUint32*)tmp, cc);
- if (pa == (size_t)-1)
- return pa;
-
- if (buf)
+ outLen += numChars;
+ if ( dst )
{
- *buf++ = cc[0];
- if (pa > 1)
- *buf++ = cc[1];
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- len += pa;
- psz += sizeof(wxUint32);
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return outLen;
}
-
-// swap 16bit String to 32bit MB
-size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ if ( !dst )
{
- char cc[4];
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen*BYTES_PER_CHAR;
+ }
- // cast is ok for WC_UTF16
- size_t pa = decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
- if (pa == (size_t)-1)
- return pa;
+ wxUint32 *out = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
- if (buf)
- {
- *buf++ = cc[3];
- *buf++ = cc[2];
- *buf++ = cc[1];
- *buf++ = cc[0];
- }
+ outLen += BYTES_PER_CHAR;
- len += sizeof(wxUint32);
- psz += pa;
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ *out++ = wxUINT32_SWAP_ALWAYS(ch);
+ }
- return len;
+ return outLen;
}
-#else // WC_UTF16
-
+#else // !WC_UTF16: wchar_t is UTF-32
// copy 32bit MB to 32bit String
size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
psz += sizeof(wxUint32);
}
- if (buf && len < n)
- *buf = 0;
+ if (buf && len<n)
+ *buf=0;
return len;
}
// copy 32bit String to 32bit MB
size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
- size_t len = 0;
+ size_t len=0;
while (*psz && (!buf || len < n))
{
psz++;
}
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ if (buf && len<=n-sizeof(wxUint32))
+ *(wxUint32*)buf=0;
return len;
}
// swap 32bit MB to 32bit String
size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
- size_t len = 0;
+ size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n))
{
((char *)buf)[3] = psz[0];
buf++;
}
-
len++;
psz += sizeof(wxUint32);
}
- if (buf && len < n)
- *buf = 0;
+ if (buf && len<n)
+ *buf=0;
return len;
}
// swap 32bit String to 32bit MB
size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
- size_t len = 0;
+ size_t len=0;
while (*psz && (!buf || len < n))
{
*buf++ = ((char *)psz)[1];
*buf++ = ((char *)psz)[0];
}
-
len += sizeof(wxUint32);
psz++;
}
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ if (buf && len<=n-sizeof(wxUint32))
+ *(wxUint32*)buf=0;
return len;
}
-#endif // WC_UTF16
+#endif // WC_UTF16/!WC_UTF16
// ============================================================================
// the other direction
iconv_t m2w,
w2m;
-
#if wxUSE_THREADS
// guards access to m2w and w2m objects
wxMutex m_iconvMutex;
delete result;
return 0;
}
-
return result;
}
switch ( nulLen )
{
default:
- return (size_t)-1;
+ return wxCONV_FAILED;
case 1:
inbuf = strlen(psz); // arguably more optimized than our version
wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
#endif // wxUSE_THREADS
+
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
// VS: Use these instead of psz, buf because iconv() modifies its arguments:
// to calculate destination buffer requirement
wchar_t tbuf[8];
res = 0;
-
- do
- {
+ do {
bufPtr = tbuf;
- outbuf = 8 * SIZEOF_WCHAR_T;
+ outbuf = 8*SIZEOF_WCHAR_T;
cres = iconv(m2w,
ICONV_CHAR_CAST(&pszPtr), &inbuf,
(char**)&bufPtr, &outbuf );
- res += 8 - (outbuf / SIZEOF_WCHAR_T);
- }
- while ((cres == (size_t)-1) && (errno == E2BIG));
+ res += 8-(outbuf/SIZEOF_WCHAR_T);
+ } while ((cres==(size_t)-1) && (errno==E2BIG));
}
if (ICONV_FAILED(cres, inbuf))
{
//VS: it is ok if iconv fails, hence trace only
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return res;
tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
for ( size_t i = 0; i < inlen; i++ )
tmpbuf[n] = WC_BSWAP(psz[i]);
-
tmpbuf[inlen] = L'\0';
psz = tmpbuf;
}
// have destination buffer, convert there
cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
- res = n - outbuf;
+ res = n-outbuf;
// NB: iconv was given only wcslen(psz) characters on input, and so
// it couldn't convert the trailing zero. Let's do it ourselves
// to calculate destination buffer requirement
char tbuf[16];
res = 0;
- do
- {
- buf = tbuf;
- outbuf = 16;
+ do {
+ buf = tbuf; outbuf = 16;
cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
res += 16 - outbuf;
- }
- while ((cres == (size_t)-1) && (errno == E2BIG));
+ } while ((cres==(size_t)-1) && (errno==E2BIG));
}
if (ms_wcNeedsSwap)
if (ICONV_FAILED(cres, inbuf))
{
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return res;
char buf[8]; // should be enough for NUL in any encoding
size_t inLen = sizeof(wchar_t),
outLen = WXSIZEOF(buf);
- char *inBuff = (char *)wnul;
- char *outBuff = buf;
- if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
+ char *in = (char *)wnul;
+ char *out = buf;
+ if ( iconv(w2m, ICONV_CHAR_CAST(&in), &inLen, &out, &outLen) == (size_t)-1 )
{
self->m_minMBCharWidth = (size_t)-1;
}
else // ok
{
- self->m_minMBCharWidth = outBuff - buf;
+ self->m_minMBCharWidth = out - buf;
}
}
if ( !len )
{
// function totally failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
// if we were really converting and didn't use MB_ERR_INVALID_CHARS,
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
if ( !len )
{
// function totally failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
// if we were really converting, check if we succeeded
// check if the conversion failed, i.e. if any replacements
// were done
if ( usedDef )
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else // we must resort to double tripping...
{
wxWCharBuffer wcBuf(n);
- if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
+ if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
wcscmp(wcBuf, pwz) != 0 )
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
}
{
default:
wxLogDebug(_T("Unexpected NUL length %d"), len);
- self->m_minMBCharWidth = (size_t)-1;
- break;
+ // fall through
case 0:
self->m_minMBCharWidth = (size_t)-1;
break;
default:
- // unknown, be conservative by default
+ // unknown, be conseravtive by default
s_isWin98Or2k = 0;
- break;
}
wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
#if defined(__WXCOCOA__)
-// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
-// Strangely enough, internally Core Foundation uses
+// RN: There is no UTF-32 support in either Core Foundation or
+// Cocoa. Strangely enough, internally Core Foundation uses
// UTF 32 internally quite a bit - its just not public (yet).
#include <CoreFoundation/CFString.h>
CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
{
CFStringEncoding enc = kCFStringEncodingInvalidId ;
-
- switch (encoding)
+ if ( encoding == wxFONTENCODING_DEFAULT )
+ {
+ enc = CFStringGetSystemEncoding();
+ }
+ else switch( encoding)
{
- case wxFONTENCODING_DEFAULT :
- enc = CFStringGetSystemEncoding();
- break ;
-
case wxFONTENCODING_ISO8859_1 :
enc = kCFStringEncodingISOLatin1 ;
break ;
// break ;
case wxFONTENCODING_CP437 :
- enc = kCFStringEncodingDOSLatinUS ;
+ enc =kCFStringEncodingDOSLatinUS ;
break ;
case wxFONTENCODING_CP850 :
enc = kCFStringEncodingDOSLatin1;
enc = kCFStringEncodingDOSCyrillic;
break ;
case wxFONTENCODING_CP866 :
- enc = kCFStringEncodingDOSRussian ;
+ enc =kCFStringEncodingDOSRussian ;
break ;
case wxFONTENCODING_CP874 :
enc = kCFStringEncodingDOSThai;
enc = kCFStringEncodingDOSJapanese;
break ;
case wxFONTENCODING_CP936 :
- enc = kCFStringEncodingDOSChineseSimplif ;
+ enc =kCFStringEncodingDOSChineseSimplif ;
break ;
case wxFONTENCODING_CP949 :
enc = kCFStringEncodingDOSKorean;
enc = kCFStringEncodingWindowsLatin2;
break ;
case wxFONTENCODING_CP1251 :
- enc = kCFStringEncodingWindowsCyrillic ;
+ enc =kCFStringEncodingWindowsCyrillic ;
break ;
case wxFONTENCODING_CP1252 :
- enc = kCFStringEncodingWindowsLatin1 ;
+ enc =kCFStringEncodingWindowsLatin1 ;
break ;
case wxFONTENCODING_CP1253 :
enc = kCFStringEncodingWindowsGreek;
enc = kCFStringEncodingWindowsLatin5;
break ;
case wxFONTENCODING_CP1255 :
- enc = kCFStringEncodingWindowsHebrew ;
+ enc =kCFStringEncodingWindowsHebrew ;
break ;
case wxFONTENCODING_CP1256 :
- enc = kCFStringEncodingWindowsArabic ;
+ enc =kCFStringEncodingWindowsArabic ;
break ;
case wxFONTENCODING_CP1257 :
enc = kCFStringEncodingWindowsBalticRim;
// case wxFONTENCODING_MACKEYBOARD :
// enc = kCFStringEncodingMacKeyboardGlyphs ;
// break ;
-
default :
// because gcc is picky
break ;
- }
-
+ } ;
return enc ;
}
#if wxUSE_FONTMAP
wxMBConv_mac(const wxChar* name)
{
- Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
+ Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
}
#endif
{
OSStatus status = noErr ;
m_char_encoding = encoding ;
- m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
+ m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
status = TECCreateConverter(&m_MB2WC_converter,
m_char_encoding,
n = wxMax( 32 , byteInLen ) ;
tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
}
-
ByteCount byteBufferLen = n * sizeof( UniChar ) ;
-
#if SIZEOF_WCHAR_T == 4
ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
#else
#else
res = byteOutLen / sizeof( UniChar ) ;
#endif
-
if ( buf == NULL )
free(tbuf) ;
ByteCount byteBufferLen = n ;
UniChar* ubuf = NULL ;
-
#if SIZEOF_WCHAR_T == 4
wxMBConvUTF16 converter ;
size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
#else
ubuf = (UniChar*) psz ;
#endif
-
- status = TECConvertText(
- m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
- (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
-
+ status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
+ (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
#if SIZEOF_WCHAR_T == 4
free( ubuf ) ;
#endif
-
if ( buf == NULL )
free(tbuf) ;
//of bogus characters
wxWCharBuffer wcBuf(n);
size_t pszlen = wxWcslen(psz);
- if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
+ if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
wxWcslen(wcBuf) != pszlen ||
memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
bool IsOk() const
- { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; }
+ { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL ; }
private:
- TECObjectRef m_MB2WC_converter;
- TECObjectRef m_WC2MB_converter;
+ TECObjectRef m_MB2WC_converter ;
+ TECObjectRef m_WC2MB_converter ;
- TextEncodingBase m_char_encoding;
- TextEncodingBase m_unicode_encoding;
+ TextEncodingBase m_char_encoding ;
+ TextEncodingBase m_unicode_encoding ;
};
#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
size_t inbuf = strlen(psz);
if (buf)
{
- if (!m2w.Convert(psz, buf))
- return (size_t)-1;
+ if (!m2w.Convert(psz,buf))
+ return wxCONV_FAILED;
}
return inbuf;
}
const size_t inbuf = wxWcslen(psz);
if (buf)
{
- if (!w2m.Convert(psz, buf))
- return (size_t)-1;
+ if (!w2m.Convert(psz,buf))
+ return wxCONV_FAILED;
}
return inbuf;
delete result;
return 0;
}
-
return result;
}
#endif
}
#endif // wxHAVE_WIN32_MB2WC
-
#if defined(__WXMAC__)
{
// leave UTF16 and UTF32 to the built-ins of wx
if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
{
+
#if wxUSE_FONTMAP
wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
: new wxMBConv_mac(m_encoding);
}
}
#endif
-
#if defined(__WXCOCOA__)
{
if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
{
+
#if wxUSE_FONTMAP
wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
: new wxMBConv_cocoa(m_encoding);
#else
wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
#endif
-
if ( conv->IsOk() )
return conv;
default:
// nothing to do but put here to suppress gcc warnings
- break;
+ ;
}
// step (3)
wxString::Format(_("encoding %s"), m_encoding).c_str()
#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
);
-
alreadyLoggingError = false;
}
for (size_t c = 0; c <= len; c++)
{
if (psz[c] > 0xFF)
- return (size_t)-1;
-
+ return wxCONV_FAILED;
buf[c] = (char)psz[c];
}
}
for (size_t c = 0; c <= len; c++)
{
if (psz[c] > 0xFF)
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}