// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
-// ============================================================================
-// declarations
-// ============================================================================
-
-// ----------------------------------------------------------------------------
-// headers
-// ----------------------------------------------------------------------------
-
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
-#ifdef __BORLANDC__
- #pragma hdrstop
-#endif
-
#ifndef WX_PRECOMP
+ #ifdef __WXMSW__
+ #include "wx/msw/missing.h"
+ #endif
#include "wx/intl.h"
#include "wx/log.h"
-#endif // WX_PRECOMP
+ #include "wx/utils.h"
+ #include "wx/hashmap.h"
+#endif
#include "wx/strconv.h"
#ifdef __WINDOWS__
#include "wx/msw/private.h"
- #include "wx/msw/missing.h"
#endif
#ifndef __WXWINCE__
#if defined(__WIN32__) && !defined(__WXMICROWIN__)
#define wxHAVE_WIN32_MB2WC
-#endif // __WIN32__ but !__WXMICROWIN__
+#endif
#ifdef __SALFORDC__
#include <clib.h>
#include "wx/encconv.h"
#include "wx/fontmap.h"
-#include "wx/utils.h"
#ifdef __WXMAC__
#ifndef __DARWIN__
#include <TextEncodingConverter.h>
#endif
-#include "wx/mac/private.h" // includes mac headers
+// includes Mac headers
+#include "wx/mac/private.h"
#endif
+
#define TRACE_STRCONV _T("strconv")
+// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
+// be 4 bytes
#if SIZEOF_WCHAR_T == 2
#define WC_UTF16
#endif
+
// ============================================================================
// implementation
// ============================================================================
}
// ----------------------------------------------------------------------------
-// UTF-16 en/decoding to/from UCS-4
+// UTF-16 en/decoding to/from UCS-4 with surrogates handling
// ----------------------------------------------------------------------------
-
static size_t encode_utf16(wxUint32 input, wxUint16 *output)
{
- if (input<=0xffff)
+ if (input <= 0xffff)
{
if (output)
*output = (wxUint16) input;
+
return 1;
}
- else if (input>=0x110000)
+ else if (input >= 0x110000)
{
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else
{
if (output)
{
- *output++ = (wxUint16) ((input >> 10)+0xd7c0);
- *output = (wxUint16) ((input&0x3ff)+0xdc00);
+ *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
+ *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
}
+
return 2;
}
}
static size_t decode_utf16(const wxUint16* input, wxUint32& output)
{
- if ((*input<0xd800) || (*input>0xdfff))
+ if ((*input < 0xd800) || (*input > 0xdfff))
{
output = *input;
return 1;
}
- else if ((input[1]<0xdc00) || (input[1]>0xdfff))
+ else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
{
output = *input;
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else
{
}
}
+#ifdef WC_UTF16
+ typedef wchar_t wxDecodeSurrogate_t;
+#else // !WC_UTF16
+ typedef wxUint16 wxDecodeSurrogate_t;
+#endif // WC_UTF16/!WC_UTF16
+
+// returns the next UTF-32 character from the wchar_t buffer and advances the
+// pointer to the character after this one
+//
+// if an invalid character is found, *pSrc is set to NULL, the caller must
+// check for this
+static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
+{
+ wxUint32 out;
+ const size_t
+ n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
+ if ( n == wxCONV_FAILED )
+ *pSrc = NULL;
+ else
+ *pSrc += n;
+
+ return out;
+}
// ----------------------------------------------------------------------------
// wxMBConv
size_t dstWritten = 0;
// the number of NULs terminating this string
- size_t nulLen wxDUMMY_INITIALIZE(0);
+ size_t nulLen = 0; // not really needed, but just to avoid warnings
// if we were not given the input size we just have to assume that the
// string is properly terminated as we have no way of knowing how long it
// NULs at the end
wxCharBuffer bufTmp;
const char *srcEnd;
- if ( srcLen != (size_t)-1 )
+ if ( srcLen != wxNO_LEN )
{
// we need to know how to find the end of this string
nulLen = GetMBNulLen();
{
// try to convert the current chunk
size_t lenChunk = MB2WC(NULL, src, 0);
- if ( lenChunk == 0 )
- {
- // nothing left in the input string, conversion succeeded;
- // but still account for the trailing NULL
- dstWritten++;
- break;
- }
-
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
- lenChunk++; // for trailing NUL
+ lenChunk++; // for the L'\0' at the end of this chunk
dstWritten += lenChunk;
+ if ( lenChunk == 1 )
+ {
+ // nothing left in the input string, conversion succeeded
+ break;
+ }
+
if ( dst )
{
if ( dstWritten > dstLen )
if ( !srcEnd )
{
- // we convert the entire string in this case, as we suppose that the
- // string is NUL-terminated and so srcEnd is not used at all
+ // we convert just one chunk in this case as this is the entire
+ // string anyhow
break;
}
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
- if ( srcLen == (size_t)-1 )
+ if ( srcLen == wxNO_LEN )
{
srcLen = wxWcslen(src) + 1;
}
size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
{
size_t rc = ToWChar(outBuff, outLen, inBuff);
- if ( rc != (size_t)wxCONV_FAILED )
+ if ( rc != wxCONV_FAILED )
{
// ToWChar() returns the buffer length, i.e. including the trailing
// NUL, while this method doesn't take it into account
size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
{
size_t rc = FromWChar(outBuff, outLen, inBuff);
- if ( rc != (size_t)wxCONV_FAILED )
+ if ( rc != wxCONV_FAILED )
{
rc -= GetMBNulLen();
}
{
// calculate the length of the buffer needed first
const size_t nLen = MB2WC(NULL, psz, 0);
- if ( nLen != (size_t)wxCONV_FAILED )
+ if ( nLen != wxCONV_FAILED )
{
// now do the actual conversion
wxWCharBuffer buf(nLen /* +1 added implicitly */);
if ( pwz )
{
const size_t nLen = WC2MB(NULL, pwz, 0);
- if ( nLen != (size_t)wxCONV_FAILED )
+ if ( nLen != wxCONV_FAILED )
{
// extra space for trailing NUL(s)
static const size_t extraLen = GetMaxMBNulLen();
wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
{
const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
- if ( dstLen != (size_t)wxCONV_FAILED )
+ if ( dstLen != wxCONV_FAILED )
{
wxWCharBuffer wbuf(dstLen - 1);
- if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) )
+ if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
- *outLen = dstLen - 1;
+ {
+ *outLen = dstLen;
+ if ( wbuf[dstLen - 1] == L'\0' )
+ (*outLen)--;
+ }
+
return wbuf;
}
}
const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
{
- const size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
- if ( dstLen != (size_t)wxCONV_FAILED )
+ size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
+ if ( dstLen != wxCONV_FAILED )
{
- wxCharBuffer buf(dstLen - 1);
- if ( FromWChar(buf.data(), dstLen, inBuff, inLen) )
+ // special case of empty input: can't allocate 0 size buffer below as
+ // wxCharBuffer insists on NUL-terminating it
+ wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
+ if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
- *outLen = dstLen - 1;
+ {
+ *outLen = dstLen;
+
+ const size_t nulLen = GetMBNulLen();
+ if ( dstLen >= nulLen &&
+ !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
+ {
+ // in this case the output is NUL-terminated and we're not
+ // supposed to count NUL
+ *outLen -= nulLen;
+ }
+ }
return buf;
}
if ( !ok )
{
// in valid UTF7 we should have valid characters after '+'
- return (size_t)-1;
+ return wxCONV_FAILED;
}
if (*psz == '-')
else if (((wxUint32)cc) > 0xffff)
{
// no surrogate pair generation (yet?)
- return (size_t)-1;
+ return wxCONV_FAILED;
}
#endif
else
{
if (buf)
*buf++ = '+';
+
len++;
if (cc != '+')
{
len++;
}
}
+
cc = *psz;
if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
break;
}
+
if (l != 0)
{
if (buf)
*buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
+
len++;
}
}
psz++;
res = (res << 6) | (cc & 0x3f);
}
+
if (invalid || res <= utf8_max[ocnt])
{
// illegal UTF-8 encoding
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16(res, (wxUint16 *)buf);
- if (pa == (size_t)-1)
+ if (pa == wxCONV_FAILED)
{
invalid = true;
}
#endif // WC_UTF16/!WC_UTF16
}
}
+
if (invalid)
{
if (m_options & MAP_INVALID_UTF8_TO_PUA)
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
- wxASSERT(pa != (size_t)-1);
+ wxASSERT(pa != wxCONV_FAILED);
if (buf)
buf += pa;
opsz++;
}
else // MAP_INVALID_UTF8_NOT
{
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
}
#ifdef WC_UTF16
// cast is ok for WC_UTF16
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
- psz += (pa == (size_t)-1) ? 1 : pa;
+ psz += (pa == wxCONV_FAILED) ? 1 : pa;
#else
cc = (*psz++) & 0x7fffffff;
#endif
*buf++ = (char) cc;
len++;
}
-
else
{
len += cnt + 1;
return len;
}
-// ----------------------------------------------------------------------------
+// ============================================================================
// UTF-16
-// ----------------------------------------------------------------------------
+// ============================================================================
#ifdef WORDS_BIGENDIAN
#define wxMBConvUTF16straight wxMBConvUTF16BE
#define wxMBConvUTF16straight wxMBConvUTF16LE
#endif
+/* static */
+size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( srcLen = 1; *inBuff++; srcLen++ )
+ ;
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-16 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16 too
#ifdef WC_UTF16
-// copy 16bit MB to 16bit String
-size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ // set up the scene for using memcpy() (which is presumably more efficient
+ // than copying the bytes one by one)
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint16*)psz && (!buf || len < n))
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( dst )
{
- if (buf)
- *buf++ = *(wxUint16*)psz;
- len++;
+ if ( dstLen < inLen )
+ return wxCONV_FAILED;
- psz += sizeof(wxUint16);
+ memcpy(dst, src, srcLen);
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return inLen;
}
-
-// copy 16bit String to 16bit MB
-size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ srcLen *= BYTES_PER_CHAR;
- while (*psz && (!buf || len < n))
+ if ( dst )
{
- if (buf)
- {
- *(wxUint16*)buf = *psz;
- buf += sizeof(wxUint16);
- }
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
- len += sizeof(wxUint16);
- psz++;
+ memcpy(dst, src, srcLen);
}
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
-
- return len;
+ return srcLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 16bit MB to 16bit String
-size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ srcLen /= BYTES_PER_CHAR;
- // UTF16 string must be terminated by 2 NULs as single NULs may occur
- // inside the string
- while ( (psz[0] || psz[1]) && (!buf || len < n) )
+ if ( dst )
{
- if ( buf )
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( size_t n = 0; n < srcLen; n++, inBuff++ )
{
- ((char *)buf)[0] = psz[1];
- ((char *)buf)[1] = psz[0];
- buf++;
+ *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
}
- len++;
- psz += 2;
}
- if ( buf && len < n )
- *buf = L'\0';
-
- return len;
+ return srcLen;
}
-
-// swap 16bit MB to 16bit String
-size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ srcLen *= BYTES_PER_CHAR;
- while ( *psz && (!buf || len < n) )
+ if ( dst )
{
- if ( buf )
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
{
- *buf++ = ((char*)psz)[1];
- *buf++ = ((char*)psz)[0];
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
}
-
- len += 2;
- psz++;
- }
-
- if ( buf && len < n - 1 )
- {
- buf[0] =
- buf[1] = '\0';
}
- return len;
+ return srcLen;
}
+#else // !WC_UTF16: wchar_t is UTF-32
-#else // WC_UTF16
-
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
-// copy 16bit MB to 32bit String
-size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint16*)psz && (!buf || len < n))
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( !dst )
{
- wxUint32 cc;
- size_t pa = decode_utf16((wxUint16*)psz, cc);
- if (pa == (size_t)-1)
- return pa;
+ // optimization: return maximal space which could be needed for this
+ // string even if the real size could be smaller if the buffer contains
+ // any surrogates
+ return inLen;
+ }
- if (buf)
- *buf++ = (wchar_t)cc;
- len++;
- psz += pa * sizeof(wxUint16);
+ size_t outLen = 0;
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&inBuff);
+ if ( !inBuff )
+ return wxCONV_FAILED;
+
+ if ( ++outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = ch;
}
- if (buf && len < n)
- *buf = 0;
- return len;
+ return outLen;
}
-
-// copy 32bit String to 16bit MB
-size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len=0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ size_t outLen = 0;
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( size_t n = 0; n < srcLen; n++ )
{
wxUint16 cc[2];
- size_t pa = encode_utf16(*psz, cc);
-
- if (pa == (size_t)-1)
- return pa;
+ const size_t numChars = encode_utf16(*src++, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- if (buf)
+ outLen += numChars * BYTES_PER_CHAR;
+ if ( outBuff )
{
- *(wxUint16*)buf = cc[0];
- buf += sizeof(wxUint16);
- if (pa > 1)
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = cc[0];
+ if ( numChars == 2 )
{
- *(wxUint16*)buf = cc[1];
- buf += sizeof(wxUint16);
+ // second character of a surrogate
+ *outBuff++ = cc[1];
}
}
-
- len += pa*sizeof(wxUint16);
- psz++;
}
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
-
- return len;
+ return outLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 16bit MB to 32bit String
-size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len=0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint16*)psz && (!buf || len < n))
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( !dst )
{
- wxUint32 cc;
- char tmp[4];
+ // optimization: return maximal space which could be needed for this
+ // string even if the real size could be smaller if the buffer contains
+ // any surrogates
+ return inLen;
+ }
- tmp[0] = psz[1];
- tmp[1] = psz[0];
- tmp[2] = psz[3];
- tmp[3] = psz[2];
+ size_t outLen = 0;
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
+ {
+ wxUint32 ch;
+ wxUint16 tmp[2];
- size_t pa = decode_utf16((wxUint16*)tmp, cc);
- if (pa == (size_t)-1)
- return pa;
+ tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
+ inBuff++;
+ tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
- if (buf)
- *buf++ = (wchar_t)cc;
+ const size_t numChars = decode_utf16(tmp, ch);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- len++;
- psz += pa * sizeof(wxUint16);
+ if ( numChars == 2 )
+ inBuff++;
+
+ if ( ++outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = ch;
}
- if (buf && len < n)
- *buf = 0;
- return len;
+ return outLen;
}
-
-// swap 32bit String to 16bit MB
-size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ size_t outLen = 0;
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
{
wxUint16 cc[2];
- size_t pa = encode_utf16(*psz, cc);
-
- if (pa == (size_t)-1)
- return pa;
+ const size_t numChars = encode_utf16(*src, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- if (buf)
+ outLen += numChars * BYTES_PER_CHAR;
+ if ( outBuff )
{
- *buf++ = ((char*)cc)[1];
- *buf++ = ((char*)cc)[0];
- if (pa > 1)
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
+ if ( numChars == 2 )
{
- *buf++ = ((char*)cc)[3];
- *buf++ = ((char*)cc)[2];
+ // second character of a surrogate
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
}
}
-
- len += pa * sizeof(wxUint16);
- psz++;
}
- if (buf && len <= n - sizeof(wxUint16))
- *(wxUint16*)buf = 0;
-
- return len;
+ return outLen;
}
-#endif // WC_UTF16
+#endif // WC_UTF16/!WC_UTF16
-// ----------------------------------------------------------------------------
+// ============================================================================
// UTF-32
-// ----------------------------------------------------------------------------
+// ============================================================================
#ifdef WORDS_BIGENDIAN
-#define wxMBConvUTF32straight wxMBConvUTF32BE
-#define wxMBConvUTF32swap wxMBConvUTF32LE
+ #define wxMBConvUTF32straight wxMBConvUTF32BE
+ #define wxMBConvUTF32swap wxMBConvUTF32LE
#else
-#define wxMBConvUTF32swap wxMBConvUTF32BE
-#define wxMBConvUTF32straight wxMBConvUTF32LE
+ #define wxMBConvUTF32swap wxMBConvUTF32BE
+ #define wxMBConvUTF32straight wxMBConvUTF32LE
#endif
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
+/* static */
+size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ for ( srcLen = 1; *inBuff++; srcLen++ )
+ ;
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-32 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16
#ifdef WC_UTF16
-// copy 32bit MB to 16bit String
-size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint32*)psz && (!buf || len < n))
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++ )
{
wxUint16 cc[2];
+ const size_t numChars = encode_utf16(*inBuff++, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- size_t pa = encode_utf16(*(wxUint32*)psz, cc);
- if (pa == (size_t)-1)
- return pa;
-
- if (buf)
+ outLen += numChars;
+ if ( dst )
{
- *buf++ = cc[0];
- if (pa > 1)
- *buf++ = cc[1];
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- len += pa;
- psz += sizeof(wxUint32);
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return outLen;
}
-
-// copy 16bit String to 32bit MB
-size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ if ( !dst )
{
- wxUint32 cc;
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen * BYTES_PER_CHAR;
+ }
- // cast is ok for WC_UTF16
- size_t pa = decode_utf16((const wxUint16 *)psz, cc);
- if (pa == (size_t)-1)
- return pa;
+ wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
- if (buf)
- {
- *(wxUint32*)buf = cc;
- buf += sizeof(wxUint32);
- }
+ outLen += BYTES_PER_CHAR;
- len += sizeof(wxUint32);
- psz += pa;
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ *outBuff++ = ch;
+ }
- return len;
+ return outLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 32bit MB to 16bit String
-size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint32*)psz && (!buf || len < n))
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++, inBuff++ )
{
- char tmp[4];
- tmp[0] = psz[3];
- tmp[1] = psz[2];
- tmp[2] = psz[1];
- tmp[3] = psz[0];
-
wxUint16 cc[2];
+ const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
- size_t pa = encode_utf16(*(wxUint32*)tmp, cc);
- if (pa == (size_t)-1)
- return pa;
-
- if (buf)
+ outLen += numChars;
+ if ( dst )
{
- *buf++ = cc[0];
- if (pa > 1)
- *buf++ = cc[1];
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- len += pa;
- psz += sizeof(wxUint32);
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return outLen;
}
-
-// swap 16bit String to 32bit MB
-size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ if ( !dst )
{
- char cc[4];
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen*BYTES_PER_CHAR;
+ }
- // cast is ok for WC_UTF16
- size_t pa = decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
- if (pa == (size_t)-1)
- return pa;
+ wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
- if (buf)
- {
- *buf++ = cc[3];
- *buf++ = cc[2];
- *buf++ = cc[1];
- *buf++ = cc[0];
- }
+ outLen += BYTES_PER_CHAR;
- len += sizeof(wxUint32);
- psz += pa;
- }
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
+ *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
+ }
- return len;
+ return outLen;
}
-#else // WC_UTF16
+#else // !WC_UTF16: wchar_t is UTF-32
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
-// copy 32bit MB to 32bit String
-size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len=0;
+ // use memcpy() as it should be much faster than hand-written loop
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
- while (*(wxUint32*)psz && (!buf || len < n))
+ const size_t inLen = srcLen/BYTES_PER_CHAR;
+ if ( dst )
{
- if (buf)
- *buf++ = (wchar_t)(*(wxUint32*)psz);
- len++;
- psz += sizeof(wxUint32);
- }
+ if ( dstLen < inLen )
+ return wxCONV_FAILED;
- if (buf && len < n)
- *buf = 0;
+ memcpy(dst, src, srcLen);
+ }
- return len;
+ return inLen;
}
-
-// copy 32bit String to 32bit MB
-size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ srcLen *= BYTES_PER_CHAR;
- while (*psz && (!buf || len < n))
+ if ( dst )
{
- if (buf)
- {
- *(wxUint32*)buf = *psz;
- buf += sizeof(wxUint32);
- }
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
- len += sizeof(wxUint32);
- psz++;
+ memcpy(dst, src, srcLen);
}
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
-
- return len;
+ return srcLen;
}
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
-// swap 32bit MB to 32bit String
-size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- size_t len = 0;
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ srcLen /= BYTES_PER_CHAR;
- while (*(wxUint32*)psz && (!buf || len < n))
+ if ( dst )
{
- if (buf)
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ for ( size_t n = 0; n < srcLen; n++, inBuff++ )
{
- ((char *)buf)[0] = psz[3];
- ((char *)buf)[1] = psz[2];
- ((char *)buf)[2] = psz[1];
- ((char *)buf)[3] = psz[0];
- buf++;
+ *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
}
-
- len++;
- psz += sizeof(wxUint32);
}
- if (buf && len < n)
- *buf = 0;
-
- return len;
+ return srcLen;
}
-
-// swap 32bit String to 32bit MB
-size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t
+wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- size_t len = 0;
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
- while (*psz && (!buf || len < n))
+ srcLen *= BYTES_PER_CHAR;
+
+ if ( dst )
{
- if (buf)
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
+ for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
{
- *buf++ = ((char *)psz)[3];
- *buf++ = ((char *)psz)[2];
- *buf++ = ((char *)psz)[1];
- *buf++ = ((char *)psz)[0];
+ *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
}
-
- len += sizeof(wxUint32);
- psz++;
}
- if (buf && len <= n - sizeof(wxUint32))
- *(wxUint32*)buf = 0;
-
- return len;
+ return srcLen;
}
-
-#endif // WC_UTF16
+#endif // WC_UTF16/!WC_UTF16
// ============================================================================
{ return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
protected:
- // the iconv handlers used to translate from multibyte to wide char and in
- // the other direction
+ // the iconv handlers used to translate from multibyte
+ // to wide char and in the other direction
iconv_t m2w,
w2m;
#if wxUSE_FONTMAP
const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
#else // !wxUSE_FONTMAP
- static const wxChar *names[] =
+ static const wxChar *names_static[] =
{
#if SIZEOF_WCHAR_T == 4
_T("UCS-4"),
#endif
NULL
};
+ const wxChar **names = names_static;
#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
for ( ; *names && ms_wcCharsetName.empty(); ++names )
// first try charset with explicit bytesex info (e.g. "UCS-4LE"):
wxString nameXE(nameCS);
- #ifdef WORDS_BIGENDIAN
+
+#ifdef WORDS_BIGENDIAN
nameXE += _T("BE");
- #else // little endian
+#else // little endian
nameXE += _T("LE");
- #endif
+#endif
wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
nameXE.c_str());
wbufPtr = wbuf;
bufPtr = buf;
- res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
- (char**)&wbufPtr, &outsz);
+ res = iconv(
+ m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
+ (char**)&wbufPtr, &outsz);
if (ICONV_FAILED(res, insz))
{
switch ( nulLen )
{
default:
- return (size_t)-1;
+ return wxCONV_FAILED;
case 1:
inbuf = strlen(psz); // arguably more optimized than our version
}
#if wxUSE_THREADS
- // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
- // Unfortunately there is a couple of global wxCSConv objects such as
+ // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
+ // Unfortunately there are a couple of global wxCSConv objects such as
// wxConvLocal that are used all over wx code, so we have to make sure
// the handle is used by at most one thread at the time. Otherwise
// only a few wx classes would be safe to use from non-main threads
{
//VS: it is ok if iconv fails, hence trace only
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return res;
}
else
{
- // no destination buffer... convert using temp buffer
+ // no destination buffer: convert using temp buffer
// to calculate destination buffer requirement
char tbuf[16];
res = 0;
if (ICONV_FAILED(cres, inbuf))
{
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return res;
}
wxMBConv_win32(const wxMBConv_win32& conv)
+ : wxMBConv()
{
m_CodePage = conv.m_CodePage;
m_minMBCharWidth = conv.m_minMBCharWidth;
if ( !len )
{
// function totally failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
// if we were really converting and didn't use MB_ERR_INVALID_CHARS,
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
if ( !len )
{
// function totally failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
// if we were really converting, check if we succeeded
// check if the conversion failed, i.e. if any replacements
// were done
if ( usedDef )
- return (size_t)-1;
+ return wxCONV_FAILED;
}
else // we must resort to double tripping...
{
wxWCharBuffer wcBuf(n);
- if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
+ if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
wcscmp(wcBuf, pwz) != 0 )
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
}
int verMaj, verMin;
switch ( wxGetOsVersion(&verMaj, &verMin) )
{
- case wxWIN95:
+ case wxOS_WINDOWS_9X:
s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
break;
- case wxWINDOWS_NT:
+ case wxOS_WINDOWS_NT:
s_isWin98Or2k = verMaj >= 5;
break;
default:
- // unknown, be conservative by default
+ // unknown: be conservative by default
s_isWin98Or2k = 0;
break;
}
// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
// Strangely enough, internally Core Foundation uses
-// UTF 32 internally quite a bit - its just not public (yet).
+// UTF-32 internally quite a bit - its just not public (yet).
#include <CoreFoundation/CFString.h>
#include <CoreFoundation/CFStringEncodingExt.h>
Init( wxCFStringEncFromFontEnc(encoding) );
}
- ~wxMBConv_cocoa()
+ virtual ~wxMBConv_cocoa()
{
}
CFRelease(theString);
- szUniCharBuffer[nOutLength] = '\0' ;
+ szUniCharBuffer[nOutLength] = '\0';
#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
- delete[] szUniCharBuffer;
+ wxMBConvUTF16 converter;
+ converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
+ delete [] szUniCharBuffer;
#endif
return nOutLength;
#if SIZEOF_WCHAR_T == 4
wxMBConvUTF16 converter ;
- nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
- szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
- converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
+ nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
+ szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
+ converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
nBufSize /= sizeof(UniChar);
#endif
#if wxUSE_FONTMAP
wxMBConv_mac(const wxChar* name)
{
- Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
+ Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
}
#endif
Init( wxMacGetSystemEncFromFontEnc(encoding) );
}
- ~wxMBConv_mac()
+ virtual ~wxMBConv_mac()
{
OSStatus status = noErr ;
- status = TECDisposeConverter(m_MB2WC_converter);
- status = TECDisposeConverter(m_WC2MB_converter);
+ if (m_MB2WC_converter)
+ status = TECDisposeConverter(m_MB2WC_converter);
+ if (m_WC2MB_converter)
+ status = TECDisposeConverter(m_WC2MB_converter);
}
-
- void Init( TextEncodingBase encoding)
+ void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
+ TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
{
- OSStatus status = noErr ;
- m_char_encoding = encoding ;
+ m_MB2WC_converter = NULL ;
+ m_WC2MB_converter = NULL ;
+ m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
+ }
- status = TECCreateConverter(&m_MB2WC_converter,
+ virtual void CreateIfNeeded() const
+ {
+ if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
+ {
+ OSStatus status = noErr ;
+ status = TECCreateConverter(&m_MB2WC_converter,
m_char_encoding,
m_unicode_encoding);
- status = TECCreateConverter(&m_WC2MB_converter,
+ wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
+ status = TECCreateConverter(&m_WC2MB_converter,
m_unicode_encoding,
m_char_encoding);
+ wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
+ }
}
size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
+ CreateIfNeeded() ;
OSStatus status = noErr ;
ByteCount byteOutLen ;
- ByteCount byteInLen = strlen(psz) ;
+ ByteCount byteInLen = strlen(psz) + 1;
wchar_t *tbuf = NULL ;
UniChar* ubuf = NULL ;
size_t res = 0 ;
if (buf == NULL)
{
- //apple specs say at least 32
- n = wxMax( 32 , byteInLen ) ;
- tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
+ // Apple specs say at least 32
+ n = wxMax( 32, byteInLen ) ;
+ tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
}
ByteCount byteBufferLen = n * sizeof( UniChar ) ;
#else
ubuf = (UniChar*) (buf ? buf : tbuf) ;
#endif
- status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
- (TextPtr) ubuf , byteBufferLen, &byteOutLen);
+
+ status = TECConvertText(
+ m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
+ (TextPtr) ubuf, byteBufferLen, &byteOutLen);
+
#if SIZEOF_WCHAR_T == 4
// we have to terminate here, because n might be larger for the trailing zero, and if UniChar
// is not properly terminated we get random characters at the end
ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
wxMBConvUTF16 converter ;
- res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
+ res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
free( ubuf ) ;
#else
res = byteOutLen / sizeof( UniChar ) ;
size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
+ CreateIfNeeded() ;
OSStatus status = noErr ;
ByteCount byteOutLen ;
ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
if (buf == NULL)
{
- //apple specs say at least 32
- n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
+ // Apple specs say at least 32
+ n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
tbuf = (char*) malloc( n ) ;
}
#if SIZEOF_WCHAR_T == 4
wxMBConvUTF16 converter ;
- size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
+ size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
byteInLen = unicharlen ;
ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
- converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
+ converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
#else
ubuf = (UniChar*) psz ;
#endif
//of bogus characters
wxWCharBuffer wcBuf(n);
size_t pszlen = wxWcslen(psz);
- if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
+ if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
wxWcslen(wcBuf) != pszlen ||
memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
bool IsOk() const
- { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; }
+ {
+ CreateIfNeeded() ;
+ return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
+ }
-private:
- TECObjectRef m_MB2WC_converter;
- TECObjectRef m_WC2MB_converter;
+protected :
+ mutable TECObjectRef m_MB2WC_converter;
+ mutable TECObjectRef m_WC2MB_converter;
TextEncodingBase m_char_encoding;
TextEncodingBase m_unicode_encoding;
};
+// MB is decomposed (D) normalized UTF8
+
+class wxMBConv_macUTF8D : public wxMBConv_mac
+{
+public :
+ wxMBConv_macUTF8D()
+ {
+ Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
+ m_uni = NULL;
+ m_uniBack = NULL ;
+ }
+
+ virtual ~wxMBConv_macUTF8D()
+ {
+ if (m_uni!=NULL)
+ DisposeUnicodeToTextInfo(&m_uni);
+ if (m_uniBack!=NULL)
+ DisposeUnicodeToTextInfo(&m_uniBack);
+ }
+
+ size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
+ {
+ CreateIfNeeded() ;
+ OSStatus status = noErr ;
+ ByteCount byteOutLen ;
+ ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
+
+ char *tbuf = NULL ;
+
+ if (buf == NULL)
+ {
+ // Apple specs say at least 32
+ n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
+ tbuf = (char*) malloc( n ) ;
+ }
+
+ ByteCount byteBufferLen = n ;
+ UniChar* ubuf = NULL ;
+
+#if SIZEOF_WCHAR_T == 4
+ wxMBConvUTF16 converter ;
+ size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
+ byteInLen = unicharlen ;
+ ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
+ converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
+#else
+ ubuf = (UniChar*) psz ;
+#endif
+
+ // ubuf is a non-decomposed UniChar buffer
+
+ ByteCount dcubuflen = byteInLen * 2 + 2 ;
+ ByteCount dcubufread , dcubufwritten ;
+ UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
+
+ ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
+ kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
+
+ // we now convert that decomposed buffer into UTF8
+
+ status = TECConvertText(
+ m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
+ (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
+
+ free( dcubuf );
+
+#if SIZEOF_WCHAR_T == 4
+ free( ubuf ) ;
+#endif
+
+ if ( buf == NULL )
+ free(tbuf) ;
+
+ size_t res = byteOutLen ;
+ if ( buf && res < n)
+ {
+ buf[res] = 0;
+ // don't test for round-trip fidelity yet, we cannot guarantee it yet
+ }
+
+ return res ;
+ }
+
+ size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
+ {
+ CreateIfNeeded() ;
+ OSStatus status = noErr ;
+ ByteCount byteOutLen ;
+ ByteCount byteInLen = strlen(psz) + 1;
+ wchar_t *tbuf = NULL ;
+ UniChar* ubuf = NULL ;
+ size_t res = 0 ;
+
+ if (buf == NULL)
+ {
+ // Apple specs say at least 32
+ n = wxMax( 32, byteInLen ) ;
+ tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
+ }
+
+ ByteCount byteBufferLen = n * sizeof( UniChar ) ;
+
+#if SIZEOF_WCHAR_T == 4
+ ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
+#else
+ ubuf = (UniChar*) (buf ? buf : tbuf) ;
+#endif
+
+ ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
+ ByteCount dcubufread , dcubufwritten ;
+ UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
+
+ status = TECConvertText(
+ m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
+ (TextPtr) dcubuf, dcubuflen, &byteOutLen);
+ // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
+ // is not properly terminated we get random characters at the end
+ dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
+
+ // now from the decomposed UniChar to properly composed uniChar
+ ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
+ kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
+
+ free( dcubuf );
+ byteOutLen = dcubufwritten ;
+ ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
+
+
+#if SIZEOF_WCHAR_T == 4
+ wxMBConvUTF16 converter ;
+ res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
+ free( ubuf ) ;
+#else
+ res = byteOutLen / sizeof( UniChar ) ;
+#endif
+
+ if ( buf == NULL )
+ free(tbuf) ;
+
+ if ( buf && res < n)
+ buf[res] = 0;
+
+ return res ;
+ }
+
+ virtual void CreateIfNeeded() const
+ {
+ wxMBConv_mac::CreateIfNeeded() ;
+ if ( m_uni == NULL )
+ {
+ m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+ kUnicodeNoSubset, kTextEncodingDefaultFormat);
+ m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+ kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
+ m_map.mappingVersion = kUnicodeUseLatestMapping;
+
+ OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
+ wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
+
+ m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+ kUnicodeNoSubset, kTextEncodingDefaultFormat);
+ m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
+ kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
+ m_map.mappingVersion = kUnicodeUseLatestMapping;
+ err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
+ wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
+ }
+ }
+protected :
+ mutable UnicodeToTextInfo m_uni;
+ mutable UnicodeToTextInfo m_uniBack;
+ mutable UnicodeMapping m_map;
+};
#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
// ============================================================================
if (buf)
{
if (!m2w.Convert(psz, buf))
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return inbuf;
}
if (buf)
{
if (!w2m.Convert(psz, buf))
- return (size_t)-1;
+ return wxCONV_FAILED;
}
return inbuf;
}
#if wxUSE_FONTMAP
-#include "wx/hashmap.h"
WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
wxEncodingNameCache );
#endif // !wxUSE_FONTMAP
{
wxString name(m_name);
+#if wxUSE_FONTMAP
wxFontEncoding encoding(m_encoding);
+#endif
if ( !name.empty() )
{
}
const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
-
- for ( ; *names; ++names )
+ // CS : in case this does not return valid names (eg for MacRoman) encoding
+ // got a 'failure' entry in the cache all the same, although it just has to
+ // be created using a different method, so only store failed iconv creation
+ // attempts (or perhaps we shoulnd't do this at all ?)
+ if ( names[0] != NULL )
{
- wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
- if ( conv->IsOk() )
+ for ( ; *names; ++names )
{
- gs_nameCache[encoding] = *names;
- return conv;
+ wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
+ if ( conv->IsOk() )
+ {
+ gs_nameCache[encoding] = *names;
+ return conv;
+ }
+
+ delete conv;
}
- delete conv;
+ gs_nameCache[encoding] = _T(""); // cache the failure
}
-
- gs_nameCache[encoding] = _T(""); // cache the failure
}
#endif // wxUSE_FONTMAP
}
// NB: This is a hack to prevent deadlock. What could otherwise happen
// in Unicode build: wxConvLocal creation ends up being here
// because of some failure and logs the error. But wxLog will try to
- // attach timestamp, for which it will need wxConvLocal (to convert
- // time to char* and then wchar_t*), but that fails, tries to log
- // error, but wxLog has a (already locked) critical section that
- // guards static buffer.
+ // attach a timestamp, for which it will need wxConvLocal (to convert
+ // time to char* and then wchar_t*), but that fails, tries to log the
+ // error, but wxLog has an (already locked) critical section that
+ // guards the static buffer.
static bool alreadyLoggingError = false;
if (!alreadyLoggingError)
{
#if wxUSE_FONTMAP
wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
#else // !wxUSE_FONTMAP
- wxString::Format(_("encoding %s"), m_encoding).c_str()
+ wxString::Format(_("encoding %i"), m_encoding).c_str()
#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
);
{
wxCSConv *self = (wxCSConv *)this; // const_cast
-#if wxUSE_INTL
// if we don't have neither the name nor the encoding, use the default
// encoding for this system
if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
{
+#if wxUSE_INTL
self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
- }
+#else
+ // fallback to some reasonable default:
+ self->m_encoding = wxFONTENCODING_ISO8859_1;
#endif // wxUSE_INTL
+ }
self->m_convReal = DoCreate();
self->m_deferred = false;
for (size_t c = 0; c <= len; c++)
{
if (psz[c] > 0xFF)
- return (size_t)-1;
+ return wxCONV_FAILED;
buf[c] = (char)psz[c];
}
for (size_t c = 0; c <= len; c++)
{
if (psz[c] > 0xFF)
- return (size_t)-1;
+ return wxCONV_FAILED;
}
}
static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
static wxMBConvUTF7 wxConvUTF7Obj;
static wxMBConvUTF8 wxConvUTF8Obj;
-
+#if defined(__WXMAC__) && defined(TARGET_CARBON)
+static wxMBConv_macUTF8D wxConvMacUTF8DObj;
+#endif
WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
+WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
#ifdef __WXOSX__
+#if defined(__WXMAC__) && defined(TARGET_CARBON)
+ wxConvMacUTF8DObj;
+#else
wxConvUTF8Obj;
+#endif
#else
wxConvLibcObj;
#endif
-
#else // !wxUSE_WCHAR_T
// stand-ins in absence of wchar_t