#include "wx/mac/corefoundation/private/strconv_cf.h"
#endif //def __DARWIN__
-#ifdef __WXMAC__
-#ifndef __DARWIN__
-#include <ATSUnicode.h>
-#include <TextCommon.h>
-#include <TextEncodingConverter.h>
-#endif
-
-// includes Mac headers
-#include "wx/mac/private.h"
-#endif
-
#define TRACE_STRCONV _T("strconv")
if ( psz )
{
// calculate the length of the buffer needed first
- const size_t nLen = MB2WC(NULL, psz, 0);
+ const size_t nLen = ToWChar(NULL, 0, psz);
if ( nLen != wxCONV_FAILED )
{
// now do the actual conversion
- wxWCharBuffer buf(nLen /* +1 added implicitly */);
+ wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
// +1 for the trailing NULL
- if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+ if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
return buf;
}
}
{
if ( pwz )
{
- const size_t nLen = WC2MB(NULL, pwz, 0);
+ const size_t nLen = FromWChar(NULL, 0, pwz);
if ( nLen != wxCONV_FAILED )
{
- // extra space for trailing NUL(s)
- static const size_t extraLen = GetMaxMBNulLen();
-
- wxCharBuffer buf(nLen + extraLen - 1);
- if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+ wxCharBuffer buf(nLen - 1);
+ if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
return buf;
}
}
const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
if ( dstLen != wxCONV_FAILED )
{
- wxWCharBuffer wbuf(dstLen - 1);
+ // notice that we allocate space for dstLen+1 wide characters here
+ // because we want the buffer to always be NUL-terminated, even if the
+ // input isn't (as otherwise the caller has no way to know its length)
+ wxWCharBuffer wbuf(dstLen);
+ wbuf.data()[dstLen - 1] = L'\0';
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
if ( dstLen != wxCONV_FAILED )
{
- // special case of empty input: can't allocate 0 size buffer below as
- // wxCharBuffer insists on NUL-terminating it
- wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
+ const size_t nulLen = GetMBNulLen();
+
+ // as above, ensure that the buffer is always NUL-terminated, even if
+ // the input is not
+ wxCharBuffer buf(dstLen + nulLen - 1);
+ memset(buf.data() + dstLen, 0, nulLen);
if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
{
*outLen = dstLen;
- const size_t nulLen = GetMBNulLen();
if ( dstLen >= nulLen &&
!NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
{
// UTF-8
// ----------------------------------------------------------------------------
-static wxUint32 utf8_max[]=
+static const wxUint32 utf8_max[]=
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
// boundaries of the private use area we use to (temporarily) remap invalid
const wxUint32 wxUnicodePUA = 0x100000;
const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
-size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+// this table gives the length of the UTF-8 encoding from its first character:
+const unsigned char tableUtf8Lengths[256] = {
+ // single-byte sequences (ASCII):
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
+
+ // these are invalid:
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80..8F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90..9F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A0..AF
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B0..BF
+ 0, 0, // C0,C1
+
+ // two-byte sequences:
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
+
+ // three-byte sequences:
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
+
+ // four-byte sequences:
+ 4, 4, 4, 4, 4, // F0..F4
+
+ // these are invalid again (5- or 6-byte
+ // sequences and sequences for code points
+ // above U+10FFFF, as restricted by RFC 3629):
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F5..FF
+};
+
+size_t
+wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
+ wchar_t *out = dstLen ? dst : NULL;
+ size_t written = 0;
+
+ if ( srcLen == wxNO_LEN )
+ srcLen = strlen(src) + 1;
+
+ for ( const char *p = src; ; p++ )
+ {
+ if ( !(srcLen == wxNO_LEN ? *p : srcLen) )
+ {
+ // all done successfully, just add the trailing NULL if we are not
+ // using explicit length
+ if ( srcLen == wxNO_LEN )
+ {
+ if ( out )
+ {
+ if ( !dstLen )
+ break;
+
+ *out = L'\0';
+ }
+
+ written++;
+ }
+
+ return written;
+ }
+
+ if ( out && !dstLen-- )
+ break;
+
+ wxUint32 code;
+ unsigned char c = *p;
+
+ if ( c < 0x80 )
+ {
+ if ( srcLen == 0 ) // the test works for wxNO_LEN too
+ break;
+
+ if ( srcLen != wxNO_LEN )
+ srcLen--;
+
+ code = c;
+ }
+ else
+ {
+ unsigned len = tableUtf8Lengths[c];
+ if ( !len )
+ break;
+
+ if ( srcLen < len ) // the test works for wxNO_LEN too
+ break;
+
+ if ( srcLen != wxNO_LEN )
+ srcLen -= len;
+
+ // Char. number range | UTF-8 octet sequence
+ // (hexadecimal) | (binary)
+ // ----------------------+----------------------------------------
+ // 0000 0000 - 0000 007F | 0xxxxxxx
+ // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+ // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ //
+ // Code point value is stored in bits marked with 'x',
+ // lowest-order bit of the value on the right side in the diagram
+ // above. (from RFC 3629)
+
+ // mask to extract lead byte's value ('x' bits above), by sequence
+ // length:
+ static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
+
+ // mask and value of lead byte's most significant bits, by length:
+ static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
+ static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
+
+ len--; // it's more convenient to work with 0-based length here
+
+ // extract the lead byte's value bits:
+ if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
+ break;
+
+ code = c & leadValueMask[len];
+
+ // all remaining bytes, if any, are handled in the same way
+ // regardless of sequence's length:
+ for ( ; len; --len )
+ {
+ c = *++p;
+ if ( (c & 0xC0) != 0x80 )
+ return wxCONV_FAILED;
+
+ code <<= 6;
+ code |= c & 0x3F;
+ }
+ }
+
+#ifdef WC_UTF16
+ // cast is ok because wchar_t == wxUint16 if WC_UTF16
+ if ( encode_utf16(code, (wxUint16 *)out) == 2 )
+ {
+ if ( out )
+ out++;
+ written++;
+ }
+#else // !WC_UTF16
+ if ( out )
+ *out = code;
+#endif // WC_UTF16/!WC_UTF16
+
+ if ( out )
+ out++;
+
+ written++;
+ }
+
+ return wxCONV_FAILED;
+}
+
+size_t
+wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ char *out = dstLen ? dst : NULL;
+ size_t written = 0;
+
+ for ( const wchar_t *wp = src; ; wp++ )
+ {
+ if ( !(srcLen == wxNO_LEN ? *wp : srcLen--) )
+ {
+ // all done successfully, just add the trailing NULL if we are not
+ // using explicit length
+ if ( srcLen == wxNO_LEN )
+ {
+ if ( out )
+ {
+ if ( !dstLen )
+ break;
+
+ *out = '\0';
+ }
+
+ written++;
+ }
+
+ return written;
+ }
+
+
+ wxUint32 code;
+#ifdef WC_UTF16
+ // cast is ok for WC_UTF16
+ if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
+ {
+ // skip the next char too as we decoded a surrogate
+ wp++;
+ }
+#else // wchar_t is UTF-32
+ code = *wp & 0x7fffffff;
+#endif
+
+ unsigned len;
+ if ( code <= 0x7F )
+ {
+ len = 1;
+ if ( out )
+ {
+ if ( dstLen < len )
+ break;
+
+ out[0] = (char)code;
+ }
+ }
+ else if ( code <= 0x07FF )
+ {
+ len = 2;
+ if ( out )
+ {
+ if ( dstLen < len )
+ break;
+
+ // NB: this line takes 6 least significant bits, encodes them as
+ // 10xxxxxx and discards them so that the next byte can be encoded:
+ out[1] = 0x80 | (code & 0x3F); code >>= 6;
+ out[0] = 0xC0 | code;
+ }
+ }
+ else if ( code < 0xFFFF )
+ {
+ len = 3;
+ if ( out )
+ {
+ if ( dstLen < len )
+ break;
+
+ out[2] = 0x80 | (code & 0x3F); code >>= 6;
+ out[1] = 0x80 | (code & 0x3F); code >>= 6;
+ out[0] = 0xE0 | code;
+ }
+ }
+ else if ( code <= 0x10FFFF )
+ {
+ len = 4;
+ if ( out )
+ {
+ if ( dstLen < len )
+ break;
+
+ out[3] = 0x80 | (code & 0x3F); code >>= 6;
+ out[2] = 0x80 | (code & 0x3F); code >>= 6;
+ out[1] = 0x80 | (code & 0x3F); code >>= 6;
+ out[0] = 0xF0 | code;
+ }
+ }
+ else
+ {
+ wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
+ break;
+ }
+
+ if ( out )
+ {
+ out += len;
+ dstLen -= len;
+ }
+
+ written += len;
+ }
+
+ // we only get here if an error occurs during decoding
+ return wxCONV_FAILED;
+}
+
+size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
+ const char *psz, size_t srcLen) const
+{
+ if ( m_options == MAP_INVALID_UTF8_NOT )
+ return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
+
size_t len = 0;
- while (*psz && ((!buf) || (len < n)))
+ while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
{
const char *opsz = psz;
bool invalid = false;
else
{
#ifdef WC_UTF16
- // cast is ok because wchar_t == wxUuint16 if WC_UTF16
+ // cast is ok because wchar_t == wxUint16 if WC_UTF16
size_t pa = encode_utf16(res, (wxUint16 *)buf);
if (pa == wxCONV_FAILED)
{
}
}
- if (buf && (len < n))
+ if (srcLen == wxNO_LEN && buf && (len < n))
*buf = 0;
- return len;
+ return len + 1;
}
static inline bool isoctal(wchar_t wch)
return L'0' <= wch && wch <= L'7';
}
-size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
+ const wchar_t *psz, size_t srcLen) const
{
+ if ( m_options == MAP_INVALID_UTF8_NOT )
+ return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
+
size_t len = 0;
- while (*psz && ((!buf) || (len < n)))
+ while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
{
wxUint32 cc;
}
}
- if (buf && (len < n))
+ if (srcLen == wxNO_LEN && buf && (len < n))
*buf = 0;
- return len;
+ return len + 1;
}
// ============================================================================
return wxCONV_FAILED;
}
- // if we were really converting, check if we succeeded
- if ( buf )
+ // we did something, check if we really succeeded
+ if ( flags )
+ {
+ // check if the conversion failed, i.e. if any replacements
+ // were done
+ if ( usedDef )
+ return wxCONV_FAILED;
+ }
+ else // we must resort to double tripping...
{
- if ( flags )
+ // first we need to ensure that we really have the MB data: this is
+ // not the case if we're called with NULL buffer, in which case we
+ // need to do the conversion yet again
+ wxCharBuffer bufDef;
+ if ( !buf )
{
- // check if the conversion failed, i.e. if any replacements
- // were done
- if ( usedDef )
+ bufDef = wxCharBuffer(len);
+ buf = bufDef.data();
+ if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
+ buf, len, NULL, NULL) )
return wxCONV_FAILED;
}
- else // we must resort to double tripping...
+
+ if ( !n )
+ n = wcslen(pwz);
+ wxWCharBuffer wcBuf(n);
+ if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
+ wcscmp(wcBuf, pwz) != 0 )
{
- wxWCharBuffer wcBuf(n);
- if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
- wcscmp(wcBuf, pwz) != 0 )
- {
- // we didn't obtain the same thing we started from, hence
- // the conversion was lossy and we consider that it failed
- return wxCONV_FAILED;
- }
+ // we didn't obtain the same thing we started from, hence
+ // the conversion was lossy and we consider that it failed
+ return wxCONV_FAILED;
}
}
#endif // wxHAVE_WIN32_MB2WC
-// ============================================================================
-// Mac conversion classes
-// ============================================================================
-
-/* Although we are in the base library we currently have this wxMac
- * conditional. This is not generally good but fortunately does not affect
- * the ABI of the base library, only what encodings might work.
- * It does mean that a wxBase built as part of wxMac has slightly more support
- * than one built for wxCocoa or even wxGtk.
- */
-#if defined(__WXMAC__) && defined(TARGET_CARBON)
-
-class wxMBConv_mac : public wxMBConv
-{
-public:
- wxMBConv_mac()
- {
- Init(CFStringGetSystemEncoding()) ;
- }
-
- wxMBConv_mac(const wxMBConv_mac& conv)
- {
- Init(conv.m_char_encoding);
- }
-
-#if wxUSE_FONTMAP
- wxMBConv_mac(const char* name)
- {
- Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
- }
-#endif
-
- wxMBConv_mac(wxFontEncoding encoding)
- {
- Init( wxMacGetSystemEncFromFontEnc(encoding) );
- }
-
- virtual ~wxMBConv_mac()
- {
- OSStatus status = noErr ;
- if (m_MB2WC_converter)
- status = TECDisposeConverter(m_MB2WC_converter);
- if (m_WC2MB_converter)
- status = TECDisposeConverter(m_WC2MB_converter);
- }
-
- void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
- TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
- {
- m_MB2WC_converter = NULL ;
- m_WC2MB_converter = NULL ;
- m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
- m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
- }
-
- virtual void CreateIfNeeded() const
- {
- if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
- {
- OSStatus status = noErr ;
- status = TECCreateConverter(&m_MB2WC_converter,
- m_char_encoding,
- m_unicode_encoding);
- wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
- status = TECCreateConverter(&m_WC2MB_converter,
- m_unicode_encoding,
- m_char_encoding);
- wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
- }
- }
-
- size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
- {
- CreateIfNeeded() ;
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = strlen(psz) + 1;
- wchar_t *tbuf = NULL ;
- UniChar* ubuf = NULL ;
- size_t res = 0 ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, byteInLen ) ;
- tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
- }
-
- ByteCount byteBufferLen = n * sizeof( UniChar ) ;
-
-#if SIZEOF_WCHAR_T == 4
- ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
-#else
- ubuf = (UniChar*) (buf ? buf : tbuf) ;
-#endif
-
- status = TECConvertText(
- m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
- (TextPtr) ubuf, byteBufferLen, &byteOutLen);
-
-#if SIZEOF_WCHAR_T == 4
- // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
- // is not properly terminated we get random characters at the end
- ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
- wxMBConvUTF16 converter ;
- res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
- free( ubuf ) ;
-#else
- res = byteOutLen / sizeof( UniChar ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- if ( buf && res < n)
- buf[res] = 0;
-
- return res ;
- }
-
- size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
- {
- CreateIfNeeded() ;
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
-
- char *tbuf = NULL ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
- tbuf = (char*) malloc( n ) ;
- }
-
- ByteCount byteBufferLen = n ;
- UniChar* ubuf = NULL ;
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
- byteInLen = unicharlen ;
- ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
- converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
-#else
- ubuf = (UniChar*) psz ;
-#endif
-
- status = TECConvertText(
- m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
- (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
-
-#if SIZEOF_WCHAR_T == 4
- free( ubuf ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- size_t res = byteOutLen ;
- if ( buf && res < n)
- {
- buf[res] = 0;
-
- //we need to double-trip to verify it didn't insert any ? in place
- //of bogus characters
- wxWCharBuffer wcBuf(n);
- size_t pszlen = wxWcslen(psz);
- if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
- wxWcslen(wcBuf) != pszlen ||
- memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
- {
- // we didn't obtain the same thing we started from, hence
- // the conversion was lossy and we consider that it failed
- return wxCONV_FAILED;
- }
- }
-
- return res ;
- }
-
- virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
-
- bool IsOk() const
- {
- CreateIfNeeded() ;
- return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
- }
-
-protected :
- mutable TECObjectRef m_MB2WC_converter;
- mutable TECObjectRef m_WC2MB_converter;
-
- TextEncodingBase m_char_encoding;
- TextEncodingBase m_unicode_encoding;
-};
-
-// MB is decomposed (D) normalized UTF8
-
-class wxMBConv_macUTF8D : public wxMBConv_mac
-{
-public :
- wxMBConv_macUTF8D()
- {
- Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
- m_uni = NULL;
- m_uniBack = NULL ;
- }
-
- virtual ~wxMBConv_macUTF8D()
- {
- if (m_uni!=NULL)
- DisposeUnicodeToTextInfo(&m_uni);
- if (m_uniBack!=NULL)
- DisposeUnicodeToTextInfo(&m_uniBack);
- }
-
- size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
- {
- CreateIfNeeded() ;
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
-
- char *tbuf = NULL ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
- tbuf = (char*) malloc( n ) ;
- }
-
- ByteCount byteBufferLen = n ;
- UniChar* ubuf = NULL ;
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
- byteInLen = unicharlen ;
- ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
- converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
-#else
- ubuf = (UniChar*) psz ;
-#endif
-
- // ubuf is a non-decomposed UniChar buffer
-
- ByteCount dcubuflen = byteInLen * 2 + 2 ;
- ByteCount dcubufread , dcubufwritten ;
- UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
-
- ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
- kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
-
- // we now convert that decomposed buffer into UTF8
-
- status = TECConvertText(
- m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
- (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
-
- free( dcubuf );
-
-#if SIZEOF_WCHAR_T == 4
- free( ubuf ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- size_t res = byteOutLen ;
- if ( buf && res < n)
- {
- buf[res] = 0;
- // don't test for round-trip fidelity yet, we cannot guarantee it yet
- }
-
- return res ;
- }
-
- size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
- {
- CreateIfNeeded() ;
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = strlen(psz) + 1;
- wchar_t *tbuf = NULL ;
- UniChar* ubuf = NULL ;
- size_t res = 0 ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, byteInLen ) ;
- tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
- }
-
- ByteCount byteBufferLen = n * sizeof( UniChar ) ;
-
-#if SIZEOF_WCHAR_T == 4
- ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
-#else
- ubuf = (UniChar*) (buf ? buf : tbuf) ;
-#endif
-
- ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
- ByteCount dcubufread , dcubufwritten ;
- UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
-
- status = TECConvertText(
- m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
- (TextPtr) dcubuf, dcubuflen, &byteOutLen);
- // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
- // is not properly terminated we get random characters at the end
- dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
-
- // now from the decomposed UniChar to properly composed uniChar
- ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
- kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
-
- free( dcubuf );
- byteOutLen = dcubufwritten ;
- ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
-
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
- free( ubuf ) ;
-#else
- res = byteOutLen / sizeof( UniChar ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- if ( buf && res < n)
- buf[res] = 0;
-
- return res ;
- }
-
- virtual void CreateIfNeeded() const
- {
- wxMBConv_mac::CreateIfNeeded() ;
- if ( m_uni == NULL )
- {
- m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
- kUnicodeNoSubset, kTextEncodingDefaultFormat);
- m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
- kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
- m_map.mappingVersion = kUnicodeUseLatestMapping;
-
- OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
- wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
-
- m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
- kUnicodeNoSubset, kTextEncodingDefaultFormat);
- m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
- kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
- m_map.mappingVersion = kUnicodeUseLatestMapping;
- err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
- wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
- }
- }
-protected :
- mutable UnicodeToTextInfo m_uni;
- mutable UnicodeToTextInfo m_uniBack;
- mutable UnicodeMapping m_map;
-};
-#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
-
// ============================================================================
// wxEncodingConverter based conversion classes
// ============================================================================
private:
void Init()
{
- m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
+ // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
+ // The wxMBConv_cf class does a better job.
+ m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
+ m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
w2m.Init(wxFONTENCODING_UNICODE, m_enc);
}
{
if (charset)
{
- m_name = strdup(charset);
+ m_name = wxStrdup(charset);
m_deferred = true;
}
}
}
#endif // wxHAVE_WIN32_MB2WC
-#if defined(__WXMAC__)
- {
- // leave UTF16 and UTF32 to the built-ins of wx
- if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
- ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
- {
-#if wxUSE_FONTMAP
- wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
- : new wxMBConv_mac(m_encoding);
-#else
- wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
-#endif
- if ( conv->IsOk() )
- return conv;
-
- delete conv;
- }
- }
-#endif
-
#ifdef __DARWIN__
{
// leave UTF16 and UTF32 to the built-ins of wx
#ifdef __WINDOWS__
WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
-#elif defined(__WXMAC__) && !defined(__MACH__)
- WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
#else
WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
#endif
-WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
-WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
+// NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
+// passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
+// provokes an error message about "not enough macro parameters"; and we
+// can't use "()" here as the name##Obj declaration would be parsed as a
+// function declaration then, so use a semicolon and live with an extra
+// empty statement (and hope that no compilers warns about this)
+WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
+WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
-#if defined(__WXMAC__) && defined(TARGET_CARBON)
-static wxMBConv_macUTF8D wxConvMacUTF8DObj;
+#ifdef __DARWIN__
+// The xnu kernel always communicates file paths in decomposed UTF-8.
+// WARNING: Are we sure that CFString's conversion will cause decomposition?
+static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
#endif
+
WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
-#ifdef __WXOSX__
-#if defined(__WXMAC__) && defined(TARGET_CARBON)
+#ifdef __DARWIN__
&wxConvMacUTF8DObj;
-#else
- wxGet_wxConvUTF8Ptr();
-#endif
-#else // !__WXOSX__
+#else // !__DARWIN__
wxGet_wxConvLibcPtr();
-#endif // __WXOSX__/!__WXOSX__
+#endif // __DARWIN__/!__DARWIN__
#else // !wxUSE_WCHAR_T