/////////////////////////////////////////////////////////////////////////////
// Name: strconv.cpp
// Purpose: Unicode conversion classes
-// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
+// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
+// Ryan Norton, Fredrik Roubert (UTF7)
// Modified by:
// Created: 29/01/98
// RCS-ID: $Id$
// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
// (c) 2000-2003 Vadim Zeitlin
+// (c) 2004 Ryan Norton, Fredrik Roubert
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
#ifdef HAVE_ICONV
#include <iconv.h>
+ #include "wx/thread.h"
#endif
#include "wx/encconv.h"
return buf;
}
+const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
+{
+ wxASSERT(pOutSize != NULL);
+
+ const char* szEnd = szString + nStringLen + 1;
+ const char* szPos = szString;
+ const char* szStart = szPos;
+
+ size_t nActualLength = 0;
+ size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
+
+ wxWCharBuffer theBuffer(nCurrentSize);
+
+ //Convert the string until the length() is reached, continuing the
+ //loop every time a null character is reached
+ while(szPos != szEnd)
+ {
+ wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+
+ //Get the length of the current (sub)string
+ size_t nLen = MB2WC(NULL, szPos, 0);
+
+ //Invalid conversion?
+ if( nLen == (size_t)-1 )
+ {
+ *pOutSize = 0;
+ theBuffer.data()[0u] = wxT('\0');
+ return theBuffer;
+ }
+
+
+ //Increase the actual length (+1 for current null character)
+ nActualLength += nLen + 1;
+
+ //if buffer too big, realloc the buffer
+ if (nActualLength > (nCurrentSize+1))
+ {
+ wxWCharBuffer theNewBuffer(nCurrentSize << 1);
+ memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
+ theBuffer = theNewBuffer;
+ nCurrentSize <<= 1;
+ }
+
+ //Convert the current (sub)string
+ if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
+ {
+ *pOutSize = 0;
+ theBuffer.data()[0u] = wxT('\0');
+ return theBuffer;
+ }
+
+ //Increment to next (sub)string
+ //Note that we have to use strlen here instead of nLen
+ //here because XX2XX gives us the size of the output buffer,
+ //not neccessarly the length of the string
+ szPos += strlen(szPos) + 1;
+ }
+
+ //success - return actual length and the buffer
+ *pOutSize = nActualLength;
+ return theBuffer;
+}
+
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
+{
+ wxASSERT(pOutSize != NULL);
+
+ const wchar_t* szEnd = szString + nStringLen + 1;
+ const wchar_t* szPos = szString;
+ const wchar_t* szStart = szPos;
+
+ size_t nActualLength = 0;
+ size_t nCurrentSize = nStringLen << 2; //try * 4 first
+
+ wxCharBuffer theBuffer(nCurrentSize);
+
+ //Convert the string until the length() is reached, continuing the
+ //loop every time a null character is reached
+ while(szPos != szEnd)
+ {
+ wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+
+ //Get the length of the current (sub)string
+ size_t nLen = WC2MB(NULL, szPos, 0);
+
+ //Invalid conversion?
+ if( nLen == (size_t)-1 )
+ {
+ *pOutSize = 0;
+ theBuffer.data()[0u] = wxT('\0');
+ return theBuffer;
+ }
+
+ //Increase the actual length (+1 for current null character)
+ nActualLength += nLen + 1;
+
+ //if buffer too big, realloc the buffer
+ if (nActualLength > (nCurrentSize+1))
+ {
+ wxCharBuffer theNewBuffer(nCurrentSize << 1);
+ memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
+ theBuffer = theNewBuffer;
+ nCurrentSize <<= 1;
+ }
+
+ //Convert the current (sub)string
+ if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
+ {
+ *pOutSize = 0;
+ theBuffer.data()[0u] = wxT('\0');
+ return theBuffer;
+ }
+
+ //Increment to next (sub)string
+ //Note that we have to use wxWcslen here instead of nLen
+ //here because XX2XX gives us the size of the output buffer,
+ //not neccessarly the length of the string
+ szPos += wxWcslen(szPos) + 1;
+ }
+
+ //success - return actual length and the buffer
+ *pOutSize = nActualLength;
+ return theBuffer;
+}
+
// ----------------------------------------------------------------------------
// wxMBConvLibc
// ----------------------------------------------------------------------------
{
return wxWC2MB(buf, psz, n);
}
-
// ----------------------------------------------------------------------------
-// UTF-7
+// UTF-7
// ----------------------------------------------------------------------------
-#if 0
-static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789'(),-./:?";
-static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
-static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-#endif
+// Implementation (C) 2004 Fredrik Roubert
+
+//
+// BASE64 decoding table
+//
+static const unsigned char utf7unb64[] =
+{
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
+ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+ 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+ 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
+ 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+ 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
-// TODO: write actual implementations of UTF-7 here
-size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
- const char * WXUNUSED(psz),
- size_t WXUNUSED(n)) const
+size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
- return (size_t)-1;
+
+ size_t len = 0;
+
+ while (*psz && ((!buf) || (len < n)))
+ {
+ unsigned char cc = *psz++;
+ if (cc != '+')
+ {
+ // plain ASCII char
+ if (buf)
+ *buf++ = cc;
+ len++;
+ }
+ else if (*psz == '-')
+ {
+ // encoded plus sign
+ if (buf)
+ *buf++ = cc;
+ len++;
+ psz++;
+ }
+ else
+ {
+ // BASE64 encoded string
+ bool lsb;
+ unsigned char c;
+ unsigned int d, l;
+ for (lsb = false, d = 0, l = 0;
+ (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
+ {
+ d <<= 6;
+ d += cc;
+ for (l += 6; l >= 8; lsb = !lsb)
+ {
+ c = (unsigned char)((d >> (l -= 8)) % 256);
+ if (lsb)
+ {
+ if (buf)
+ *buf++ |= c;
+ len ++;
+ }
+ else
+ if (buf)
+ *buf = (wchar_t)(c << 8);
+ }
+ }
+ if (*psz == '-')
+ psz++;
+ }
+ }
+ if (buf && (len < n))
+ *buf = 0;
+ return len;
}
-size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
- const wchar_t * WXUNUSED(psz),
- size_t WXUNUSED(n)) const
+//
+// BASE64 encoding table
+//
+static const unsigned char utf7enb64[] =
+{
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+ 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
+ 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+ 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+ 'w', 'x', 'y', 'z', '0', '1', '2', '3',
+ '4', '5', '6', '7', '8', '9', '+', '/'
+};
+
+//
+// UTF-7 encoding table
+//
+// 0 - Set D (directly encoded characters)
+// 1 - Set O (optional direct characters)
+// 2 - whitespace characters (optional)
+// 3 - special characters
+//
+static const unsigned char utf7encode[128] =
+{
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
+};
+
+size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
+*psz, size_t n) const
{
- return (size_t)-1;
+
+
+ size_t len = 0;
+
+ while (*psz && ((!buf) || (len < n)))
+ {
+ wchar_t cc = *psz++;
+ if (cc < 0x80 && utf7encode[cc] < 1)
+ {
+ // plain ASCII char
+ if (buf)
+ *buf++ = (char)cc;
+ len++;
+ }
+#ifndef WC_UTF16
+ else if (((wxUint32)cc) > 0xffff)
+ {
+ // no surrogate pair generation (yet?)
+ return (size_t)-1;
+ }
+#endif
+ else
+ {
+ if (buf)
+ *buf++ = '+';
+ len++;
+ if (cc != '+')
+ {
+ // BASE64 encode string
+ unsigned int lsb, d, l;
+ for (d = 0, l = 0;; psz++)
+ {
+ for (lsb = 0; lsb < 2; lsb ++)
+ {
+ d <<= 8;
+ d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
+
+ for (l += 8; l >= 6; )
+ {
+ l -= 6;
+ if (buf)
+ *buf++ = utf7enb64[(d >> l) % 64];
+ len++;
+ }
+ }
+ cc = *psz;
+ if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
+ break;
+ }
+ if (l != 0)
+ {
+ if (buf)
+ *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
+ len++;
+ }
+ }
+ if (buf)
+ *buf++ = '-';
+ len++;
+ }
+ }
+ if (buf && (len < n))
+ *buf = 0;
+ return len;
}
// ----------------------------------------------------------------------------
#ifdef HAVE_ICONV
-// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
-// if output buffer is _exactly_ as big as needed. Such case is (unless there's
-// yet another bug in glibc) the only case when iconv() returns with (size_t)-1
-// (which means error) and says there are 0 bytes left in the input buffer --
-// when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
-// this alternative test for iconv() failure.
+// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
+// E2BIG if output buffer is _exactly_ as big as needed. Such case is
+// (unless there's yet another bug in glibc) the only case when iconv()
+// returns with (size_t)-1 (which means error) and says there are 0 bytes
+// left in the input buffer -- when _real_ error occurs,
+// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
+// iconv() failure.
// [This bug does not appear in glibc 2.2.]
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
// the other direction
iconv_t m2w,
w2m;
+#if wxUSE_THREADS
+ // guards access to m2w and w2m objects
+ wxMutex m_iconvMutex;
+#endif
private:
// the name (for iconv_open()) of a wide char charset -- if none is
size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
+#if wxUSE_THREADS
+ // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
+ // Unfortunately there is a couple of global wxCSConv objects such as
+ // wxConvLocal that are used all over wx code, so we have to make sure
+ // the handle is used by at most one thread at the time. Otherwise
+ // only a few wx classes would be safe to use from non-main threads
+ // as MB<->WC conversion would fail "randomly".
+ wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
+#endif
+
size_t inbuf = strlen(psz);
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
+#if wxUSE_THREADS
+ // NB: explained in MB2WC
+ wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
+#endif
+
size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
size_t outbuf = n;
size_t res, cres;
CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
{
- CFStringEncoding enc = 0 ;
+ CFStringEncoding enc = kCFStringEncodingInvalidId ;
if ( encoding == wxFONTENCODING_DEFAULT )
{
-#if wxUSE_GUI
- encoding = wxFont::GetDefaultEncoding() ;
-#else
- encoding = wxLocale::GetSystemEncoding() ;
-#endif
+ enc = CFStringGetSystemEncoding();
}
else switch( encoding)
{
case wxFONTENCODING_CP950 :
enc = kCFStringEncodingDOSChineseTrad;
break ;
-
case wxFONTENCODING_CP1250 :
enc = kCFStringEncodingWindowsLatin2;
break ;
case wxFONTENCODING_CP1257 :
enc = kCFStringEncodingWindowsBalticRim;
break ;
- case wxFONTENCODING_UTF7 :
- enc = kCFStringEncodingNonLossyASCII ;
- break ;
+// This only really encodes to UTF7 (if that) evidently
+// case wxFONTENCODING_UTF7 :
+// enc = kCFStringEncodingNonLossyASCII ;
+// break ;
case wxFONTENCODING_UTF8 :
enc = kCFStringEncodingUTF8 ;
break ;
return enc ;
}
-wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
-{
- wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
-
- switch( encoding)
- {
- case kCFStringEncodingISOLatin1 :
- enc = wxFONTENCODING_ISO8859_1 ;
- break ;
- case kCFStringEncodingISOLatin2 :
- enc = wxFONTENCODING_ISO8859_2;
- break ;
- case kCFStringEncodingISOLatin3 :
- enc = wxFONTENCODING_ISO8859_3 ;
- break ;
- case kCFStringEncodingISOLatin4 :
- enc = wxFONTENCODING_ISO8859_4;
- break ;
- case kCFStringEncodingISOLatinCyrillic :
- enc = wxFONTENCODING_ISO8859_5;
- break ;
- case kCFStringEncodingISOLatinArabic :
- enc = wxFONTENCODING_ISO8859_6;
- break ;
- case kCFStringEncodingISOLatinGreek :
- enc = wxFONTENCODING_ISO8859_7;
- break ;
- case kCFStringEncodingISOLatinHebrew :
- enc = wxFONTENCODING_ISO8859_8;
- break ;
- case kCFStringEncodingISOLatin5 :
- enc = wxFONTENCODING_ISO8859_9;
- break ;
- case kCFStringEncodingISOLatin6 :
- enc = wxFONTENCODING_ISO8859_10;
- break ;
- case kCFStringEncodingISOLatin7 :
- enc = wxFONTENCODING_ISO8859_13;
- break ;
- case kCFStringEncodingISOLatin8 :
- enc = wxFONTENCODING_ISO8859_14;
- break ;
- case kCFStringEncodingISOLatin9 :
- enc =wxFONTENCODING_ISO8859_15 ;
- break ;
-
- case kCFStringEncodingKOI8_R :
- enc = wxFONTENCODING_KOI8;
- break ;
-
-// case :
-// enc = wxFONTENCODING_BULGARIAN;
-// break ;
-
- case kCFStringEncodingDOSLatinUS :
- enc = wxFONTENCODING_CP437;
- break ;
- case kCFStringEncodingDOSLatin1 :
- enc = wxFONTENCODING_CP850;
- break ;
- case kCFStringEncodingDOSLatin2 :
- enc =wxFONTENCODING_CP852 ;
- break ;
- case kCFStringEncodingDOSCyrillic :
- enc = wxFONTENCODING_CP855;
- break ;
- case kCFStringEncodingDOSRussian :
- enc = wxFONTENCODING_CP866;
- break ;
- case kCFStringEncodingDOSThai :
- enc =wxFONTENCODING_CP874 ;
- break ;
- case kCFStringEncodingDOSJapanese :
- enc = wxFONTENCODING_CP932;
- break ;
- case kCFStringEncodingDOSChineseSimplif :
- enc = wxFONTENCODING_CP936;
- break ;
- case kCFStringEncodingDOSKorean :
- enc = wxFONTENCODING_CP949;
- break ;
- case kCFStringEncodingDOSChineseTrad :
- enc = wxFONTENCODING_CP950;
- break ;
-
- case kCFStringEncodingWindowsLatin2 :
- enc = wxFONTENCODING_CP1250;
- break ;
- case kCFStringEncodingWindowsCyrillic :
- enc = wxFONTENCODING_CP1251;
- break ;
- case kCFStringEncodingWindowsLatin1 :
- enc = wxFONTENCODING_CP1252;
- break ;
- case kCFStringEncodingWindowsGreek :
- enc = wxFONTENCODING_CP1253;
- break ;
- case kCFStringEncodingWindowsLatin5 :
- enc = wxFONTENCODING_CP1254;
- break ;
- case kCFStringEncodingWindowsHebrew :
- enc = wxFONTENCODING_CP1255;
- break ;
- case kCFStringEncodingWindowsArabic :
- enc = wxFONTENCODING_CP1256;
- break ;
- case kCFStringEncodingWindowsBalticRim :
- enc =wxFONTENCODING_CP1257 ;
- break ;
- case kCFStringEncodingEUC_JP :
- enc = wxFONTENCODING_EUC_JP;
- break ;
- case kCFStringEncodingUnicode :
- enc = wxFONTENCODING_UTF16;
- break;
- case kCFStringEncodingMacRoman :
- enc = wxFONTENCODING_MACROMAN ;
- break ;
- case kCFStringEncodingMacJapanese :
- enc = wxFONTENCODING_MACJAPANESE ;
- break ;
- case kCFStringEncodingMacChineseTrad :
- enc = wxFONTENCODING_MACCHINESETRAD ;
- break ;
- case kCFStringEncodingMacKorean :
- enc = wxFONTENCODING_MACKOREAN ;
- break ;
- case kCFStringEncodingMacArabic :
- enc =wxFONTENCODING_MACARABIC ;
- break ;
- case kCFStringEncodingMacHebrew :
- enc = wxFONTENCODING_MACHEBREW ;
- break ;
- case kCFStringEncodingMacGreek :
- enc = wxFONTENCODING_MACGREEK ;
- break ;
- case kCFStringEncodingMacCyrillic :
- enc = wxFONTENCODING_MACCYRILLIC ;
- break ;
- case kCFStringEncodingMacDevanagari :
- enc = wxFONTENCODING_MACDEVANAGARI ;
- break ;
- case kCFStringEncodingMacGurmukhi :
- enc = wxFONTENCODING_MACGURMUKHI ;
- break ;
- case kCFStringEncodingMacGujarati :
- enc = wxFONTENCODING_MACGUJARATI ;
- break ;
- case kCFStringEncodingMacOriya :
- enc =wxFONTENCODING_MACORIYA ;
- break ;
- case kCFStringEncodingMacBengali :
- enc =wxFONTENCODING_MACBENGALI ;
- break ;
- case kCFStringEncodingMacTamil :
- enc = wxFONTENCODING_MACTAMIL ;
- break ;
- case kCFStringEncodingMacTelugu :
- enc = wxFONTENCODING_MACTELUGU ;
- break ;
- case kCFStringEncodingMacKannada :
- enc = wxFONTENCODING_MACKANNADA ;
- break ;
- case kCFStringEncodingMacMalayalam :
- enc = wxFONTENCODING_MACMALAJALAM ;
- break ;
- case kCFStringEncodingMacSinhalese :
- enc = wxFONTENCODING_MACSINHALESE ;
- break ;
- case kCFStringEncodingMacBurmese :
- enc = wxFONTENCODING_MACBURMESE ;
- break ;
- case kCFStringEncodingMacKhmer :
- enc = wxFONTENCODING_MACKHMER ;
- break ;
- case kCFStringEncodingMacThai :
- enc = wxFONTENCODING_MACTHAI ;
- break ;
- case kCFStringEncodingMacLaotian :
- enc = wxFONTENCODING_MACLAOTIAN ;
- break ;
- case kCFStringEncodingMacGeorgian :
- enc = wxFONTENCODING_MACGEORGIAN ;
- break ;
- case kCFStringEncodingMacArmenian :
- enc = wxFONTENCODING_MACARMENIAN ;
- break ;
- case kCFStringEncodingMacChineseSimp :
- enc = wxFONTENCODING_MACCHINESESIMP ;
- break ;
- case kCFStringEncodingMacTibetan :
- enc = wxFONTENCODING_MACTIBETAN ;
- break ;
- case kCFStringEncodingMacMongolian :
- enc = wxFONTENCODING_MACMONGOLIAN ;
- break ;
- case kCFStringEncodingMacEthiopic :
- enc = wxFONTENCODING_MACETHIOPIC ;
- break ;
- case kCFStringEncodingMacCentralEurRoman:
- enc = wxFONTENCODING_MACCENTRALEUR ;
- break ;
- case kCFStringEncodingMacVietnamese:
- enc = wxFONTENCODING_MACVIATNAMESE ;
- break ;
- case kCFStringEncodingMacExtArabic :
- enc = wxFONTENCODING_MACARABICEXT ;
- break ;
- case kCFStringEncodingMacSymbol :
- enc = wxFONTENCODING_MACSYMBOL ;
- break ;
- case kCFStringEncodingMacDingbats :
- enc = wxFONTENCODING_MACDINGBATS ;
- break ;
- case kCFStringEncodingMacTurkish :
- enc = wxFONTENCODING_MACTURKISH ;
- break ;
- case kCFStringEncodingMacCroatian :
- enc = wxFONTENCODING_MACCROATIAN ;
- break ;
- case kCFStringEncodingMacIcelandic :
- enc = wxFONTENCODING_MACICELANDIC ;
- break ;
- case kCFStringEncodingMacRomanian :
- enc = wxFONTENCODING_MACROMANIAN ;
- break ;
- case kCFStringEncodingMacCeltic :
- enc = wxFONTENCODING_MACCELTIC ;
- break ;
- case kCFStringEncodingMacGaelic :
- enc = wxFONTENCODING_MACGAELIC ;
- break ;
-// case kCFStringEncodingMacKeyboardGlyphs :
-// enc = wxFONTENCODING_MACKEYBOARD ;
-// break ;
- } ;
- return enc ;
-}
-
class wxMBConv_cocoa : public wxMBConv
{
public:
void Init( CFStringEncoding encoding)
{
- m_char_encoding = encoding ;
- m_unicode_encoding = kCFStringEncodingUnicode;
+ m_encoding = encoding ;
}
size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
{
wxASSERT(szUnConv);
- size_t nBufSize = strlen(szUnConv) + 1;
- size_t nRealOutSize;
-
- UniChar* szUniCharBuffer = (UniChar*) szOut;
- wchar_t* szConvBuffer = szOut;
-
- if (szConvBuffer == NULL && nOutSize != 0)
- {
- szConvBuffer = new wchar_t[nOutSize] ;
- }
-
-#if SIZEOF_WCHAR_T == 4
- szUniCharBuffer = new UniChar[nOutSize];
-#endif
-
- CFDataRef theData = CFDataCreateWithBytesNoCopy (
- NULL, //allocator
- (const UInt8*)szUnConv,
- nBufSize - 1,
- NULL //deallocator
- );
-
- wxASSERT(theData);
-
- CFStringRef theString = CFStringCreateFromExternalRepresentation (
- NULL,
- theData,
- m_char_encoding
+ CFStringRef theString = CFStringCreateWithBytes (
+ NULL, //the allocator
+ (const UInt8*)szUnConv,
+ strlen(szUnConv),
+ m_encoding,
+ false //no BOM/external representation
);
wxASSERT(theString);
- if (nOutSize == 0)
+ size_t nOutLength = CFStringGetLength(theString);
+
+ if (szOut == NULL)
{
- nRealOutSize = CFStringGetLength(theString) + 1;
CFRelease(theString);
- return nRealOutSize - 1;
+ return nOutLength;
}
- CFRange theRange = { 0, CFStringGetLength(theString) };
+ CFRange theRange = { 0, nOutSize };
+#if SIZEOF_WCHAR_T == 4
+ UniChar* szUniCharBuffer = new UniChar[nOutSize];
+#endif
+
CFStringGetCharacters(theString, theRange, szUniCharBuffer);
-
-
- nRealOutSize = (CFStringGetLength(theString) + 1);
-
+
CFRelease(theString);
- szUniCharBuffer[nRealOutSize-1] = '\0' ;
+ szUniCharBuffer[nOutLength] = '\0' ;
#if SIZEOF_WCHAR_T == 4
wxMBConvUTF16 converter ;
- converter.MB2WC(szConvBuffer , (const char*)szUniCharBuffer , nRealOutSize ) ;
+ converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
delete[] szUniCharBuffer;
#endif
- if ( szOut == NULL )
- delete [] szConvBuffer;
-
- return nRealOutSize ;
+
+ return nOutLength;
}
size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
{
- size_t nBufSize = wxWcslen(szUnConv) + 1;
+ wxASSERT(szUnConv);
+
size_t nRealOutSize;
- char* szBuffer = szOut;
+ size_t nBufSize = wxWcslen(szUnConv);
UniChar* szUniBuffer = (UniChar*) szUnConv;
- if (szOut == NULL)
- {
- // worst case
- nRealOutSize = wxString::WorstEncodingCase(nBufSize - 1, *this)+1 ;
- szBuffer = new char[ nRealOutSize ] ;
- }
- else
- nRealOutSize = nOutSize;
-
#if SIZEOF_WCHAR_T == 4
wxMBConvUTF16BE converter ;
nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
nBufSize /= sizeof(UniChar);
- ++nBufSize;
#endif
CFStringRef theString = CFStringCreateWithCharactersNoCopy(
NULL, //allocator
szUniBuffer,
nBufSize,
- NULL //deallocator
+ kCFAllocatorNull //deallocator - we want to deallocate it ourselves
);
wxASSERT(theString);
//Note that CER puts a BOM when converting to unicode
- //so we may want to check and use getchars instead in that case
- CFDataRef theData = CFStringCreateExternalRepresentation(
- NULL, //allocator
- theString,
- m_char_encoding,
- 0 //what to put in characters that can't be converted -
- //0 tells CFString to return NULL if it meets such a character
- );
-
- if(!theData)
- return (size_t)-1;
-
- CFRelease(theString);
-
- nRealOutSize = CFDataGetLength(theData);
-
- if ( szOut == NULL )
- delete[] szBuffer;
-
- if(nOutSize == 0)
+ //so we check and use getchars instead in that case
+ if (m_encoding == kCFStringEncodingUnicode)
{
-//TODO: This gets flagged as a non-malloced address by the debugger...
-//#if SIZEOF_WCHAR_T == 4
-// delete[] szUniBuffer;
-//#endif
- CFRelease(theData);
- return nRealOutSize - 1;
+ if (szOut != NULL)
+ CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
+
+ nRealOutSize = CFStringGetLength(theString) + 1;
+ }
+ else
+ {
+ CFStringGetBytes(
+ theString,
+ CFRangeMake(0, CFStringGetLength(theString)),
+ m_encoding,
+ 0, //what to put in characters that can't be converted -
+ //0 tells CFString to return NULL if it meets such a character
+ false, //not an external representation
+ (UInt8*) szOut,
+ nOutSize,
+ (CFIndex*) &nRealOutSize
+ );
}
- CFRange theRange = {0, CFDataGetLength(theData) };
- CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
+ CFRelease(theString);
- CFRelease(theData);
+#if SIZEOF_WCHAR_T == 4
+ delete[] szUniBuffer;
+#endif
-//TODO: This gets flagged as a non-malloced address by the debugger...
-//#if SIZEOF_WCHAR_T == 4
-// delete[] szUniBuffer;
-//#endif
return nRealOutSize - 1;
}
bool IsOk() const
{
- //TODO: check for invalid en/de/coding
- return true;
+ return m_encoding != kCFStringEncodingInvalidId &&
+ CFStringIsEncodingAvailable(m_encoding);
}
private:
- CFStringEncoding m_char_encoding ;
- CFStringEncoding m_unicode_encoding ;
+ CFStringEncoding m_encoding ;
};
#endif // defined(__WXCOCOA__)
if (buf == NULL)
{
- n = byteInLen ;
+ //apple specs say at least 32
+ n = wxMax( 32 , byteInLen ) ;
tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
}
ByteCount byteBufferLen = n * sizeof( UniChar ) ;
if (buf == NULL)
{
- // worst case
- n = wxString::WorstEncodingCase(byteInLen / SIZEOF_WCHAR_T, *this) + SIZEOF_WCHAR_T;
+ //apple specs say at least 32
+ n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
tbuf = (char*) malloc( n ) ;
}
size_t res = byteOutLen ;
if ( buf && res < n)
+ {
buf[res] = 0;
+
+ //we need to double-trip to verify it didn't insert any ? in place
+ //of bogus characters
+ wxWCharBuffer wcBuf(n);
+ size_t pszlen = wxWcslen(psz);
+ if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
+ wxWcslen(wcBuf) != pszlen ||
+ memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
+ {
+ // we didn't obtain the same thing we started from, hence
+ // the conversion was lossy and we consider that it failed
+ return (size_t)-1;
+ }
+ }
return res ;
}
{
size_t inbuf = strlen(psz);
if (buf)
- m2w.Convert(psz,buf);
+ {
+ if (!m2w.Convert(psz,buf))
+ return (size_t)-1;
+ }
return inbuf;
}
{
const size_t inbuf = wxWcslen(psz);
if (buf)
- w2m.Convert(psz,buf);
+ {
+ if (!w2m.Convert(psz,buf))
+ return (size_t)-1;
+ }
return inbuf;
}