-// ============================================================================
-// Cocoa conversion classes
-// ============================================================================
-
-#if defined(__WXCOCOA__)
-
-// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
-// Strangely enough, internally Core Foundation uses
-// UTF-32 internally quite a bit - its just not public (yet).
-
-#include <CoreFoundation/CFString.h>
-#include <CoreFoundation/CFStringEncodingExt.h>
-
-CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
-{
- CFStringEncoding enc = kCFStringEncodingInvalidId ;
-
- switch (encoding)
- {
- case wxFONTENCODING_DEFAULT :
- enc = CFStringGetSystemEncoding();
- break ;
-
- case wxFONTENCODING_ISO8859_1 :
- enc = kCFStringEncodingISOLatin1 ;
- break ;
- case wxFONTENCODING_ISO8859_2 :
- enc = kCFStringEncodingISOLatin2;
- break ;
- case wxFONTENCODING_ISO8859_3 :
- enc = kCFStringEncodingISOLatin3 ;
- break ;
- case wxFONTENCODING_ISO8859_4 :
- enc = kCFStringEncodingISOLatin4;
- break ;
- case wxFONTENCODING_ISO8859_5 :
- enc = kCFStringEncodingISOLatinCyrillic;
- break ;
- case wxFONTENCODING_ISO8859_6 :
- enc = kCFStringEncodingISOLatinArabic;
- break ;
- case wxFONTENCODING_ISO8859_7 :
- enc = kCFStringEncodingISOLatinGreek;
- break ;
- case wxFONTENCODING_ISO8859_8 :
- enc = kCFStringEncodingISOLatinHebrew;
- break ;
- case wxFONTENCODING_ISO8859_9 :
- enc = kCFStringEncodingISOLatin5;
- break ;
- case wxFONTENCODING_ISO8859_10 :
- enc = kCFStringEncodingISOLatin6;
- break ;
- case wxFONTENCODING_ISO8859_11 :
- enc = kCFStringEncodingISOLatinThai;
- break ;
- case wxFONTENCODING_ISO8859_13 :
- enc = kCFStringEncodingISOLatin7;
- break ;
- case wxFONTENCODING_ISO8859_14 :
- enc = kCFStringEncodingISOLatin8;
- break ;
- case wxFONTENCODING_ISO8859_15 :
- enc = kCFStringEncodingISOLatin9;
- break ;
-
- case wxFONTENCODING_KOI8 :
- enc = kCFStringEncodingKOI8_R;
- break ;
- case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
- enc = kCFStringEncodingDOSRussian;
- break ;
-
-// case wxFONTENCODING_BULGARIAN :
-// enc = ;
-// break ;
-
- case wxFONTENCODING_CP437 :
- enc = kCFStringEncodingDOSLatinUS ;
- break ;
- case wxFONTENCODING_CP850 :
- enc = kCFStringEncodingDOSLatin1;
- break ;
- case wxFONTENCODING_CP852 :
- enc = kCFStringEncodingDOSLatin2;
- break ;
- case wxFONTENCODING_CP855 :
- enc = kCFStringEncodingDOSCyrillic;
- break ;
- case wxFONTENCODING_CP866 :
- enc = kCFStringEncodingDOSRussian ;
- break ;
- case wxFONTENCODING_CP874 :
- enc = kCFStringEncodingDOSThai;
- break ;
- case wxFONTENCODING_CP932 :
- enc = kCFStringEncodingDOSJapanese;
- break ;
- case wxFONTENCODING_CP936 :
- enc = kCFStringEncodingDOSChineseSimplif ;
- break ;
- case wxFONTENCODING_CP949 :
- enc = kCFStringEncodingDOSKorean;
- break ;
- case wxFONTENCODING_CP950 :
- enc = kCFStringEncodingDOSChineseTrad;
- break ;
- case wxFONTENCODING_CP1250 :
- enc = kCFStringEncodingWindowsLatin2;
- break ;
- case wxFONTENCODING_CP1251 :
- enc = kCFStringEncodingWindowsCyrillic ;
- break ;
- case wxFONTENCODING_CP1252 :
- enc = kCFStringEncodingWindowsLatin1 ;
- break ;
- case wxFONTENCODING_CP1253 :
- enc = kCFStringEncodingWindowsGreek;
- break ;
- case wxFONTENCODING_CP1254 :
- enc = kCFStringEncodingWindowsLatin5;
- break ;
- case wxFONTENCODING_CP1255 :
- enc = kCFStringEncodingWindowsHebrew ;
- break ;
- case wxFONTENCODING_CP1256 :
- enc = kCFStringEncodingWindowsArabic ;
- break ;
- case wxFONTENCODING_CP1257 :
- enc = kCFStringEncodingWindowsBalticRim;
- break ;
-// This only really encodes to UTF7 (if that) evidently
-// case wxFONTENCODING_UTF7 :
-// enc = kCFStringEncodingNonLossyASCII ;
-// break ;
- case wxFONTENCODING_UTF8 :
- enc = kCFStringEncodingUTF8 ;
- break ;
- case wxFONTENCODING_EUC_JP :
- enc = kCFStringEncodingEUC_JP;
- break ;
- case wxFONTENCODING_UTF16 :
- enc = kCFStringEncodingUnicode ;
- break ;
- case wxFONTENCODING_MACROMAN :
- enc = kCFStringEncodingMacRoman ;
- break ;
- case wxFONTENCODING_MACJAPANESE :
- enc = kCFStringEncodingMacJapanese ;
- break ;
- case wxFONTENCODING_MACCHINESETRAD :
- enc = kCFStringEncodingMacChineseTrad ;
- break ;
- case wxFONTENCODING_MACKOREAN :
- enc = kCFStringEncodingMacKorean ;
- break ;
- case wxFONTENCODING_MACARABIC :
- enc = kCFStringEncodingMacArabic ;
- break ;
- case wxFONTENCODING_MACHEBREW :
- enc = kCFStringEncodingMacHebrew ;
- break ;
- case wxFONTENCODING_MACGREEK :
- enc = kCFStringEncodingMacGreek ;
- break ;
- case wxFONTENCODING_MACCYRILLIC :
- enc = kCFStringEncodingMacCyrillic ;
- break ;
- case wxFONTENCODING_MACDEVANAGARI :
- enc = kCFStringEncodingMacDevanagari ;
- break ;
- case wxFONTENCODING_MACGURMUKHI :
- enc = kCFStringEncodingMacGurmukhi ;
- break ;
- case wxFONTENCODING_MACGUJARATI :
- enc = kCFStringEncodingMacGujarati ;
- break ;
- case wxFONTENCODING_MACORIYA :
- enc = kCFStringEncodingMacOriya ;
- break ;
- case wxFONTENCODING_MACBENGALI :
- enc = kCFStringEncodingMacBengali ;
- break ;
- case wxFONTENCODING_MACTAMIL :
- enc = kCFStringEncodingMacTamil ;
- break ;
- case wxFONTENCODING_MACTELUGU :
- enc = kCFStringEncodingMacTelugu ;
- break ;
- case wxFONTENCODING_MACKANNADA :
- enc = kCFStringEncodingMacKannada ;
- break ;
- case wxFONTENCODING_MACMALAJALAM :
- enc = kCFStringEncodingMacMalayalam ;
- break ;
- case wxFONTENCODING_MACSINHALESE :
- enc = kCFStringEncodingMacSinhalese ;
- break ;
- case wxFONTENCODING_MACBURMESE :
- enc = kCFStringEncodingMacBurmese ;
- break ;
- case wxFONTENCODING_MACKHMER :
- enc = kCFStringEncodingMacKhmer ;
- break ;
- case wxFONTENCODING_MACTHAI :
- enc = kCFStringEncodingMacThai ;
- break ;
- case wxFONTENCODING_MACLAOTIAN :
- enc = kCFStringEncodingMacLaotian ;
- break ;
- case wxFONTENCODING_MACGEORGIAN :
- enc = kCFStringEncodingMacGeorgian ;
- break ;
- case wxFONTENCODING_MACARMENIAN :
- enc = kCFStringEncodingMacArmenian ;
- break ;
- case wxFONTENCODING_MACCHINESESIMP :
- enc = kCFStringEncodingMacChineseSimp ;
- break ;
- case wxFONTENCODING_MACTIBETAN :
- enc = kCFStringEncodingMacTibetan ;
- break ;
- case wxFONTENCODING_MACMONGOLIAN :
- enc = kCFStringEncodingMacMongolian ;
- break ;
- case wxFONTENCODING_MACETHIOPIC :
- enc = kCFStringEncodingMacEthiopic ;
- break ;
- case wxFONTENCODING_MACCENTRALEUR :
- enc = kCFStringEncodingMacCentralEurRoman ;
- break ;
- case wxFONTENCODING_MACVIATNAMESE :
- enc = kCFStringEncodingMacVietnamese ;
- break ;
- case wxFONTENCODING_MACARABICEXT :
- enc = kCFStringEncodingMacExtArabic ;
- break ;
- case wxFONTENCODING_MACSYMBOL :
- enc = kCFStringEncodingMacSymbol ;
- break ;
- case wxFONTENCODING_MACDINGBATS :
- enc = kCFStringEncodingMacDingbats ;
- break ;
- case wxFONTENCODING_MACTURKISH :
- enc = kCFStringEncodingMacTurkish ;
- break ;
- case wxFONTENCODING_MACCROATIAN :
- enc = kCFStringEncodingMacCroatian ;
- break ;
- case wxFONTENCODING_MACICELANDIC :
- enc = kCFStringEncodingMacIcelandic ;
- break ;
- case wxFONTENCODING_MACROMANIAN :
- enc = kCFStringEncodingMacRomanian ;
- break ;
- case wxFONTENCODING_MACCELTIC :
- enc = kCFStringEncodingMacCeltic ;
- break ;
- case wxFONTENCODING_MACGAELIC :
- enc = kCFStringEncodingMacGaelic ;
- break ;
-// case wxFONTENCODING_MACKEYBOARD :
-// enc = kCFStringEncodingMacKeyboardGlyphs ;
-// break ;
-
- default :
- // because gcc is picky
- break ;
- }
-
- return enc ;
-}
-
-class wxMBConv_cocoa : public wxMBConv
-{
-public:
- wxMBConv_cocoa()
- {
- Init(CFStringGetSystemEncoding()) ;
- }
-
- wxMBConv_cocoa(const wxMBConv_cocoa& conv)
- {
- m_encoding = conv.m_encoding;
- }
-
-#if wxUSE_FONTMAP
- wxMBConv_cocoa(const wxChar* name)
- {
- Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
- }
-#endif
-
- wxMBConv_cocoa(wxFontEncoding encoding)
- {
- Init( wxCFStringEncFromFontEnc(encoding) );
- }
-
- ~wxMBConv_cocoa()
- {
- }
-
- void Init( CFStringEncoding encoding)
- {
- m_encoding = encoding ;
- }
-
- size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
- {
- wxASSERT(szUnConv);
-
- CFStringRef theString = CFStringCreateWithBytes (
- NULL, //the allocator
- (const UInt8*)szUnConv,
- strlen(szUnConv),
- m_encoding,
- false //no BOM/external representation
- );
-
- wxASSERT(theString);
-
- size_t nOutLength = CFStringGetLength(theString);
-
- if (szOut == NULL)
- {
- CFRelease(theString);
- return nOutLength;
- }
-
- CFRange theRange = { 0, nOutSize };
-
-#if SIZEOF_WCHAR_T == 4
- UniChar* szUniCharBuffer = new UniChar[nOutSize];
-#endif
-
- CFStringGetCharacters(theString, theRange, szUniCharBuffer);
-
- CFRelease(theString);
-
- szUniCharBuffer[nOutLength] = '\0';
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter;
- converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
- delete [] szUniCharBuffer;
-#endif
-
- return nOutLength;
- }
-
- size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
- {
- wxASSERT(szUnConv);
-
- size_t nRealOutSize;
- size_t nBufSize = wxWcslen(szUnConv);
- UniChar* szUniBuffer = (UniChar*) szUnConv;
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
- szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
- converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
- nBufSize /= sizeof(UniChar);
-#endif
-
- CFStringRef theString = CFStringCreateWithCharactersNoCopy(
- NULL, //allocator
- szUniBuffer,
- nBufSize,
- kCFAllocatorNull //deallocator - we want to deallocate it ourselves
- );
-
- wxASSERT(theString);
-
- //Note that CER puts a BOM when converting to unicode
- //so we check and use getchars instead in that case
- if (m_encoding == kCFStringEncodingUnicode)
- {
- if (szOut != NULL)
- CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
-
- nRealOutSize = CFStringGetLength(theString) + 1;
- }
- else
- {
- CFStringGetBytes(
- theString,
- CFRangeMake(0, CFStringGetLength(theString)),
- m_encoding,
- 0, //what to put in characters that can't be converted -
- //0 tells CFString to return NULL if it meets such a character
- false, //not an external representation
- (UInt8*) szOut,
- nOutSize,
- (CFIndex*) &nRealOutSize
- );
- }
-
- CFRelease(theString);
-
-#if SIZEOF_WCHAR_T == 4
- delete[] szUniBuffer;
-#endif
-
- return nRealOutSize - 1;
- }
-
- virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
-
- bool IsOk() const
- {
- return m_encoding != kCFStringEncodingInvalidId &&
- CFStringIsEncodingAvailable(m_encoding);
- }
-
-private:
- CFStringEncoding m_encoding ;
-};
-
-#endif // defined(__WXCOCOA__)
-
-// ============================================================================
-// Mac conversion classes
-// ============================================================================
-
-#if defined(__WXMAC__) && defined(TARGET_CARBON)
-
-class wxMBConv_mac : public wxMBConv
-{
-public:
- wxMBConv_mac()
- {
- Init(CFStringGetSystemEncoding()) ;
- }
-
- wxMBConv_mac(const wxMBConv_mac& conv)
- {
- Init(conv.m_char_encoding);
- }
-
-#if wxUSE_FONTMAP
- wxMBConv_mac(const wxChar* name)
- {
- Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
- }
-#endif
-
- wxMBConv_mac(wxFontEncoding encoding)
- {
- Init( wxMacGetSystemEncFromFontEnc(encoding) );
- }
-
- ~wxMBConv_mac()
- {
- OSStatus status = noErr ;
- status = TECDisposeConverter(m_MB2WC_converter);
- status = TECDisposeConverter(m_WC2MB_converter);
- }
-
-
- void Init( TextEncodingBase encoding)
- {
- OSStatus status = noErr ;
- m_char_encoding = encoding ;
- m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
-
- status = TECCreateConverter(&m_MB2WC_converter,
- m_char_encoding,
- m_unicode_encoding);
- status = TECCreateConverter(&m_WC2MB_converter,
- m_unicode_encoding,
- m_char_encoding);
- }
-
- size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
- {
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = strlen(psz) ;
- wchar_t *tbuf = NULL ;
- UniChar* ubuf = NULL ;
- size_t res = 0 ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, byteInLen ) ;
- tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
- }
-
- ByteCount byteBufferLen = n * sizeof( UniChar ) ;
-
-#if SIZEOF_WCHAR_T == 4
- ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
-#else
- ubuf = (UniChar*) (buf ? buf : tbuf) ;
-#endif
-
- status = TECConvertText(
- m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
- (TextPtr) ubuf, byteBufferLen, &byteOutLen);
-
-#if SIZEOF_WCHAR_T == 4
- // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
- // is not properly terminated we get random characters at the end
- ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
- wxMBConvUTF16 converter ;
- res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
- free( ubuf ) ;
-#else
- res = byteOutLen / sizeof( UniChar ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- if ( buf && res < n)
- buf[res] = 0;
-
- return res ;
- }
-
- size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
- {
- OSStatus status = noErr ;
- ByteCount byteOutLen ;
- ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
-
- char *tbuf = NULL ;
-
- if (buf == NULL)
- {
- // Apple specs say at least 32
- n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
- tbuf = (char*) malloc( n ) ;
- }
-
- ByteCount byteBufferLen = n ;
- UniChar* ubuf = NULL ;
-
-#if SIZEOF_WCHAR_T == 4
- wxMBConvUTF16 converter ;
- size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
- byteInLen = unicharlen ;
- ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
- converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
-#else
- ubuf = (UniChar*) psz ;
-#endif
-
- status = TECConvertText(
- m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
- (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
-
-#if SIZEOF_WCHAR_T == 4
- free( ubuf ) ;
-#endif
-
- if ( buf == NULL )
- free(tbuf) ;
-
- size_t res = byteOutLen ;
- if ( buf && res < n)
- {
- buf[res] = 0;
-
- //we need to double-trip to verify it didn't insert any ? in place
- //of bogus characters
- wxWCharBuffer wcBuf(n);
- size_t pszlen = wxWcslen(psz);
- if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
- wxWcslen(wcBuf) != pszlen ||
- memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
- {
- // we didn't obtain the same thing we started from, hence
- // the conversion was lossy and we consider that it failed
- return wxCONV_FAILED;
- }
- }
-
- return res ;
- }
-
- virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
-
- bool IsOk() const
- { return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL; }
-
-private:
- TECObjectRef m_MB2WC_converter;
- TECObjectRef m_WC2MB_converter;
-
- TextEncodingBase m_char_encoding;
- TextEncodingBase m_unicode_encoding;
-};
-
-#endif // defined(__WXMAC__) && defined(TARGET_CARBON)