From: David Elliott <dfe@tgwbd.org>
Date: Fri, 6 Jul 2007 22:38:27 +0000 (+0000)
Subject: Fix wxMBConv_cf to implement FromWChar/ToWChar in lieu of now deprecated WC2MB/MB2WC.
X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/6ff49cbcd4d66b958014176594b9be1796bd243a

Fix wxMBConv_cf to implement FromWChar/ToWChar in lieu of now deprecated WC2MB/MB2WC.
This is a complete rewrite trying built-in UTF-32 conversion first, then falling back to conversion through UTF-16.


git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@47206 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---

diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp
index db7b34c1ba..fb6b7b6f9c 100644
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -59,6 +59,8 @@
 #ifdef __DARWIN__
 #include <CoreFoundation/CFString.h>
 #include <CoreFoundation/CFStringEncodingExt.h>
+
+#include "wx/mac/corefoundation/cfref.h"
 #endif //def __DARWIN__
 
 #ifdef __WXMAC__
@@ -2307,10 +2309,6 @@ private:
 
 #ifdef __DARWIN__
 
-// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
-// Strangely enough, internally Core Foundation uses
-// UTF-32 internally quite a bit - its just not public (yet).
-
 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
 {
     CFStringEncoding enc = kCFStringEncodingInvalidId ;
@@ -2439,9 +2437,12 @@ CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
         case wxFONTENCODING_EUC_JP :
             enc = kCFStringEncodingEUC_JP;
             break ;
+/* Don't support conversion to/from UTF16 as wxWidgets can do this better.
+ * In particular, ToWChar would fail miserably using strlen on an input UTF16.
         case wxFONTENCODING_UTF16 :
             enc = kCFStringEncodingUnicode ;
             break ;
+*/
         case wxFONTENCODING_MACROMAN :
             enc = kCFStringEncodingMacRoman ;
             break ;
@@ -2571,6 +2572,16 @@ CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
     return enc ;
 }
 
+#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
+// Provide a constant for the wchat_t encoding used by the host platform.
+#ifdef WORDS_BIGENDIAN
+    static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32BE;
+#else
+    static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32LE;
+#endif
+
+#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
+
 class wxMBConv_cf : public wxMBConv
 {
 public:
@@ -2605,105 +2616,167 @@ public:
         m_encoding = encoding ;
     }
 
-    size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
+    virtual size_t ToWChar(wchar_t * dst, size_t dstSize, const char * src, size_t srcSize = wxNO_LEN) const
     {
-        wxASSERT(szUnConv);
+        wxCHECK(src, wxCONV_FAILED);
 
-        CFStringRef theString = CFStringCreateWithBytes (
+        /* NOTE: This is wrong if the source encoding has an element size
+         * other than char (e.g. it's kCFStringEncodingUnicode)
+         * If the user specifies it, it's presumably right though.
+         * Right now we don't support UTF-16 in anyway since wx can do a better job.
+         */
+        if(srcSize == wxNO_LEN)
+            srcSize = strlen(src) + 1;
+
+        // First create the temporary CFString
+        wxCFRef<CFStringRef> theString( CFStringCreateWithBytes (
                                                 NULL, //the allocator
-                                                (const UInt8*)szUnConv,
-                                                strlen(szUnConv),
+                                                (const UInt8*)src,
+                                                srcSize,
                                                 m_encoding,
                                                 false //no BOM/external representation
-                                                );
-
-        wxASSERT(theString);
+                                                ));
 
-        size_t nOutLength = CFStringGetLength(theString);
-
-        if (szOut == NULL)
-        {
-            CFRelease(theString);
-            return nOutLength;
-        }
+        wxCHECK(theString != NULL, wxCONV_FAILED);
 
-        CFRange theRange = { 0, nOutSize };
-
-#if SIZEOF_WCHAR_T == 4
-        UniChar* szUniCharBuffer = new UniChar[nOutSize];
-#endif
+        /* NOTE: The string content includes the NULL element if the source string did
+         * That means we have to do nothing special because the destination will have
+         * the NULL element iff the source did and the NULL element will be included
+         * in the count iff it was included in the source count.
+         */
 
-        CFStringGetCharacters(theString, theRange, szUniCharBuffer);
 
-        CFRelease(theString);
+/* If we're compiling against Tiger headers we can support direct conversion
+ * to UTF32.  If we are then run against a pre-Tiger system, the encoding
+ * won't be available so we'll defer to the string->UTF-16->UTF-32 conversion.
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
+        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
+        {
+            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
+            CFIndex usedBufLen;
+
+            CFIndex charsConverted = CFStringGetBytes(
+                    theString,
+                    fullStringRange,
+                    wxCFStringEncodingWcharT,
+                    0,
+                    false,
+                    // if dstSize is 0 then pass NULL to get required length in usedBufLen
+                    dstSize != 0?(UInt8*)dst:NULL,
+                    dstSize * sizeof(wchar_t),
+                    &usedBufLen);
+
+            // charsConverted is > 0 iff conversion succeeded
+            if(charsConverted <= 0)
+                return wxCONV_FAILED;
 
-        szUniCharBuffer[nOutLength] = '\0';
+            /* usedBufLen is the number of bytes written, so we divide by
+             * sizeof(wchar_t) to get the number of elements written.
+             */
+            wxASSERT( (usedBufLen % sizeof(wchar_t)) == 0 );
 
-#if SIZEOF_WCHAR_T == 4
-        wxMBConvUTF16 converter;
-        converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
-        delete [] szUniCharBuffer;
-#endif
+            // CFStringGetBytes does exactly the right thing when buffer
+            // pointer is NULL and returns the number of bytes required
+            return usedBufLen / sizeof(wchar_t);
+        }
+        else
+#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
+        {
+            // NOTE: Includes NULL iff source did
+            /* NOTE: This is an approximation.  The eventual UTF-32 will    
+             * possibly have less elements but certainly not more.
+             */
+            size_t returnSize = CFStringGetLength(theString);
+    
+            if (dstSize == 0 || dst == NULL)
+            {
+                return returnSize;
+            }
 
-        return nOutLength;
+            // Convert the entire string.. too hard to figure out how many UTF-16 we'd need
+            // for an undersized UTF-32 destination buffer.
+            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
+            UniChar *szUniCharBuffer = new UniChar[fullStringRange.length];
+    
+            CFStringGetCharacters(theString, fullStringRange, szUniCharBuffer);
+    
+            wxMBConvUTF16 converter;
+            returnSize = converter.ToWChar( dst, dstSize, (const char*)szUniCharBuffer, fullStringRange.length );
+            delete [] szUniCharBuffer;
+    
+            return returnSize;
+        }
+        // NOTREACHED
     }
 
-    size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
+    virtual size_t FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcSize) const
     {
-        wxASSERT(szUnConv);
-
-        size_t nRealOutSize;
-        size_t nBufSize = wxWcslen(szUnConv);
-        UniChar* szUniBuffer = (UniChar*) szUnConv;
-
-#if SIZEOF_WCHAR_T == 4
-        wxMBConvUTF16 converter ;
-        nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
-        szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
-        converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
-        nBufSize /= sizeof(UniChar);
-#endif
+        wxCHECK(src, wxCONV_FAILED);
 
-        CFStringRef theString = CFStringCreateWithCharactersNoCopy(
-                                NULL, //allocator
-                                szUniBuffer,
-                                nBufSize,
-                                kCFAllocatorNull //deallocator - we want to deallocate it ourselves
-                            );
+        if(srcSize == wxNO_LEN)
+            srcSize = wxStrlen(src) + 1;
 
-        wxASSERT(theString);
+        // Temporary CFString
+        wxCFRef<CFStringRef> theString;
 
-        //Note that CER puts a BOM when converting to unicode
-        //so we  check and use getchars instead in that case
-        if (m_encoding == kCFStringEncodingUnicode)
+/* If we're compiling against Tiger headers we can support direct conversion
+ * from UTF32.  If we are then run against a pre-Tiger system, the encoding
+ * won't be available so we'll defer to the UTF-32->UTF-16->string conversion.
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
+        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
         {
-            if (szOut != NULL)
-                CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
-
-            nRealOutSize = CFStringGetLength(theString) + 1;
+            theString = wxCFRef<CFStringRef>(CFStringCreateWithBytes(
+                    kCFAllocatorDefault,
+                    (UInt8*)src,
+                    srcSize * sizeof(wchar_t),
+                    wxCFStringEncodingWcharT,
+                    false));
         }
         else
+#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
         {
-            CFStringGetBytes(
-                theString,
-                CFRangeMake(0, CFStringGetLength(theString)),
-                m_encoding,
-                0, //what to put in characters that can't be converted -
-                    //0 tells CFString to return NULL if it meets such a character
-                false, //not an external representation
-                (UInt8*) szOut,
-                nOutSize,
-                (CFIndex*) &nRealOutSize
-                        );
+            wxMBConvUTF16 converter;
+            size_t cbUniBuffer = converter.FromWChar( NULL, 0, src, srcSize );
+            wxASSERT(cbUniBuffer % sizeof(UniChar));
+
+            // Will be free'd by kCFAllocatorMalloc when CFString is released
+            UniChar *tmpUniBuffer = (UniChar*)malloc(cbUniBuffer);
+
+            cbUniBuffer = converter.FromWChar( (char*) tmpUniBuffer, cbUniBuffer, src, srcSize );
+            wxASSERT(cbUniBuffer % sizeof(UniChar));
+
+            theString = wxCFRef<CFStringRef>(CFStringCreateWithCharactersNoCopy(
+                        kCFAllocatorDefault,
+                        tmpUniBuffer,
+                        cbUniBuffer / sizeof(UniChar),
+                        kCFAllocatorMalloc
+                    ));
+
         }
 
-        CFRelease(theString);
+        wxCHECK(theString != NULL, wxCONV_FAILED);
 
-#if SIZEOF_WCHAR_T == 4
-        delete[] szUniBuffer;
-#endif
+        CFIndex usedBufLen;
+
+        CFIndex charsConverted = CFStringGetBytes(
+                theString, 
+                CFRangeMake(0, CFStringGetLength(theString)),
+                m_encoding,
+                0, // FAIL on unconvertible characters
+                false, // not an external representation
+                // if dstSize is 0 then pass NULL to get required length in usedBufLen
+                (dstSize != 0)?(UInt8*)dst:NULL,
+                dstSize,
+                &usedBufLen
+            );
+
+        // charsConverted is > 0 iff conversion succeeded
+        if(charsConverted <= 0)
+            return wxCONV_FAILED;
 
-        return  nRealOutSize - 1;
+        return usedBufLen;
     }
 
     virtual wxMBConv *Clone() const { return new wxMBConv_cf(*this); }
@@ -3421,7 +3494,9 @@ wxMBConv *wxCSConv::DoCreate() const
 
 #ifdef __DARWIN__
     {
-        if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
+        // leave UTF16 and UTF32 to the built-ins of wx
+        if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
+            ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
         {
 #if wxUSE_FONTMAP
             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)