X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/ea8ce907e19d774fab4661eef478e32a0b2fead3..088ddc4e370d808a2f7e923b671856c303dfa45c:/src/common/strconv.cpp

diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp
index 2603ebec34..88e49338d7 100644
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -55,9 +55,6 @@
 #include <ctype.h>
 #include <string.h>
 #include <stdlib.h>
-#ifdef HAVE_LANGINFO_H
-  #include <langinfo.h>
-#endif
 
 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
     #define wxHAVE_WIN32_MB2WC
@@ -155,7 +152,7 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
         output = *input;
         return 1;
     }
-    else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
+    else if ((input[1]<0xdc00) || (input[1]>0xdfff))
     {
         output = *input;
         return (size_t)-1;
@@ -281,7 +278,7 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, si
 
     //success - return actual length and the buffer
     *pOutSize = nActualLength;
-    return theBuffer;  
+    return theBuffer;
 }
 
 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
@@ -316,7 +313,7 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen,
 
         //Increase the actual length (+1 for current null character)
         nActualLength += nLen + 1;
-        
+
         //if buffer too big, realloc the buffer
         if (nActualLength > (nCurrentSize+1))
         {
@@ -343,7 +340,7 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen,
 
     //success - return actual length and the buffer
     *pOutSize = nActualLength;
-    return theBuffer;  
+    return theBuffer;
 }
 
 // ----------------------------------------------------------------------------
@@ -360,52 +357,41 @@ size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
     return wxWC2MB(buf, psz, n);
 }
 
+#ifdef __UNIX__
+
 // ----------------------------------------------------------------------------
-// wxConvBrokenFileNames is made for GTK2 in Unicode mode when
-// files are accidentally written in an encoding which is not
-// the system encoding. Typically, the system encoding will be
-// UTF8 but there might be files stored in ISO8859-1 on disk. 
+// wxConvBrokenFileNames 
 // ----------------------------------------------------------------------------
 
-class wxConvBrokenFileNames: public wxMBConvLibc
-{
-public:
-    wxConvBrokenFileNames() : m_utf8conv(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL) { }
-    virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
-    virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
-    inline bool UseUTF8() const;
-private:
-    wxMBConvUTF8 m_utf8conv;
-};
-
-bool wxConvBrokenFileNames::UseUTF8() const
+wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 {
-#if defined HAVE_LANGINFO_H && defined CODESET
-    char *codeset = nl_langinfo(CODESET);
-    return strcmp(codeset, "UTF-8") == 0;
-#else
-    return false;
-#endif
+    if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
+                  || wxStricmp(charset, _T("UTF8")) == 0  )
+        m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
+    else
+        m_conv = new wxCSConv(charset);
 }
 
-size_t wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const
+size_t
+wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
+                             const char *psz,
+                             size_t outputSize) const
 {
-    if (UseUTF8())
-        return m_utf8conv.MB2WC( outputBuf, psz, outputSize );
-    else
-        return wxMBConvLibc::MB2WC( outputBuf, psz, outputSize );
+    return m_conv->MB2WC( outputBuf, psz, outputSize );
 }
 
-size_t wxConvBrokenFileNames::WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const
+size_t
+wxConvBrokenFileNames::WC2MB(char *outputBuf,
+                             const wchar_t *psz,
+                             size_t outputSize) const
 {
-    if (UseUTF8())
-        return m_utf8conv.WC2MB( outputBuf, psz, outputSize );
-    else
-        return wxMBConvLibc::WC2MB( outputBuf, psz, outputSize );
+    return m_conv->WC2MB( outputBuf, psz, outputSize );
 }
 
+#endif
+
 // ----------------------------------------------------------------------------
-// UTF-7 
+// UTF-7
 // ----------------------------------------------------------------------------
 
 // Implementation (C) 2004 Fredrik Roubert
@@ -558,7 +544,7 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
         }
 #ifndef WC_UTF16
         else if (((wxUint32)cc) > 0xffff)
-	    {
+        {
             // no surrogate pair generation (yet?)
             return (size_t)-1;
         }
@@ -615,6 +601,8 @@ size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 static wxUint32 utf8_max[]=
     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 
+// boundaries of the private use area we use to (temporarily) remap invalid
+// characters invalid in a UTF-8 encoded string
 const wxUint32 wxUnicodePUA = 0x100000;
 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 
@@ -636,6 +624,15 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
             if (buf)
                 *buf++ = cc;
             len++;
+
+            // escape the escape character for octal escapes
+            if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
+                    && cc == '\\' && (!buf || len < n))
+            {
+                if (buf)
+                    *buf++ = cc;
+                len++;
+            }
         }
         else
         {
@@ -718,26 +715,23 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 #endif
                     }
                 }
-                else
-                if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
+                else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
                 {
                     while (opsz < psz && (!buf || len < n))
                     {
-                        wchar_t str[6];
-                        wxSnprintf( str, 5, L"\\%o", (int) (unsigned char) *opsz );
-                        if (buf)
-                            *buf++ = str[0];
-                        if (buf)
-                            *buf++ = str[1];
-                        if (buf)
-                            *buf++ = str[2];
-                        if (buf)
-                            *buf++ = str[3];
+                        if ( buf && len + 3 < n )
+                        {
+                            unsigned char n = *opsz;
+                            *buf++ = L'\\';
+                            *buf++ = (wchar_t)( L'0' + n / 0100 );
+                            *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
+                            *buf++ = (wchar_t)( L'0' + n % 010 );
+                        }
                         opsz++;
                         len += 4;
                     }
                 }
-                else
+                else // MAP_INVALID_UTF8_NOT
                 {
                     return (size_t)-1;
                 }
@@ -749,6 +743,11 @@ size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
     return len;
 }
 
+static inline bool isoctal(wchar_t wch)
+{
+    return L'0' <= wch && wch <= L'7';
+}
+
 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 {
     size_t len = 0;
@@ -763,26 +762,34 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 #else
         cc=(*psz++) & 0x7fffffff;
 #endif
-        if ((m_options & MAP_INVALID_UTF8_TO_PUA)
-            && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd)
+
+        if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
+                && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
         {
             if (buf)
                 *buf++ = (char)(cc - wxUnicodePUA);
             len++;
-        } 
-        else
-        if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
-            && cc == L'\\')
+        }
+        else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
+                    && cc == L'\\' && psz[0] == L'\\' )
+        {
+            if (buf)
+                *buf++ = (char)cc;
+            psz++;
+            len++;
+        }
+        else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
+                    cc == L'\\' &&
+                        isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
         {
-            wchar_t str[4];
-            str[0] = *psz; psz++;
-            str[1] = *psz; psz++;
-            str[2] = *psz; psz++;
-            str[3] = 0;
-            int octal;
-            wxSscanf( str, L"%o", &octal );
             if (buf)
-                *buf++ = (char) octal;
+            {
+                *buf++ = (char) ((psz[0] - L'0')*0100 +
+                                 (psz[1] - L'0')*010 +
+                                 (psz[2] - L'0'));
+            }
+
+            psz += 3;
             len++;
         }
         else
@@ -810,7 +817,8 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
         }
     }
 
-    if (buf && (len<n)) *buf = 0;
+    if (buf && (len<n))
+        *buf = 0;
 
     return len;
 }
@@ -1448,7 +1456,7 @@ size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
     //     as MB<->WC conversion would fail "randomly".
     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
 #endif
- 
+
     size_t inbuf = strlen(psz);
     size_t outbuf = n * SIZEOF_WCHAR_T;
     size_t res, cres;
@@ -1510,7 +1518,7 @@ size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
     // NB: explained in MB2WC
     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
 #endif
-    
+
     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
     size_t outbuf = n;
     size_t res, cres;
@@ -2083,9 +2091,9 @@ public:
 #if SIZEOF_WCHAR_T == 4
         UniChar* szUniCharBuffer = new UniChar[nOutSize];
 #endif
- 
+
         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
-        
+
         CFRelease(theString);
 
         szUniCharBuffer[nOutLength] = '\0' ;
@@ -2095,14 +2103,14 @@ public:
         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
         delete[] szUniCharBuffer;
 #endif
-    
+
         return nOutLength;
     }
 
     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
     {
         wxASSERT(szUnConv);
-        
+
         size_t nRealOutSize;
         size_t nBufSize = wxWcslen(szUnConv);
         UniChar* szUniBuffer = (UniChar*) szUnConv;
@@ -2130,7 +2138,7 @@ public:
         {
             if (szOut != NULL)
                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
-            
+
             nRealOutSize = CFStringGetLength(theString) + 1;
         }
         else
@@ -2143,7 +2151,7 @@ public:
                     //0 tells CFString to return NULL if it meets such a character
                 false, //not an external representation
                 (UInt8*) szOut,
-                nOutSize, 
+                nOutSize,
                 (CFIndex*) &nRealOutSize
                         );
         }
@@ -2159,7 +2167,7 @@ public:
 
     bool IsOk() const
     {
-        return m_encoding != kCFStringEncodingInvalidId && 
+        return m_encoding != kCFStringEncodingInvalidId &&
               CFStringIsEncodingAvailable(m_encoding);
     }
 
@@ -2297,7 +2305,7 @@ public:
         if ( buf  && res < n)
         {
             buf[res] = 0;
-            
+
             //we need to double-trip to verify it didn't insert any ? in place
             //of bogus characters
             wxWCharBuffer wcBuf(n);
@@ -2536,7 +2544,7 @@ wxMBConv *wxCSConv::DoCreate() const
 #if defined(__WXMAC__)
     {
         // leave UTF16 and UTF32 to the built-ins of wx
-        if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE || 
+        if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
         {
 
@@ -2731,7 +2739,6 @@ static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
 static wxMBConvUTF7 wxConvUTF7Obj;
 static wxMBConvUTF8 wxConvUTF8Obj;
-static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
 
 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
@@ -2742,8 +2749,6 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
 #ifdef __WXOSX__
                                     wxConvUTF8Obj;
-#elif __WXGTK20__
-                                    wxConvBrokenFileNamesObj;
 #else
                                     wxConvLibcObj;
 #endif