+ cc = *psz;
+ if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
+ break;
+ }
+
+ if (l != 0)
+ {
+ if (buf)
+ *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
+
+ len++;
+ }
+ }
+
+ if (buf)
+ *buf++ = '-';
+ len++;
+ }
+ }
+
+ if (buf && (len < n))
+ *buf = 0;
+
+ return len;
+}
+
+// ----------------------------------------------------------------------------
+// UTF-8
+// ----------------------------------------------------------------------------
+
+static wxUint32 utf8_max[]=
+ { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
+
+// boundaries of the private use area we use to (temporarily) remap invalid
+// characters invalid in a UTF-8 encoded string
+const wxUint32 wxUnicodePUA = 0x100000;
+const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
+
+size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
+{
+ size_t len = 0;
+
+ while (*psz && ((!buf) || (len < n)))
+ {
+ const char *opsz = psz;
+ bool invalid = false;
+ unsigned char cc = *psz++, fc = cc;
+ unsigned cnt;
+ for (cnt = 0; fc & 0x80; cnt++)
+ fc <<= 1;
+
+ if (!cnt)
+ {
+ // plain ASCII char
+ if (buf)
+ *buf++ = cc;
+ len++;
+
+ // escape the escape character for octal escapes
+ if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
+ && cc == '\\' && (!buf || len < n))
+ {
+ if (buf)
+ *buf++ = cc;
+ len++;
+ }
+ }
+ else
+ {
+ cnt--;
+ if (!cnt)
+ {
+ // invalid UTF-8 sequence
+ invalid = true;
+ }
+ else
+ {
+ unsigned ocnt = cnt - 1;
+ wxUint32 res = cc & (0x3f >> cnt);
+ while (cnt--)
+ {
+ cc = *psz;
+ if ((cc & 0xC0) != 0x80)
+ {
+ // invalid UTF-8 sequence
+ invalid = true;
+ break;
+ }
+
+ psz++;
+ res = (res << 6) | (cc & 0x3f);
+ }
+
+ if (invalid || res <= utf8_max[ocnt])
+ {
+ // illegal UTF-8 encoding
+ invalid = true;
+ }
+ else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
+ res >= wxUnicodePUA && res < wxUnicodePUAEnd)
+ {
+ // if one of our PUA characters turns up externally
+ // it must also be treated as an illegal sequence
+ // (a bit like you have to escape an escape character)
+ invalid = true;
+ }
+ else
+ {
+#ifdef WC_UTF16
+ // cast is ok because wchar_t == wxUuint16 if WC_UTF16
+ size_t pa = encode_utf16(res, (wxUint16 *)buf);
+ if (pa == wxCONV_FAILED)
+ {
+ invalid = true;
+ }
+ else
+ {
+ if (buf)
+ buf += pa;
+ len += pa;
+ }
+#else // !WC_UTF16
+ if (buf)
+ *buf++ = (wchar_t)res;
+ len++;
+#endif // WC_UTF16/!WC_UTF16
+ }
+ }
+
+ if (invalid)
+ {
+ if (m_options & MAP_INVALID_UTF8_TO_PUA)
+ {
+ while (opsz < psz && (!buf || len < n))
+ {
+#ifdef WC_UTF16
+ // cast is ok because wchar_t == wxUuint16 if WC_UTF16
+ size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
+ wxASSERT(pa != wxCONV_FAILED);
+ if (buf)
+ buf += pa;
+ opsz++;
+ len += pa;
+#else
+ if (buf)
+ *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
+ opsz++;
+ len++;
+#endif
+ }
+ }
+ else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
+ {
+ while (opsz < psz && (!buf || len < n))
+ {
+ if ( buf && len + 3 < n )
+ {
+ unsigned char on = *opsz;
+ *buf++ = L'\\';
+ *buf++ = (wchar_t)( L'0' + on / 0100 );
+ *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
+ *buf++ = (wchar_t)( L'0' + on % 010 );
+ }
+
+ opsz++;
+ len += 4;
+ }
+ }
+ else // MAP_INVALID_UTF8_NOT
+ {
+ return wxCONV_FAILED;
+ }
+ }
+ }
+ }
+
+ if (buf && (len < n))
+ *buf = 0;
+
+ return len;
+}
+
+static inline bool isoctal(wchar_t wch)
+{
+ return L'0' <= wch && wch <= L'7';
+}
+
+size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
+{
+ size_t len = 0;
+
+ while (*psz && ((!buf) || (len < n)))
+ {
+ wxUint32 cc;
+
+#ifdef WC_UTF16
+ // cast is ok for WC_UTF16
+ size_t pa = decode_utf16((const wxUint16 *)psz, cc);
+ psz += (pa == wxCONV_FAILED) ? 1 : pa;
+#else
+ cc = (*psz++) & 0x7fffffff;
+#endif
+
+ if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
+ && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
+ {
+ if (buf)
+ *buf++ = (char)(cc - wxUnicodePUA);
+ len++;
+ }
+ else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
+ && cc == L'\\' && psz[0] == L'\\' )
+ {
+ if (buf)
+ *buf++ = (char)cc;
+ psz++;
+ len++;
+ }
+ else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
+ cc == L'\\' &&
+ isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
+ {
+ if (buf)
+ {
+ *buf++ = (char) ((psz[0] - L'0') * 0100 +
+ (psz[1] - L'0') * 010 +
+ (psz[2] - L'0'));
+ }
+
+ psz += 3;
+ len++;
+ }
+ else
+ {
+ unsigned cnt;
+ for (cnt = 0; cc > utf8_max[cnt]; cnt++)
+ {
+ }
+
+ if (!cnt)
+ {
+ // plain ASCII char
+ if (buf)
+ *buf++ = (char) cc;
+ len++;
+ }
+ else
+ {
+ len += cnt + 1;
+ if (buf)
+ {
+ *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
+ while (cnt--)
+ *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
+ }
+ }
+ }
+ }
+
+ if (buf && (len < n))
+ *buf = 0;
+
+ return len;
+}
+
+// ============================================================================
+// UTF-16
+// ============================================================================
+
+#ifdef WORDS_BIGENDIAN
+ #define wxMBConvUTF16straight wxMBConvUTF16BE
+ #define wxMBConvUTF16swap wxMBConvUTF16LE
+#else
+ #define wxMBConvUTF16swap wxMBConvUTF16BE
+ #define wxMBConvUTF16straight wxMBConvUTF16LE
+#endif
+
+/* static */
+size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( srcLen = 1; *inBuff++; srcLen++ )
+ ;
+
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-16 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16 too
+#ifdef WC_UTF16
+
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ // set up the scene for using memcpy() (which is presumably more efficient
+ // than copying the bytes one by one)
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( dst )
+ {
+ if ( dstLen < inLen )
+ return wxCONV_FAILED;
+
+ memcpy(dst, src, srcLen);
+ }
+
+ return inLen;
+}
+
+size_t
+wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ srcLen *= BYTES_PER_CHAR;
+
+ if ( dst )
+ {
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ memcpy(dst, src, srcLen);
+ }
+
+ return srcLen;
+}
+
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ srcLen /= BYTES_PER_CHAR;
+
+ if ( dst )
+ {
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( size_t n = 0; n < srcLen; n++, inBuff++ )
+ {
+ *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
+ }
+ }
+
+ return srcLen;
+}
+
+size_t
+wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ srcLen *= BYTES_PER_CHAR;
+
+ if ( dst )
+ {
+ if ( dstLen < srcLen )
+ return wxCONV_FAILED;
+
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
+ {
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
+ }
+ }
+
+ return srcLen;
+}
+
+#else // !WC_UTF16: wchar_t is UTF-32
+
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( !dst )
+ {
+ // optimization: return maximal space which could be needed for this
+ // string even if the real size could be smaller if the buffer contains
+ // any surrogates
+ return inLen;
+ }
+
+ size_t outLen = 0;
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&inBuff);
+ if ( !inBuff )
+ return wxCONV_FAILED;
+
+ if ( ++outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = ch;
+ }
+
+
+ return outLen;
+}
+
+size_t
+wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ size_t outLen = 0;
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( size_t n = 0; n < srcLen; n++ )
+ {
+ wxUint16 cc[2];
+ const size_t numChars = encode_utf16(*src++, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ outLen += numChars * BYTES_PER_CHAR;
+ if ( outBuff )
+ {
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *outBuff++ = cc[1];
+ }
+ }
+ }
+
+ return outLen;
+}
+
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ if ( !dst )
+ {
+ // optimization: return maximal space which could be needed for this
+ // string even if the real size could be smaller if the buffer contains
+ // any surrogates
+ return inLen;
+ }
+
+ size_t outLen = 0;
+ const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
+ for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
+ {
+ wxUint32 ch;
+ wxUint16 tmp[2];
+
+ tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
+ inBuff++;
+ tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
+
+ const size_t numChars = decode_utf16(tmp, ch);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ if ( numChars == 2 )
+ inBuff++;
+
+ if ( ++outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = ch;
+ }
+
+
+ return outLen;
+}
+
+size_t
+wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ size_t outLen = 0;
+ wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
+ for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
+ {
+ wxUint16 cc[2];
+ const size_t numChars = encode_utf16(*src, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ outLen += numChars * BYTES_PER_CHAR;
+ if ( outBuff )
+ {
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
+ }
+ }
+ }
+
+ return outLen;
+}
+
+#endif // WC_UTF16/!WC_UTF16
+
+
+// ============================================================================
+// UTF-32
+// ============================================================================
+
+#ifdef WORDS_BIGENDIAN
+ #define wxMBConvUTF32straight wxMBConvUTF32BE
+ #define wxMBConvUTF32swap wxMBConvUTF32LE
+#else
+ #define wxMBConvUTF32swap wxMBConvUTF32BE
+ #define wxMBConvUTF32straight wxMBConvUTF32LE
+#endif
+
+
+WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
+WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
+
+/* static */
+size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
+{
+ if ( srcLen == wxNO_LEN )
+ {
+ // count the number of bytes in input, including the trailing NULs
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ for ( srcLen = 1; *inBuff++; srcLen++ )
+ ;
+
+ srcLen *= BYTES_PER_CHAR;
+ }
+ else // we already have the length
+ {
+ // we can only convert an entire number of UTF-32 characters
+ if ( srcLen % BYTES_PER_CHAR )
+ return wxCONV_FAILED;
+ }
+
+ return srcLen;
+}
+
+// case when in-memory representation is UTF-16
+#ifdef WC_UTF16
+
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++ )
+ {
+ wxUint16 cc[2];
+ const size_t numChars = encode_utf16(*inBuff++, cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ outLen += numChars;
+ if ( dst )
+ {
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
+ }
+
+ return outLen;
+}
+
+size_t
+wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ if ( !dst )
+ {
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen * BYTES_PER_CHAR;
+ }
+
+ wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
+
+ outLen += BYTES_PER_CHAR;
+
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = ch;
+ }
+
+ return outLen;
+}
+
+// ----------------------------------------------------------------------------
+// endian-reversing conversions
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
+ const size_t inLen = srcLen / BYTES_PER_CHAR;
+ size_t outLen = 0;
+ for ( size_t n = 0; n < inLen; n++, inBuff++ )
+ {
+ wxUint16 cc[2];
+ const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
+ if ( numChars == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ outLen += numChars;
+ if ( dst )
+ {
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *dst++ = cc[0];
+ if ( numChars == 2 )
+ {
+ // second character of a surrogate
+ *dst++ = cc[1];
+ }
+ }
+ }
+
+ return outLen;
+}
+
+size_t
+wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
+{
+ if ( srcLen == wxNO_LEN )
+ srcLen = wxWcslen(src) + 1;
+
+ if ( !dst )
+ {
+ // optimization: return maximal space which could be needed for this
+ // string instead of the exact amount which could be less if there are
+ // any surrogates in the input
+ //
+ // we consider that surrogates are rare enough to make it worthwhile to
+ // avoid running the loop below at the cost of slightly extra memory
+ // consumption
+ return srcLen*BYTES_PER_CHAR;
+ }
+
+ wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
+ size_t outLen = 0;
+ for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
+ {
+ const wxUint32 ch = wxDecodeSurrogate(&src);
+ if ( !src )
+ return wxCONV_FAILED;
+
+ outLen += BYTES_PER_CHAR;
+
+ if ( outLen > dstLen )
+ return wxCONV_FAILED;
+
+ *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
+ }
+
+ return outLen;
+}
+
+#else // !WC_UTF16: wchar_t is UTF-32
+
+// ----------------------------------------------------------------------------
+// conversions without endianness change
+// ----------------------------------------------------------------------------
+
+size_t
+wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
+{
+ // use memcpy() as it should be much faster than hand-written loop
+ srcLen = GetLength(src, srcLen);
+ if ( srcLen == wxNO_LEN )
+ return wxCONV_FAILED;
+
+ const size_t inLen = srcLen/BYTES_PER_CHAR;
+ if ( dst )
+ {
+ if ( dstLen < inLen )
+ return wxCONV_FAILED;
+
+ memcpy(dst, src, srcLen);
+ }