From 9a6d14383a57771a334f6d0b6376dd47bf24049d Mon Sep 17 00:00:00 2001 From: Robert Roebling <robert@roebling.de> Date: Tue, 29 Jul 2008 11:01:16 +0000 Subject: [PATCH] Added 32-bit (UCS-4) wxUString class git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54802 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- build/bakefiles/files.bkl | 2 + include/wx/chartype.h | 2 +- include/wx/defs.h | 18 + include/wx/ustring.h | 724 ++++++++++++++++++++++++++++++++++++++ interface/wx/string.h | 5 +- interface/wx/ustring.h | 289 +++++++++++++++ src/common/ustring.cpp | 548 +++++++++++++++++++++++++++++ src/common/wxcrt.cpp | 36 ++ 8 files changed, 1622 insertions(+), 2 deletions(-) create mode 100644 include/wx/ustring.h create mode 100644 interface/wx/ustring.h create mode 100644 src/common/ustring.cpp diff --git a/build/bakefiles/files.bkl b/build/bakefiles/files.bkl index a964fda965..0d6e867642 100644 --- a/build/bakefiles/files.bkl +++ b/build/bakefiles/files.bkl @@ -408,6 +408,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file! src/common/txtstrm.cpp src/common/unichar.cpp src/common/uri.cpp + src/common/ustring.cpp src/common/variant.cpp src/common/wfstream.cpp src/common/wxcrt.cpp @@ -534,6 +535,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file! wx/types.h wx/unichar.h wx/uri.h + wx/ustring.h wx/utils.h wx/variant.h wx/vector.h diff --git a/include/wx/chartype.h b/include/wx/chartype.h index 4bd55d63bb..73606a61e0 100644 --- a/include/wx/chartype.h +++ b/include/wx/chartype.h @@ -145,7 +145,7 @@ typedef char wxChar; typedef signed char wxSChar; typedef unsigned char wxUChar; -#else /* Unicode */ +#else /* VZ: note that VC++ defines _T[SU]CHAR simply as wchar_t and not as */ /* signed/unsigned version of it which (a) makes sense to me (unlike */ /* char wchar_t is always unsigned) and (b) was how the previous */ diff --git a/include/wx/defs.h b/include/wx/defs.h index 254401f612..5440cd9b3c 100644 --- a/include/wx/defs.h +++ b/include/wx/defs.h @@ -1273,6 +1273,24 @@ typedef double wxDouble; */ #define wxNullPtr ((void *)NULL) + +/* Define wxChar16 and wxChar32 */ + +#if wxUSE_WCHAR_T && (!defined(SIZEOF_WCHAR_T) || (SIZEOF_WCHAR_T == 2)) + #define wxWCHAR_T_IS_WXCHAR16 + typedef wchar_t wxChar16; +#else + typedef wxUint16 wxChar16; +#endif + +#if wxUSE_WCHAR_T && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) + #define wxWCHAR_T_IS_WXCHAR32 + typedef wchar_t wxChar32; +#else + typedef wxUint32 wxChar32; +#endif + + /* ---------------------------------------------------------------------------- */ /* byte ordering related definition and macros */ /* ---------------------------------------------------------------------------- */ diff --git a/include/wx/ustring.h b/include/wx/ustring.h new file mode 100644 index 0000000000..1158af5a98 --- /dev/null +++ b/include/wx/ustring.h @@ -0,0 +1,724 @@ +///////////////////////////////////////////////////////////////////////////// +// Name: wx/ustring.h +// Purpose: 32-bit string (UCS-4) +// Author: Robert Roebling +// Copyright: (c) Robert Roebling +// RCS-ID: $Id: tab.h 37400 2006-02-09 00:28:34Z VZ $ +// Licence: wxWindows licence +///////////////////////////////////////////////////////////////////////////// + +#ifndef _WX_USTRING_H_BASE_ +#define _WX_USTRING_H_BASE_ + +#include "wx/defs.h" +#include "wx/string.h" + +WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxCharTypeBuffer<wxChar32> ) + +#if SIZEOF_WCHAR_T == 2 + +typedef wxWCharBuffer wxU16CharBuffer; + +#else + +class WXDLLIMPEXP_BASE wxU16CharBuffer : public wxCharTypeBuffer<wxChar16> +{ +public: + typedef wxCharTypeBuffer<wxChar16> wxCharTypeBufferBase; + + wxU16CharBuffer(const wxCharTypeBufferBase& buf) + : wxCharTypeBufferBase(buf) {} + + wxU16CharBuffer(const CharType *str = NULL) : wxCharTypeBufferBase(str) {} + wxU16CharBuffer(size_t len) : wxCharTypeBufferBase(len) {} +}; + +#endif + + + +#if SIZEOF_WCHAR_T == 2 + +class WXDLLIMPEXP_BASE wxU32CharBuffer : public wxCharTypeBuffer<wxChar32> +{ +public: + typedef wxCharTypeBuffer<wxChar32> wxCharTypeBufferBase; + + wxU32CharBuffer(const wxCharTypeBufferBase& buf) + : wxCharTypeBufferBase(buf) {} + + wxU32CharBuffer(const CharType *str = NULL) : wxCharTypeBufferBase(str) {} + wxU32CharBuffer(size_t len) : wxCharTypeBufferBase(len) {} +}; + +#else + +typedef wxWCharBuffer wxU32CharBuffer; + +#endif + +class WXDLLIMPEXP_BASE wxUString: public std::basic_string<wxChar32> +{ +public: + wxUString() { } + + wxUString( const wxChar32 *str ) { assign(str); } + wxUString( const wxUString &str ) { assign(str); } + wxUString( const wxU32CharBuffer &buf ) { assign(buf); } + + wxUString( const char *str ) { assign(str); } + wxUString( const wxCharBuffer &buf ) { assign(buf); } + wxUString( const char *str, const wxMBConv &conv ) { assign(str,conv); } + wxUString( const wxCharBuffer &buf, const wxMBConv &conv ) { assign(buf,conv); } + + wxUString( const wxChar16 *str ) { assign(str); } + wxUString( const wxU16CharBuffer &buf ) { assign(buf); } + + wxUString( const wxCStrData *cstr ) { assign(cstr); } + wxUString( const wxString &str ) { assign(str); } + + wxUString( char ch ) { assign(ch); } + wxUString( wxChar16 ch ) { assign(ch); } + wxUString( wxChar32 ch ) { assign(ch); } + wxUString( wxUniChar ch ) { assign(ch); } + wxUString( wxUniCharRef ch ) { assign(ch); } + wxUString( size_type n, char ch ) { assign(n,ch); } + wxUString( size_type n, wxChar16 ch ) { assign(n,ch); } + wxUString( size_type n, wxChar32 ch ) { assign(n,ch); } + wxUString( size_type n, wxUniChar ch ) { assign(n,ch); } + wxUString( size_type n, wxUniCharRef ch ) { assign(n,ch); } + + // static construction + + static wxUString FromAscii( const char *str, size_type n ) + { + wxUString ret; + ret.assignFromAscii( str, n ); + return ret; + } + + static wxUString FromAscii( const char *str ) + { + wxUString ret; + ret.assignFromAscii( str ); + return ret; + } + + static wxUString FromUTF8( const char *str, size_type n ) + { + wxUString ret; + ret.assignFromUTF8( str, n ); + return ret; + } + + static wxUString FromUTF8( const char *str ) + { + wxUString ret; + ret.assignFromUTF8( str ); + return ret; + } + + static wxUString FromUTF16( const wxChar16 *str, size_type n ) + { + wxUString ret; + ret.assignFromUTF16( str, n ); + return ret; + } + + static wxUString FromUTF16( const wxChar16 *str ) + { + wxUString ret; + ret.assignFromUTF16( str ); + return ret; + } + + // assign from encoding + + wxUString &assignFromAscii( const char *str ); + wxUString &assignFromAscii( const char *str, size_type n ); + wxUString &assignFromUTF8( const char *str ); + wxUString &assignFromUTF8( const char *str, size_type n ); + wxUString &assignFromUTF16( const wxChar16* str ); + wxUString &assignFromUTF16( const wxChar16* str, size_type n ); + wxUString &assignFromCString( const char* str ); + wxUString &assignFromCString( const char* str, const wxMBConv &conv ); + + // conversions + + wxCharBuffer utf8_str() const; + wxU16CharBuffer utf16_str() const; + +#if SIZEOF_WCHAR_T == 2 + wxWCharBuffer wc_str() const + { + return utf16_str(); + } +#else + wchar_t *wc_str() const + { + return (wchar_t*) c_str(); + } +#endif + + operator wxString() const + { +#if wxUSE_UNICODE_UTF8 + return wxString::FromUTF8( utf8_str() ); +#else +#if SIZEOF_WCHAR_T == 2 + return wxString( utf16_str() ); +#else + return wxString( c_str() ); +#endif +#endif + } + +#if wxUSE_UNICODE_UTF8 + wxCharBuffer wx_str() + { + return utf8_str(); + } +#else +#if SIZEOF_WCHAR_T == 2 + wxWCharBuffer wx_str() + { + return utf16_str(); + } +#else + wchar_t* wx_str() + { + return c_str(); + } +#endif +#endif + + // assign + + inline wxUString &assign( const wxChar32* str ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( str ); + } + + inline wxUString &assign( const wxChar32* str, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( str, n ); + } + + inline wxUString &assign( const wxUString &str ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( str ); + } + + inline wxUString &assign( const wxUString &str, size_type pos, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( str, pos, n ); + } + + inline wxUString &assign( wxChar32 ch ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( (size_type) 1, ch ); + } + + inline wxUString &assign( size_type n, wxChar32 ch ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->assign( n, ch ); + } + + wxUString &assign( const wxU32CharBuffer &buf ) + { + return assign( buf.data() ); + } + + wxUString &assign( const char *str ) + { + return assignFromCString( str ); + } + + wxUString &assign( const wxCharBuffer &buf ) + { + return assignFromCString( buf.data() ); + } + + wxUString &assign( const char *str, const wxMBConv &conv ) + { + return assignFromCString( str, conv ); + } + + wxUString &assign( const wxCharBuffer &buf, const wxMBConv &conv ) + { + return assignFromCString( buf.data(), conv ); + } + + wxUString &assign( const wxChar16 *str ) + { + return assignFromUTF16( str ); + } + + wxUString &assign( const wxU16CharBuffer &buf ) + { + return assignFromUTF16( buf.data() ); + } + + wxUString &assign( const wxCStrData *cstr ) + { +#if SIZEOF_WCHAR_T == 2 + return assignFromUTF16( cstr->AsWChar() ); +#else + return assign( cstr->AsWChar() ); +#endif + } + + wxUString &assign( const wxString &str ) + { +#if wxUSE_UNICODE_UTF8 + return assignFromUTF8( str.wx_str() ); +#else + #if SIZEOF_WCHAR_T == 2 + return assignFromUTF16( str.wc_str() ); + #else + return assign( wc_str() ); + #endif +#endif + } + + wxUString &assign( char ch ) + { + char buf[2]; + buf[0] = ch; + buf[1] = 0; + return assignFromCString( buf ); + } + + wxUString &assign( size_type n, char ch ) + { + wxCharBuffer buffer(n); + char *p = buffer.data(); + size_type i; + for (i = 0; i < n; i++) + { + *p = ch; + p++; + } + return assignFromCString( buffer.data() ); + } + + wxUString &assign( wxChar16 ch ) + { + wxChar16 buf[2]; + buf[0] = ch; + buf[1] = 0; + return assignFromUTF16( buf ); + } + + wxUString &assign( size_type n, wxChar16 ch ) + { + wxU16CharBuffer buffer(n); + wxChar16 *p = buffer.data(); + size_type i; + for (i = 0; i < n; i++) + { + *p = ch; + p++; + } + return assignFromUTF16( buffer.data() ); + } + + wxUString &assign( wxUniChar ch ) + { + return assign( (const wxChar32) ch.GetValue() ); + } + + wxUString &assign( size_type n, wxUniChar ch ) + { + return assign( n, (const wxChar32) ch.GetValue() ); + } + + wxUString &assign( wxUniCharRef ch ) + { + return assign( (const wxChar32) ch.GetValue() ); + } + + wxUString &assign( size_type n, wxUniCharRef ch ) + { + return assign( n, (const wxChar32) ch.GetValue() ); + } + + // append [STL overload] + + inline wxUString &append( const wxUString &s ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( s ); + } + + inline wxUString &append( const wxUString &s, size_type pos, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( s, pos, n ); + } + + inline wxUString &append( const wxChar32* s ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( s ); + } + + inline wxUString &append( const wxChar32* s, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( s, n ); + } + + inline wxUString &append( size_type n, wxChar32 c ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( n, c ); + } + + inline wxUString &append( wxChar32 c ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->append( 1, c ); + } + + // append [wx overload] + + wxUString &append( const wxU16CharBuffer &buf ) + { + return append( buf.data() ); + } + + wxUString &append( const wxU32CharBuffer &buf ) + { + return append( buf.data() ); + } + + wxUString &append( const char *str ) + { + return append( wxUString( str ) ); + } + + wxUString &append( const wxCharBuffer &buf ) + { + return append( wxUString( buf ) ); + } + + wxUString &append( const wxChar16 *str ) + { + return append( wxUString( str ) ); + } + + wxUString &append( const wxString &str ) + { + return append( wxUString( str ) ); + } + + wxUString &append( const wxCStrData *cstr ) + { + return append( wxUString( cstr ) ); + } + + wxUString &append( char ch ) + { + char buf[2]; + buf[0] = ch; + buf[1] = 0; + return append( buf ); + } + + wxUString &append( wxChar16 ch ) + { + wxChar16 buf[2]; + buf[0] = ch; + buf[1] = 0; + return append( buf ); + } + + wxUString &append( wxUniChar ch ) + { + return append( (size_type) 1, (wxChar32) ch.GetValue() ); + } + + wxUString &append( wxUniCharRef ch ) + { + return append( (size_type) 1, (wxChar32) ch.GetValue() ); + } + + + // insert [STL overloads] + + inline wxUString &insert( size_type pos, const wxUString &s ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->insert( pos, s ); + } + + inline wxUString &insert( size_type pos, const wxUString &s, size_type pos1, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->insert( pos, s, pos1, n ); + } + + inline wxUString &insert( size_type pos, const wxChar32 *s ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->insert( pos, s ); + } + + inline wxUString &insert( size_type pos, const wxChar32 *s, size_type n ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->insert( pos, s, n ); + } + + inline wxUString &insert( size_type pos, size_type n, wxChar32 c ) + { + std::basic_string<wxChar32> *base = this; + return (wxUString &) base->insert( pos, n, c ); + } + + + // insert [STL overloads] + + wxUString &insert( size_type n, const char *s ) + { + return insert( n, wxUString( s ) ); + } + + wxUString &insert( size_type n, const wxChar16 *s ) + { + return insert( n, wxUString( s ) ); + } + + wxUString &insert( size_type n, const wxCharBuffer &buf ) + { + return insert( n, wxUString( buf ) ); + } + + wxUString &insert( size_type n, const wxU16CharBuffer &buf ) + { + return insert( n, wxUString( buf ) ); + } + + wxUString &insert( size_type n, const wxU32CharBuffer &buf ) + { + return insert( n, buf.data() ); + } + + wxUString &insert( size_type n, const wxString &s ) + { + return insert( n, wxUString( s ) ); + } + + wxUString &insert( size_type n, const wxCStrData *cstr ) + { + return insert( n, wxUString( cstr ) ); + } + + wxUString &insert( size_type n, char ch ) + { + char buf[2]; + buf[0] = ch; + buf[1] = 0; + return insert( n, buf ); + } + + wxUString &insert( size_type n, wchar_t ch ) + { + wchar_t buf[2]; + buf[0] = ch; + buf[1] = 0; + return insert( n, buf ); + } + + // insert iterator + + iterator insert( iterator it, wxChar32 ch ) + { + std::basic_string<wxChar32> *base = this; + return base->insert( it, ch ); + } + + void insert(iterator it, const_iterator first, const_iterator last) + { + std::basic_string<wxChar32> *base = this; + base->insert( it, first, last ); + } + + + // operator = + inline wxUString& operator=(const wxUString& s) + { return assign( s ); } + inline wxUString& operator=(const wxString& s) + { return assign( s ); } + inline wxUString& operator=(const wxCStrData* s) + { return assign( s ); } + inline wxUString& operator=(const char *s) + { return assign( s ); } + inline wxUString& operator=(const wxChar16 *s) + { return assign( s ); } + inline wxUString& operator=(const wxChar32 *s) + { return assign( s ); } + inline wxUString& operator=(const wxCharBuffer &s) + { return assign( s ); } + inline wxUString& operator=(const wxU16CharBuffer &s) + { return assign( s ); } + inline wxUString& operator=(const wxU32CharBuffer &s) + { return assign( s ); } + inline wxUString& operator=(const char ch) + { return assign( ch ); } + inline wxUString& operator=(const wxChar16 ch) + { return assign( ch ); } + inline wxUString& operator=(const wxChar32 ch) + { return assign( ch ); } + inline wxUString& operator=(const wxUniChar ch) + { return assign( ch ); } + inline wxUString& operator=(const wxUniCharRef ch) + { return assign( ch ); } + + // operator += + inline wxUString& operator+=(const wxUString& s) + { return append( s ); } + inline wxUString& operator+=(const wxString& s) + { return append( s ); } + inline wxUString& operator+=(const wxCStrData* s) + { return append( s ); } + inline wxUString& operator+=(const char *s) + { return append( s ); } + inline wxUString& operator+=(const wxChar16 *s) + { return append( s ); } + inline wxUString& operator+=(const wxChar32 *s) + { return append( s ); } + inline wxUString& operator+=(const wxCharBuffer &s) + { return append( s ); } + inline wxUString& operator+=(const wxU16CharBuffer &s) + { return append( s ); } + inline wxUString& operator+=(const wxU32CharBuffer &s) + { return append( s ); } + inline wxUString& operator+=(const char ch) + { return append( ch ); } + inline wxUString& operator+=(const wxChar16 ch) + { return append( ch ); } + inline wxUString& operator+=(const wxChar32 ch) + { return append( ch ); } + inline wxUString& operator+=(const wxUniChar ch) + { return append( ch ); } + inline wxUString& operator+=(const wxUniCharRef ch) + { return append( ch ); } + +}; + +inline wxUString operator+(const wxUString &s1, const wxUString &s2) + { wxUString ret( s1 ); ret.append( s2 ); return ret; } +inline wxUString operator+(const wxUString &s1, const char *s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxString &s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxCStrData *s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxChar16* s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxChar32 *s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxCharBuffer &s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxU16CharBuffer &s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, const wxU32CharBuffer &s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, char s2) + { return s1 + wxUString(s2); } +inline wxUString operator+(const wxUString &s1, wxChar32 s2) + { wxUString ret( s1 ); ret.append( s2 ); return ret; } +inline wxUString operator+(const wxUString &s1, wxChar16 s2) + { wxUString ret( s1 ); ret.append( (wxChar32) s2 ); return ret; } +inline wxUString operator+(const wxUString &s1, wxUniChar s2) + { wxUString ret( s1 ); ret.append( (wxChar32) s2.GetValue() ); return ret; } +inline wxUString operator+(const wxUString &s1, wxUniCharRef s2) + { wxUString ret( s1 ); ret.append( (wxChar32) s2.GetValue() ); return ret; } + +inline wxUString operator+(const char *s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxString &s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxCStrData *s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxChar16* s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxChar32 *s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxCharBuffer &s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxU16CharBuffer &s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(const wxU32CharBuffer &s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(char s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(wxChar32 s1, const wxUString &s2 ) + { return wxUString(s1) + s2; } +inline wxUString operator+(wxChar16 s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(wxUniChar s1, const wxUString &s2) + { return wxUString(s1) + s2; } +inline wxUString operator+(wxUniCharRef s1, const wxUString &s2) + { return wxUString(s1) + s2; } + + +inline bool operator==(const wxUString& s1, const wxUString& s2) + { return s1.compare( s2 ) == 0; } +inline bool operator!=(const wxUString& s1, const wxUString& s2) + { return s1.compare( s2 ) != 0; } +inline bool operator< (const wxUString& s1, const wxUString& s2) + { wxPrintf( "test\n"); return s1.compare( s2 ) < 0; } +inline bool operator> (const wxUString& s1, const wxUString& s2) + { return s1.compare( s2 ) > 0; } +inline bool operator<=(const wxUString& s1, const wxUString& s2) + { return s1.compare( s2 ) <= 0; } +inline bool operator>=(const wxUString& s1, const wxUString& s2) + { return s1.compare( s2 ) >= 0; } + +#define wxUSTRING_COMP_OPERATORS( T ) \ +inline bool operator==(const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) == 0; } \ +inline bool operator!=(const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) != 0; } \ +inline bool operator< (const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) < 0; } \ +inline bool operator> (const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) > 0; } \ +inline bool operator<=(const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) <= 0; } \ +inline bool operator>=(const wxUString& s1, T s2) \ + { return s1.compare( wxUString(s2) ) >= 0; } \ +\ +inline bool operator==(T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) == 0; } \ +inline bool operator!=(T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) != 0; } \ +inline bool operator< (T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) > 0; } \ +inline bool operator> (T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) < 0; } \ +inline bool operator<=(T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) >= 0; } \ +inline bool operator>=(T s2, const wxUString& s1) \ + { return s1.compare( wxUString(s2) ) <= 0; } + +wxUSTRING_COMP_OPERATORS( const wxString & ) +wxUSTRING_COMP_OPERATORS( const char * ) +wxUSTRING_COMP_OPERATORS( const wxChar16 * ) +wxUSTRING_COMP_OPERATORS( const wxChar32 * ) +wxUSTRING_COMP_OPERATORS( const wxCharBuffer & ) +wxUSTRING_COMP_OPERATORS( const wxU16CharBuffer & ) +wxUSTRING_COMP_OPERATORS( const wxU32CharBuffer & ) +wxUSTRING_COMP_OPERATORS( const wxCStrData * ) + +#endif + // _WX_USTRING_H_BASE_ diff --git a/interface/wx/string.h b/interface/wx/string.h index b6cf403dc8..2072e34e06 100644 --- a/interface/wx/string.h +++ b/interface/wx/string.h @@ -82,6 +82,9 @@ public: and Linux, too, you can specify this on the command line with the @c configure @c --disable-utf8 switch. + If you need a Unicode string class with O(1) access on all platforms + you should consider using wxUString. + Since iterating over a wxString by index can become inefficient in UTF-8 mode and iterators should be used instead of index based access: @@ -268,7 +271,7 @@ public: ::Objects, ::wxEmptyString, @see @ref overview_string "wxString overview", @ref overview_unicode - "Unicode overview" + "Unicode overview", wxUString */ class wxString { diff --git a/interface/wx/ustring.h b/interface/wx/ustring.h new file mode 100644 index 0000000000..3a0ab2bc8c --- /dev/null +++ b/interface/wx/ustring.h @@ -0,0 +1,289 @@ +///////////////////////////////////////////////////////////////////////////// +// Name: wx/ustring.h +// Purpose: interface of wxUString +// Author: Robert Roebling +// Copyright: (c) Robert Roebling +// RCS-ID: $Id: tab.h 37400 2006-02-09 00:28:34Z VZ $ +// Licence: wxWindows licence +///////////////////////////////////////////////////////////////////////////// + +/** + @class wxUString + + wxUString is a class representing a Unicode character string where + each character is stored using a 32-bit value. This is different from + wxString which may store a character either as a UTF-8 or as a UTF-16 + sequence and different from @c std::string which stores a string + as a squence of simple 8-bit charactesr and also different from + @c std::wstring which stores the string differently depending on + the definition of wchar_t. + + The main purpose of wxUString is a to give users a Unicode string + class that has O(1) access to its content, to be identical on all + platforms and to be easily convertable to wxString as well as other + ways to store strings (C string literals, wide character + string literals, character buffer, etc) by providing many overload + and built-in conversion to and from the various format. + + wxUString derives from @c std::basic_string<wxChar32> and therefore + offers the complete API of @c std::string. + + @library{wxbase} + @category{data} + + @see wxString, @ref overview_string "wxString overview", @ref overview_unicode + "Unicode overview" +*/ + + +class WXDLLIMPEXP_BASE wxUString: public std::basic_string<wxChar32> +{ +public: + /** + Default constructor. + */ + wxUString(); + /** + Copy constructor. + */ + wxUString( const wxUString &str ) + /** + Constructs a string from a 32-bit string literal. + */ + wxUString( const wxChar32 *str ) + /** + Constructs a string from 32-bit string buffer. + */ + wxUString( const wxU32CharBuffer &buf ) + /** + Constructs a string from C string literal using wxConvLibc to convert it to Unicode. + */ + wxUString( const char *str ) + /** + Constructs a string from C string buffer using wxConvLibc to convert it to Unicode. + */ + wxUString( const wxCharBuffer &buf ) + /** + Constructs a string from C string literal using @a conv to convert it to Unicode. + */ + wxUString( const char *str, const wxMBConv &conv ) + /** + Constructs a string from C string literal using @a conv to convert it to Unicode. + */ + wxUString( const wxCharBuffer &buf, const wxMBConv &conv ) + /** + Constructs a string from UTF-16 string literal + */ + wxUString( const wxChar16 *str ) + /** + Constructs a string from UTF-16 string buffer + */ + wxUString( const wxU16CharBuffer &buf ) + /** + Constructs a string from wxString. + */ + wxUString( const wxString &str ) + /** + Constructs a string from using wxConvLibc to convert it to Unicode. + */ + wxUString( char ch ) + /** + Constructs a string from a UTF-16 character. + */ + wxUString( wxChar16 ch ) + /** + Constructs a string from 32-bit Unicode character. + */ + wxUString( wxChar32 ch ) + /** + Constructs a string from wxUniChar (returned by wxString's access operator) + */ + wxUString( wxUniChar ch ) + /** + Constructs a string from wxUniCharRef (returned by wxString's access operator) + */ + wxUString( wxUniCharRef ch ) + /** + Constructs a string from @a n characters @a ch. + */ + wxUString( size_type n, char ch ) + /** + Constructs a string from @a n characters @a ch. + */ + wxUString( size_type n, wxChar16 ch ) + /** + Constructs a string from @a n characters @a ch. + */ + wxUString( size_type n, wxChar32 ch ) + /** + Constructs a string from @a n characters @a ch. + */ + wxUString( size_type n, wxUniChar ch ) + /** + Constructs a string from @a n characters @a ch. + */ + wxUString( size_type n, wxUniCharRef ch ) + + /** + Static construction of a wxUString from a 7-bit ASCII string + */ + static wxUString FromAscii( const char *str, size_type n ); + /** + Static construction of a wxUString from a 7-bit ASCII string + */ + static wxUString FromAscii( const char *str ); + /** + Static construction of a wxUString from a UTF-8 encoded string + */ + static wxUString FromUTF8( const char *str, size_type n ); + /** + Static construction of a wxUString from a UTF-8 encoded string + */ + static wxUString FromUTF8( const char *str ); + /** + Static construction of a wxUString from a UTF-16 encoded string + */ + static wxUString FromUTF16( const wxChar16 *str, size_type n ); + /** + Static construction of a wxUString from a UTF-16 encoded string + */ + static wxUString FromUTF16( const wxChar16 *str ); + + + /** + Assigment from a 7-bit ASCII string literal + */ + wxUString &assignFromAscii( const char *str ); + /** + Assigment from a 7-bit ASCII string literal + */ + wxUString &assignFromAscii( const char *str, size_type n ); + /** + Assigment from a UTF-8 string literal + */ + wxUString &assignFromUTF8( const char *str ); + /** + Assigment from a UTF-8 string literal + */ + wxUString &assignFromUTF8( const char *str, size_type n ); + /** + Assigment from a UTF-16 string literal + */ + wxUString &assignFromUTF16( const wxChar16* str ); + /** + Assigment from a UTF-16 string literal + */ + wxUString &assignFromUTF16( const wxChar16* str, size_type n ); + /** + Assigment from a C string literal using wxConvLibc + */ + wxUString &assignFromCString( const char* str ); + /** + Assigment from a C string literal using @a conv + */ + wxUString &assignFromCString( const char* str, const wxMBConv &conv ); + + /** + Conversion to a UTF-8 string + */ + wxCharBuffer utf8_str() const; + /** + Conversion to a UTF-16 string + */ + wxU16CharBuffer utf16_str() const; + + /** + Conversion to a wide character string (either UTF-16 + or UCS-4, depending on the size of wchar_t). + */ + wxWCharBuffer wc_str() const; + + /** + Implicit conversion to wxString. + */ + operator wxString() const; + + /** + wxUString assignment. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single and repeated characters etc. + */ + wxUString &assign( const wxUString &str ); + + /** + Appending. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single and repeated characters etc. + */ + wxUString &append( const wxUString &s ); + + /** + Insertion. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ + wxUString &insert( size_type pos, const wxUString &s ); + + /** + Assignment operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ + inline wxUString& operator=(const wxUString& s); + + /** + Concatenation operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ + inline wxUString& operator+=(const wxUString& s); + +}; + + /** + Concatenation operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline wxUString operator+(const wxUString &s1, const wxUString &s2); + + /** + Equality operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator==(const wxUString& s1, const wxUString& s2); + /** + Inequality operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator!=(const wxUString& s1, const wxUString& s2); + /** + Comparison operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator< (const wxUString& s1, const wxUString& s2); + /** + Comparison operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator> (const wxUString& s1, const wxUString& s2); + /** + Comparison operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator<=(const wxUString& s1, const wxUString& s2); + /** + Comparison operator. wxUString addtionally provides overloads for + wxString, C string, UTF-16 strings, 32-bit strings, char buffers, + single characters etc. + */ +inline bool operator>=(const wxUString& s1, const wxUString& s2); + + +#endif + // _WX_USTRING_H_BASE_ diff --git a/src/common/ustring.cpp b/src/common/ustring.cpp new file mode 100644 index 0000000000..cb7eb084c4 --- /dev/null +++ b/src/common/ustring.cpp @@ -0,0 +1,548 @@ +///////////////////////////////////////////////////////////////////////////// +// Name: src/common/ustring.cpp +// Purpose: wxUString class +// Author: Robert Roebling +// Created: 2008-07-25 +// RCS-ID: $Id:$ +// Copyright: (c) 2008 Robert Roebling +// Licence: wxWindows licence +/////////////////////////////////////////////////////////////////////////////// + +// For compilers that support precompilation, includes "wx.h". +#include "wx/wxprec.h" + +#ifdef __BORLANDC__ + #pragma hdrstop +#endif + +#ifndef WX_PRECOMP + #include "wx/strconv.h" // wxConvLibc + #include "wx/log.h" +#endif + +#include "wx/ustring.h" +#include "wx/unichar.h" +#include "wx/string.h" + + +wxUString &wxUString::assignFromAscii( const char *str ) +{ + size_type len = wxStrlen( str ); + + wxU32CharBuffer buffer( len ); + wxChar32 *ptr = buffer.data(); + + size_type i; + for (i = 0; i < len; i++) + { + *ptr = *str; + ptr++; + str++; + } + + return assign( buffer ); +} + +wxUString &wxUString::assignFromAscii( const char *str, size_type n ) +{ + size_type len = 0; + const char *s = str; + while (len < n && *s) + { + len++; + s++; + } + + wxU32CharBuffer buffer( len ); + wxChar32 *ptr = buffer.data(); + + size_type i; + for (i = 0; i < len; i++) + { + *ptr = *str; + ptr++; + str++; + } + + return *this; +} + +// ---------------------------------------------------------------------------- +// UTF-8 +// ---------------------------------------------------------------------------- + +static const wxUint32 utf8_max[]= + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; + +// this table gives the length of the UTF-8 encoding from its first character: +const unsigned char tableUtf8Lengths[256] = { + // single-byte sequences (ASCII): + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F + + // these are invalid: + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80..8F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90..9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A0..AF + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B0..BF + 0, 0, // C0,C1 + + // two-byte sequences: + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF + + // three-byte sequences: + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF + + // four-byte sequences: + 4, 4, 4, 4, 4, // F0..F4 + + // these are invalid again (5- or 6-byte + // sequences and sequences for code points + // above U+10FFFF, as restricted by RFC 3629): + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F5..FF +}; + +wxUString &wxUString::assignFromUTF8( const char *str ) +{ + if (!str) + return assign( wxUString() ); + + size_type ucs4_len = 0; + const char *p = str; + while (*p) + { + unsigned char c = *p; + size_type len = tableUtf8Lengths[c]; + if (!len) + return assign( wxUString() ); // don't try to convert invalid UTF-8 + ucs4_len++; + p += len; + } + + wxU32CharBuffer buffer( ucs4_len ); + wxChar32 *out = buffer.data(); + + p = str; + while (*p) + { + unsigned char c = *p; + if (c < 0x80) + { + *out = c; + p++; + } + else + { + size_type len = tableUtf8Lengths[c]; // len == 0 is caught above + + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // ----------------------+---------------------------------------- + // 0000 0000 - 0000 007F | 0xxxxxxx + // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Code point value is stored in bits marked with 'x', + // lowest-order bit of the value on the right side in the diagram + // above. (from RFC 3629) + + // mask to extract lead byte's value ('x' bits above), by sequence + // length: + static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; + + // mask and value of lead byte's most significant bits, by length: + static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 }; + static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 }; + + len--; // it's more convenient to work with 0-based length here + + // extract the lead byte's value bits: + if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] ) + break; + + wxChar32 code = c & leadValueMask[len]; + + // all remaining bytes, if any, are handled in the same way + // regardless of sequence's length: + for ( ; len; --len ) + { + c = *++p; + if ( (c & 0xC0) != 0x80 ) + return assign( wxUString() ); // don't try to convert invalid UTF-8 + + code <<= 6; + code |= c & 0x3F; + } + + *out = code; + p++; + } + out++; + } + + return assign( buffer.data() ); +} + +wxUString &wxUString::assignFromUTF8( const char *str, size_type n ) +{ + if (!str) + return assign( wxUString() ); + + size_type ucs4_len = 0; + size_type utf8_pos = 0; + const char *p = str; + while (*p) + { + unsigned char c = *p; + size_type len = tableUtf8Lengths[c]; + if (!len) + return assign( wxUString() ); // don't try to convert invalid UTF-8 + if (utf8_pos + len > n) + break; + utf8_pos += len; + ucs4_len ++; + p += len; + } + + wxU32CharBuffer buffer( ucs4_len ); + wxChar32 *out = buffer.data(); + + utf8_pos = 0; + p = str; + while (*p) + { + unsigned char c = *p; + if (c < 0x80) + { + if (utf8_pos + 1 > n) + break; + utf8_pos++; + + *out = c; + p++; + } + else + { + size_type len = tableUtf8Lengths[c]; // len == 0 is caught above + if (utf8_pos + len > n) + break; + utf8_pos += len; + + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // ----------------------+---------------------------------------- + // 0000 0000 - 0000 007F | 0xxxxxxx + // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Code point value is stored in bits marked with 'x', + // lowest-order bit of the value on the right side in the diagram + // above. (from RFC 3629) + + // mask to extract lead byte's value ('x' bits above), by sequence + // length: + static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; + + // mask and value of lead byte's most significant bits, by length: + static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 }; + static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 }; + + len--; // it's more convenient to work with 0-based length here + + // extract the lead byte's value bits: + if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] ) + break; + + wxChar32 code = c & leadValueMask[len]; + + // all remaining bytes, if any, are handled in the same way + // regardless of sequence's length: + for ( ; len; --len ) + { + c = *++p; + if ( (c & 0xC0) != 0x80 ) + return assign( wxUString() ); // don't try to convert invalid UTF-8 + + code <<= 6; + code |= c & 0x3F; + } + + *out = code; + p++; + } + out++; + } + + *out = 0; + + return assign( buffer.data() ); +} + +wxUString &wxUString::assignFromUTF16( const wxChar16* str, size_type n ) +{ + if (!str) + return assign( wxUString() ); + + size_type ucs4_len = 0; + size_type utf16_pos = 0; + const wxChar16 *p = str; + while (*p) + { + size_type len; + if ((*p < 0xd800) || (*p > 0xdfff)) + { + len = 1; + } + else if ((p[1] < 0xdc00) || (p[1] > 0xdfff)) + { + return assign( wxUString() ); // don't try to convert invalid UTF-16 + } + else + { + len = 2; + } + + if (utf16_pos + len > n) + break; + + ucs4_len++; + p += len; + utf16_pos += len; + } + + wxU32CharBuffer buffer( ucs4_len ); + wxChar32 *out = buffer.data(); + + utf16_pos = 0; + + p = str; + while (*p) + { + if ((*p < 0xd800) || (*p > 0xdfff)) + { + if (utf16_pos + 1 > n) + break; + + *out = *p; + p++; + utf16_pos++; + } + else + { + if (utf16_pos + 2 > n) + break; + + *out = ((p[0] - 0xd7c0) << 10) + (p[1] - 0xdc00); + p += 2; + utf16_pos += 2; + } + out++; + } + + return assign( buffer.data() ); +} + +wxUString &wxUString::assignFromUTF16( const wxChar16* str ) +{ + if (!str) + return assign( wxUString() ); + + size_type ucs4_len = 0; + const wxChar16 *p = str; + while (*p) + { + size_type len; + if ((*p < 0xd800) || (*p > 0xdfff)) + { + len = 1; + } + else if ((p[1] < 0xdc00) || (p[1] > 0xdfff)) + { + return assign( wxUString() ); // don't try to convert invalid UTF-16 + } + else + { + len = 2; + } + + ucs4_len++; + p += len; + } + + wxU32CharBuffer buffer( ucs4_len ); + wxChar32 *out = buffer.data(); + + p = str; + while (*p) + { + if ((*p < 0xd800) || (*p > 0xdfff)) + { + *out = *p; + p++; + } + else + { + *out = ((p[0] - 0xd7c0) << 10) + (p[1] - 0xdc00); + p += 2; + } + out++; + } + + return assign( buffer.data() ); +} + +wxUString &wxUString::assignFromCString( const char* str ) +{ + if (!str) + return assign( wxUString() ); + + wxWCharBuffer buffer = wxConvLibc.cMB2WC( str ); + + return assign( buffer ); +} + +wxUString &wxUString::assignFromCString( const char* str, const wxMBConv &conv ) +{ + if (!str) + return assign( wxUString() ); + + wxWCharBuffer buffer = conv.cMB2WC( str ); + + return assign( buffer ); +} + +wxCharBuffer wxUString::utf8_str() const +{ + size_type utf8_length = 0; + const wxChar32 *ptr = data(); + + while (*ptr) + { + wxChar32 code = *ptr; + ptr++; + + if ( code <= 0x7F ) + { + utf8_length++; + } + else if ( code <= 0x07FF ) + { + utf8_length += 2; + } + else if ( code < 0xFFFF ) + { + utf8_length += 3; + } + else if ( code <= 0x10FFFF ) + { + utf8_length += 4; + } + else + { + // invalid range, skip + } + } + + wxCharBuffer result( utf8_length ); + + char *out = result.data(); + + ptr = data(); + while (*ptr) + { + wxChar32 code = *ptr; + ptr++; + + if ( code <= 0x7F ) + { + out[0] = (char)code; + out++; + } + else if ( code <= 0x07FF ) + { + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xC0 | code; + out += 2; + } + else if ( code < 0xFFFF ) + { + out[2] = 0x80 | (code & 0x3F); code >>= 6; + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xE0 | code; + out += 3; + } + else if ( code <= 0x10FFFF ) + { + out[3] = 0x80 | (code & 0x3F); code >>= 6; + out[2] = 0x80 | (code & 0x3F); code >>= 6; + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xF0 | code; + out += 4; + } + else + { + // invalid range, skip + } + } + + wxPrintf( "utf8_str %s len %d\n", result, wxStrlen( result.data() ) ); + wxPrintf( "utf8_str %s len %d\n", result, wxStrlen( result.data() ) ); + + return result; +} + +wxU16CharBuffer wxUString::utf16_str() const +{ + size_type utf16_length = 0; + const wxChar32 *ptr = data(); + + while (*ptr) + { + wxChar32 code = *ptr; + ptr++; + + // TODO: error range checks + + if (code < 0x10000) + utf16_length++; + else + utf16_length += 2; + } + + wxU16CharBuffer result( utf16_length ); + wxChar16 *out = result.data(); + + ptr = data(); + + while (*ptr) + { + wxChar32 code = *ptr; + ptr++; + + // TODO: error range checks + + if (code < 0x10000) + { + out[0] = code; + out++; + } + else + { + out[0] = (code - 0x10000) / 0x400 + 0xd800; + out[1] = (code - 0x10000) % 0x400 + 0xdc00; + out += 2; + } + } + + return result; +} + diff --git a/src/common/wxcrt.cpp b/src/common/wxcrt.cpp index 5d42579e94..5a114a368d 100644 --- a/src/common/wxcrt.cpp +++ b/src/common/wxcrt.cpp @@ -796,6 +796,42 @@ WXDLLIMPEXP_BASE wchar_t * wxCRT_StrdupW(const wchar_t *pwz) } #endif // wxCRT_StrdupW +#ifndef wxWCHAR_T_IS_WXCHAR16 +WXDLLIMPEXP_BASE size_t wxStrlen(const wxChar16 *s ) +{ + if (!s) return 0; + size_t i=0; + while (*s!=0) { ++i; ++s; }; + return i; +} + +WXDLLIMPEXP_BASE wxChar16* wxStrdup(const wxChar16* s) +{ + size_t size = (wxStrlen(s) + 1) * sizeof(wxChar16); + wxChar16 *ret = (wxChar16*) malloc(size); + memcpy(ret, s, size); + return ret; +} +#endif + +#ifndef wxWCHAR_T_IS_WXCHAR32 +WXDLLIMPEXP_BASE size_t wxStrlen(const wxChar32 *s ) +{ + if (!s) return 0; + size_t i=0; + while (*s!=0) { ++i; ++s; }; + return i; +} + +WXDLLIMPEXP_BASE wxChar32* wxStrdup(const wxChar32* s) +{ + size_t size = (wxStrlen(s) + 1) * sizeof(wxChar32); + wxChar32 *ret = (wxChar32*) malloc(size); + memcpy(ret, s, size); + return ret; +} +#endif + #ifndef wxCRT_StricmpA WXDLLIMPEXP_BASE int wxCRT_StricmpA(const char *psz1, const char *psz2) { -- 2.47.2