From 817270659e986de1b243586d8eb6ad3a76c87480 Mon Sep 17 00:00:00 2001 From: =?utf8?q?V=C3=A1clav=20Slav=C3=ADk?= Date: Thu, 12 Apr 2007 21:15:07 +0000 Subject: [PATCH] initial version of UTF-8 strings representation (still converting to wchar_t* a lot); it has to be explicitly enabled with --enable-utf8 for now git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45433 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- configure | 300 ++++++++++++++++++---- configure.in | 8 + include/wx/buffer.h | 9 +- include/wx/chartype.h | 12 +- include/wx/list.h | 18 +- include/wx/log.h | 14 +- include/wx/string.h | 253 ++++++++++++++++--- include/wx/stringimpl.h | 29 ++- include/wx/strvararg.h | 33 ++- include/wx/unichar.h | 51 +++- setup.h.in | 2 + src/common/list.cpp | 8 +- src/common/log.cpp | 34 +-- src/common/string.cpp | 509 ++++++++++++++++++++++++++++++++++++-- src/common/stringimpl.cpp | 85 ++++--- src/common/strvararg.cpp | 61 ++++- src/common/unichar.cpp | 39 +++ src/common/uri.cpp | 6 +- 18 files changed, 1250 insertions(+), 221 deletions(-) diff --git a/configure b/configure index 1b7ed84fe8..2c82bd1d57 100755 --- a/configure +++ b/configure @@ -1639,6 +1639,7 @@ Optional Features: --enable-optimise create optimised code --enable-debug same as debug_flag and debug_info --enable-stl use STL for containers + --enable-extended_rtti use extended RTTI (XTI) --enable-omf use OMF object format --enable-debug_flag set __WXDEBUG__ flag (recommended for developers!) --enable-debug_info create code with debugging information @@ -1688,6 +1689,7 @@ Optional Features: --enable-longlong use wxLongLong class --enable-mimetype use wxMimeTypesManager --enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only) + --enable-utf8 use UTF-8 representation for strings (Unix only) --enable-snglinst use wxSingleInstanceChecker class --enable-std_iostreams use standard C++ stream classes --enable-std_string use standard C++ string classes @@ -2900,6 +2902,7 @@ DEBUG_CONFIGURE=0 if test $DEBUG_CONFIGURE = 1; then DEFAULT_wxUSE_UNIVERSAL=no DEFAULT_wxUSE_STL=no + DEFAULT_wxUSE_EXTENDED_RTTI=no DEFAULT_wxUSE_NANOX=no @@ -3106,6 +3109,7 @@ if test $DEBUG_CONFIGURE = 1; then DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE_MSLU=no + DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_PALETTE=no @@ -3125,6 +3129,7 @@ if test $DEBUG_CONFIGURE = 1; then else DEFAULT_wxUSE_UNIVERSAL=no DEFAULT_wxUSE_STL=no + DEFAULT_wxUSE_EXTENDED_RTTI=no DEFAULT_wxUSE_NANOX=no @@ -3330,6 +3335,7 @@ else DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE_MSLU=yes + DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_PALETTE=yes @@ -4675,6 +4681,47 @@ echo "${ECHO_T}yes" >&6; } echo "${ECHO_T}no" >&6; } fi + + enablestring= + { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-extended_rtti" >&5 +echo $ECHO_N "checking for --${enablestring:-enable}-extended_rtti... $ECHO_C" >&6; } + no_cache=0 + # Check whether --enable-extended_rtti was given. +if test "${enable_extended_rtti+set}" = set; then + enableval=$enable_extended_rtti; + if test "$enableval" = yes; then + ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=yes' + else + ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=no' + fi + +else + + LINE=`grep "wxUSE_EXTENDED_RTTI" ${wx_arg_cache_file}` + if test "x$LINE" != x ; then + eval "DEFAULT_$LINE" + else + no_cache=1 + fi + + ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI='$DEFAULT_wxUSE_EXTENDED_RTTI + +fi + + + eval "$ac_cv_use_extended_rtti" + if test "$no_cache" != 1; then + echo $ac_cv_use_extended_rtti >> ${wx_arg_cache_file}.tmp + fi + + if test "$wxUSE_EXTENDED_RTTI" = yes; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + fi + if test "$USE_OS2" = "1"; then DEFAULT_wxUSE_OMF=no @@ -6698,6 +6745,47 @@ echo "${ECHO_T}no" >&6; } fi + enablestring= + { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-utf8" >&5 +echo $ECHO_N "checking for --${enablestring:-enable}-utf8... $ECHO_C" >&6; } + no_cache=0 + # Check whether --enable-utf8 was given. +if test "${enable_utf8+set}" = set; then + enableval=$enable_utf8; + if test "$enableval" = yes; then + ac_cv_use_utf8='wxUSE_UNICODE_UTF8=yes' + else + ac_cv_use_utf8='wxUSE_UNICODE_UTF8=no' + fi + +else + + LINE=`grep "wxUSE_UNICODE_UTF8" ${wx_arg_cache_file}` + if test "x$LINE" != x ; then + eval "DEFAULT_$LINE" + else + no_cache=1 + fi + + ac_cv_use_utf8='wxUSE_UNICODE_UTF8='$DEFAULT_wxUSE_UNICODE_UTF8 + +fi + + + eval "$ac_cv_use_utf8" + if test "$no_cache" != 1; then + echo $ac_cv_use_utf8 >> ${wx_arg_cache_file}.tmp + fi + + if test "$wxUSE_UNICODE_UTF8" = yes; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + fi + + enablestring= { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-snglinst" >&5 echo $ECHO_N "checking for --${enablestring:-enable}-snglinst... $ECHO_C" >&6; } @@ -22380,13 +22468,11 @@ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include /* for off_t */ - #include +#include int main () { -int (*fp) (FILE *, off_t, int) = fseeko; - return fseeko (stdin, 0, 0) && fp (stdin, 0, 0); +return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0); ; return 0; } @@ -22426,13 +22512,11 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define _LARGEFILE_SOURCE 1 -#include /* for off_t */ - #include +#include int main () { -int (*fp) (FILE *, off_t, int) = fseeko; - return fseeko (stdin, 0, 0) && fp (stdin, 0, 0); +return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0); ; return 0; } @@ -39268,7 +39352,6 @@ echo $ECHO_N "checking how many arguments gethostbyname_r() takes... $ECHO_C" >& else -################################################################ ac_cv_func_which_gethostbyname_r=unknown @@ -39498,7 +39581,6 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi -################################################################ fi @@ -39647,19 +39729,94 @@ _ACEOF fi fi - -{ echo "$as_me:$LINENO: checking how many arguments getservbyname_r() takes" >&5 -echo $ECHO_N "checking how many arguments getservbyname_r() takes... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: checking for getservbyname_r" >&5 +echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; } if test "${ac_cv_func_which_getservbyname_r+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ echo "$as_me:$LINENO: checking for getservbyname_r" >&5 +echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; } +if test "${ac_cv_func_getservbyname_r+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define getservbyname_r to an innocuous variant, in case declares getservbyname_r. + For example, HP-UX 11i declares gettimeofday. */ +#define getservbyname_r innocuous_getservbyname_r + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char getservbyname_r (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef getservbyname_r + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char getservbyname_r (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_getservbyname_r || defined __stub___getservbyname_r +choke me +#endif +int +main () +{ +return getservbyname_r (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + ac_cv_func_getservbyname_r=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_func_getservbyname_r=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_func_getservbyname_r" >&5 +echo "${ECHO_T}$ac_cv_func_getservbyname_r" >&6; } +if test $ac_cv_func_getservbyname_r = yes; then cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ @@ -39667,17 +39824,20 @@ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + +# include + int main () { - char *name; - char *proto; - struct servent *se, *res; - char buffer[2048]; - int buflen = 2048; - (void) getservbyname_r(name, proto, se, buffer, buflen, &res) + + char *name; + char *proto; + struct servent *se; + struct servent_data data; + (void) getservbyname_r(name, proto, se, &data); + ; return 0; @@ -39697,32 +39857,34 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || + test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then - ac_cv_func_which_getservbyname_r=six + ac_cv_func_which_getservbyname_r=four else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + +# include + int main () { - char *name; - char *proto; - struct servent *se; - char buffer[2048]; - int buflen = 2048; - (void) getservbyname_r(name, proto, se, buffer, buflen) + char *name; + char *proto; + struct servent *se, *res; + char buffer[2048]; + int buflen = 2048; + (void) getservbyname_r(name, proto, se, buffer, buflen, &res) ; return 0; @@ -39742,31 +39904,34 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || + test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then - ac_cv_func_which_getservbyname_r=five + ac_cv_func_which_getservbyname_r=six else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -#include + +# include + int main () { - char *name; - char *proto; - struct servent *se; - struct servent_data data; - (void) getservbyname_r(name, proto, se, &data); + char *name; + char *proto; + struct servent *se; + char buffer[2048]; + int buflen = 2048; + (void) getservbyname_r(name, proto, se, buffer, buflen) ; return 0; @@ -39786,21 +39951,22 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || + test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then - ac_cv_func_which_getservbyname_r=four + ac_cv_func_which_getservbyname_r=five else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_func_which_getservbyname_r=no - fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext @@ -39809,13 +39975,10 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - +else + ac_cv_func_which_getservbyname_r=no +fi fi { echo "$as_me:$LINENO: result: $ac_cv_func_which_getservbyname_r" >&5 @@ -39836,6 +39999,7 @@ elif test $ac_cv_func_which_getservbyname_r = four; then #define HAVE_FUNC_GETSERVBYNAME_R_4 1 _ACEOF + fi @@ -40715,6 +40879,13 @@ _ACEOF fi +if test "$wxUSE_EXTENDED_RTTI" = "yes"; then + cat >>confdefs.h <<\_ACEOF +#define wxUSE_EXTENDED_RTTI 1 +_ACEOF + +fi + if test "$wxUSE_APPLE_IEEE" = "yes"; then cat >>confdefs.h <<\_ACEOF #define wxUSE_APPLE_IEEE 1 @@ -43476,6 +43647,13 @@ fi fi fi +if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then + cat >>confdefs.h <<\_ACEOF +#define wxUSE_UNICODE_UTF8 1 +_ACEOF + +fi + if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then cat >>confdefs.h <<\_ACEOF #define wxUSE_EXPERIMENTAL_PRINTF 1 @@ -46113,7 +46291,10 @@ echo "${ECHO_T}$bakefile_cv_prog_makeisgnu" >&6; } PLATFORM_BEOS=1 ;; * ) - ;; + { { echo "$as_me:$LINENO: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&5 +echo "$as_me: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&2;} + { (exit 1); exit 1; }; } + ;; esac fi @@ -48857,10 +49038,21 @@ echo "${ECHO_T}no" >&6; } cppunit_major_min=`echo $cppunit_version_min | \ sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'` + if test "x${cppunit_major_min}" = "x" ; then + cppunit_major_min=0 + fi + cppunit_minor_min=`echo $cppunit_version_min | \ sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'` + if test "x${cppunit_minor_min}" = "x" ; then + cppunit_minor_min=0 + fi + cppunit_micro_min=`echo $cppunit_version_min | \ sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'` + if test "x${cppunit_micro_min}" = "x" ; then + cppunit_micro_min=0 + fi cppunit_version_proper=`expr \ $cppunit_major_version \> $cppunit_major_min \| \ diff --git a/configure.in b/configure.in index 06a9b8a144..23393e8243 100644 --- a/configure.in +++ b/configure.in @@ -576,6 +576,7 @@ if test $DEBUG_CONFIGURE = 1; then DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE_MSLU=no + DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_PALETTE=no @@ -801,6 +802,7 @@ else DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE_MSLU=yes + DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_PALETTE=yes @@ -987,6 +989,8 @@ WX_ARG_ENABLE(log, [ --enable-log use logging system], wxU WX_ARG_ENABLE(longlong, [ --enable-longlong use wxLongLong class], wxUSE_LONGLONG) WX_ARG_ENABLE(mimetype, [ --enable-mimetype use wxMimeTypesManager], wxUSE_MIMETYPE) WX_ARG_ENABLE(mslu, [ --enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only)], wxUSE_UNICODE_MSLU) +dnl FIXME-UTF8: make UTF8 automatic +WX_ARG_ENABLE(utf8, [ --enable-utf8 use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8) WX_ARG_ENABLE(snglinst, [ --enable-snglinst use wxSingleInstanceChecker class], wxUSE_SNGLINST_CHECKER) WX_ARG_ENABLE(std_iostreams, [ --enable-std_iostreams use standard C++ stream classes], wxUSE_STD_IOSTREAM) WX_ARG_ENABLE(std_string, [ --enable-std_string use standard C++ string classes], wxUSE_STD_STRING) @@ -6483,6 +6487,10 @@ if test "$wxUSE_UNICODE" = "yes" ; then fi fi +if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then + AC_DEFINE(wxUSE_UNICODE_UTF8) +fi + if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then AC_DEFINE(wxUSE_EXPERIMENTAL_PRINTF) fi diff --git a/include/wx/buffer.h b/include/wx/buffer.h index 59a7ad6c35..81184c7705 100644 --- a/include/wx/buffer.h +++ b/include/wx/buffer.h @@ -168,8 +168,13 @@ typedef wxWritableCharTypeBuffer wxWritableWCharBuffer; #define wxMB2WXbuf wxWCharBuffer #define wxWX2MBbuf wxCharBuffer - #define wxWC2WXbuf wxChar* - #define wxWX2WCbuf wxChar* + #if wxUSE_UNICODE_WCHAR + #define wxWC2WXbuf wxChar* + #define wxWX2WCbuf wxChar* + #elif wxUSE_UNICODE_UTF8 + #define wxWC2WXbuf wxWCharBuffer + #define wxWX2WCbuf wxWCharBuffer + #endif #else // ANSI #define wxWxCharBuffer wxCharBuffer diff --git a/include/wx/chartype.h b/include/wx/chartype.h index 934e13d1bc..e5ddbf0c9a 100644 --- a/include/wx/chartype.h +++ b/include/wx/chartype.h @@ -190,9 +190,15 @@ /* depending on the platform, Unicode build can either store wxStrings as wchar_t* or UTF-8 encoded char*: */ #if wxUSE_UNICODE - /* for now, all Unicode builds are wchar_t* based: */ - #define wxUSE_UNICODE_WCHAR 1 - #define wxUSE_UNICODE_UTF8 0 + #ifndef wxUSE_UNICODE_UTF8 + #define wxUSE_UNICODE_UTF8 0 + #endif + + #if wxUSE_UNICODE_UTF8 + #define wxUSE_UNICODE_WCHAR 0 + #else + #define wxUSE_UNICODE_WCHAR 1 + #endif #else #define wxUSE_UNICODE_WCHAR 0 #define wxUSE_UNICODE_UTF8 0 diff --git a/include/wx/list.h b/include/wx/list.h index a2907d0693..ad0e88f93d 100644 --- a/include/wx/list.h +++ b/include/wx/list.h @@ -380,7 +380,7 @@ private: union wxListKeyValue { long integer; - wxChar *string; + wxString *string; }; // a struct which may contain both types of keys @@ -397,15 +397,13 @@ public: { } wxListKey(long i) : m_keyType(wxKEY_INTEGER) { m_key.integer = i; } - wxListKey(const wxChar *s) : m_keyType(wxKEY_STRING) - { m_key.string = wxStrdup(s); } wxListKey(const wxString& s) : m_keyType(wxKEY_STRING) - { m_key.string = wxStrdup(s.c_str()); } + { m_key.string = new wxString(s); } // accessors wxKeyType GetKeyType() const { return m_keyType; } - const wxChar *GetString() const - { wxASSERT( m_keyType == wxKEY_STRING ); return m_key.string; } + const wxString GetString() const + { wxASSERT( m_keyType == wxKEY_STRING ); return *m_key.string; } long GetNumber() const { wxASSERT( m_keyType == wxKEY_INTEGER ); return m_key.integer; } @@ -418,7 +416,7 @@ public: ~wxListKey() { if ( m_keyType == wxKEY_STRING ) - free(m_key.string); + delete m_key.string; } private: @@ -448,11 +446,11 @@ public: virtual ~wxNodeBase(); // FIXME no check is done that the list is really keyed on strings - const wxChar *GetKeyString() const { return m_key.string; } + wxString GetKeyString() const { return *m_key.string; } long GetKeyInteger() const { return m_key.integer; } // Necessary for some existing code - void SetKeyString(wxChar* s) { m_key.string = s; } + void SetKeyString(const wxString& s) { m_key.string = new wxString(s); } void SetKeyInteger(long i) { m_key.integer = i; } #ifdef wxLIST_COMPATIBILITY @@ -602,7 +600,7 @@ protected: // keyed append wxNodeBase *Append(long key, void *object); - wxNodeBase *Append(const wxChar *key, void *object); + wxNodeBase *Append(const wxString& key, void *object); // removes node from the list but doesn't delete it (returns pointer // to the node or NULL if it wasn't found in the list) diff --git a/include/wx/log.h b/include/wx/log.h index 229380c94c..b280d7d309 100644 --- a/include/wx/log.h +++ b/include/wx/log.h @@ -476,14 +476,14 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0); WX_DEFINE_VARARG_FUNC_VOID(wxLog##level, wxDoLog##level) #define DECLARE_LOG_FUNCTION_IMPL(level) \ - extern void WXDLLIMPEXP_BASE wxVLog##level(const wxChar *szFormat, \ + extern void WXDLLIMPEXP_BASE wxVLog##level(const wxString& format, \ va_list argptr); \ - extern void WXDLLIMPEXP_BASE wxDoLog##level(const wxChar *szFormat, \ - ...) ATTRIBUTE_PRINTF_1 + extern void WXDLLIMPEXP_BASE \ + wxDoLog##level(const wxChar *szFormat, ...) ATTRIBUTE_PRINTF_1 #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl) \ extern void expdecl wxVLog##level(argclass arg, \ - const wxChar *szFormat, \ + const wxString& format, \ va_list argptr); \ extern void expdecl wxDoLog##level(argclass arg, \ const wxChar *szFormat, \ @@ -497,12 +497,12 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0); WX_DEFINE_VARARG_FUNC_NOP(wxLog##level) #define DECLARE_LOG_FUNCTION_IMPL(level) \ - inline void wxVLog##level(const wxChar *WXUNUSED(szFormat), \ + inline void wxVLog##level(const wxString& WXUNUSED(format), \ va_list WXUNUSED(argptr)) { } \ #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl) \ inline void wxVLog##level(argclass WXUNUSED(arg), \ - const wxChar *WXUNUSED(szFormat), \ + const wxString& WXUNUSED(format), \ va_list WXUNUSED(argptr)) {} // Empty Class to fake wxLogNull @@ -590,7 +590,7 @@ DECLARE_LOG_FUNCTION_PUBLIC(SysError) // this version only logs the message if the mask had been added to the // list of masks with AddTraceMask() - DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxChar*, mask); + DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxString&, mask); // and this one does nothing if all of level bits are not set in // wxLog::GetActive()->GetTraceMask() -- it's deprecated in favour of // string identifiers diff --git a/include/wx/string.h b/include/wx/string.h index 5f3801ef51..3bc5588f9a 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -371,13 +371,14 @@ private: T data; size_t len; - SubstrBufFromType() {} SubstrBufFromType(const T& data_, size_t len_) : data(data_), len(len_) {} }; #if wxUSE_UNICODE_UTF8 - // FIXME-UTF8: this will have to use slightly different type + // even char* -> char* needs conversion, from locale charset to UTF-8 + typedef SubstrBufFromType SubstrBufFromWC; + typedef SubstrBufFromType SubstrBufFromMB; #elif wxUSE_UNICODE_WCHAR typedef SubstrBufFromType SubstrBufFromWC; typedef SubstrBufFromType SubstrBufFromMB; @@ -392,8 +393,12 @@ private: // between UTF-8 and wchar_t* representations of the string are mostly // contained here. -#if wxUSE_UNICODE - // FIXME-UTF8: This will need changes when UTF8 build is introduced +#if wxUSE_UNICODE_UTF8 + static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength, + const wxMBConv& conv); + static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength, + const wxMBConv& conv); +#elif wxUSE_UNICODE_WCHAR static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength, const wxMBConv& conv); #else @@ -447,6 +452,7 @@ private: // encodes the character to a form used to represent it in internal // representation (returns a string in UTF8 version) static wxChar EncodeChar(wxUniChar ch) { return (wxChar)ch; } + static wxUniChar DecodeChar(wxStringImpl::const_iterator i) { return *i; } // translates position index in wxString to/from index in underlying // wxStringImpl: @@ -459,11 +465,56 @@ private: #else // wxUSE_UNICODE_UTF8 - typedef char Utf8CharBuffer[5]; + // checks correctness of UTF-8 sequence + static bool IsValidUtf8String(const char *c); +#ifdef __WXDEBUG__ + static bool IsValidUtf8LeadByte(unsigned char c); +#endif + + // table of offsets to skip forward when iterating + static unsigned char ms_utf8IterTable[256]; + + static void IncIter(wxStringImpl::iterator& i) + { + wxASSERT( IsValidUtf8LeadByte(*i) ); + i += ms_utf8IterTable[(unsigned char)*i]; + } + static void IncIter(wxStringImpl::const_iterator& i) + { + wxASSERT( IsValidUtf8LeadByte(*i) ); + i += ms_utf8IterTable[(unsigned char)*i]; + } + + static void DecIter(wxStringImpl::iterator& i); + static void DecIter(wxStringImpl::const_iterator& i); + static wxStringImpl::iterator AddToIter(wxStringImpl::iterator i, int n); + static wxStringImpl::const_iterator AddToIter(wxStringImpl::const_iterator i, int n); + static int DiffIters(wxStringImpl::const_iterator i1, wxStringImpl::const_iterator i2); + static int DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2); + + struct Utf8CharBuffer + { + char data[5]; + operator const char*() const { return data; } + }; static Utf8CharBuffer EncodeChar(wxUniChar ch); // returns n copies of ch encoded in UTF-8 string static wxCharBuffer EncodeNChars(size_t n, wxUniChar ch); + // returns the length of UTF-8 encoding of the character with lead byte 'c' + static size_t GetUtf8CharLength(char c) + { + wxASSERT( IsValidUtf8LeadByte(c) ); + return ms_utf8IterTable[(unsigned char)c]; + } + + // decodes single UTF-8 character from UTF-8 string + // FIXME-UTF8: move EncodeChar/DecodeChar and other operations to separate + // class + static wxUniChar DecodeChar(wxStringImpl::const_iterator i) + { return wxUniCharRef::DecodeChar(i); } + friend class WXDLLIMPEXP_BASE wxUniCharRef; + size_t PosToImpl(size_t pos) const { if ( pos == 0 || pos == npos ) @@ -472,6 +523,15 @@ private: return wxStringImpl::const_iterator(begin() + pos) - m_impl.begin(); } + void PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const; + + size_t LenToImpl(size_t len) const + { + size_t pos, len2; + PosLenToImpl(0, len, &pos, &len2); + return len2; + } + size_t PosFromImpl(size_t pos) const { if ( pos == 0 || pos == npos ) @@ -480,13 +540,23 @@ private: return const_iterator(m_impl.begin() + pos) - begin(); } - // FIXME: return as-is without copying under UTF8 locale, return - // converted string under other locales - needs wxCharBuffer - // changes - static wxCharBuffer ImplStr(const char* str); + size_t IterToImplPos(wxStringImpl::iterator i) const + { return wxStringImpl::const_iterator(i) - m_impl.begin(); } + + // FIXME-UTF8: return as-is without copying under UTF8 locale, return + // converted string under other locales - needs wxCharBuffer + // changes + static wxCharBuffer ImplStr(const char* str, + const wxMBConv& conv = wxConvLibc) + { return ConvertStr(str, npos, conv).data; } + static SubstrBufFromMB ImplStr(const char* str, size_t n, + const wxMBConv& conv = wxConvLibc) + { return ConvertStr(str, n, conv); } static wxCharBuffer ImplStr(const wchar_t* str) - { return wxConvUTF8.cWC2MB(str); } + { return ConvertStr(str, npos, wxConvUTF8).data; } + static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n) + { return ConvertStr(str, n, wxConvUTF8); } #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8 @@ -496,7 +566,9 @@ public: wxString() {} // copy ctor + // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build! wxString(const wxStringImpl& stringSrc) : m_impl(stringSrc) { } + wxString(const wxString& stringSrc) : m_impl(stringSrc.m_impl) { } // string containing nRepeat copies of ch @@ -571,6 +643,18 @@ public: wxString(const wxString& str, size_t nLength) : m_impl(str.Mid(0, nLength).m_impl) {} + // even if we're not built with wxUSE_STL == 1 it is very convenient to allow + // implicit conversions from std::string to wxString as this allows to use + // the same strings in non-GUI and GUI code, however we don't want to + // unconditionally add this ctor as it would make wx lib dependent on + // libstdc++ on some Linux versions which is bad, so instead we ask the + // client code to define this wxUSE_STD_STRING symbol if they need it +#if wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING + wxString(const wxStdString& s) + // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build! + : m_impl(s.c_str()) { } // FIXME-UTF8: this is broken for embedded 0s +#endif // wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING + public: // standard types typedef wxUniChar value_type; @@ -583,7 +667,12 @@ public: typedef wxUniChar const_reference; #if wxUSE_STL - #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag + #if wxUSE_UNICODE_UTF8 + // random access is not O(1), as required by Random Access Iterator + #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag + #else + #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag + #endif #else #define WX_STR_ITERATOR_TAG void /* dummy type */ #endif @@ -599,8 +688,6 @@ public: typedef reference_type reference; \ typedef pointer_type pointer; \ \ - iterator_name(const iterator_name& i) : m_cur(i.m_cur) {} \ - \ reference operator*() const { return reference_ctor; } \ reference operator[](size_t n) const { return *(*this + n); } \ \ @@ -621,14 +708,6 @@ public: return tmp; \ } \ \ - iterator_name operator+(int n) const \ - { return iterator_name(wxString::AddToIter(m_cur, n)); } \ - iterator_name operator+(size_t n) const \ - { return iterator_name(wxString::AddToIter(m_cur, (int)n)); } \ - iterator_name operator-(int n) const \ - { return iterator_name(wxString::AddToIter(m_cur, -n)); } \ - iterator_name operator-(size_t n) const \ - { return iterator_name(wxString::AddToIter(m_cur, -(int)n)); } \ iterator_name& operator+=(int n) \ { m_cur = wxString::AddToIter(m_cur, n); return *this; } \ iterator_name& operator+=(size_t n) \ @@ -657,7 +736,6 @@ public: \ private: \ /* for internal wxString use only: */ \ - iterator_name(underlying_iterator ptr) : m_cur(ptr) {} \ operator underlying_iterator() const { return m_cur; } \ \ friend class WXDLLIMPEXP_BASE wxString; \ @@ -668,23 +746,90 @@ public: class const_iterator; +#if wxUSE_UNICODE_UTF8 + class iterator + { + // NB: In UTF-8 build, (non-const) iterator needs to keep reference + // to the underlying wxStringImpl, because UTF-8 is variable-length + // encoding and changing the value pointer to by an iterator using + // its operator* requires calling wxStringImpl::replace() if the old + // and new values differ in their encoding's length. + + WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef, + wxUniCharRef::CreateForString(m_str, m_cur)); + + public: + iterator(const iterator& i) : m_cur(i.m_cur), m_str(i.m_str) {} + + iterator operator+(int n) const + { return iterator(m_str, wxString::AddToIter(m_cur, n)); } + iterator operator+(size_t n) const + { return iterator(m_str, wxString::AddToIter(m_cur, (int)n)); } + iterator operator-(int n) const + { return iterator(m_str, wxString::AddToIter(m_cur, -n)); } + iterator operator-(size_t n) const + { return iterator(m_str, wxString::AddToIter(m_cur, -(int)n)); } + + private: + iterator(wxString *str, underlying_iterator ptr) + : m_cur(ptr), m_str(str->m_impl) {} + iterator(wxStringImpl& str, underlying_iterator ptr) + : m_cur(ptr), m_str(str) {} + + wxStringImpl& m_str; + + friend class const_iterator; + }; +#else // !wxUSE_UNICODE_UTF8 class iterator { WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef, wxUniCharRef::CreateForString(m_cur)); + public: + iterator(const iterator& i) : m_cur(i.m_cur) {} + + iterator operator+(int n) const + { return iterator(wxString::AddToIter(m_cur, n)); } + iterator operator+(size_t n) const + { return iterator(wxString::AddToIter(m_cur, (int)n)); } + iterator operator-(int n) const + { return iterator(wxString::AddToIter(m_cur, -n)); } + iterator operator-(size_t n) const + { return iterator(wxString::AddToIter(m_cur, -(int)n)); } + + private: + // for internal wxString use only: + iterator(underlying_iterator ptr) : m_cur(ptr) {} + iterator(wxString *WXUNUSED(str), underlying_iterator ptr) : m_cur(ptr) {} + friend class const_iterator; }; +#endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8 class const_iterator { // NB: reference_type is intentionally value, not reference, the character // may be encoded differently in wxString data: WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar, - wxUniChar(*m_cur)); + wxString::DecodeChar(m_cur)); public: + const_iterator(const const_iterator& i) : m_cur(i.m_cur) {} const_iterator(const iterator& i) : m_cur(i.m_cur) {} + + const_iterator operator+(int n) const + { return const_iterator(wxString::AddToIter(m_cur, n)); } + const_iterator operator+(size_t n) const + { return const_iterator(wxString::AddToIter(m_cur, (int)n)); } + const_iterator operator-(int n) const + { return const_iterator(wxString::AddToIter(m_cur, -n)); } + const_iterator operator-(size_t n) const + { return const_iterator(wxString::AddToIter(m_cur, -(int)n)); } + + private: + // for internal wxString use only: + const_iterator(underlying_iterator ptr) : m_cur(ptr) {} }; #undef WX_STR_ITERATOR_TAG @@ -767,10 +912,10 @@ public: // first valid index position const_iterator begin() const { return const_iterator(m_impl.begin()); } - iterator begin() { return iterator(m_impl.begin()); } + iterator begin() { return iterator(this, m_impl.begin()); } // position one after the last valid one const_iterator end() const { return const_iterator(m_impl.end()); } - iterator end() { return iterator(m_impl.end()); } + iterator end() { return iterator(this, m_impl.end()); } // first element of the reversed string const_reverse_iterator rbegin() const @@ -925,7 +1070,7 @@ public: // explicit conversion to C string in internal representation (char*, // wchar_t*, UTF-8-encoded char*, depending on the build): - const_pointer wx_str() const { return m_impl.c_str(); } + const wxStringCharType *wx_str() const { return m_impl.c_str(); } // conversion to *non-const* multibyte or widestring buffer; modifying // returned buffer won't affect the string, these methods are only useful @@ -963,21 +1108,26 @@ public: const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); } - const wxChar* wc_str() const { return c_str(); } - +#if wxUSE_UNICODE_WCHAR + const wxChar* wc_str() const { return wx_str(); } +#elif wxUSE_UNICODE_UTF8 + const wxWCharBuffer wc_str() const; +#endif // for compatibility with !wxUSE_UNICODE version - const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); } + const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const + { return wc_str(); } #if wxMBFILES const wxCharBuffer fn_str() const { return mb_str(wxConvFile); } #else // !wxMBFILES - const wxChar* fn_str() const { return c_str(); } + const wxWX2WCbuf fn_str() const { return wc_str(); } #endif // wxMBFILES/!wxMBFILES + #else // ANSI - const wxChar* mb_str() const { return c_str(); } + const wxChar* mb_str() const { return wx_str(); } // for compatibility with wxUSE_UNICODE version - const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); } + const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); } const wxWX2MBbuf mbc_str() const { return mb_str(); } @@ -1588,14 +1738,25 @@ public: { #if wxUSE_UNICODE_UTF8 if ( !ch.IsAscii() ) - m_impl.insert(begin() + nPos, EncodeNChars(n, ch)); + m_impl.insert(PosToImpl(nPos), EncodeNChars(n, ch)); else #endif - m_impl.insert(begin() + nPos, n, (wxStringCharType)ch); + m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch); return *this; } iterator insert(iterator it, wxUniChar ch) - { return iterator(m_impl.insert(it, EncodeChar(ch))); } + { +#if wxUSE_UNICODE_UTF8 + if ( !ch.IsAscii() ) + { + size_t pos = IterToImplPos(it); + m_impl.insert(pos, EncodeChar(ch)); + return iterator(this, m_impl.begin() + pos); + } + else +#endif + return iterator(this, m_impl.insert(it, (wxStringCharType)ch)); + } void insert(iterator it, const_iterator first, const_iterator last) { m_impl.insert(it, first, last); } void insert(iterator it, const char *first, const char *last) @@ -1606,7 +1767,7 @@ public: { #if wxUSE_UNICODE_UTF8 if ( !ch.IsAscii() ) - m_impl.insert(it, EncodeNChars(n, ch)); + m_impl.insert(IterToImplPos(it), EncodeNChars(n, ch)); else #endif m_impl.insert(it, n, (wxStringCharType)ch); @@ -1622,9 +1783,9 @@ public: } // delete characters from first up to last iterator erase(iterator first, iterator last) - { return iterator(m_impl.erase(first, last)); } + { return iterator(this, m_impl.erase(first, last)); } iterator erase(iterator first) - { return iterator(m_impl.erase(first)); } + { return iterator(this, m_impl.erase(first)); } #ifdef wxSTRING_BASE_HASNT_CLEAR void clear() { erase(); } @@ -1874,7 +2035,11 @@ public: // as strpbrk() but starts at nStart, returns npos if not found size_t find_first_of(const wxString& str, size_t nStart = 0) const +#if wxUSE_UNICODE // FIXME-UTF8: temporary + { return find_first_of(str.mb_str().data(), nStart); } +#else { return find_first_of((const wxChar*)str.c_str(), nStart); } +#endif // same as above size_t find_first_of(const char* sz, size_t nStart = 0) const; size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const; @@ -1885,7 +2050,11 @@ public: { return find(c, nStart); } // find the last (starting from nStart) char from str in this string size_t find_last_of (const wxString& str, size_t nStart = npos) const +#if wxUSE_UNICODE // FIXME-UTF8: temporary + { return find_last_of(str.mb_str().data(), nStart); } +#else { return find_last_of((const wxChar*)str.c_str(), nStart); } +#endif // same as above size_t find_last_of (const char* sz, size_t nStart = npos) const; size_t find_last_of (const wchar_t* sz, size_t nStart = npos) const; @@ -1899,7 +2068,11 @@ public: // as strspn() (starting from nStart), returns npos on failure size_t find_first_not_of(const wxString& str, size_t nStart = 0) const +#if wxUSE_UNICODE // FIXME-UTF8: temporary + { return find_first_not_of(str.mb_str().data(), nStart); } +#else { return find_first_not_of((const wxChar*)str.c_str(), nStart); } +#endif // same as above size_t find_first_not_of(const char* sz, size_t nStart = 0) const; size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const; @@ -1909,7 +2082,11 @@ public: size_t find_first_not_of(wxUniChar ch, size_t nStart = 0) const; // as strcspn() size_t find_last_not_of(const wxString& str, size_t nStart = npos) const +#if wxUSE_UNICODE // FIXME-UTF8: temporary + { return find_last_not_of(str.mb_str().data(), nStart); } +#else { return find_last_not_of((const wxChar*)str.c_str(), nStart); } +#endif // same as above size_t find_last_not_of(const char* sz, size_t nStart = npos) const; size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const; diff --git a/include/wx/stringimpl.h b/include/wx/stringimpl.h index b680b52cf1..0a3f45bb52 100644 --- a/include/wx/stringimpl.h +++ b/include/wx/stringimpl.h @@ -42,6 +42,10 @@ // global pointer to empty string extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString; +#if wxUSE_UNICODE_UTF8 +// FIXME-UTF8: we should have only one wxEmptyString +extern WXDLLIMPEXP_DATA_BASE(const wxStringCharType*) wxEmptyStringImpl; +#endif // ---------------------------------------------------------------------------- @@ -61,7 +65,7 @@ extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString; #ifdef HAVE_STD_WSTRING typedef std::wstring wxStdString; #else - typedef std::basic_string wxStdString; + typedef std::basic_string wxStdString; #endif #else typedef std::string wxStdString; @@ -97,8 +101,8 @@ struct WXDLLIMPEXP_BASE wxStringData size_t nDataLength, // actual string length nAllocLength; // allocated memory size - // mimics declaration 'wxChar data[nAllocLength]' - wxChar* data() const { return (wxChar*)(this + 1); } + // mimics declaration 'wxStringCharType data[nAllocLength]' + wxStringCharType* data() const { return (wxStringCharType*)(this + 1); } // empty string has a special ref count so it's never deleted bool IsEmpty() const { return (nRefs == -1); } @@ -143,7 +147,11 @@ protected: // string (re)initialization functions // initializes the string to the empty value (must be called only from // ctors, use Reinit() otherwise) +#if wxUSE_UNICODE_UTF8 + void Init() { m_pchData = (wxStringCharType *)wxEmptyStringImpl; } // FIXME-UTF8 +#else void Init() { m_pchData = (wxStringCharType *)wxEmptyString; } +#endif // initializes the string with (a part of) C-string void InitWith(const wxStringCharType *psz, size_t nPos = 0, size_t nLen = npos); // as Init, but also frees old data @@ -378,7 +386,7 @@ public: { ConcatSelf(str.length(), str.c_str()); return *this; } // append first n (or all if n == npos) characters of sz wxStringImpl& append(const wxStringCharType *sz) - { ConcatSelf(wxStrlen(sz), sz); return *this; } + { ConcatSelf(Strsize(sz), sz); return *this; } wxStringImpl& append(const wxStringCharType *sz, size_t n) { ConcatSelf(n, sz); return *this; } // append n copies of ch @@ -395,7 +403,7 @@ public: { clear(); return append(str, pos, n); } // same as `= first n (or all if n == npos) characters of sz' wxStringImpl& assign(const wxStringCharType *sz) - { clear(); return append(sz, wxStrlen(sz)); } + { clear(); return append(sz, Strsize(sz)); } wxStringImpl& assign(const wxStringCharType *sz, size_t n) { clear(); return append(sz, n); } // same as `= n copies of ch' @@ -430,9 +438,9 @@ public: // insert first n (or all if n == npos) characters of sz wxStringImpl& insert(size_t nPos, const wxStringCharType *sz, size_t n = npos); // insert n copies of ch - wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch)// FIXME-UTF8: tricky + wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch) { return insert(nPos, wxStringImpl(n, ch)); } - iterator insert(iterator it, wxStringCharType ch) // FIXME-UTF8: tricky + iterator insert(iterator it, wxStringCharType ch) { size_t idx = it - begin(); insert(idx, 1, ch); return begin() + idx; } void insert(iterator it, const_iterator first, const_iterator last) { insert(it - begin(), first, last - first); } @@ -525,6 +533,13 @@ public: void DoUngetWriteBuf(size_t nLen); #endif +private: +#if wxUSE_UNICODE_UTF8 + static size_t Strsize(const wxStringCharType *s) { return strlen(s); } +#else + static size_t Strsize(const wxStringCharType *s) { return wxStrlen(s); } +#endif + friend class WXDLLIMPEXP_BASE wxString; }; diff --git a/include/wx/strvararg.h b/include/wx/strvararg.h index 6ad650be85..55eb9c01f2 100644 --- a/include/wx/strvararg.h +++ b/include/wx/strvararg.h @@ -93,7 +93,7 @@ template<> struct WXDLLIMPEXP_BASE wxArgNormalizer { wxArgNormalizer(const wxCStrData& value) : m_value(value) {} - const wxStringCharType *get() const; + const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType const wxCStrData& m_value; }; @@ -109,7 +109,7 @@ template<> struct WXDLLIMPEXP_BASE wxArgNormalizer { wxArgNormalizer(const wxString& value) : m_value(value) {} - const wxStringCharType *get() const; + const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType const wxString& m_value; }; @@ -121,8 +121,7 @@ struct wxArgNormalizer : public wxArgNormalizer : wxArgNormalizer(value) {} }; -#if wxUSE_UNICODE_WCHAR - +#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR template<> struct WXDLLIMPEXP_BASE wxArgNormalizer { @@ -139,9 +138,30 @@ struct wxArgNormalizer : public wxArgNormalizer wxArgNormalizer(char *value) : wxArgNormalizer(value) {} }; +#endif // wxUSE_UNICODE_WCHAR + +// FIXME-UTF8 +#if 0 // wxUSE_UNICODE_UTF8 +// for conversion from local charset to UTF-8 +template<> +struct WXDLLIMPEXP_BASE wxArgNormalizer +{ + wxArgNormalizer(const char *value); + ~wxArgNormalizer(); + const char *get() const; + + wxCharBuffer *m_value; +}; -#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T +template<> +struct wxArgNormalizer : public wxArgNormalizer +{ + wxArgNormalizer(char *value) + : wxArgNormalizer(value) {} +}; +#endif // wxUSE_UNICODE_UTF8 +#if /*wxUSE_UNICODE_UTF8 || */ !wxUSE_UNICODE // FIXME-UTF8 template<> struct WXDLLIMPEXP_BASE wxArgNormalizer { @@ -158,8 +178,7 @@ struct wxArgNormalizer : public wxArgNormalizer wxArgNormalizer(wchar_t *value) : wxArgNormalizer(value) {} }; - -#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T +#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE // versions for passing wx[W]CharBuffer: template<> diff --git a/include/wx/unichar.h b/include/wx/unichar.h index b7fe54d9ca..fc664c031f 100644 --- a/include/wx/unichar.h +++ b/include/wx/unichar.h @@ -69,6 +69,9 @@ public: // Returns Unicode code point value of the character value_type GetValue() const { return m_value; } + // Returns true if the character is an ASCII character: + bool IsAscii() const { return m_value < 0x80; } + // Conversions to char and wchar_t types: all of those are needed to be // able to pass wxUniChars to verious standard narrow and wide character // functions @@ -152,7 +155,11 @@ private: typedef wxStringImpl::iterator iterator; // create the reference +#if wxUSE_UNICODE_UTF8 + wxUniCharRef(wxStringImpl& str, iterator pos) : m_str(str), m_pos(pos) {} +#else wxUniCharRef(iterator pos) : m_pos(pos) {} +#endif public: // NB: we have to make this public, because we don't have wxString @@ -160,23 +167,26 @@ public: // as friend; so at least don't use a ctor but a static function // that must be used explicitly (this is more than using 'explicit' // keyword on ctor!): +#if wxUSE_UNICODE_UTF8 + static wxUniCharRef CreateForString(wxStringImpl& str, iterator pos) + { return wxUniCharRef(str, pos); } +#else static wxUniCharRef CreateForString(iterator pos) { return wxUniCharRef(pos); } +#endif wxUniChar::value_type GetValue() const { return UniChar().GetValue(); } + bool IsAscii() const { return UniChar().GetValue(); } // Assignment operators: - wxUniCharRef& operator=(const wxUniCharRef& c) - { - *m_pos = *c.m_pos; - return *this; - }; +#if wxUSE_UNICODE_UTF8 + wxUniCharRef& operator=(const wxUniChar& c); +#else + wxUniCharRef& operator=(const wxUniChar& c) { *m_pos = c; return *this; } +#endif - wxUniCharRef& operator=(const wxUniChar& c) - { - *m_pos = c; - return *this; - }; + wxUniCharRef& operator=(const wxUniCharRef& c) + { return *this = c.UniChar(); } wxUniCharRef& operator=(char c) { return *this = wxUniChar(c); } wxUniCharRef& operator=(wchar_t c) { return *this = wxUniChar(c); } @@ -227,11 +237,28 @@ public: #endif private: - wxUniChar UniChar() const { return *m_pos; } + wxUniChar UniChar() const + { +#if wxUSE_UNICODE_UTF8 + return DecodeChar(m_pos); +#else + return *m_pos; +#endif + } + +#if wxUSE_UNICODE_UTF8 + // FIXME-UTF8: move this to a separate 'string operations' class + static wxUniChar DecodeChar(wxStringImpl::const_iterator i); + friend class WXDLLIMPEXP_BASE wxString; +#endif + friend class WXDLLIMPEXP_BASE wxUniChar; private: - // pointer to the character in string + // reference to the string and pointer to the character in string +#if wxUSE_UNICODE_UTF8 + wxStringImpl& m_str; +#endif iterator m_pos; }; diff --git a/setup.h.in b/setup.h.in index d9b83ee4bb..d4e09cd1e8 100644 --- a/setup.h.in +++ b/setup.h.in @@ -557,6 +557,8 @@ #define wxUSE_UNICODE_MSLU 0 +#define wxUSE_UNICODE_UTF8 0 + #define wxUSE_DC_CACHEING 0 #define wxUSE_GADGETS 0 diff --git a/src/common/list.cpp b/src/common/list.cpp index 5a63e0c524..7d069da0ff 100644 --- a/src/common/list.cpp +++ b/src/common/list.cpp @@ -53,7 +53,7 @@ bool wxListKey::operator==(wxListKeyValue value) const // by not putting return here... case wxKEY_STRING: - return wxStrcmp(m_key.string, value.string) == 0; + return *m_key.string == *value.string; case wxKEY_INTEGER: return m_key.integer == value.integer; @@ -84,7 +84,7 @@ wxNodeBase::wxNodeBase(wxListBase *list, case wxKEY_STRING: // to be free()d later - m_key.string = wxStrdup(key.GetString()); + m_key.string = new wxString(key.GetString()); break; default: @@ -107,7 +107,7 @@ wxNodeBase::~wxNodeBase() { if ( m_list->m_keyType == wxKEY_STRING ) { - free(m_key.string); + delete m_key.string; } m_list->DetachNode(this); @@ -257,7 +257,7 @@ wxNodeBase *wxListBase::Append(long key, void *object) return AppendCommon(node); } -wxNodeBase *wxListBase::Append (const wxChar *key, void *object) +wxNodeBase *wxListBase::Append (const wxString& key, void *object) { wxCHECK_MSG( (m_keyType == wxKEY_STRING) || (m_keyType == wxKEY_NONE && m_count == 0), diff --git a/src/common/log.cpp b/src/common/log.cpp index 69abd5e0ba..e6690b652a 100644 --- a/src/common/log.cpp +++ b/src/common/log.cpp @@ -84,10 +84,10 @@ // ---------------------------------------------------------------------------- // generic log function -void wxVLogGeneric(wxLogLevel level, const wxChar *szFormat, va_list argptr) +void wxVLogGeneric(wxLogLevel level, const wxString& format, va_list argptr) { if ( wxLog::IsEnabled() ) { - wxLog::OnLog(level, wxString::FormatV(szFormat, argptr), time(NULL)); + wxLog::OnLog(level, wxString::FormatV(format, argptr), time(NULL)); } } @@ -100,11 +100,11 @@ void wxDoLogGeneric(wxLogLevel level, const wxChar *szFormat, ...) } #define IMPLEMENT_LOG_FUNCTION(level) \ - void wxVLog##level(const wxChar *szFormat, va_list argptr) \ + void wxVLog##level(const wxString& format, va_list argptr) \ { \ if ( wxLog::IsEnabled() ) { \ wxLog::OnLog(wxLOG_##level, \ - wxString::FormatV(szFormat, argptr), time(NULL));\ + wxString::FormatV(format, argptr), time(NULL)); \ } \ } \ \ @@ -134,9 +134,9 @@ void wxSafeShowMessage(const wxString& title, const wxString& text) // fatal errors can't be suppressed nor handled by the custom log target and // always terminate the program -void wxVLogFatalError(const wxChar *szFormat, va_list argptr) +void wxVLogFatalError(const wxString& format, va_list argptr) { - wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(szFormat, argptr)); + wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(format, argptr)); #ifdef __WXWINCE__ ExitThread(3); @@ -157,12 +157,12 @@ void wxDoLogFatalError(const wxChar *szFormat, ...) } // same as info, but only if 'verbose' mode is on -void wxVLogVerbose(const wxChar *szFormat, va_list argptr) +void wxVLogVerbose(const wxString& format, va_list argptr) { if ( wxLog::IsEnabled() ) { if ( wxLog::GetActiveTarget() != NULL && wxLog::GetVerbose() ) { wxLog::OnLog(wxLOG_Info, - wxString::FormatV(szFormat, argptr), time(NULL)); + wxString::FormatV(format, argptr), time(NULL)); } } } @@ -194,17 +194,17 @@ void wxDoLogVerbose(const wxChar *szFormat, ...) va_end(argptr); \ } - void wxVLogTrace(const wxChar *mask, const wxChar *szFormat, va_list argptr) + void wxVLogTrace(const wxString& mask, const wxString& format, va_list argptr) { if ( wxLog::IsEnabled() && wxLog::IsAllowedTraceMask(mask) ) { wxString msg; - msg << _T("(") << mask << _T(") ") << wxString::FormatV(szFormat, argptr); + msg << _T("(") << mask << _T(") ") << wxString::FormatV(format, argptr); wxLog::OnLog(wxLOG_Trace, msg, time(NULL)); } } - void wxDoLogTrace(const wxChar *mask, const wxChar *szFormat, ...) + void wxDoLogTrace(const wxString& mask, const wxChar *szFormat, ...) { va_list argptr; va_start(argptr, szFormat); @@ -212,13 +212,13 @@ void wxDoLogVerbose(const wxChar *szFormat, ...) va_end(argptr); } - void wxVLogTrace(wxTraceMask mask, const wxChar *szFormat, va_list argptr) + void wxVLogTrace(wxTraceMask mask, const wxString& format, va_list argptr) { // we check that all of mask bits are set in the current mask, so // that wxLogTrace(wxTraceRefCount | wxTraceOle) will only do something // if both bits are set. if ( wxLog::IsEnabled() && ((wxLog::GetTraceMask() & mask) == mask) ) { - wxLog::OnLog(wxLOG_Trace, wxString::FormatV(szFormat, argptr), time(NULL)); + wxLog::OnLog(wxLOG_Trace, wxString::FormatV(format, argptr), time(NULL)); } } @@ -246,9 +246,9 @@ static inline wxString wxLogSysErrorHelper(long err) return wxString::Format(_(" (error %ld: %s)"), err, wxSysErrorMsg(err)); } -void WXDLLEXPORT wxVLogSysError(const wxChar *szFormat, va_list argptr) +void WXDLLEXPORT wxVLogSysError(const wxString& format, va_list argptr) { - wxVLogSysError(wxSysErrorCode(), szFormat, argptr); + wxVLogSysError(wxSysErrorCode(), format, argptr); } void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...) @@ -259,11 +259,11 @@ void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...) va_end(argptr); } -void WXDLLEXPORT wxVLogSysError(long err, const wxChar *fmt, va_list argptr) +void WXDLLEXPORT wxVLogSysError(long err, const wxString& format, va_list argptr) { if ( wxLog::IsEnabled() ) { wxLog::OnLog(wxLOG_Error, - wxString::FormatV(fmt, argptr) + wxLogSysErrorHelper(err), + wxString::FormatV(format, argptr) + wxLogSysErrorHelper(err), time(NULL)); } } diff --git a/src/common/string.cpp b/src/common/string.cpp index 9513b69074..003980e556 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -105,6 +105,427 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str) #endif // wxUSE_STD_IOSTREAM +// =========================================================================== +// wxString class core +// =========================================================================== + +#if wxUSE_UNICODE_UTF8 + +// --------------------------------------------------------------------------- +// UTF-8 operations +// --------------------------------------------------------------------------- + +// +// Table 3.1B from Unicode spec: Legal UTF-8 Byte Sequences +// +// Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte | +// -------------------+----------+----------+----------+----------+ +// U+0000..U+007F | 00..7F | | | | +// U+0080..U+07FF | C2..DF | 80..BF | | | +// U+0800..U+0FFF | E0 | A0..BF | 80..BF | | +// U+1000..U+FFFF | E1..EF | 80..BF | 80..BF | | +// U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | +// U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | +// U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | +// -------------------+----------+----------+----------+----------+ + +bool wxString::IsValidUtf8String(const char *str) +{ + if ( !str ) + return true; // empty string is UTF8 string + + const unsigned char *c = (const unsigned char*)str; + + for ( ; *c; ++c ) + { + unsigned char b = *c; + + if ( b <= 0x7F ) // 00..7F + continue; + + else if ( b < 0xC2 ) // invalid lead bytes: 80..C1 + return false; + + // two-byte sequences: + else if ( b <= 0xDF ) // C2..DF + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + + // three-byte sequences: + else if ( b == 0xE0 ) + { + b = *(++c); + if ( !(b >= 0xA0 && b <= 0xBF ) ) + return false; + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + else if ( b <= 0xEF ) // E1..EF + { + for ( int i = 0; i < 2; ++i ) + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + } + + // four-byte sequences: + else if ( b == 0xF0 ) + { + b = *(++c); + if ( !(b >= 0x90 && b <= 0xBF ) ) + return false; + for ( int i = 0; i < 2; ++i ) + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + } + else if ( b <= 0xF3 ) // F1..F3 + { + for ( int i = 0; i < 3; ++i ) + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + } + else if ( b == 0xF4 ) + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0x8F ) ) + return false; + for ( int i = 0; i < 2; ++i ) + { + b = *(++c); + if ( !(b >= 0x80 && b <= 0xBF ) ) + return false; + } + } + else // otherwise, it's invalid lead byte + return false; + } + + return true; +} + +#ifdef __WXDEBUG__ +/* static */ +bool wxString::IsValidUtf8LeadByte(unsigned char c) +{ + return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4); +} +#endif + +unsigned char wxString::ms_utf8IterTable[256] = { + // single-byte sequences (ASCII): + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F + + // these are invalid, we use step 1 to skip + // over them (should never happen): + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF + 1, 1, // C0,C1 + + // two-byte sequences: + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF + + // three-byte sequences: + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF + + // four-byte sequences: + 4, 4, 4, 4, 4, // F0..F4 + + // these are invalid again (5- or 6-byte + // sequences and sequences for code points + // above U+10FFFF, as restricted by RFC 3629): + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF +}; + +/* static */ +void wxString::DecIter(wxStringImpl::const_iterator& i) +{ + wxASSERT( IsValidUtf8LeadByte(*i) ); + + // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in + // binary), so we just have to go back until we hit a byte that is either + // < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in binary; this + // includes some invalid values, but we can ignore it here, because we + // assume valid UTF-8 input for the purpose of efficient implementation). + --i; + while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ ) + --i; +} + +/* static */ +void wxString::DecIter(wxStringImpl::iterator& i) +{ + // FIXME-UTF8: use template instead + wxASSERT( IsValidUtf8LeadByte(*i) ); + --i; + while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ ) + --i; +} + +/* static */ +wxStringImpl::const_iterator +wxString::AddToIter(wxStringImpl::const_iterator i, int n) +{ + wxStringImpl::const_iterator out(i); + + if ( n > 0 ) + { + for ( int j = 0; j < n; ++j ) + IncIter(out); + } + else if ( n < 0 ) + { + for ( int j = 0; j > n; --j ) + DecIter(out); + } + + return out; +} + +wxStringImpl::iterator +wxString::AddToIter(wxStringImpl::iterator i, int n) +{ + // FIXME-UTF8: use template instead + wxStringImpl::iterator out(i); + + if ( n > 0 ) + { + for ( int j = 0; j < n; ++j ) + IncIter(out); + } + else if ( n < 0 ) + { + for ( int j = 0; j > n; --j ) + DecIter(out); + } + + return out; +} + + +/* static */ +int wxString::DiffIters(wxStringImpl::const_iterator i1, + wxStringImpl::const_iterator i2) +{ + int dist = 0; + + if ( i1 < i2 ) + { + while ( i1 != i2 ) + { + IncIter(i1); + dist--; + } + } + else if ( i2 < i1 ) + { + while ( i2 != i1 ) + { + IncIter(i2); + dist++; + } + } + + return dist; +} + +int wxString::DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2) +{ + // FIXME-UTF8: use template instead + int dist = 0; + + if ( i1 < i2 ) + { + while ( i1 != i2 ) + { + IncIter(i1); + dist--; + } + } + else if ( i2 < i1 ) + { + while ( i2 != i1 ) + { + IncIter(i2); + dist++; + } + } + + return dist; +} + +/* static */ +wxString::Utf8CharBuffer wxString::EncodeChar(wxUniChar ch) +{ + Utf8CharBuffer buf; + char *out = buf.data; + + wxUniChar::value_type code = ch.GetValue(); + + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // ----------------------+--------------------------------------------- + // 0000 0000 - 0000 007F | 0xxxxxxx + // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Code point value is stored in bits marked with 'x', lowest-order bit + // of the value on the right side in the diagram above. + // (from RFC 3629) + + if ( code <= 0x7F ) + { + out[1] = 0; + out[0] = (char)code; + } + else if ( code <= 0x07FF ) + { + out[2] = 0; + // NB: this line takes 6 least significant bits, encodes them as + // 10xxxxxx and discards them so that the next byte can be encoded: + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xC0 | code; + } + else if ( code < 0xFFFF ) + { + out[3] = 0; + out[2] = 0x80 | (code & 0x3F); code >>= 6; + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xE0 | code; + } + else if ( code <= 0x10FFFF ) + { + out[4] = 0; + out[3] = 0x80 | (code & 0x3F); code >>= 6; + out[2] = 0x80 | (code & 0x3F); code >>= 6; + out[1] = 0x80 | (code & 0x3F); code >>= 6; + out[0] = 0xF0 | code; + } + else + { + wxFAIL_MSG( _T("trying to encode undefined Unicode character") ); + out[0] = 0; + } + + return buf; +} + +/* static */ +wxUniChar wxUniCharRef::DecodeChar(wxStringImpl::const_iterator i) +{ + wxASSERT( wxString::IsValidUtf8LeadByte(*i) ); // FIXME-UTF8: no "wxString::" + + wxUniChar::value_type code = 0; + size_t len = wxString::GetUtf8CharLength(*i); + wxASSERT_MSG( len <= 4, _T("invalid UTF-8 sequence length") ); + + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // ----------------------+--------------------------------------------- + // 0000 0000 - 0000 007F | 0xxxxxxx + // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Code point value is stored in bits marked with 'x', lowest-order bit + // of the value on the right side in the diagram above. + // (from RFC 3629) + + // mask to extract lead byte's value ('x' bits above), by sequence's length: + static const unsigned char s_leadValueMask[4] = { 0x7F, 0x1F, 0x0F, 0x07 }; +#ifdef __WXDEBUG__ + // mask and value of lead byte's most significant bits, by length: + static const unsigned char s_leadMarkerMask[4] = { 0x80, 0xE0, 0xF0, 0xF8 }; + static const unsigned char s_leadMarkerVal[4] = { 0x00, 0xC0, 0xE0, 0xF0 }; +#endif + + // extract the lead byte's value bits: + wxASSERT_MSG( ((unsigned char)*i & s_leadMarkerMask[len-1]) == + s_leadMarkerVal[len-1], + _T("invalid UTF-8 lead byte") ); + code = (unsigned char)*i & s_leadValueMask[len-1]; + + // all remaining bytes, if any, are handled in the same way regardless of + // sequence's length: + for ( ++i ; len > 1; --len, ++i ) + { + wxASSERT_MSG( ((unsigned char)*i & 0xC0) == 0x80, + _T("invalid UTF-8 byte") ); + + code <<= 6; + code |= (unsigned char)*i & 0x3F; + } + + return wxUniChar(code); +} + +/* static */ +wxCharBuffer wxString::EncodeNChars(size_t n, wxUniChar ch) +{ + Utf8CharBuffer once(EncodeChar(ch)); + // the IncIter() table can be used to determine the length of ch's encoding: + size_t len = ms_utf8IterTable[(unsigned char)once.data[0]]; + + wxCharBuffer buf(n * len); + char *ptr = buf.data(); + for ( size_t i = 0; i < n; i++, ptr += len ) + { + memcpy(ptr, once.data, len); + } + + return buf; +} + + +void wxString::PosLenToImpl(size_t pos, size_t len, + size_t *implPos, size_t *implLen) const +{ + if ( pos == npos ) + *implPos = npos; + else + { + const_iterator i = begin() + pos; + *implPos = wxStringImpl::const_iterator(i) - m_impl.begin(); + if ( len == npos ) + *implLen = npos; + else + { + // too large length is interpreted as "to the end of the string" + // FIXME-UTF8: verify this is the case in std::string, assert + // otherwise + if ( pos + len > length() ) + len = length() - pos; + + *implLen = wxStringImpl::const_iterator(i + len) - + wxStringImpl::const_iterator(i); + } + } +} + +#endif // wxUSE_UNICODE_UTF8 + // ---------------------------------------------------------------------------- // wxCStrData converted strings caching // ---------------------------------------------------------------------------- @@ -254,14 +675,14 @@ const wchar_t* wxCStrData::AsWChar() const // construction and conversion // --------------------------------------------------------------------------- -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR /* static */ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !psz || nLength == 0 ) - return SubstrBufFromMB(); + return SubstrBufFromMB(L"", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -269,18 +690,51 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, size_t wcLen; wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); if ( !wcLen ) - return SubstrBufFromMB(); + return SubstrBufFromMB(_T(""), 0); else return SubstrBufFromMB(wcBuf, wcLen); } -#else +#endif // wxUSE_UNICODE_WCHAR + +#if wxUSE_UNICODE_UTF8 +/* static */ +wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, + const wxMBConv& conv) +{ + // FIXME-UTF8: return as-is without copying under UTF8 locale, return + // converted string under other locales - needs wxCharBuffer + // changes + + // anything to do? + if ( !psz || nLength == 0 ) + return SubstrBufFromMB("", 0); + + if ( nLength == npos ) + nLength = wxNO_LEN; + + // first convert to wide string: + size_t wcLen; + wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen)); + if ( !wcLen ) + return SubstrBufFromMB("", 0); + + // and then to UTF-8: + SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8)); + // widechar -> UTF-8 conversion isn't supposed to ever fail: + wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") ); + + return buf; +} +#endif // wxUSE_UNICODE_UTF8 + +#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE /* static */ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength, const wxMBConv& conv) { // anything to do? if ( !pwz || nLength == 0 ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); if ( nLength == npos ) nLength = wxNO_LEN; @@ -288,34 +742,56 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt size_t mbLen; wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen)); if ( !mbLen ) - return SubstrBufFromWC(); + return SubstrBufFromWC("", 0); else return SubstrBufFromWC(mbBuf, mbLen); } -#endif +#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE -#if wxUSE_UNICODE +#if wxUSE_UNICODE_WCHAR //Convert wxString in Unicode mode to a multi-byte string const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const { - return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL); } -#else // ANSI +#elif wxUSE_UNICODE_UTF8 -#if wxUSE_WCHAR_T +const wxWCharBuffer wxString::wc_str() const +{ + return wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + NULL); +} + +const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const +{ + // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc + // under UTF8 locale + // FIXME-UTF8: use wc_str() here once we have buffers with length + + size_t wcLen; + wxWCharBuffer wcBuf( + wxConvUTF8.cMB2WC(m_impl.c_str(), + m_impl.length() + 1 /* size, not length */, + &wcLen)); + if ( !wcLen ) + return wxCharBuffer(""); + + return conv.cWC2MB(wcBuf, wcLen, NULL); +} + +#else // ANSI //Converts this string to a wide character string if unicode //mode is not enabled and wxUSE_WCHAR_T is enabled const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const { - return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL); + return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL); } -#endif // wxUSE_WCHAR_T - #endif // Unicode/ANSI // shrink to minimal size (releasing extra memory) @@ -996,7 +1472,8 @@ bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") ); int start = length() - wxStrlen(suffix); - if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 ) + + if ( start < 0 || compare(start, npos, suffix) != 0 ) return false; if ( rest ) @@ -1420,7 +1897,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr) // only a copy va_list argptrcopy; wxVaCopy(argptrcopy, argptr); - int len = wxVsnprintf(buf, size, format, argptrcopy); + int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy); va_end(argptrcopy); // some implementations of vsnprintf() don't NUL terminate diff --git a/src/common/stringimpl.cpp b/src/common/stringimpl.cpp index e8750dc4f0..0f1380e708 100644 --- a/src/common/stringimpl.cpp +++ b/src/common/stringimpl.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: src/common/string.cpp +// Name: src/common/stringimpl.cpp // Purpose: wxString class // Author: Vadim Zeitlin, Ryan Norton // Modified by: @@ -56,12 +56,10 @@ #define wxStringMemcpy memcpy #define wxStringMemcmp memcmp #define wxStringMemchr memchr - #define wxStringStrlen strlen #else #define wxStringMemcpy wxTmemcpy #define wxStringMemcmp wxTmemcmp #define wxStringMemchr wxTmemchr - #define wxStringStrlen wxStrlen #endif @@ -80,6 +78,10 @@ const size_t wxStringImpl::npos = (size_t) -1; #if wxUSE_STL_BASED_WXSTRING +// FIXME-UTF8: get rid of this, have only one wxEmptyString +#if wxUSE_UNICODE_UTF8 +extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = ""; +#endif extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T(""); #else @@ -90,11 +92,17 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T(""); static const struct { wxStringData data; - wxChar dummy; + wxStringCharType dummy; } g_strEmpty = { {-1, 0, 0}, wxT('\0') }; // empty C style string: points to 'string data' byte of g_strEmpty -extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy; +#if wxUSE_UNICODE_UTF8 +// FIXME-UTF8: get rid of this, have only one wxEmptyString +extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = &g_strEmpty.dummy; +extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T(""); +#else +extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy; +#endif #endif @@ -111,7 +119,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy; class Averager { public: - Averager(const wxChar *sz) { m_sz = sz; m_nTotal = m_nCount = 0; } + Averager(const wxStringCharType *sz) { m_sz = sz; m_nTotal = m_nCount = 0; } ~Averager() { wxPrintf("wxString: average %s = %f\n", m_sz, ((float)m_nTotal)/m_nCount); } @@ -119,7 +127,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy; private: size_t m_nCount, m_nTotal; - const wxChar *m_sz; + const wxStringCharType *m_sz; } g_averageLength("allocation size"), g_averageSummandLength("summand length"), g_averageConcatHit("hit probability in concat"), @@ -147,15 +155,16 @@ void wxStringData::Free() // =========================================================================== // takes nLength elements of psz starting at nPos -void wxStringImpl::InitWith(const wxChar *psz, size_t nPos, size_t nLength) +void wxStringImpl::InitWith(const wxStringCharType *psz, + size_t nPos, size_t nLength) { Init(); // if the length is not given, assume the string to be NUL terminated if ( nLength == npos ) { - wxASSERT_MSG( nPos <= wxStrlen(psz), _T("index out of bounds") ); + wxASSERT_MSG( nPos <= Strsize(psz), _T("index out of bounds") ); - nLength = wxStrlen(psz + nPos); + nLength = Strsize(psz + nPos); } STATISTICS_ADD(InitialLength, nLength); @@ -201,7 +210,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen) wxASSERT( nLen > 0 ); // make sure that we don't overflow - wxASSERT( nLen < (INT_MAX / sizeof(wxChar)) - + wxASSERT( nLen < (INT_MAX / sizeof(wxStringCharType)) - (sizeof(wxStringData) + EXTRA_ALLOC + 1) ); STATISTICS_ADD(Length, nLen); @@ -210,7 +219,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen) // 1) one extra character for '\0' termination // 2) sizeof(wxStringData) for housekeeping info wxStringData* pData = (wxStringData*) - malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxChar)); + malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxStringCharType)); if ( pData == NULL ) { // allocation failures are handled by the caller @@ -269,7 +278,8 @@ bool wxStringImpl::AllocBeforeWrite(size_t nLen) nLen += EXTRA_ALLOC; pData = (wxStringData*) - realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar)); + realloc(pData, + sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType)); if ( pData == NULL ) { // allocation failures are handled by the caller @@ -331,7 +341,7 @@ bool wxStringImpl::Alloc(size_t nLen) nLen += EXTRA_ALLOC; pData = (wxStringData *) - malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar)); + malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType)); if ( pData == NULL ) { // allocation failure handled by caller @@ -352,14 +362,14 @@ bool wxStringImpl::Alloc(size_t nLen) return false; } // +1 to copy the terminator, too - memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxChar)); + memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxStringCharType)); GetStringData()->nDataLength = nOldLen; } else { nLen += EXTRA_ALLOC; pData = (wxStringData *) - realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar)); + realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType)); if ( pData == NULL ) { // allocation failure handled by caller @@ -411,11 +421,12 @@ wxStringImpl& wxStringImpl::erase(size_t nStart, size_t nLen) return *this; } -wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n) +wxStringImpl& wxStringImpl::insert(size_t nPos, + const wxStringCharType *sz, size_t n) { wxASSERT( nPos <= length() ); - if ( n == npos ) n = wxStrlen(sz); + if ( n == npos ) n = Strsize(sz); if ( n == 0 ) return *this; if ( !Alloc(length() + n) || !CopyBeforeWrite() ) { @@ -424,8 +435,8 @@ wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n) } memmove(m_pchData + nPos + n, m_pchData + nPos, - (length() - nPos) * sizeof(wxChar)); - memcpy(m_pchData + nPos, sz, n * sizeof(wxChar)); + (length() - nPos) * sizeof(wxStringCharType)); + memcpy(m_pchData + nPos, sz, n * sizeof(wxStringCharType)); GetStringData()->nDataLength = length() + n; m_pchData[length()] = '\0'; @@ -487,7 +498,8 @@ size_t wxStringImpl::find(const wxStringImpl& str, size_t nStart) const return p - c_str() + nLenOther <= nLen ? p - c_str() : npos; } -size_t wxStringImpl::find(const wxChar* sz, size_t nStart, size_t n) const +size_t wxStringImpl::find(const wxStringCharType* sz, + size_t nStart, size_t n) const { return find(wxStringImpl(sz, n), nStart); } @@ -534,7 +546,8 @@ size_t wxStringImpl::rfind(const wxStringImpl& str, size_t nStart) const return npos; } -size_t wxStringImpl::rfind(const wxChar* sz, size_t nStart, size_t n) const +size_t wxStringImpl::rfind(const wxStringCharType* sz, + size_t nStart, size_t n) const { return rfind(wxStringImpl(sz, n), nStart); } @@ -562,7 +575,7 @@ size_t wxStringImpl::rfind(wxStringCharType ch, size_t nStart) const } wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen, - const wxChar *sz) + const wxStringCharType *sz) { wxASSERT_MSG( nStart <= length(), _T("index out of bounds in wxStringImpl::replace") ); @@ -607,7 +620,7 @@ wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen, } wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen, - const wxChar* sz, size_t nCount) + const wxStringCharType* sz, size_t nCount) { return replace(nStart, nLen, wxStringImpl(sz, nCount).c_str()); } @@ -643,24 +656,25 @@ wxStringImpl& wxStringImpl::operator=(const wxStringImpl& stringSrc) // assigns a single character wxStringImpl& wxStringImpl::operator=(wxStringCharType ch) { - wxChar c(ch); + wxStringCharType c(ch); if ( !AssignCopy(1, &c) ) { - wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxChar)") ); + wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxStringCharType)") ); } return *this; } // assigns C string -wxStringImpl& wxStringImpl::operator=(const wxChar *psz) +wxStringImpl& wxStringImpl::operator=(const wxStringCharType *psz) { - if ( !AssignCopy(wxStrlen(psz), psz) ) { - wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxChar *)") ); + if ( !AssignCopy(Strsize(psz), psz) ) { + wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxStringCharType *)") ); } return *this; } // helper function: does real copy -bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData) +bool wxStringImpl::AssignCopy(size_t nSrcLen, + const wxStringCharType *pszSrcData) { if ( nSrcLen == 0 ) { Reinit(); @@ -670,7 +684,7 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData) // allocation failure handled by caller return false; } - memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxChar)); + memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxStringCharType)); GetStringData()->nDataLength = nSrcLen; m_pchData[nSrcLen] = wxT('\0'); } @@ -682,7 +696,8 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData) // --------------------------------------------------------------------------- // add something to this string -bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData, +bool wxStringImpl::ConcatSelf(size_t nSrcLen, + const wxStringCharType *pszSrcData, size_t nMaxLen) { STATISTICS_ADD(SummandLength, nSrcLen); @@ -705,7 +720,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData, // allocation failure handled by caller return false; } - memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxChar)); + memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxStringCharType)); pOldData->Unlock(); } else if ( nNewLen > pData->nAllocLength ) { @@ -728,7 +743,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData, wxASSERT( nNewLen <= GetStringData()->nAllocLength ); // fast concatenation - all is done in our buffer - memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxChar)); + memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxStringCharType)); m_pchData[nNewLen] = wxT('\0'); // put terminating '\0' GetStringData()->nDataLength = nNewLen; // and fix the length @@ -755,7 +770,7 @@ wxChar *wxStringImpl::DoGetWriteBuf(size_t nLen) // put string back in a reasonable state after GetWriteBuf void wxStringImpl::DoUngetWriteBuf() { - DoUngetWriteBuf(wxStrlen(m_pchData)); + DoUngetWriteBuf(Strsize(m_pchData)); } void wxStringImpl::DoUngetWriteBuf(size_t nLen) diff --git a/src/common/strvararg.cpp b/src/common/strvararg.cpp index 5e7955dd1e..81288c6f89 100644 --- a/src/common/strvararg.cpp +++ b/src/common/strvararg.cpp @@ -32,20 +32,30 @@ // implementation // ============================================================================ -const wxStringCharType *wxArgNormalizer::get() const +const wxChar *wxArgNormalizer::get() const { + // FIXME-UTF8: use some way that doesn't involve implicit conversion, + // so that we deallocate any converted buffer immediately; + // can't use AsString() because it returns wxString and not + // const wxString&, unfortunately; use As[W]CharBuf() when + // available. return m_value; } -const wxStringCharType *wxArgNormalizer::get() const +const wxChar *wxArgNormalizer::get() const { +#if wxUSE_UNICODE_UTF8 // FIXME-UTF8 + return (const wxChar*)m_value; +#else return m_value.wx_str(); +#endif } -#if wxUSE_UNICODE_WCHAR - +#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR wxArgNormalizer::wxArgNormalizer(const char *value) { + // FIXME-UTF8: move this to the header so that m_value doesn't have + // to be dynamically allocated m_value = new wxWCharBuffer(wxConvLibc.cMB2WC(value)); } @@ -58,12 +68,17 @@ const wchar_t *wxArgNormalizer::get() const { return m_value->data(); } +#endif // wxUSE_UNICODE_WCHAR -#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T +#if /*wxUSE_UNICODE_UTF8 ||*/ !wxUSE_UNICODE // FIXME-UTF8 wxArgNormalizer::wxArgNormalizer(const wchar_t *value) { +#if wxUSE_UNICODE_UTF8 // FIXME-UTF8: this will be the only case + m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value)); +#else m_value = new wxCharBuffer(wxConvLibc.cWC2MB(value)); +#endif } wxArgNormalizer::~wxArgNormalizer() @@ -75,12 +90,44 @@ const char *wxArgNormalizer::get() const { return m_value->data(); } +#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE + +#if 0 // wxUSE_UNICODE_UTF8 - FIXME-UTF8 +wxArgNormalizer::wxArgNormalizer(const char *value) +{ + // FIXME-UTF8: move this to the header so that m_value doesn't have + // to be dynamically allocated + // FIXME-UTF8: optimize this if current locale is UTF-8 one + + // convert to widechar string first: + wxWCharBuffer buf(wxConvLibc.cMB2WC(value)); + + if ( buf ) + { + // then to UTF-8: + m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value)); + } + else + { + m_value = new wxCharBuffer(); + } +} + +wxArgNormalizer::~wxArgNormalizer() +{ + delete m_value; +} + +const char *wxArgNormalizer::get() const +{ + return m_value->data(); +} +#endif // wxUSE_UNICODE_UTF8 + -#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T // FIXME-UTF8: move this to the header once it's possible to include buffer.h // without including wxcrt.h - wxArgNormalizer::wxArgNormalizer(const wxCharBuffer& buf) : wxArgNormalizer(buf.data()) { diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp index 9e8ef8df4f..0fec3a7792 100644 --- a/src/common/unichar.cpp +++ b/src/common/unichar.cpp @@ -25,10 +25,17 @@ #include "wx/unichar.h" +// FIXME-UTF8: remove once UTF-8 functions moved outside +#include "wx/string.h" + // =========================================================================== // implementation // =========================================================================== +// --------------------------------------------------------------------------- +// wxUniChar +// --------------------------------------------------------------------------- + /* static */ wxUniChar::value_type wxUniChar::From8bit(char c) { @@ -55,3 +62,35 @@ char wxUniChar::To8bit(wxUniChar::value_type c) return '?'; // FIXME-UTF8: what to use as failure character? return buf[0]; } + + +// --------------------------------------------------------------------------- +// wxUniCharRef +// --------------------------------------------------------------------------- + +#if wxUSE_UNICODE_UTF8 +wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c) +{ + wxString::Utf8CharBuffer utf(wxString::EncodeChar(c)); + size_t lenOld = wxString::GetUtf8CharLength(*m_pos); + size_t lenNew = wxString::GetUtf8CharLength(utf[0]); + + if ( lenNew == lenOld ) + { + iterator pos(m_pos); + for ( size_t i = 0; i < lenNew; ++i, ++pos ) + *pos = utf[i]; + } + else + { + size_t idx = m_pos - m_str.begin(); + + m_str.replace(m_pos, m_pos + lenOld, utf, lenNew); + + // this is needed to keep m_pos valid: + m_pos = m_str.begin() + idx; + } + + return *this; +} +#endif // wxUSE_UNICODE_UTF8 diff --git a/src/common/uri.cpp b/src/common/uri.cpp index 022b66b590..a5151947ef 100644 --- a/src/common/uri.cpp +++ b/src/common/uri.cpp @@ -641,7 +641,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali if (bNormalize) { wxStringBufferLength theBuffer(m_path, m_path.length() + 1); -#if wxUSE_STL +#if wxUSE_STL || wxUSE_UNICODE_UTF8 + // FIXME-UTF8: have some wxReadWriteStringBuffer instead? wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1); #endif Normalize(theBuffer, true); @@ -693,7 +694,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali if (bNormalize) { wxStringBufferLength theBuffer(m_path, m_path.length() + 1); -#if wxUSE_STL +#if wxUSE_STL || wxUSE_UNICODE_UTF8 + // FIXME-UTF8: have some wxReadWriteStringBuffer instead? wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1); #endif Normalize(theBuffer); -- 2.45.2