initial version of UTF-8 strings representation (still converting to wchar_t* a lot...

author Václav Slavík <vslavik@fastmail.fm>

Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)

committer Václav Slavík <vslavik@fastmail.fm>

Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)
author Václav Slavík <vslavik@fastmail.fm>
Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)
committer Václav Slavík <vslavik@fastmail.fm>
Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)
diff --git a/configure b/configure

index 1b7ed84fe8571beccc40b488c4141305e49ec19f..2c82bd1d577bc95a67b56656131bf45299c67be4 100755 (executable)
--- a/configure
+++ b/configure
@@ -1639,6 +1639,7 @@ Optional Features:
    --enable-optimise       create optimised code
    --enable-debug          same as debug_flag and debug_info
    --enable-stl            use STL for containers
+  --enable-extended_rtti  use extended RTTI (XTI)
    --enable-omf            use OMF object format
    --enable-debug_flag     set __WXDEBUG__ flag (recommended for developers!)
    --enable-debug_info     create code with debugging information
@@ -1688,6 +1689,7 @@ Optional Features:
    --enable-longlong       use wxLongLong class
    --enable-mimetype       use wxMimeTypesManager
    --enable-mslu           use MS Layer for Unicode on Windows 9x (Win32 only)
+  --enable-utf8           use UTF-8 representation for strings (Unix only)
    --enable-snglinst       use wxSingleInstanceChecker class
    --enable-std_iostreams  use standard C++ stream classes
    --enable-std_string     use standard C++ string classes
@@ -2900,6 +2902,7 @@ DEBUG_CONFIGURE=0
  if test $DEBUG_CONFIGURE = 1; then
    DEFAULT_wxUSE_UNIVERSAL=no
    DEFAULT_wxUSE_STL=no
+  DEFAULT_wxUSE_EXTENDED_RTTI=no
  
    DEFAULT_wxUSE_NANOX=no
  
@@ -3106,6 +3109,7 @@ if test $DEBUG_CONFIGURE = 1; then
  
    DEFAULT_wxUSE_UNICODE=no
    DEFAULT_wxUSE_UNICODE_MSLU=no
+  DEFAULT_wxUSE_UNICODE_UTF8=no
    DEFAULT_wxUSE_WCSRTOMBS=no
  
    DEFAULT_wxUSE_PALETTE=no
@@ -3125,6 +3129,7 @@ if test $DEBUG_CONFIGURE = 1; then
  else
    DEFAULT_wxUSE_UNIVERSAL=no
    DEFAULT_wxUSE_STL=no
+  DEFAULT_wxUSE_EXTENDED_RTTI=no
  
    DEFAULT_wxUSE_NANOX=no
  
@@ -3330,6 +3335,7 @@ else
  
    DEFAULT_wxUSE_UNICODE=no
    DEFAULT_wxUSE_UNICODE_MSLU=yes
+  DEFAULT_wxUSE_UNICODE_UTF8=no
    DEFAULT_wxUSE_WCSRTOMBS=no
  
    DEFAULT_wxUSE_PALETTE=yes
@@ -4675,6 +4681,47 @@ echo "${ECHO_T}yes" >&6; }
  echo "${ECHO_T}no" >&6; }
            fi
  
+
+          enablestring=
+          { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-extended_rtti" >&5
+echo $ECHO_N "checking for --${enablestring:-enable}-extended_rtti... $ECHO_C" >&6; }
+          no_cache=0
+          # Check whether --enable-extended_rtti was given.
+if test "${enable_extended_rtti+set}" = set; then
+  enableval=$enable_extended_rtti;
+                          if test "$enableval" = yes; then
+                            ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=yes'
+                          else
+                            ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=no'
+                          fi
+
+else
+
+                          LINE=`grep "wxUSE_EXTENDED_RTTI" ${wx_arg_cache_file}`
+                          if test "x$LINE" != x ; then
+                            eval "DEFAULT_$LINE"
+                          else
+                            no_cache=1
+                          fi
+
+                          ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI='$DEFAULT_wxUSE_EXTENDED_RTTI
+
+fi
+
+
+          eval "$ac_cv_use_extended_rtti"
+          if test "$no_cache" != 1; then
+            echo $ac_cv_use_extended_rtti >> ${wx_arg_cache_file}.tmp
+          fi
+
+          if test "$wxUSE_EXTENDED_RTTI" = yes; then
+            { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+          else
+            { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+          fi
+
  if test "$USE_OS2" = "1"; then
      DEFAULT_wxUSE_OMF=no
  
@@ -6698,6 +6745,47 @@ echo "${ECHO_T}no" >&6; }
            fi
  
  
+          enablestring=
+          { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-utf8" >&5
+echo $ECHO_N "checking for --${enablestring:-enable}-utf8... $ECHO_C" >&6; }
+          no_cache=0
+          # Check whether --enable-utf8 was given.
+if test "${enable_utf8+set}" = set; then
+  enableval=$enable_utf8;
+                          if test "$enableval" = yes; then
+                            ac_cv_use_utf8='wxUSE_UNICODE_UTF8=yes'
+                          else
+                            ac_cv_use_utf8='wxUSE_UNICODE_UTF8=no'
+                          fi
+
+else
+
+                          LINE=`grep "wxUSE_UNICODE_UTF8" ${wx_arg_cache_file}`
+                          if test "x$LINE" != x ; then
+                            eval "DEFAULT_$LINE"
+                          else
+                            no_cache=1
+                          fi
+
+                          ac_cv_use_utf8='wxUSE_UNICODE_UTF8='$DEFAULT_wxUSE_UNICODE_UTF8
+
+fi
+
+
+          eval "$ac_cv_use_utf8"
+          if test "$no_cache" != 1; then
+            echo $ac_cv_use_utf8 >> ${wx_arg_cache_file}.tmp
+          fi
+
+          if test "$wxUSE_UNICODE_UTF8" = yes; then
+            { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+          else
+            { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+          fi
+
+
            enablestring=
            { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-snglinst" >&5
  echo $ECHO_N "checking for --${enablestring:-enable}-snglinst... $ECHO_C" >&6; }
@@ -22380,13 +22468,11 @@ _ACEOF
  cat confdefs.h >>conftest.$ac_ext
  cat >>conftest.$ac_ext <<_ACEOF
  /* end confdefs.h.  */
-#include <sys/types.h> /* for off_t */
-     #include <stdio.h>
+#include <stdio.h>
  int
  main ()
  {
-int (*fp) (FILE *, off_t, int) = fseeko;
-     return fseeko (stdin, 0, 0) && fp (stdin, 0, 0);
+return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0);
    ;
    return 0;
  }
@@ -22426,13 +22512,11 @@ cat confdefs.h >>conftest.$ac_ext
  cat >>conftest.$ac_ext <<_ACEOF
  /* end confdefs.h.  */
  #define _LARGEFILE_SOURCE 1
-#include <sys/types.h> /* for off_t */
-     #include <stdio.h>
+#include <stdio.h>
  int
  main ()
  {
-int (*fp) (FILE *, off_t, int) = fseeko;
-     return fseeko (stdin, 0, 0) && fp (stdin, 0, 0);
+return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0);
    ;
    return 0;
  }
@@ -39268,7 +39352,6 @@ echo $ECHO_N "checking how many arguments gethostbyname_r() takes... $ECHO_C" >&
  else
  
  
-################################################################
  
  ac_cv_func_which_gethostbyname_r=unknown
  
@@ -39498,7 +39581,6 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
  
  fi
  
-################################################################
  
  
  fi
@@ -39647,19 +39729,94 @@ _ACEOF
  fi
  
        fi
-
-{ echo "$as_me:$LINENO: checking how many arguments getservbyname_r() takes" >&5
-echo $ECHO_N "checking how many arguments getservbyname_r() takes... $ECHO_C" >&6; }
+                              { echo "$as_me:$LINENO: checking for getservbyname_r" >&5
+echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; }
  if test "${ac_cv_func_which_getservbyname_r+set}" = set; then
    echo $ECHO_N "(cached) $ECHO_C" >&6
  else
  
-                ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ echo "$as_me:$LINENO: checking for getservbyname_r" >&5
+echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; }
+if test "${ac_cv_func_getservbyname_r+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define getservbyname_r to an innocuous variant, in case <limits.h> declares getservbyname_r.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define getservbyname_r innocuous_getservbyname_r
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char getservbyname_r (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef getservbyname_r
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char getservbyname_r ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_getservbyname_r || defined __stub___getservbyname_r
+choke me
+#endif
  
+int
+main ()
+{
+return getservbyname_r ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext &&
+       $as_test_x conftest$ac_exeext; then
+  ac_cv_func_getservbyname_r=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_cv_func_getservbyname_r=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_getservbyname_r" >&5
+echo "${ECHO_T}$ac_cv_func_getservbyname_r" >&6; }
+if test $ac_cv_func_getservbyname_r = yes; then
  
          cat >conftest.$ac_ext <<_ACEOF
  /* confdefs.h.  */
@@ -39667,17 +39824,20 @@ _ACEOF
  cat confdefs.h >>conftest.$ac_ext
  cat >>conftest.$ac_ext <<_ACEOF
  /* end confdefs.h.  */
-#include <netdb.h>
+
+#               include <netdb.h>
+
  int
  main ()
  {
  
-                char *name;
-                char *proto;
-                struct servent *se, *res;
-                char buffer[2048];
-                int buflen = 2048;
-                (void) getservbyname_r(name, proto, se, buffer, buflen, &res)
+
+        char *name;
+        char *proto;
+        struct servent *se;
+        struct servent_data data;
+        (void) getservbyname_r(name, proto, se, &data);
+
  
    ;
    return 0;
@@ -39697,32 +39857,34 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
    cat conftest.err >&5
    echo "$as_me:$LINENO: \$? = $ac_status" >&5
    (exit $ac_status); } && {
-        test -z "$ac_cxx_werror_flag" ||
+        test -z "$ac_c_werror_flag" ||
          test ! -s conftest.err
         } && test -s conftest.$ac_objext; then
-  ac_cv_func_which_getservbyname_r=six
+  ac_cv_func_which_getservbyname_r=four
  else
    echo "$as_me: failed program was:" >&5
  sed 's/^/| /' conftest.$ac_ext >&5
  
  
-                cat >conftest.$ac_ext <<_ACEOF
+  cat >conftest.$ac_ext <<_ACEOF
  /* confdefs.h.  */
  _ACEOF
  cat confdefs.h >>conftest.$ac_ext
  cat >>conftest.$ac_ext <<_ACEOF
  /* end confdefs.h.  */
-#include <netdb.h>
+
+#   include <netdb.h>
+
  int
  main ()
  {
  
-                        char *name;
-                        char *proto;
-                        struct servent *se;
-                        char buffer[2048];
-                        int buflen = 2048;
-                        (void) getservbyname_r(name, proto, se, buffer, buflen)
+        char *name;
+        char *proto;
+        struct servent *se, *res;
+        char buffer[2048];
+        int buflen = 2048;
+        (void) getservbyname_r(name, proto, se, buffer, buflen, &res)
  
    ;
    return 0;
@@ -39742,31 +39904,34 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
    cat conftest.err >&5
    echo "$as_me:$LINENO: \$? = $ac_status" >&5
    (exit $ac_status); } && {
-        test -z "$ac_cxx_werror_flag" ||
+        test -z "$ac_c_werror_flag" ||
          test ! -s conftest.err
         } && test -s conftest.$ac_objext; then
-  ac_cv_func_which_getservbyname_r=five
+  ac_cv_func_which_getservbyname_r=six
  else
    echo "$as_me: failed program was:" >&5
  sed 's/^/| /' conftest.$ac_ext >&5
  
  
-                        cat >conftest.$ac_ext <<_ACEOF
+  cat >conftest.$ac_ext <<_ACEOF
  /* confdefs.h.  */
  _ACEOF
  cat confdefs.h >>conftest.$ac_ext
  cat >>conftest.$ac_ext <<_ACEOF
  /* end confdefs.h.  */
-#include <netdb.h>
+
+#   include <netdb.h>
+
  int
  main ()
  {
  
-                                char *name;
-                                char *proto;
-                                struct servent *se;
-                                struct servent_data data;
-                                (void) getservbyname_r(name, proto, se, &data);
+        char *name;
+        char *proto;
+        struct servent *se;
+        char buffer[2048];
+        int buflen = 2048;
+        (void) getservbyname_r(name, proto, se, buffer, buflen)
  
    ;
    return 0;
@@ -39786,21 +39951,22 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
    cat conftest.err >&5
    echo "$as_me:$LINENO: \$? = $ac_status" >&5
    (exit $ac_status); } && {
-        test -z "$ac_cxx_werror_flag" ||
+        test -z "$ac_c_werror_flag" ||
          test ! -s conftest.err
         } && test -s conftest.$ac_objext; then
-  ac_cv_func_which_getservbyname_r=four
+  ac_cv_func_which_getservbyname_r=five
  else
    echo "$as_me: failed program was:" >&5
  sed 's/^/| /' conftest.$ac_ext >&5
  
         ac_cv_func_which_getservbyname_r=no
-
  fi
  
  rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
  
  
+
+
  fi
  
  rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
@@ -39809,13 +39975,10 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
  fi
  
  rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-        ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
  
+else
+  ac_cv_func_which_getservbyname_r=no
+fi
  
  fi
  { echo "$as_me:$LINENO: result: $ac_cv_func_which_getservbyname_r" >&5
@@ -39836,6 +39999,7 @@ elif test $ac_cv_func_which_getservbyname_r = four; then
  #define HAVE_FUNC_GETSERVBYNAME_R_4 1
  _ACEOF
  
+
  fi
  
  
@@ -40715,6 +40879,13 @@ _ACEOF
  
  fi
  
+if test "$wxUSE_EXTENDED_RTTI" = "yes"; then
+  cat >>confdefs.h <<\_ACEOF
+#define wxUSE_EXTENDED_RTTI 1
+_ACEOF
+
+fi
+
  if test "$wxUSE_APPLE_IEEE" = "yes"; then
    cat >>confdefs.h <<\_ACEOF
  #define wxUSE_APPLE_IEEE 1
@@ -43476,6 +43647,13 @@ fi
      fi
  fi
  
+if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
+    cat >>confdefs.h <<\_ACEOF
+#define wxUSE_UNICODE_UTF8 1
+_ACEOF
+
+fi
+
  if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then
    cat >>confdefs.h <<\_ACEOF
  #define wxUSE_EXPERIMENTAL_PRINTF 1
@@ -46113,7 +46291,10 @@ echo "${ECHO_T}$bakefile_cv_prog_makeisgnu" >&6; }
                  PLATFORM_BEOS=1
              ;;
              * )
-                                            ;;
+                { { echo "$as_me:$LINENO: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&5
+echo "$as_me: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&2;}
+   { (exit 1); exit 1; }; }
+            ;;
          esac
      fi
  
@@ -48857,10 +49038,21 @@ echo "${ECHO_T}no" >&6; }
  
      cppunit_major_min=`echo $cppunit_version_min | \
             sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'`
+    if test "x${cppunit_major_min}" = "x" ; then
+       cppunit_major_min=0
+    fi
+
      cppunit_minor_min=`echo $cppunit_version_min | \
             sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'`
+    if test "x${cppunit_minor_min}" = "x" ; then
+       cppunit_minor_min=0
+    fi
+
      cppunit_micro_min=`echo $cppunit_version_min | \
             sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'`
+    if test "x${cppunit_micro_min}" = "x" ; then
+       cppunit_micro_min=0
+    fi
  
      cppunit_version_proper=`expr \
          $cppunit_major_version \> $cppunit_major_min \| \
diff --git a/configure.in b/configure.in

index 06a9b8a14421f7581a660b1559a2f7154034ed2b..23393e824396618127fb82159b129c151b164b6e 100644 (file)
--- a/configure.in
+++ b/configure.in
@@ -576,6 +576,7 @@ if test $DEBUG_CONFIGURE = 1; then
  
    DEFAULT_wxUSE_UNICODE=no
    DEFAULT_wxUSE_UNICODE_MSLU=no
+  DEFAULT_wxUSE_UNICODE_UTF8=no
    DEFAULT_wxUSE_WCSRTOMBS=no
  
    DEFAULT_wxUSE_PALETTE=no
@@ -801,6 +802,7 @@ else
  
    DEFAULT_wxUSE_UNICODE=no
    DEFAULT_wxUSE_UNICODE_MSLU=yes
+  DEFAULT_wxUSE_UNICODE_UTF8=no
    DEFAULT_wxUSE_WCSRTOMBS=no
  
    DEFAULT_wxUSE_PALETTE=yes
@@ -987,6 +989,8 @@ WX_ARG_ENABLE(log,           [  --enable-log            use logging system], wxU
  WX_ARG_ENABLE(longlong,      [  --enable-longlong       use wxLongLong class], wxUSE_LONGLONG)
  WX_ARG_ENABLE(mimetype,      [  --enable-mimetype       use wxMimeTypesManager], wxUSE_MIMETYPE)
  WX_ARG_ENABLE(mslu,          [  --enable-mslu           use MS Layer for Unicode on Windows 9x (Win32 only)], wxUSE_UNICODE_MSLU)
+dnl FIXME-UTF8: make UTF8 automatic
+WX_ARG_ENABLE(utf8,          [  --enable-utf8           use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8)
  WX_ARG_ENABLE(snglinst,      [  --enable-snglinst       use wxSingleInstanceChecker class], wxUSE_SNGLINST_CHECKER)
  WX_ARG_ENABLE(std_iostreams, [  --enable-std_iostreams  use standard C++ stream classes], wxUSE_STD_IOSTREAM)
  WX_ARG_ENABLE(std_string,    [  --enable-std_string     use standard C++ string classes], wxUSE_STD_STRING)
@@ -6483,6 +6487,10 @@ if test "$wxUSE_UNICODE" = "yes" ; then
      fi
  fi
  
+if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
+    AC_DEFINE(wxUSE_UNICODE_UTF8)
+fi
+
  if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then
    AC_DEFINE(wxUSE_EXPERIMENTAL_PRINTF)
  fi
diff --git a/include/wx/buffer.h b/include/wx/buffer.h

index 59a7ad6c35de5e5dd713f2848bfe9a22ede9de02..81184c7705c3fbcd7e7d86b6295a72c7e7853235 100644 (file)
--- a/include/wx/buffer.h
+++ b/include/wx/buffer.h
@@ -168,8 +168,13 @@ typedef wxWritableCharTypeBuffer<wchar_t> wxWritableWCharBuffer;
  
      #define wxMB2WXbuf wxWCharBuffer
      #define wxWX2MBbuf wxCharBuffer
-    #define wxWC2WXbuf wxChar*
-    #define wxWX2WCbuf wxChar*
+    #if wxUSE_UNICODE_WCHAR
+        #define wxWC2WXbuf wxChar*
+        #define wxWX2WCbuf wxChar*
+    #elif wxUSE_UNICODE_UTF8
+        #define wxWC2WXbuf wxWCharBuffer
+        #define wxWX2WCbuf wxWCharBuffer
+    #endif
  #else // ANSI
      #define wxWxCharBuffer wxCharBuffer
  
diff --git a/include/wx/chartype.h b/include/wx/chartype.h

index 934e13d1bc2978a76a20dd7df6f09fa64cfea96f..e5ddbf0c9a19590edb1bd33b1ae2b9d93cd35d69 100644 (file)
--- a/include/wx/chartype.h
+++ b/include/wx/chartype.h
@@ -190,9 +190,15 @@
  /* depending on the platform, Unicode build can either store wxStrings as
     wchar_t* or UTF-8 encoded char*: */
  #if wxUSE_UNICODE
-    /* for now, all Unicode builds are wchar_t* based: */
-    #define wxUSE_UNICODE_WCHAR 1
-    #define wxUSE_UNICODE_UTF8  0
+    #ifndef wxUSE_UNICODE_UTF8
+        #define wxUSE_UNICODE_UTF8 0
+    #endif
+
+    #if wxUSE_UNICODE_UTF8
+        #define wxUSE_UNICODE_WCHAR 0
+    #else
+        #define wxUSE_UNICODE_WCHAR 1
+    #endif
  #else
      #define wxUSE_UNICODE_WCHAR 0
      #define wxUSE_UNICODE_UTF8  0
diff --git a/include/wx/list.h b/include/wx/list.h

index a2907d0693c191213d2fbfeca765b9d3051dd635..ad0e88f93d8f2d1c365e487685da3db23fd74add 100644 (file)
--- a/include/wx/list.h
+++ b/include/wx/list.h
@@ -380,7 +380,7 @@ private:
  union wxListKeyValue
  {
      long integer;
-    wxChar *string;
+    wxString *string;
  };
  
  // a struct which may contain both types of keys
@@ -397,15 +397,13 @@ public:
          { }
      wxListKey(long i) : m_keyType(wxKEY_INTEGER)
          { m_key.integer = i; }
-    wxListKey(const wxChar *s) : m_keyType(wxKEY_STRING)
-        { m_key.string = wxStrdup(s); }
      wxListKey(const wxString& s) : m_keyType(wxKEY_STRING)
-        { m_key.string = wxStrdup(s.c_str()); }
+        { m_key.string = new wxString(s); }
  
      // accessors
      wxKeyType GetKeyType() const { return m_keyType; }
-    const wxChar *GetString() const
-        { wxASSERT( m_keyType == wxKEY_STRING ); return m_key.string; }
+    const wxString GetString() const
+        { wxASSERT( m_keyType == wxKEY_STRING ); return *m_key.string; }
      long GetNumber() const
          { wxASSERT( m_keyType == wxKEY_INTEGER ); return m_key.integer; }
  
@@ -418,7 +416,7 @@ public:
      ~wxListKey()
      {
          if ( m_keyType == wxKEY_STRING )
-            free(m_key.string);
+            delete m_key.string;
      }
  
  private:
@@ -448,11 +446,11 @@ public:
      virtual ~wxNodeBase();
  
      // FIXME no check is done that the list is really keyed on strings
-    const wxChar *GetKeyString() const { return m_key.string; }
+    wxString GetKeyString() const { return *m_key.string; }
      long GetKeyInteger() const { return m_key.integer; }
  
      // Necessary for some existing code
-    void SetKeyString(wxChar* s) { m_key.string = s; }
+    void SetKeyString(const wxString& s) { m_key.string = new wxString(s); }
      void SetKeyInteger(long i) { m_key.integer = i; }
  
  #ifdef wxLIST_COMPATIBILITY
@@ -602,7 +600,7 @@ protected:
  
          // keyed append
      wxNodeBase *Append(long key, void *object);
-    wxNodeBase *Append(const wxChar *key, void *object);
+    wxNodeBase *Append(const wxString& key, void *object);
  
          // removes node from the list but doesn't delete it (returns pointer
          // to the node or NULL if it wasn't found in the list)
diff --git a/include/wx/log.h b/include/wx/log.h

index 229380c94c5d441c8f64b7bd757c7f17c52d5eb7..b280d7d309ab7aaaad324eb49901f9bd204a3841 100644 (file)
--- a/include/wx/log.h
+++ b/include/wx/log.h
@@ -476,14 +476,14 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0);
      WX_DEFINE_VARARG_FUNC_VOID(wxLog##level, wxDoLog##level)
  
  #define DECLARE_LOG_FUNCTION_IMPL(level)                                    \
-    extern void WXDLLIMPEXP_BASE wxVLog##level(const wxChar *szFormat,      \
+    extern void WXDLLIMPEXP_BASE wxVLog##level(const wxString& format,      \
                                                 va_list argptr);             \
-    extern void WXDLLIMPEXP_BASE wxDoLog##level(const wxChar *szFormat,     \
-                                                ...) ATTRIBUTE_PRINTF_1
+    extern void WXDLLIMPEXP_BASE                                            \
+    wxDoLog##level(const wxChar *szFormat, ...) ATTRIBUTE_PRINTF_1
  
  #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl)       \
      extern void expdecl wxVLog##level(argclass arg,                         \
-                                      const wxChar *szFormat,               \
+                                      const wxString& format,               \
                                        va_list argptr);                      \
      extern void expdecl wxDoLog##level(argclass arg,                        \
                                         const wxChar *szFormat,              \
@@ -497,12 +497,12 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0);
      WX_DEFINE_VARARG_FUNC_NOP(wxLog##level)
  
  #define DECLARE_LOG_FUNCTION_IMPL(level)                                    \
-    inline void wxVLog##level(const wxChar *WXUNUSED(szFormat),             \
+    inline void wxVLog##level(const wxString& WXUNUSED(format),             \
                                va_list WXUNUSED(argptr)) { }                 \
  
  #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl)       \
      inline void wxVLog##level(argclass WXUNUSED(arg),                       \
-                              const wxChar *WXUNUSED(szFormat),             \
+                              const wxString& WXUNUSED(format),             \
                                va_list WXUNUSED(argptr)) {}
  
  // Empty Class to fake wxLogNull
@@ -590,7 +590,7 @@ DECLARE_LOG_FUNCTION_PUBLIC(SysError)
  
      // this version only logs the message if the mask had been added to the
      // list of masks with AddTraceMask()
-    DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxChar*, mask);
+    DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxString&, mask);
      // and this one does nothing if all of level bits are not set in
      // wxLog::GetActive()->GetTraceMask() -- it's deprecated in favour of
      // string identifiers
diff --git a/include/wx/string.h b/include/wx/string.h

index 5f3801ef5126193e7eac677cd47f2614b8b128ce..3bc5588f9ad9bbe42f293634812e00d6981861c9 100644 (file)
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -371,13 +371,14 @@ private:
        T data;
        size_t len;
  
-      SubstrBufFromType() {}
        SubstrBufFromType(const T& data_, size_t len_)
            : data(data_), len(len_) {}
    };
  
  #if wxUSE_UNICODE_UTF8
-  // FIXME-UTF8: this will have to use slightly different type
+  // even char* -> char* needs conversion, from locale charset to UTF-8
+  typedef SubstrBufFromType<wxCharBuffer>    SubstrBufFromWC;
+  typedef SubstrBufFromType<wxCharBuffer>    SubstrBufFromMB;
  #elif wxUSE_UNICODE_WCHAR
    typedef SubstrBufFromType<const wchar_t*>  SubstrBufFromWC;
    typedef SubstrBufFromType<wxWCharBuffer>   SubstrBufFromMB;
@@ -392,8 +393,12 @@ private:
    // between UTF-8 and wchar_t* representations of the string are mostly
    // contained here.
  
-#if wxUSE_UNICODE
-  // FIXME-UTF8: This will need changes when UTF8 build is introduced
+#if wxUSE_UNICODE_UTF8
+  static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
+                                    const wxMBConv& conv);
+  static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
+                                    const wxMBConv& conv);
+#elif wxUSE_UNICODE_WCHAR
    static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
                                      const wxMBConv& conv);
  #else
@@ -447,6 +452,7 @@ private:
    // encodes the character to a form used to represent it in internal
    // representation (returns a string in UTF8 version)
    static wxChar EncodeChar(wxUniChar ch) { return (wxChar)ch; }
+  static wxUniChar DecodeChar(wxStringImpl::const_iterator i) { return *i; }
  
    // translates position index in wxString to/from index in underlying
    // wxStringImpl:
@@ -459,11 +465,56 @@ private:
  
  #else // wxUSE_UNICODE_UTF8
  
-  typedef char Utf8CharBuffer[5];
+  // checks correctness of UTF-8 sequence
+  static bool IsValidUtf8String(const char *c);
+#ifdef __WXDEBUG__
+  static bool IsValidUtf8LeadByte(unsigned char c);
+#endif
+
+  // table of offsets to skip forward when iterating
+  static unsigned char ms_utf8IterTable[256];
+
+  static void IncIter(wxStringImpl::iterator& i)
+  {
+      wxASSERT( IsValidUtf8LeadByte(*i) );
+      i += ms_utf8IterTable[(unsigned char)*i];
+  }
+  static void IncIter(wxStringImpl::const_iterator& i)
+  {
+      wxASSERT( IsValidUtf8LeadByte(*i) );
+      i += ms_utf8IterTable[(unsigned char)*i];
+  }
+
+  static void DecIter(wxStringImpl::iterator& i);
+  static void DecIter(wxStringImpl::const_iterator& i);
+  static wxStringImpl::iterator AddToIter(wxStringImpl::iterator i, int n);
+  static wxStringImpl::const_iterator AddToIter(wxStringImpl::const_iterator i, int n);
+  static int DiffIters(wxStringImpl::const_iterator i1, wxStringImpl::const_iterator i2);
+  static int DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2);
+
+  struct Utf8CharBuffer
+  {
+      char data[5];
+      operator const char*() const { return data; }
+  };
    static Utf8CharBuffer EncodeChar(wxUniChar ch);
    // returns n copies of ch encoded in UTF-8 string
    static wxCharBuffer EncodeNChars(size_t n, wxUniChar ch);
  
+  // returns the length of UTF-8 encoding of the character with lead byte 'c'
+  static size_t GetUtf8CharLength(char c)
+  {
+      wxASSERT( IsValidUtf8LeadByte(c) );
+      return ms_utf8IterTable[(unsigned char)c];
+  }
+
+  // decodes single UTF-8 character from UTF-8 string
+  // FIXME-UTF8: move EncodeChar/DecodeChar and other operations to separate
+  //             class
+  static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
+    { return wxUniCharRef::DecodeChar(i); }
+  friend class WXDLLIMPEXP_BASE wxUniCharRef;
+
    size_t PosToImpl(size_t pos) const
    {
        if ( pos == 0 || pos == npos )
@@ -472,6 +523,15 @@ private:
            return wxStringImpl::const_iterator(begin() + pos) - m_impl.begin();
    }
  
+  void PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const;
+
+  size_t LenToImpl(size_t len) const
+  {
+      size_t pos, len2;
+      PosLenToImpl(0, len, &pos, &len2);
+      return len2;
+  }
+
    size_t PosFromImpl(size_t pos) const
    {
        if ( pos == 0 || pos == npos )
@@ -480,13 +540,23 @@ private:
            return const_iterator(m_impl.begin() + pos) - begin();
    }
  
-  // FIXME: return as-is without copying under UTF8 locale, return
-  //        converted string under other locales - needs wxCharBuffer
-  //        changes
-  static wxCharBuffer ImplStr(const char* str);
+  size_t IterToImplPos(wxStringImpl::iterator i) const
+    { return wxStringImpl::const_iterator(i) - m_impl.begin(); }
+
+  // FIXME-UTF8: return as-is without copying under UTF8 locale, return
+  //             converted string under other locales - needs wxCharBuffer
+  //             changes
+  static wxCharBuffer ImplStr(const char* str,
+                              const wxMBConv& conv = wxConvLibc)
+    { return ConvertStr(str, npos, conv).data; }
+  static SubstrBufFromMB ImplStr(const char* str, size_t n,
+                                 const wxMBConv& conv = wxConvLibc)
+    { return ConvertStr(str, n, conv); }
  
    static wxCharBuffer ImplStr(const wchar_t* str)
-      { return wxConvUTF8.cWC2MB(str); }
+    { return ConvertStr(str, npos, wxConvUTF8).data; }
+  static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
+    { return ConvertStr(str, n, wxConvUTF8); }
  #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
  
  
@@ -496,7 +566,9 @@ public:
    wxString() {}
  
      // copy ctor
+  // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build!
    wxString(const wxStringImpl& stringSrc) : m_impl(stringSrc) { }
+
    wxString(const wxString& stringSrc) : m_impl(stringSrc.m_impl) { }
  
      // string containing nRepeat copies of ch
@@ -571,6 +643,18 @@ public:
    wxString(const wxString& str, size_t nLength)
        : m_impl(str.Mid(0, nLength).m_impl) {}
  
+  // even if we're not built with wxUSE_STL == 1 it is very convenient to allow
+  // implicit conversions from std::string to wxString as this allows to use
+  // the same strings in non-GUI and GUI code, however we don't want to
+  // unconditionally add this ctor as it would make wx lib dependent on
+  // libstdc++ on some Linux versions which is bad, so instead we ask the
+  // client code to define this wxUSE_STD_STRING symbol if they need it
+#if wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING
+  wxString(const wxStdString& s)
+      // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build!
+      : m_impl(s.c_str()) { } // FIXME-UTF8: this is broken for embedded 0s
+#endif // wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING
+
  public:
    // standard types
    typedef wxUniChar value_type;
@@ -583,7 +667,12 @@ public:
    typedef wxUniChar const_reference;
  
  #if wxUSE_STL
-  #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag
+  #if wxUSE_UNICODE_UTF8
+    // random access is not O(1), as required by Random Access Iterator
+    #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag
+  #else
+    #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag
+  #endif
  #else
    #define WX_STR_ITERATOR_TAG void /* dummy type */
  #endif
@@ -599,8 +688,6 @@ public:
            typedef reference_type reference;                                 \
            typedef pointer_type pointer;                                     \
                                                                              \
-          iterator_name(const iterator_name& i) : m_cur(i.m_cur) {}         \
-                                                                            \
            reference operator*() const { return reference_ctor; }            \
            reference operator[](size_t n) const { return *(*this + n); }     \
                                                                              \
@@ -621,14 +708,6 @@ public:
                return tmp;                                                   \
            }                                                                 \
                                                                              \
-          iterator_name operator+(int n) const                              \
-            { return iterator_name(wxString::AddToIter(m_cur, n)); }        \
-          iterator_name operator+(size_t n) const                           \
-            { return iterator_name(wxString::AddToIter(m_cur, (int)n)); }   \
-          iterator_name operator-(int n) const                              \
-            { return iterator_name(wxString::AddToIter(m_cur, -n)); }       \
-          iterator_name operator-(size_t n) const                           \
-            { return iterator_name(wxString::AddToIter(m_cur, -(int)n)); }  \
            iterator_name& operator+=(int n)                                  \
              { m_cur = wxString::AddToIter(m_cur, n); return *this; }        \
            iterator_name& operator+=(size_t n)                               \
@@ -657,7 +736,6 @@ public:
                                                                              \
        private:                                                              \
            /* for internal wxString use only: */                             \
-          iterator_name(underlying_iterator ptr) : m_cur(ptr) {}            \
            operator underlying_iterator() const { return m_cur; }            \
                                                                              \
            friend class WXDLLIMPEXP_BASE wxString;                           \
@@ -668,23 +746,90 @@ public:
  
    class const_iterator;
  
+#if wxUSE_UNICODE_UTF8
+  class iterator
+  {
+      // NB: In UTF-8 build, (non-const) iterator needs to keep reference
+      //     to the underlying wxStringImpl, because UTF-8 is variable-length
+      //     encoding and changing the value pointer to by an iterator using
+      //     its operator* requires calling wxStringImpl::replace() if the old
+      //     and new values differ in their encoding's length.
+
+      WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef,
+                           wxUniCharRef::CreateForString(m_str, m_cur));
+
+  public:
+      iterator(const iterator& i) : m_cur(i.m_cur), m_str(i.m_str) {}
+
+      iterator operator+(int n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, n)); }
+      iterator operator+(size_t n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, (int)n)); }
+      iterator operator-(int n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, -n)); }
+      iterator operator-(size_t n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      iterator(wxString *str, underlying_iterator ptr)
+          : m_cur(ptr), m_str(str->m_impl) {}
+      iterator(wxStringImpl& str, underlying_iterator ptr)
+          : m_cur(ptr), m_str(str) {}
+
+      wxStringImpl& m_str;
+
+      friend class const_iterator;
+  };
+#else // !wxUSE_UNICODE_UTF8
    class iterator
    {
        WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef,
                             wxUniCharRef::CreateForString(m_cur));
  
+  public:
+      iterator(const iterator& i) : m_cur(i.m_cur) {}
+
+      iterator operator+(int n) const
+        { return iterator(wxString::AddToIter(m_cur, n)); }
+      iterator operator+(size_t n) const
+        { return iterator(wxString::AddToIter(m_cur, (int)n)); }
+      iterator operator-(int n) const
+        { return iterator(wxString::AddToIter(m_cur, -n)); }
+      iterator operator-(size_t n) const
+        { return iterator(wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      // for internal wxString use only:
+      iterator(underlying_iterator ptr) : m_cur(ptr) {}
+      iterator(wxString *WXUNUSED(str), underlying_iterator ptr) : m_cur(ptr) {}
+
        friend class const_iterator;
    };
+#endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
  
    class const_iterator
    {
        // NB: reference_type is intentionally value, not reference, the character
        //     may be encoded differently in wxString data:
        WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar,
-                           wxUniChar(*m_cur));
+                           wxString::DecodeChar(m_cur));
  
    public:
+      const_iterator(const const_iterator& i) : m_cur(i.m_cur) {}
        const_iterator(const iterator& i) : m_cur(i.m_cur) {}
+
+      const_iterator operator+(int n) const
+        { return const_iterator(wxString::AddToIter(m_cur, n)); }
+      const_iterator operator+(size_t n) const
+        { return const_iterator(wxString::AddToIter(m_cur, (int)n)); }
+      const_iterator operator-(int n) const
+        { return const_iterator(wxString::AddToIter(m_cur, -n)); }
+      const_iterator operator-(size_t n) const
+        { return const_iterator(wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      // for internal wxString use only:
+      const_iterator(underlying_iterator ptr) : m_cur(ptr) {}
    };
  
    #undef WX_STR_ITERATOR_TAG
@@ -767,10 +912,10 @@ public:
  
    // first valid index position
    const_iterator begin() const { return const_iterator(m_impl.begin()); }
-  iterator begin() { return iterator(m_impl.begin()); }
+  iterator begin() { return iterator(this, m_impl.begin()); }
    // position one after the last valid one
    const_iterator end() const { return const_iterator(m_impl.end()); }
-  iterator end() { return iterator(m_impl.end()); }
+  iterator end() { return iterator(this, m_impl.end()); }
  
    // first element of the reversed string
    const_reverse_iterator rbegin() const
@@ -925,7 +1070,7 @@ public:
  
      // explicit conversion to C string in internal representation (char*,
      // wchar_t*, UTF-8-encoded char*, depending on the build):
-    const_pointer wx_str() const { return m_impl.c_str(); }
+    const wxStringCharType *wx_str() const { return m_impl.c_str(); }
  
      // conversion to *non-const* multibyte or widestring buffer; modifying
      // returned buffer won't affect the string, these methods are only useful
@@ -963,21 +1108,26 @@ public:
  
      const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
  
-    const wxChar* wc_str() const { return c_str(); }
-
+#if wxUSE_UNICODE_WCHAR
+    const wxChar* wc_str() const { return wx_str(); }
+#elif wxUSE_UNICODE_UTF8
+    const wxWCharBuffer wc_str() const;
+#endif
      // for compatibility with !wxUSE_UNICODE version
-    const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const
+      { return wc_str(); }
  
  #if wxMBFILES
      const wxCharBuffer fn_str() const { return mb_str(wxConvFile); }
  #else // !wxMBFILES
-    const wxChar* fn_str() const { return c_str(); }
+    const wxWX2WCbuf fn_str() const { return wc_str(); }
  #endif // wxMBFILES/!wxMBFILES
+
  #else // ANSI
-    const wxChar* mb_str() const { return c_str(); }
+    const wxChar* mb_str() const { return wx_str(); }
  
      // for compatibility with wxUSE_UNICODE version
-    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); }
  
      const wxWX2MBbuf mbc_str() const { return mb_str(); }
  
@@ -1588,14 +1738,25 @@ public:
    {
  #if wxUSE_UNICODE_UTF8
      if ( !ch.IsAscii() )
-        m_impl.insert(begin() + nPos, EncodeNChars(n, ch));
+        m_impl.insert(PosToImpl(nPos), EncodeNChars(n, ch));
      else
  #endif
-        m_impl.insert(begin() + nPos, n, (wxStringCharType)ch);
+        m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
      return *this;
    }
    iterator insert(iterator it, wxUniChar ch)
-    { return iterator(m_impl.insert(it, EncodeChar(ch))); }
+  {
+#if wxUSE_UNICODE_UTF8
+    if ( !ch.IsAscii() )
+    {
+        size_t pos = IterToImplPos(it);
+        m_impl.insert(pos, EncodeChar(ch));
+        return iterator(this, m_impl.begin() + pos);
+    }
+    else
+#endif
+        return iterator(this, m_impl.insert(it, (wxStringCharType)ch));
+  }
    void insert(iterator it, const_iterator first, const_iterator last)
      { m_impl.insert(it, first, last); }
    void insert(iterator it, const char *first, const char *last)
@@ -1606,7 +1767,7 @@ public:
    {
  #if wxUSE_UNICODE_UTF8
      if ( !ch.IsAscii() )
-        m_impl.insert(it, EncodeNChars(n, ch));
+        m_impl.insert(IterToImplPos(it), EncodeNChars(n, ch));
      else
  #endif
          m_impl.insert(it, n, (wxStringCharType)ch);
@@ -1622,9 +1783,9 @@ public:
    }
      // delete characters from first up to last
    iterator erase(iterator first, iterator last)
-    { return iterator(m_impl.erase(first, last)); }
+    { return iterator(this, m_impl.erase(first, last)); }
    iterator erase(iterator first)
-    { return iterator(m_impl.erase(first)); }
+    { return iterator(this, m_impl.erase(first)); }
  
  #ifdef wxSTRING_BASE_HASNT_CLEAR
    void clear() { erase(); }
@@ -1874,7 +2035,11 @@ public:
  
      // as strpbrk() but starts at nStart, returns npos if not found
    size_t find_first_of(const wxString& str, size_t nStart = 0) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_first_of(str.mb_str().data(), nStart); }
+#else
      { return find_first_of((const wxChar*)str.c_str(), nStart); }
+#endif
      // same as above
    size_t find_first_of(const char* sz, size_t nStart = 0) const;
    size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const;
@@ -1885,7 +2050,11 @@ public:
      { return find(c, nStart); }
      // find the last (starting from nStart) char from str in this string
    size_t find_last_of (const wxString& str, size_t nStart = npos) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_last_of(str.mb_str().data(), nStart); }
+#else
      { return find_last_of((const wxChar*)str.c_str(), nStart); }
+#endif
      // same as above
    size_t find_last_of (const char* sz, size_t nStart = npos) const;
    size_t find_last_of (const wchar_t* sz, size_t nStart = npos) const;
@@ -1899,7 +2068,11 @@ public:
  
      // as strspn() (starting from nStart), returns npos on failure
    size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_first_not_of(str.mb_str().data(), nStart); }
+#else
      { return find_first_not_of((const wxChar*)str.c_str(), nStart); }
+#endif
      // same as above
    size_t find_first_not_of(const char* sz, size_t nStart = 0) const;
    size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const;
@@ -1909,7 +2082,11 @@ public:
    size_t find_first_not_of(wxUniChar ch, size_t nStart = 0) const;
      //  as strcspn()
    size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_last_not_of(str.mb_str().data(), nStart); }
+#else
      { return find_last_not_of((const wxChar*)str.c_str(), nStart); }
+#endif
      // same as above
    size_t find_last_not_of(const char* sz, size_t nStart = npos) const;
    size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const;
diff --git a/include/wx/stringimpl.h b/include/wx/stringimpl.h

index b680b52cf1a03ff9c317939107eb4c95b04d9fe0..0a3f45bb528450322c47b4cfb418c89a307e40ed 100644 (file)
--- a/include/wx/stringimpl.h
+++ b/include/wx/stringimpl.h
@@ -42,6 +42,10 @@
  
  // global pointer to empty string
  extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString;
+#if wxUSE_UNICODE_UTF8
+// FIXME-UTF8: we should have only one wxEmptyString
+extern WXDLLIMPEXP_DATA_BASE(const wxStringCharType*) wxEmptyStringImpl;
+#endif
  
  
  // ----------------------------------------------------------------------------
@@ -61,7 +65,7 @@ extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString;
      #ifdef HAVE_STD_WSTRING
          typedef std::wstring wxStdString;
      #else
-        typedef std::basic_string<wxChar> wxStdString;
+        typedef std::basic_string<wxStringCharType> wxStdString;
      #endif
  #else
      typedef std::string wxStdString;
@@ -97,8 +101,8 @@ struct WXDLLIMPEXP_BASE wxStringData
    size_t  nDataLength,  // actual string length
            nAllocLength; // allocated memory size
  
-  // mimics declaration 'wxChar data[nAllocLength]'
-  wxChar* data() const { return (wxChar*)(this + 1); }
+  // mimics declaration 'wxStringCharType data[nAllocLength]'
+  wxStringCharType* data() const { return (wxStringCharType*)(this + 1); }
  
    // empty string has a special ref count so it's never deleted
    bool  IsEmpty()   const { return (nRefs == -1); }
@@ -143,7 +147,11 @@ protected:
    // string (re)initialization functions
      // initializes the string to the empty value (must be called only from
      // ctors, use Reinit() otherwise)
+#if wxUSE_UNICODE_UTF8
+  void Init() { m_pchData = (wxStringCharType *)wxEmptyStringImpl; } // FIXME-UTF8
+#else
    void Init() { m_pchData = (wxStringCharType *)wxEmptyString; }
+#endif
      // initializes the string with (a part of) C-string
    void InitWith(const wxStringCharType *psz, size_t nPos = 0, size_t nLen = npos);
      // as Init, but also frees old data
@@ -378,7 +386,7 @@ public:
      { ConcatSelf(str.length(), str.c_str()); return *this; }
      // append first n (or all if n == npos) characters of sz
    wxStringImpl& append(const wxStringCharType *sz)
-    { ConcatSelf(wxStrlen(sz), sz); return *this; }
+    { ConcatSelf(Strsize(sz), sz); return *this; }
    wxStringImpl& append(const wxStringCharType *sz, size_t n)
      { ConcatSelf(n, sz); return *this; }
      // append n copies of ch
@@ -395,7 +403,7 @@ public:
      { clear(); return append(str, pos, n); }
      // same as `= first n (or all if n == npos) characters of sz'
    wxStringImpl& assign(const wxStringCharType *sz)
-    { clear(); return append(sz, wxStrlen(sz)); }
+    { clear(); return append(sz, Strsize(sz)); }
    wxStringImpl& assign(const wxStringCharType *sz, size_t n)
      { clear(); return append(sz, n); }
      // same as `= n copies of ch'
@@ -430,9 +438,9 @@ public:
      // insert first n (or all if n == npos) characters of sz
    wxStringImpl& insert(size_t nPos, const wxStringCharType *sz, size_t n = npos);
      // insert n copies of ch
-  wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch)// FIXME-UTF8: tricky
+  wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch)
      { return insert(nPos, wxStringImpl(n, ch)); }
-  iterator insert(iterator it, wxStringCharType ch) // FIXME-UTF8: tricky
+  iterator insert(iterator it, wxStringCharType ch)
      { size_t idx = it - begin(); insert(idx, 1, ch); return begin() + idx; }
    void insert(iterator it, const_iterator first, const_iterator last)
      { insert(it - begin(), first, last - first); }
@@ -525,6 +533,13 @@ public:
    void DoUngetWriteBuf(size_t nLen);
  #endif
  
+private:
+#if wxUSE_UNICODE_UTF8
+  static size_t Strsize(const wxStringCharType *s) { return strlen(s); }
+#else
+  static size_t Strsize(const wxStringCharType *s) { return wxStrlen(s); }
+#endif
+
    friend class WXDLLIMPEXP_BASE wxString;
  };
  
diff --git a/include/wx/strvararg.h b/include/wx/strvararg.h

index 6ad650be857c846eb3c702746ea5267a324fc8a8..55eb9c01f23e9a18215b6be268df30124c98093e 100644 (file)
--- a/include/wx/strvararg.h
+++ b/include/wx/strvararg.h
@@ -93,7 +93,7 @@ template<>
  struct WXDLLIMPEXP_BASE wxArgNormalizer<const wxCStrData&>
  {
      wxArgNormalizer(const wxCStrData& value) : m_value(value) {}
-    const wxStringCharType *get() const;
+    const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType
  
      const wxCStrData& m_value;
  };
@@ -109,7 +109,7 @@ template<>
  struct WXDLLIMPEXP_BASE wxArgNormalizer<const wxString&>
  {
      wxArgNormalizer(const wxString& value) : m_value(value) {}
-    const wxStringCharType *get() const;
+    const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType
  
      const wxString& m_value;
  };
@@ -121,8 +121,7 @@ struct wxArgNormalizer<wxString> : public wxArgNormalizer<const wxString&>
          : wxArgNormalizer<const wxString&>(value) {}
  };
  
-#if wxUSE_UNICODE_WCHAR
-
+#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR
  template<>
  struct WXDLLIMPEXP_BASE wxArgNormalizer<const char*>
  {
@@ -139,9 +138,30 @@ struct wxArgNormalizer<char*> : public wxArgNormalizer<const char*>
      wxArgNormalizer(char *value)
          : wxArgNormalizer<const char*>(value) {}
  };
+#endif // wxUSE_UNICODE_WCHAR
+
+// FIXME-UTF8
+#if 0 // wxUSE_UNICODE_UTF8
+// for conversion from local charset to UTF-8
+template<>
+struct WXDLLIMPEXP_BASE wxArgNormalizer<const char*>
+{
+    wxArgNormalizer(const char *value);
+    ~wxArgNormalizer();
+    const char *get() const;
+
+    wxCharBuffer *m_value;
+};
  
-#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
+template<>
+struct wxArgNormalizer<char*> : public wxArgNormalizer<const char*>
+{
+    wxArgNormalizer(char *value)
+        : wxArgNormalizer<const char*>(value) {}
+};
+#endif // wxUSE_UNICODE_UTF8
  
+#if /*wxUSE_UNICODE_UTF8 || */ !wxUSE_UNICODE // FIXME-UTF8
  template<>
  struct WXDLLIMPEXP_BASE wxArgNormalizer<const wchar_t*>
  {
@@ -158,8 +178,7 @@ struct wxArgNormalizer<wchar_t*> : public wxArgNormalizer<const wchar_t*>
      wxArgNormalizer(wchar_t *value)
          : wxArgNormalizer<const wchar_t*>(value) {}
  };
-
-#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
  
  // versions for passing wx[W]CharBuffer:
  template<>
diff --git a/include/wx/unichar.h b/include/wx/unichar.h

index b7fe54d9ca041f80a53c3a55d620a6ffecd78f33..fc664c031f01eaafe53883598d1db6c375b707f9 100644 (file)
--- a/include/wx/unichar.h
+++ b/include/wx/unichar.h
@@ -69,6 +69,9 @@ public:
      // Returns Unicode code point value of the character
      value_type GetValue() const { return m_value; }
  
+    // Returns true if the character is an ASCII character:
+    bool IsAscii() const { return m_value < 0x80; }
+
      // Conversions to char and wchar_t types: all of those are needed to be
      // able to pass wxUniChars to verious standard narrow and wide character
      // functions
@@ -152,7 +155,11 @@ private:
      typedef wxStringImpl::iterator iterator;
  
      // create the reference
+#if wxUSE_UNICODE_UTF8
+    wxUniCharRef(wxStringImpl& str, iterator pos) : m_str(str), m_pos(pos) {}
+#else
      wxUniCharRef(iterator pos) : m_pos(pos) {}
+#endif
  
  public:
      // NB: we have to make this public, because we don't have wxString
@@ -160,23 +167,26 @@ public:
      //     as friend; so at least don't use a ctor but a static function
      //     that must be used explicitly (this is more than using 'explicit'
      //     keyword on ctor!):
+#if wxUSE_UNICODE_UTF8
+    static wxUniCharRef CreateForString(wxStringImpl& str, iterator pos)
+        { return wxUniCharRef(str, pos); }
+#else
      static wxUniCharRef CreateForString(iterator pos)
          { return wxUniCharRef(pos); }
+#endif
  
      wxUniChar::value_type GetValue() const { return UniChar().GetValue(); }
+    bool IsAscii() const { return UniChar().GetValue(); }
  
      // Assignment operators:
-    wxUniCharRef& operator=(const wxUniCharRef& c)
-    {
-        *m_pos = *c.m_pos;
-        return *this;
-    };
+#if wxUSE_UNICODE_UTF8
+    wxUniCharRef& operator=(const wxUniChar& c);
+#else
+    wxUniCharRef& operator=(const wxUniChar& c) { *m_pos = c; return *this; }
+#endif
  
-    wxUniCharRef& operator=(const wxUniChar& c)
-    {
-        *m_pos = c;
-        return *this;
-    };
+    wxUniCharRef& operator=(const wxUniCharRef& c)
+        { return *this = c.UniChar(); }
  
      wxUniCharRef& operator=(char c) { return *this = wxUniChar(c); }
      wxUniCharRef& operator=(wchar_t c) { return *this = wxUniChar(c); }
@@ -227,11 +237,28 @@ public:
  #endif
  
  private:
-    wxUniChar UniChar() const { return *m_pos; }
+    wxUniChar UniChar() const
+    {
+#if wxUSE_UNICODE_UTF8
+        return DecodeChar(m_pos);
+#else
+        return *m_pos;
+#endif
+    }
+
+#if wxUSE_UNICODE_UTF8
+    // FIXME-UTF8: move this to a separate 'string operations' class
+    static wxUniChar DecodeChar(wxStringImpl::const_iterator i);
+    friend class WXDLLIMPEXP_BASE wxString;
+#endif
+
      friend class WXDLLIMPEXP_BASE wxUniChar;
  
  private:
-    // pointer to the character in string
+    // reference to the string and pointer to the character in string
+#if wxUSE_UNICODE_UTF8
+    wxStringImpl& m_str;
+#endif
      iterator m_pos;
  };
  
diff --git a/setup.h.in b/setup.h.in

index d9b83ee4bb3cb50da52dc8e30492e6b7d645f7ee..d4e09cd1e873505cda9d13e89bbf41fd5efb9d26 100644 (file)
--- a/setup.h.in
+++ b/setup.h.in
@@ -557,6 +557,8 @@
  
  #define wxUSE_UNICODE_MSLU 0
  
+#define wxUSE_UNICODE_UTF8 0
+
  #define wxUSE_DC_CACHEING 0
  
  #define wxUSE_GADGETS 0
diff --git a/src/common/list.cpp b/src/common/list.cpp

index 5a63e0c524612694d6bb72a4d9361db1fe3c82dd..7d069da0fffa311b0caeca22b4b4bc9103cf6d56 100644 (file)
--- a/src/common/list.cpp
+++ b/src/common/list.cpp
@@ -53,7 +53,7 @@ bool wxListKey::operator==(wxListKeyValue value) const
              // by not putting return here...
  
          case wxKEY_STRING:
-            return wxStrcmp(m_key.string, value.string) == 0;
+            return *m_key.string == *value.string;
  
          case wxKEY_INTEGER:
              return m_key.integer == value.integer;
@@ -84,7 +84,7 @@ wxNodeBase::wxNodeBase(wxListBase *list,
  
          case wxKEY_STRING:
              // to be free()d later
-            m_key.string = wxStrdup(key.GetString());
+            m_key.string = new wxString(key.GetString());
              break;
  
          default:
@@ -107,7 +107,7 @@ wxNodeBase::~wxNodeBase()
      {
          if ( m_list->m_keyType == wxKEY_STRING )
          {
-            free(m_key.string);
+            delete m_key.string;
          }
  
          m_list->DetachNode(this);
@@ -257,7 +257,7 @@ wxNodeBase *wxListBase::Append(long key, void *object)
      return AppendCommon(node);
  }
  
-wxNodeBase *wxListBase::Append (const wxChar *key, void *object)
+wxNodeBase *wxListBase::Append (const wxString& key, void *object)
  {
      wxCHECK_MSG( (m_keyType == wxKEY_STRING) ||
                   (m_keyType == wxKEY_NONE && m_count == 0),
diff --git a/src/common/log.cpp b/src/common/log.cpp

index 69abd5e0ba3722dc73bfc5c7443de5006721bec2..e6690b652a6be8f489b95e50266c80982c36bdb2 100644 (file)
--- a/src/common/log.cpp
+++ b/src/common/log.cpp
@@ -84,10 +84,10 @@
  // ----------------------------------------------------------------------------
  
  // generic log function
-void wxVLogGeneric(wxLogLevel level, const wxChar *szFormat, va_list argptr)
+void wxVLogGeneric(wxLogLevel level, const wxString& format, va_list argptr)
  {
      if ( wxLog::IsEnabled() ) {
-        wxLog::OnLog(level, wxString::FormatV(szFormat, argptr), time(NULL));
+        wxLog::OnLog(level, wxString::FormatV(format, argptr), time(NULL));
      }
  }
  
@@ -100,11 +100,11 @@ void wxDoLogGeneric(wxLogLevel level, const wxChar *szFormat, ...)
  }
  
  #define IMPLEMENT_LOG_FUNCTION(level)                               \
-  void wxVLog##level(const wxChar *szFormat, va_list argptr)        \
+  void wxVLog##level(const wxString& format, va_list argptr)        \
    {                                                                 \
      if ( wxLog::IsEnabled() ) {                                     \
        wxLog::OnLog(wxLOG_##level,                                   \
-                   wxString::FormatV(szFormat, argptr), time(NULL));\
+                   wxString::FormatV(format, argptr), time(NULL));  \
      }                                                               \
    }                                                                 \
                                                                      \
@@ -134,9 +134,9 @@ void wxSafeShowMessage(const wxString& title, const wxString& text)
  
  // fatal errors can't be suppressed nor handled by the custom log target and
  // always terminate the program
-void wxVLogFatalError(const wxChar *szFormat, va_list argptr)
+void wxVLogFatalError(const wxString& format, va_list argptr)
  {
-    wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(szFormat, argptr));
+    wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(format, argptr));
  
  #ifdef __WXWINCE__
      ExitThread(3);
@@ -157,12 +157,12 @@ void wxDoLogFatalError(const wxChar *szFormat, ...)
  }
  
  // same as info, but only if 'verbose' mode is on
-void wxVLogVerbose(const wxChar *szFormat, va_list argptr)
+void wxVLogVerbose(const wxString& format, va_list argptr)
  {
      if ( wxLog::IsEnabled() ) {
          if ( wxLog::GetActiveTarget() != NULL && wxLog::GetVerbose() ) {
              wxLog::OnLog(wxLOG_Info,
-                         wxString::FormatV(szFormat, argptr), time(NULL));
+                         wxString::FormatV(format, argptr), time(NULL));
          }
      }
  }
@@ -194,17 +194,17 @@ void wxDoLogVerbose(const wxChar *szFormat, ...)
      va_end(argptr);                                                 \
    }
  
-  void wxVLogTrace(const wxChar *mask, const wxChar *szFormat, va_list argptr)
+  void wxVLogTrace(const wxString& mask, const wxString& format, va_list argptr)
    {
      if ( wxLog::IsEnabled() && wxLog::IsAllowedTraceMask(mask) ) {
        wxString msg;
-      msg << _T("(") << mask << _T(") ") << wxString::FormatV(szFormat, argptr);
+      msg << _T("(") << mask << _T(") ") << wxString::FormatV(format, argptr);
  
        wxLog::OnLog(wxLOG_Trace, msg, time(NULL));
      }
    }
  
-  void wxDoLogTrace(const wxChar *mask, const wxChar *szFormat, ...)
+  void wxDoLogTrace(const wxString& mask, const wxChar *szFormat, ...)
    {
      va_list argptr;
      va_start(argptr, szFormat);
@@ -212,13 +212,13 @@ void wxDoLogVerbose(const wxChar *szFormat, ...)
      va_end(argptr);
    }
  
-  void wxVLogTrace(wxTraceMask mask, const wxChar *szFormat, va_list argptr)
+  void wxVLogTrace(wxTraceMask mask, const wxString& format, va_list argptr)
    {
      // we check that all of mask bits are set in the current mask, so
      // that wxLogTrace(wxTraceRefCount | wxTraceOle) will only do something
      // if both bits are set.
      if ( wxLog::IsEnabled() && ((wxLog::GetTraceMask() & mask) == mask) ) {
-      wxLog::OnLog(wxLOG_Trace, wxString::FormatV(szFormat, argptr), time(NULL));
+      wxLog::OnLog(wxLOG_Trace, wxString::FormatV(format, argptr), time(NULL));
      }
    }
  
@@ -246,9 +246,9 @@ static inline wxString wxLogSysErrorHelper(long err)
      return wxString::Format(_(" (error %ld: %s)"), err, wxSysErrorMsg(err));
  }
  
-void WXDLLEXPORT wxVLogSysError(const wxChar *szFormat, va_list argptr)
+void WXDLLEXPORT wxVLogSysError(const wxString& format, va_list argptr)
  {
-    wxVLogSysError(wxSysErrorCode(), szFormat, argptr);
+    wxVLogSysError(wxSysErrorCode(), format, argptr);
  }
  
  void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...)
@@ -259,11 +259,11 @@ void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...)
      va_end(argptr);
  }
  
-void WXDLLEXPORT wxVLogSysError(long err, const wxChar *fmt, va_list argptr)
+void WXDLLEXPORT wxVLogSysError(long err, const wxString& format, va_list argptr)
  {
      if ( wxLog::IsEnabled() ) {
          wxLog::OnLog(wxLOG_Error,
-                     wxString::FormatV(fmt, argptr) + wxLogSysErrorHelper(err),
+                     wxString::FormatV(format, argptr) + wxLogSysErrorHelper(err),
                       time(NULL));
      }
  }
diff --git a/src/common/string.cpp b/src/common/string.cpp

index 9513b690746de466300545f5448fa50c3aff4088..003980e556e245cdb48d753158daac1bdcf55d7c 100644 (file)
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -105,6 +105,427 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  
  #endif // wxUSE_STD_IOSTREAM
  
+// ===========================================================================
+// wxString class core
+// ===========================================================================
+
+#if wxUSE_UNICODE_UTF8
+
+// ---------------------------------------------------------------------------
+// UTF-8 operations
+// ---------------------------------------------------------------------------
+
+//
+// Table 3.1B from Unicode spec: Legal UTF-8 Byte Sequences
+//
+//     Code Points    | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
+// -------------------+----------+----------+----------+----------+
+//   U+0000..U+007F   |  00..7F  |          |          |          |
+//   U+0080..U+07FF   |  C2..DF  |  80..BF  |          |          |
+//   U+0800..U+0FFF   |  E0      |  A0..BF  |  80..BF  |          |
+//   U+1000..U+FFFF   |  E1..EF  |  80..BF  |  80..BF  |          |
+//  U+10000..U+3FFFF  |  F0      |  90..BF  |  80..BF  |  80..BF  |
+//  U+40000..U+FFFFF  |  F1..F3  |  80..BF  |  80..BF  |  80..BF  |
+// U+100000..U+10FFFF |  F4      |  80..8F  |  80..BF  |  80..BF  |
+// -------------------+----------+----------+----------+----------+
+
+bool wxString::IsValidUtf8String(const char *str)
+{
+    if ( !str )
+        return true; // empty string is UTF8 string
+
+    const unsigned char *c = (const unsigned char*)str;
+
+    for ( ; *c; ++c )
+    {
+        unsigned char b = *c;
+
+        if ( b <= 0x7F ) // 00..7F
+            continue;
+
+        else if ( b < 0xC2 ) // invalid lead bytes: 80..C1
+            return false;
+
+        // two-byte sequences:
+        else if ( b <= 0xDF ) // C2..DF
+        {
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0xBF ) )
+                return false;
+        }
+
+        // three-byte sequences:
+        else if ( b == 0xE0 )
+        {
+            b = *(++c);
+            if ( !(b >= 0xA0 && b <= 0xBF ) )
+                return false;
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0xBF ) )
+                return false;
+        }
+        else if ( b <= 0xEF ) // E1..EF
+        {
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+
+        // four-byte sequences:
+        else if ( b == 0xF0 )
+        {
+            b = *(++c);
+            if ( !(b >= 0x90 && b <= 0xBF ) )
+                return false;
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else if ( b <= 0xF3 ) // F1..F3
+        {
+            for ( int i = 0; i < 3; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else if ( b == 0xF4 )
+        {
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0x8F ) )
+                return false;
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else // otherwise, it's invalid lead byte
+            return false;
+    }
+
+    return true;
+}
+
+#ifdef __WXDEBUG__
+/* static */
+bool wxString::IsValidUtf8LeadByte(unsigned char c)
+{
+    return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
+}
+#endif
+
+unsigned char wxString::ms_utf8IterTable[256] = {
+    // single-byte sequences (ASCII):
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
+
+    // these are invalid, we use step 1 to skip
+    // over them (should never happen):
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 80..8F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 90..9F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // A0..AF
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // B0..BF
+    1, 1,                                            // C0,C1
+
+    // two-byte sequences:
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
+
+    // three-byte sequences:
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
+
+    // four-byte sequences:
+    4, 4, 4, 4, 4,                                   // F0..F4
+
+    // these are invalid again (5- or 6-byte
+    // sequences and sequences for code points
+    // above U+10FFFF, as restricted by RFC 3629):
+                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1   // F5..FF
+};
+
+/* static */
+void wxString::DecIter(wxStringImpl::const_iterator& i)
+{
+    wxASSERT( IsValidUtf8LeadByte(*i) );
+
+    // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
+    // binary), so we just have to go back until we hit a byte that is either
+    // < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in binary; this
+    // includes some invalid values, but we can ignore it here, because we
+    // assume valid UTF-8 input for the purpose of efficient implementation).
+    --i;
+    while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
+        --i;
+}
+
+/* static */
+void wxString::DecIter(wxStringImpl::iterator& i)
+{
+    // FIXME-UTF8: use template instead
+    wxASSERT( IsValidUtf8LeadByte(*i) );
+    --i;
+    while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
+        --i;
+}
+
+/* static */
+wxStringImpl::const_iterator
+wxString::AddToIter(wxStringImpl::const_iterator i, int n)
+{
+    wxStringImpl::const_iterator out(i);
+
+    if ( n > 0 )
+    {
+        for ( int j = 0; j < n; ++j )
+            IncIter(out);
+    }
+    else if ( n < 0 )
+    {
+        for ( int j = 0; j > n; --j )
+            DecIter(out);
+    }
+
+    return out;
+}
+
+wxStringImpl::iterator
+wxString::AddToIter(wxStringImpl::iterator i, int n)
+{
+    // FIXME-UTF8: use template instead
+    wxStringImpl::iterator out(i);
+
+    if ( n > 0 )
+    {
+        for ( int j = 0; j < n; ++j )
+            IncIter(out);
+    }
+    else if ( n < 0 )
+    {
+        for ( int j = 0; j > n; --j )
+            DecIter(out);
+    }
+
+    return out;
+}
+
+
+/* static */
+int wxString::DiffIters(wxStringImpl::const_iterator i1,
+                        wxStringImpl::const_iterator i2)
+{
+    int dist = 0;
+
+    if ( i1 < i2 )
+    {
+        while ( i1 != i2 )
+        {
+            IncIter(i1);
+            dist--;
+        }
+    }
+    else if ( i2 < i1 )
+    {
+        while ( i2 != i1 )
+        {
+            IncIter(i2);
+            dist++;
+        }
+    }
+
+    return dist;
+}
+
+int wxString::DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2)
+{
+    // FIXME-UTF8: use template instead
+    int dist = 0;
+
+    if ( i1 < i2 )
+    {
+        while ( i1 != i2 )
+        {
+            IncIter(i1);
+            dist--;
+        }
+    }
+    else if ( i2 < i1 )
+    {
+        while ( i2 != i1 )
+        {
+            IncIter(i2);
+            dist++;
+        }
+    }
+
+    return dist;
+}
+
+/* static */
+wxString::Utf8CharBuffer wxString::EncodeChar(wxUniChar ch)
+{
+    Utf8CharBuffer buf;
+    char *out = buf.data;
+
+    wxUniChar::value_type code = ch.GetValue();
+
+    //    Char. number range   |        UTF-8 octet sequence
+    //       (hexadecimal)     |              (binary)
+    //   ----------------------+---------------------------------------------
+    //   0000 0000 - 0000 007F | 0xxxxxxx
+    //   0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+    //   0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+    //   0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    //
+    //   Code point value is stored in bits marked with 'x', lowest-order bit
+    //   of the value on the right side in the diagram above.
+    //                                                        (from RFC 3629)
+
+    if ( code <= 0x7F )
+    {
+        out[1] = 0;
+        out[0] = (char)code;
+    }
+    else if ( code <= 0x07FF )
+    {
+        out[2] = 0;
+        // NB: this line takes 6 least significant bits, encodes them as
+        // 10xxxxxx and discards them so that the next byte can be encoded:
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xC0 | code;
+    }
+    else if ( code < 0xFFFF )
+    {
+        out[3] = 0;
+        out[2] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xE0 | code;
+    }
+    else if ( code <= 0x10FFFF )
+    {
+        out[4] = 0;
+        out[3] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[2] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xF0 | code;
+    }
+    else
+    {
+        wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
+        out[0] = 0;
+    }
+
+    return buf;
+}
+
+/* static */
+wxUniChar wxUniCharRef::DecodeChar(wxStringImpl::const_iterator i)
+{
+    wxASSERT( wxString::IsValidUtf8LeadByte(*i) ); // FIXME-UTF8: no "wxString::"
+
+    wxUniChar::value_type code = 0;
+    size_t len = wxString::GetUtf8CharLength(*i);
+    wxASSERT_MSG( len <= 4, _T("invalid UTF-8 sequence length") );
+
+    //    Char. number range   |        UTF-8 octet sequence
+    //       (hexadecimal)     |              (binary)
+    //   ----------------------+---------------------------------------------
+    //   0000 0000 - 0000 007F | 0xxxxxxx
+    //   0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+    //   0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+    //   0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    //
+    //   Code point value is stored in bits marked with 'x', lowest-order bit
+    //   of the value on the right side in the diagram above.
+    //                                                        (from RFC 3629)
+
+    // mask to extract lead byte's value ('x' bits above), by sequence's length:
+    static const unsigned char s_leadValueMask[4] =  { 0x7F, 0x1F, 0x0F, 0x07 };
+#ifdef __WXDEBUG__
+    // mask and value of lead byte's most significant bits, by length:
+    static const unsigned char s_leadMarkerMask[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
+    static const unsigned char s_leadMarkerVal[4] =  { 0x00, 0xC0, 0xE0, 0xF0 };
+#endif
+
+    // extract the lead byte's value bits:
+    wxASSERT_MSG( ((unsigned char)*i & s_leadMarkerMask[len-1]) ==
+                  s_leadMarkerVal[len-1],
+                  _T("invalid UTF-8 lead byte") );
+    code = (unsigned char)*i & s_leadValueMask[len-1];
+
+    // all remaining bytes, if any, are handled in the same way regardless of
+    // sequence's length:
+    for ( ++i ; len > 1; --len, ++i )
+    {
+        wxASSERT_MSG( ((unsigned char)*i & 0xC0) == 0x80,
+                      _T("invalid UTF-8 byte") );
+
+        code <<= 6;
+        code |= (unsigned char)*i & 0x3F;
+    }
+
+    return wxUniChar(code);
+}
+
+/* static */
+wxCharBuffer wxString::EncodeNChars(size_t n, wxUniChar ch)
+{
+    Utf8CharBuffer once(EncodeChar(ch));
+    // the IncIter() table can be used to determine the length of ch's encoding:
+    size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
+
+    wxCharBuffer buf(n * len);
+    char *ptr = buf.data();
+    for ( size_t i = 0; i < n; i++, ptr += len )
+    {
+        memcpy(ptr, once.data, len);
+    }
+
+    return buf;
+}
+
+
+void wxString::PosLenToImpl(size_t pos, size_t len,
+                            size_t *implPos, size_t *implLen) const
+{
+    if ( pos == npos )
+        *implPos = npos;
+    else
+    {
+        const_iterator i = begin() + pos;
+        *implPos = wxStringImpl::const_iterator(i) - m_impl.begin();
+        if ( len == npos )
+            *implLen = npos;
+        else
+        {
+            // too large length is interpreted as "to the end of the string"
+            // FIXME-UTF8: verify this is the case in std::string, assert
+            // otherwise
+            if ( pos + len > length() )
+                len = length() - pos;
+
+            *implLen = wxStringImpl::const_iterator(i + len) -
+                       wxStringImpl::const_iterator(i);
+        }
+    }
+}
+
+#endif // wxUSE_UNICODE_UTF8
+
  // ----------------------------------------------------------------------------
  // wxCStrData converted strings caching
  // ----------------------------------------------------------------------------
@@ -254,14 +675,14 @@ const wchar_t* wxCStrData::AsWChar() const
  // construction and conversion
  // ---------------------------------------------------------------------------
  
-#if wxUSE_UNICODE
+#if wxUSE_UNICODE_WCHAR
  /* static */
  wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
                                                 const wxMBConv& conv)
  {
      // anything to do?
      if ( !psz || nLength == 0 )
-        return SubstrBufFromMB();
+        return SubstrBufFromMB(L"", 0);
  
      if ( nLength == npos )
          nLength = wxNO_LEN;
@@ -269,18 +690,51 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
      size_t wcLen;
      wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
      if ( !wcLen )
-        return SubstrBufFromMB();
+        return SubstrBufFromMB(_T(""), 0);
      else
          return SubstrBufFromMB(wcBuf, wcLen);
  }
-#else
+#endif // wxUSE_UNICODE_WCHAR
+
+#if wxUSE_UNICODE_UTF8
+/* static */
+wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
+                                               const wxMBConv& conv)
+{
+    // FIXME-UTF8: return as-is without copying under UTF8 locale, return
+    //             converted string under other locales - needs wxCharBuffer
+    //             changes
+
+    // anything to do?
+    if ( !psz || nLength == 0 )
+        return SubstrBufFromMB("", 0);
+
+    if ( nLength == npos )
+        nLength = wxNO_LEN;
+
+    // first convert to wide string:
+    size_t wcLen;
+    wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
+    if ( !wcLen )
+        return SubstrBufFromMB("", 0);
+
+    // and then to UTF-8:
+    SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
+    // widechar -> UTF-8 conversion isn't supposed to ever fail:
+    wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
+
+    return buf;
+}
+#endif // wxUSE_UNICODE_UTF8
+
+#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
  /* static */
  wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
                                                 const wxMBConv& conv)
  {
      // anything to do?
      if ( !pwz || nLength == 0 )
-        return SubstrBufFromWC();
+        return SubstrBufFromWC("", 0);
  
      if ( nLength == npos )
          nLength = wxNO_LEN;
@@ -288,34 +742,56 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt
      size_t mbLen;
      wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
      if ( !mbLen )
-        return SubstrBufFromWC();
+        return SubstrBufFromWC("", 0);
      else
          return SubstrBufFromWC(mbBuf, mbLen);
  }
-#endif
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
  
  
-#if wxUSE_UNICODE
+#if wxUSE_UNICODE_WCHAR
  
  //Convert wxString in Unicode mode to a multi-byte string
  const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
  {
-    return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
+    return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
  }
  
-#else // ANSI
+#elif wxUSE_UNICODE_UTF8
  
-#if wxUSE_WCHAR_T
+const wxWCharBuffer wxString::wc_str() const
+{
+    return wxConvUTF8.cMB2WC(m_impl.c_str(),
+                             m_impl.length() + 1 /* size, not length */,
+                             NULL);
+}
+
+const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
+{
+    // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
+    //             under UTF8 locale
+    // FIXME-UTF8: use wc_str() here once we have buffers with length
+
+    size_t wcLen;
+    wxWCharBuffer wcBuf(
+            wxConvUTF8.cMB2WC(m_impl.c_str(),
+                              m_impl.length() + 1 /* size, not length */,
+                              &wcLen));
+    if ( !wcLen )
+        return wxCharBuffer("");
+
+    return conv.cWC2MB(wcBuf, wcLen, NULL);
+}
+
+#else // ANSI
  
  //Converts this string to a wide character string if unicode
  //mode is not enabled and wxUSE_WCHAR_T is enabled
  const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
  {
-    return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
+    return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
  }
  
-#endif // wxUSE_WCHAR_T
-
  #endif // Unicode/ANSI
  
  // shrink to minimal size (releasing extra memory)
@@ -996,7 +1472,8 @@ bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
      wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
  
      int start = length() - wxStrlen(suffix);
-    if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 )
+
+    if ( start < 0 || compare(start, npos, suffix) != 0 )
          return false;
  
      if ( rest )
@@ -1420,7 +1897,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr)
          // only a copy
          va_list argptrcopy;
          wxVaCopy(argptrcopy, argptr);
-        int len = wxVsnprintf(buf, size, format, argptrcopy);
+        int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy);
          va_end(argptrcopy);
  
          // some implementations of vsnprintf() don't NUL terminate
diff --git a/src/common/stringimpl.cpp b/src/common/stringimpl.cpp

index e8750dc4f0e2c9b923169a0c4d9a826b2cdf8cf4..0f1380e708e43414f43b837cd3d854cdba183d3f 100644 (file)
--- a/src/common/stringimpl.cpp
+++ b/src/common/stringimpl.cpp
@@ -1,5 +1,5 @@
  /////////////////////////////////////////////////////////////////////////////
-// Name:        src/common/string.cpp
+// Name:        src/common/stringimpl.cpp
  // Purpose:     wxString class
  // Author:      Vadim Zeitlin, Ryan Norton
  // Modified by:
@@ -56,12 +56,10 @@
      #define wxStringMemcpy   memcpy
      #define wxStringMemcmp   memcmp
      #define wxStringMemchr   memchr
-    #define wxStringStrlen   strlen
  #else
      #define wxStringMemcpy   wxTmemcpy
      #define wxStringMemcmp   wxTmemcmp
      #define wxStringMemchr   wxTmemchr
-    #define wxStringStrlen   wxStrlen
  #endif
  
  
@@ -80,6 +78,10 @@ const size_t wxStringImpl::npos = (size_t) -1;
  
  #if wxUSE_STL_BASED_WXSTRING
  
+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+#if wxUSE_UNICODE_UTF8
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = "";
+#endif
  extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
  
  #else
@@ -90,11 +92,17 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
  static const struct
  {
    wxStringData data;
-  wxChar dummy;
+  wxStringCharType dummy;
  } g_strEmpty = { {-1, 0, 0}, wxT('\0') };
  
  // empty C style string: points to 'string data' byte of g_strEmpty
-extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#if wxUSE_UNICODE_UTF8
+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = &g_strEmpty.dummy;
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
+#else
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#endif
  
  #endif
  
@@ -111,7 +119,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
    class Averager
    {
    public:
-    Averager(const wxChar *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
+    Averager(const wxStringCharType *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
     ~Averager()
     { wxPrintf("wxString: average %s = %f\n", m_sz, ((float)m_nTotal)/m_nCount); }
  
@@ -119,7 +127,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
  
    private:
      size_t m_nCount, m_nTotal;
-    const wxChar *m_sz;
+    const wxStringCharType *m_sz;
    } g_averageLength("allocation size"),
      g_averageSummandLength("summand length"),
      g_averageConcatHit("hit probability in concat"),
@@ -147,15 +155,16 @@ void wxStringData::Free()
  // ===========================================================================
  
  // takes nLength elements of psz starting at nPos
-void wxStringImpl::InitWith(const wxChar *psz, size_t nPos, size_t nLength)
+void wxStringImpl::InitWith(const wxStringCharType *psz,
+                            size_t nPos, size_t nLength)
  {
    Init();
  
    // if the length is not given, assume the string to be NUL terminated
    if ( nLength == npos ) {
-    wxASSERT_MSG( nPos <= wxStrlen(psz), _T("index out of bounds") );
+    wxASSERT_MSG( nPos <= Strsize(psz), _T("index out of bounds") );
  
-    nLength = wxStrlen(psz + nPos);
+    nLength = Strsize(psz + nPos);
    }
  
    STATISTICS_ADD(InitialLength, nLength);
@@ -201,7 +210,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
    wxASSERT( nLen >  0 );
  
    // make sure that we don't overflow
-  wxASSERT( nLen < (INT_MAX / sizeof(wxChar)) -
+  wxASSERT( nLen < (INT_MAX / sizeof(wxStringCharType)) -
                     (sizeof(wxStringData) + EXTRA_ALLOC + 1) );
  
    STATISTICS_ADD(Length, nLen);
@@ -210,7 +219,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
    // 1) one extra character for '\0' termination
    // 2) sizeof(wxStringData) for housekeeping info
    wxStringData* pData = (wxStringData*)
-    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxChar));
+    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxStringCharType));
  
    if ( pData == NULL ) {
      // allocation failures are handled by the caller
@@ -269,7 +278,8 @@ bool wxStringImpl::AllocBeforeWrite(size_t nLen)
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData*)
-          realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+          realloc(pData,
+                  sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failures are handled by the caller
@@ -331,7 +341,7 @@ bool wxStringImpl::Alloc(size_t nLen)
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData *)
-                malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+             malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failure handled by caller
@@ -352,14 +362,14 @@ bool wxStringImpl::Alloc(size_t nLen)
          return false;
        }
        // +1 to copy the terminator, too
-      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxChar));
+      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxStringCharType));
        GetStringData()->nDataLength = nOldLen;
      }
      else {
        nLen += EXTRA_ALLOC;
  
        pData = (wxStringData *)
-        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));
  
        if ( pData == NULL ) {
          // allocation failure handled by caller
@@ -411,11 +421,12 @@ wxStringImpl& wxStringImpl::erase(size_t nStart, size_t nLen)
      return *this;
  }
  
-wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
+wxStringImpl& wxStringImpl::insert(size_t nPos,
+                                   const wxStringCharType *sz, size_t n)
  {
      wxASSERT( nPos <= length() );
  
-    if ( n == npos ) n = wxStrlen(sz);
+    if ( n == npos ) n = Strsize(sz);
      if ( n == 0 ) return *this;
  
      if ( !Alloc(length() + n) || !CopyBeforeWrite() ) {
@@ -424,8 +435,8 @@ wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
      }
  
      memmove(m_pchData + nPos + n, m_pchData + nPos,
-            (length() - nPos) * sizeof(wxChar));
-    memcpy(m_pchData + nPos, sz, n * sizeof(wxChar));
+            (length() - nPos) * sizeof(wxStringCharType));
+    memcpy(m_pchData + nPos, sz, n * sizeof(wxStringCharType));
      GetStringData()->nDataLength = length() + n;
      m_pchData[length()] = '\0';
  
@@ -487,7 +498,8 @@ size_t wxStringImpl::find(const wxStringImpl& str, size_t nStart) const
      return p - c_str() + nLenOther <= nLen ? p - c_str() : npos;
  }
  
-size_t wxStringImpl::find(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::find(const wxStringCharType* sz,
+                          size_t nStart, size_t n) const
  {
      return find(wxStringImpl(sz, n), nStart);
  }
@@ -534,7 +546,8 @@ size_t wxStringImpl::rfind(const wxStringImpl& str, size_t nStart) const
      return npos;
  }
  
-size_t wxStringImpl::rfind(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::rfind(const wxStringCharType* sz,
+                           size_t nStart, size_t n) const
  {
      return rfind(wxStringImpl(sz, n), nStart);
  }
@@ -562,7 +575,7 @@ size_t wxStringImpl::rfind(wxStringCharType ch, size_t nStart) const
  }
  
  wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar *sz)
+                                    const wxStringCharType *sz)
  {
    wxASSERT_MSG( nStart <= length(),
                  _T("index out of bounds in wxStringImpl::replace") );
@@ -607,7 +620,7 @@ wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
  }
  
  wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar* sz, size_t nCount)
+                                    const wxStringCharType* sz, size_t nCount)
  {
    return replace(nStart, nLen, wxStringImpl(sz, nCount).c_str());
  }
@@ -643,24 +656,25 @@ wxStringImpl& wxStringImpl::operator=(const wxStringImpl& stringSrc)
  // assigns a single character
  wxStringImpl& wxStringImpl::operator=(wxStringCharType ch)
  {
-  wxChar c(ch);
+  wxStringCharType c(ch);
    if ( !AssignCopy(1, &c) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxChar)") );
+    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxStringCharType)") );
    }
    return *this;
  }
  
  // assigns C string
-wxStringImpl& wxStringImpl::operator=(const wxChar *psz)
+wxStringImpl& wxStringImpl::operator=(const wxStringCharType *psz)
  {
-  if ( !AssignCopy(wxStrlen(psz), psz) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxChar *)") );
+  if ( !AssignCopy(Strsize(psz), psz) ) {
+    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxStringCharType *)") );
    }
    return *this;
  }
  
  // helper function: does real copy
-bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
+bool wxStringImpl::AssignCopy(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData)
  {
    if ( nSrcLen == 0 ) {
      Reinit();
@@ -670,7 +684,7 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
        // allocation failure handled by caller
        return false;
      }
-    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxChar));
+    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxStringCharType));
      GetStringData()->nDataLength = nSrcLen;
      m_pchData[nSrcLen] = wxT('\0');
    }
@@ -682,7 +696,8 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
  // ---------------------------------------------------------------------------
  
  // add something to this string
-bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
+bool wxStringImpl::ConcatSelf(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData,
                                size_t nMaxLen)
  {
    STATISTICS_ADD(SummandLength, nSrcLen);
@@ -705,7 +720,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
            // allocation failure handled by caller
            return false;
        }
-      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxChar));
+      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxStringCharType));
        pOldData->Unlock();
      }
      else if ( nNewLen > pData->nAllocLength ) {
@@ -728,7 +743,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
      wxASSERT( nNewLen <= GetStringData()->nAllocLength );
  
      // fast concatenation - all is done in our buffer
-    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxChar));
+    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxStringCharType));
  
      m_pchData[nNewLen] = wxT('\0');          // put terminating '\0'
      GetStringData()->nDataLength = nNewLen; // and fix the length
@@ -755,7 +770,7 @@ wxChar *wxStringImpl::DoGetWriteBuf(size_t nLen)
  // put string back in a reasonable state after GetWriteBuf
  void wxStringImpl::DoUngetWriteBuf()
  {
-  DoUngetWriteBuf(wxStrlen(m_pchData));
+  DoUngetWriteBuf(Strsize(m_pchData));
  }
  
  void wxStringImpl::DoUngetWriteBuf(size_t nLen)
diff --git a/src/common/strvararg.cpp b/src/common/strvararg.cpp

index 5e7955dd1ece151471fa6614bb6dadb744031a04..81288c6f8943f918ebfd4ff2362691bf39909535 100644 (file)
--- a/src/common/strvararg.cpp
+++ b/src/common/strvararg.cpp
@@ -32,20 +32,30 @@
  // implementation
  // ============================================================================
  
-const wxStringCharType *wxArgNormalizer<const wxCStrData&>::get() const
+const wxChar *wxArgNormalizer<const wxCStrData&>::get() const
  {
+    // FIXME-UTF8: use some way that doesn't involve implicit conversion,
+    //             so that we deallocate any converted buffer immediately;
+    //             can't use AsString() because it returns wxString and not
+    //             const wxString&, unfortunately; use As[W]CharBuf() when
+    //             available.
      return m_value;
  }
  
-const wxStringCharType *wxArgNormalizer<const wxString&>::get() const
+const wxChar *wxArgNormalizer<const wxString&>::get() const
  {
+#if wxUSE_UNICODE_UTF8 // FIXME-UTF8
+    return (const wxChar*)m_value;
+#else
      return m_value.wx_str();
+#endif
  }
  
-#if wxUSE_UNICODE_WCHAR
-
+#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR
  wxArgNormalizer<const char*>::wxArgNormalizer(const char *value)
  {
+    // FIXME-UTF8: move this to the header so that m_value doesn't have
+    //             to be dynamically allocated
      m_value = new wxWCharBuffer(wxConvLibc.cMB2WC(value));
  }
  
@@ -58,12 +68,17 @@ const wchar_t *wxArgNormalizer<const char*>::get() const
  {
      return m_value->data();
  }
+#endif // wxUSE_UNICODE_WCHAR
  
-#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
  
+#if /*wxUSE_UNICODE_UTF8 ||*/ !wxUSE_UNICODE // FIXME-UTF8
  wxArgNormalizer<const wchar_t*>::wxArgNormalizer(const wchar_t *value)
  {
+#if wxUSE_UNICODE_UTF8 // FIXME-UTF8: this will be the only case
+    m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value));
+#else
      m_value = new wxCharBuffer(wxConvLibc.cWC2MB(value));
+#endif
  }
  
  wxArgNormalizer<const wchar_t*>::~wxArgNormalizer()
@@ -75,12 +90,44 @@ const char *wxArgNormalizer<const wchar_t*>::get() const
  {
      return m_value->data();
  }
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
+
+#if 0 // wxUSE_UNICODE_UTF8 - FIXME-UTF8
+wxArgNormalizer<const char*>::wxArgNormalizer(const char *value)
+{
+    // FIXME-UTF8: move this to the header so that m_value doesn't have
+    //             to be dynamically allocated
+    // FIXME-UTF8: optimize this if current locale is UTF-8 one
+
+    // convert to widechar string first:
+    wxWCharBuffer buf(wxConvLibc.cMB2WC(value));
+
+    if ( buf )
+    {
+        // then to UTF-8:
+        m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value));
+    }
+    else
+    {
+        m_value = new wxCharBuffer();
+    }
+}
+
+wxArgNormalizer<const char*>::~wxArgNormalizer()
+{
+    delete m_value;
+}
+
+const char *wxArgNormalizer<const char*>::get() const
+{
+    return m_value->data();
+}
+#endif // wxUSE_UNICODE_UTF8
+
  
-#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
  
  // FIXME-UTF8: move this to the header once it's possible to include buffer.h
  //             without including wxcrt.h
-
  wxArgNormalizer<wxCharBuffer>::wxArgNormalizer(const wxCharBuffer& buf)
      : wxArgNormalizer<const char*>(buf.data())
  {
diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp

index 9e8ef8df4f39cf4ff0cabeeaf92b6a1a1ec53c9f..0fec3a779286a12d459cf732de38ec13bf33546c 100644 (file)
--- a/src/common/unichar.cpp
+++ b/src/common/unichar.cpp
@@ -25,10 +25,17 @@
  
  #include "wx/unichar.h"
  
+// FIXME-UTF8: remove once UTF-8 functions moved outside
+#include "wx/string.h"
+
  // ===========================================================================
  // implementation
  // ===========================================================================
  
+// ---------------------------------------------------------------------------
+// wxUniChar
+// ---------------------------------------------------------------------------
+
  /* static */
  wxUniChar::value_type wxUniChar::From8bit(char c)
  {
@@ -55,3 +62,35 @@ char wxUniChar::To8bit(wxUniChar::value_type c)
          return '?'; // FIXME-UTF8: what to use as failure character?
      return buf[0];
  }
+
+
+// ---------------------------------------------------------------------------
+// wxUniCharRef
+// ---------------------------------------------------------------------------
+
+#if wxUSE_UNICODE_UTF8
+wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
+{
+    wxString::Utf8CharBuffer utf(wxString::EncodeChar(c));
+    size_t lenOld = wxString::GetUtf8CharLength(*m_pos);
+    size_t lenNew = wxString::GetUtf8CharLength(utf[0]);
+
+    if ( lenNew == lenOld )
+    {
+        iterator pos(m_pos);
+        for ( size_t i = 0; i < lenNew; ++i, ++pos )
+            *pos = utf[i];
+    }
+    else
+    {
+        size_t idx = m_pos - m_str.begin();
+
+        m_str.replace(m_pos, m_pos + lenOld, utf, lenNew);
+
+        // this is needed to keep m_pos valid:
+        m_pos = m_str.begin() + idx;
+    }
+
+    return *this;
+}
+#endif // wxUSE_UNICODE_UTF8
diff --git a/src/common/uri.cpp b/src/common/uri.cpp

index 022b66b590a347e99e114a1c3993789beeb1b66d..a5151947ef9a17b85440b69ee79eda4234348657 100644 (file)
--- a/src/common/uri.cpp
+++ b/src/common/uri.cpp
@@ -641,7 +641,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali
          if (bNormalize)
          {
              wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-#if wxUSE_STL
+#if wxUSE_STL || wxUSE_UNICODE_UTF8
+            // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
              wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
  #endif
              Normalize(theBuffer, true);
@@ -693,7 +694,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali
              if (bNormalize)
              {
                  wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-#if wxUSE_STL
+#if wxUSE_STL || wxUSE_UNICODE_UTF8
+                // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
                  wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
  #endif
                  Normalize(theBuffer);
author	Václav Slavík <vslavik@fastmail.fm>
	Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)
committer	Václav Slavík <vslavik@fastmail.fm>
	Thu, 12 Apr 2007 21:15:07 +0000 (21:15 +0000)
configure		patch \| blob \| blame \| history
configure.in		patch \| blob \| blame \| history
include/wx/buffer.h		patch \| blob \| blame \| history
include/wx/chartype.h		patch \| blob \| blame \| history
include/wx/list.h		patch \| blob \| blame \| history
include/wx/log.h		patch \| blob \| blame \| history
include/wx/string.h		patch \| blob \| blame \| history
include/wx/stringimpl.h		patch \| blob \| blame \| history
include/wx/strvararg.h		patch \| blob \| blame \| history
include/wx/unichar.h		patch \| blob \| blame \| history
setup.h.in		patch \| blob \| blame \| history
src/common/list.cpp		patch \| blob \| blame \| history
src/common/log.cpp		patch \| blob \| blame \| history
src/common/string.cpp		patch \| blob \| blame \| history
src/common/stringimpl.cpp		patch \| blob \| blame \| history
src/common/strvararg.cpp		patch \| blob \| blame \| history
src/common/unichar.cpp		patch \| blob \| blame \| history
src/common/uri.cpp		patch \| blob \| blame \| history