No changes, synchronised source names that appear commented at the top of files with...

[wxWidgets.git] / src / common / unichar.cpp
diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp

index 0fec3a779286a12d459cf732de38ec13bf33546c..645435641ff6d9b719da16696ecc15a78ea77cd9 100644 (file)
--- a/src/common/unichar.cpp
+++ b/src/common/unichar.cpp
@@ -21,11 +21,10 @@
  
  #ifndef WX_PRECOMP
      #include "wx/strconv.h"  // wxConvLibc
  
  #ifndef WX_PRECOMP
      #include "wx/strconv.h"  // wxConvLibc
+    #include "wx/log.h"
  #endif
  
  #include "wx/unichar.h"
  #endif
  
  #include "wx/unichar.h"
-
-// FIXME-UTF8: remove once UTF-8 functions moved outside
  #include "wx/string.h"
  
  // ===========================================================================
  #include "wx/string.h"
  
  // ===========================================================================
@@ -37,58 +36,147 @@
  // ---------------------------------------------------------------------------
  
  /* static */
  // ---------------------------------------------------------------------------
  
  /* static */
-wxUniChar::value_type wxUniChar::From8bit(char c)
+wxUniChar::value_type wxUniChar::FromHi8bit(char c)
  {
  {
-    // all supported charsets have the first 128 characters same as ASCII:
-    if ( (unsigned char)c < 0x80 )
-        return c;
-
-    wchar_t buf[2];
-    if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
+#if wxUSE_UTF8_LOCALE_ONLY
+    wxFAIL_MSG( "invalid UTF-8 character" );
+    wxUnusedVar(c);
+
+    return wxT('?'); // FIXME-UTF8: what to use as failure character?
+#else
+    char cbuf[2];
+    cbuf[0] = c;
+    cbuf[1] = '\0';
+    wchar_t wbuf[2];
+    if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 )
+    {
+        wxFAIL_MSG( "invalid multibyte character" );
          return wxT('?'); // FIXME-UTF8: what to use as failure character?
          return wxT('?'); // FIXME-UTF8: what to use as failure character?
-    return buf[0];
+    }
+    return wbuf[0];
+#endif
  }
  
  /* static */
  }
  
  /* static */
-char wxUniChar::To8bit(wxUniChar::value_type c)
+char wxUniChar::ToHi8bit(wxUniChar::value_type v)
  {
  {
-    // all supported charsets have the first 128 characters same as ASCII:
-    if ( c < 0x80 )
-        return c;
-
-    wchar_t in = c;
-    char buf[2];
-    if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
-        return '?'; // FIXME-UTF8: what to use as failure character?
-    return buf[0];
+    char c;
+    if ( !GetAsHi8bit(v, &c) )
+    {
+        wxFAIL_MSG( "character cannot be converted to single byte" );
+        c = '?'; // FIXME-UTF8: what to use as failure character?
+    }
+
+    return c;
  }
  
  }
  
+/* static */
+bool wxUniChar::GetAsHi8bit(value_type v, char *c)
+{
+    wchar_t wbuf[2];
+    wbuf[0] = v;
+    wbuf[1] = L'\0';
+    char cbuf[2];
+    if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 )
+        return false;
+
+    *c = cbuf[0];
+    return true;
+}
  
  // ---------------------------------------------------------------------------
  // wxUniCharRef
  // ---------------------------------------------------------------------------
  
  #if wxUSE_UNICODE_UTF8
  
  // ---------------------------------------------------------------------------
  // wxUniCharRef
  // ---------------------------------------------------------------------------
  
  #if wxUSE_UNICODE_UTF8
+wxUniChar wxUniCharRef::UniChar() const
+{
+    return wxStringOperations::DecodeChar(m_pos);
+}
+
  wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
  {
  wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
  {
-    wxString::Utf8CharBuffer utf(wxString::EncodeChar(c));
-    size_t lenOld = wxString::GetUtf8CharLength(*m_pos);
-    size_t lenNew = wxString::GetUtf8CharLength(utf[0]);
+    wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
+    size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
+    size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
  
      if ( lenNew == lenOld )
      {
  
      if ( lenNew == lenOld )
      {
+        // this is the simpler case: if the new value's UTF-8 code has the
+        // same length, we can just replace it:
+
          iterator pos(m_pos);
          for ( size_t i = 0; i < lenNew; ++i, ++pos )
              *pos = utf[i];
      }
          iterator pos(m_pos);
          for ( size_t i = 0; i < lenNew; ++i, ++pos )
              *pos = utf[i];
      }
-    else
+    else // length of character encoding in UTF-8 changed
      {
      {
-        size_t idx = m_pos - m_str.begin();
-
-        m_str.replace(m_pos, m_pos + lenOld, utf, lenNew);
-
-        // this is needed to keep m_pos valid:
-        m_pos = m_str.begin() + idx;
+        // the worse case is when the new value has either longer or shorter
+        // code -- in that case, we have to use wxStringImpl::replace() and
+        // this invalidates all iterators, so we have to update them too:
+
+        wxStringImpl& strimpl = m_str.m_impl;
+
+        int iterDiff = lenNew - lenOld;
+        size_t posIdx = m_pos - strimpl.begin();
+
+        // compute positions of outstanding iterators for this string after the
+        // replacement is done (there is only a small number of iterators at
+        // any time, so we use an array on the stack to avoid unneeded
+        // allocation):
+        static const size_t STATIC_SIZE = 32;
+        size_t indexes_a[STATIC_SIZE];
+        size_t *indexes = indexes_a;
+        size_t iterNum = 0;
+        wxStringIteratorNode *it;
+        for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
+        {
+            wxASSERT( it->m_iter || it->m_citer );
+
+            if ( iterNum == STATIC_SIZE )
+            {
+                wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") );
+
+                size_t total = iterNum + 1;
+                for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
+                    total++;
+                indexes = new size_t[total];
+                memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
+            }
+
+            size_t idx = it->m_iter
+                         ? (*it->m_iter - strimpl.begin())
+                         : (*it->m_citer - strimpl.begin());
+
+            if ( idx > posIdx )
+                idx += iterDiff;
+
+            indexes[iterNum] = idx;
+        }
+
+        // update the string:
+        strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
+
+#if wxUSE_STRING_POS_CACHE
+        m_str.InvalidateCache();
+#endif // wxUSE_STRING_POS_CACHE
+
+        // finally, set the iterators to valid values again (note that this
+        // updates m_pos as well):
+        size_t i;
+        for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i )
+        {
+            wxASSERT( i < iterNum );
+            wxASSERT( it->m_iter || it->m_citer );
+
+            if ( it->m_iter )
+                *it->m_iter = strimpl.begin() + indexes[i];
+            else // it->m_citer
+                *it->m_citer = strimpl.begin() + indexes[i];
+        }
+
+        if ( indexes != indexes_a )
+            delete[] indexes;
      }
  
      return *this;
      }
  
      return *this;