src/common/unichar.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/unichar.cpp
   3 // Purpose:     wxUniChar and wxUniCharRef classes
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-03-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // headers
  13 // ===========================================================================
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/strconv.h"  // wxConvLibc
  24     #include "wx/log.h"
  25 #endif
  26
  27 #include "wx/unichar.h"
  28 #include "wx/string.h"
  29
  30 // ===========================================================================
  31 // implementation
  32 // ===========================================================================
  33
  34 // ---------------------------------------------------------------------------
  35 // wxUniChar
  36 // ---------------------------------------------------------------------------
  37
  38 /* static */
  39 wxUniChar::value_type wxUniChar::From8bit(char c)
  40 {
  41     // all supported charsets have the first 128 characters same as ASCII:
  42     if ( (unsigned char)c < 0x80 )
  43         return c;
  44
  45 #if wxUSE_UTF8_LOCALE_ONLY
  46     wxFAIL_MSG( "invalid UTF-8 character" );
  47     return wxT('?'); // FIXME-UTF8: what to use as failure character?
  48 #else
  49     wchar_t buf[2];
  50     if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
  51     {
  52         wxFAIL_MSG( "invalid multibyte character" );
  53         return wxT('?'); // FIXME-UTF8: what to use as failure character?
  54     }
  55     return buf[0];
  56 #endif
  57 }
  58
  59 /* static */
  60 char wxUniChar::To8bit(wxUniChar::value_type c)
  61 {
  62     // all supported charsets have the first 128 characters same as ASCII:
  63     if ( c < 0x80 )
  64         return c;
  65
  66 #if wxUSE_UTF8_LOCALE_ONLY
  67     wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" );
  68     return '?'; // FIXME-UTF8: what to use as failure character?
  69 #else
  70     wchar_t in = c;
  71     char buf[2];
  72     if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
  73     {
  74         wxFAIL_MSG( "character cannot be converted to single byte" );
  75         return '?'; // FIXME-UTF8: what to use as failure character?
  76     }
  77     return buf[0];
  78 #endif
  79 }
  80
  81
  82 // ---------------------------------------------------------------------------
  83 // wxUniCharRef
  84 // ---------------------------------------------------------------------------
  85
  86 #if wxUSE_UNICODE_UTF8
  87 wxUniChar wxUniCharRef::UniChar() const
  88 {
  89     return wxStringOperations::DecodeChar(m_pos);
  90 }
  91
  92 wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
  93 {
  94     wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
  95     size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
  96     size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
  97
  98     if ( lenNew == lenOld )
  99     {
 100         // this is the simpler case: if the new value's UTF-8 code has the
 101         // same length, we can just replace it:
 102
 103         iterator pos(m_pos);
 104         for ( size_t i = 0; i < lenNew; ++i, ++pos )
 105             *pos = utf[i];
 106     }
 107     else
 108     {
 109         // the worse case is when the new value has either longer or shorter
 110         // code -- in that case, we have to use wxStringImpl::replace() and
 111         // this invalidates all iterators, so we have to update them too:
 112
 113         wxString& str = *wx_const_cast(wxString*, m_node.m_str);
 114         wxStringImpl& strimpl = str.m_impl;
 115
 116         int iterDiff = lenNew - lenOld;
 117         size_t posIdx = m_pos - strimpl.begin();
 118
 119         // compute positions of outstanding iterators for this string after the
 120         // replacement is done (there is only a small number of iterators at
 121         // any time, so we use an array on the stack to avoid unneeded
 122         // allocation):
 123         static const size_t STATIC_SIZE = 32;
 124         size_t indexes_a[STATIC_SIZE];
 125         size_t *indexes = indexes_a;
 126         size_t iterNum = 0;
 127         wxStringIteratorNode *it;
 128         for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
 129         {
 130             wxASSERT( it->m_iter || it->m_citer );
 131
 132             if ( iterNum == STATIC_SIZE )
 133             {
 134                 wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") );
 135
 136                 size_t total = iterNum + 1;
 137                 for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
 138                     total++;
 139                 indexes = new size_t[total];
 140                 memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
 141             }
 142
 143             size_t idx = it->m_iter
 144                          ? (*it->m_iter - strimpl.begin())
 145                          : (*it->m_citer - strimpl.begin());
 146
 147             if ( idx > posIdx )
 148                 idx += iterDiff;
 149
 150             indexes[iterNum] = idx;
 151         }
 152
 153         // update the string:
 154         strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
 155
 156         // finally, set the iterators to valid values again (note that this
 157         // updates m_pos as well):
 158         size_t i;
 159         for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i )
 160         {
 161             wxASSERT( i < iterNum );
 162             wxASSERT( it->m_iter || it->m_citer );
 163
 164             if ( it->m_iter )
 165                 *it->m_iter = strimpl.begin() + indexes[i];
 166             else // it->m_citer
 167                 *it->m_citer = strimpl.begin() + indexes[i];
 168         }
 169
 170         if ( indexes != indexes_a )
 171             delete[] indexes;
 172     }
 173
 174     return *this;
 175 }
 176 #endif // wxUSE_UNICODE_UTF8