src/common/unichar.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/unichar.cpp
   3 // Purpose:     wxUniChar and wxUniCharRef classes
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-03-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // headers
  13 // ===========================================================================
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/strconv.h"  // wxConvLibc
  24     #include "wx/log.h"
  25 #endif
  26
  27 #include "wx/unichar.h"
  28 #include "wx/string.h"
  29
  30 // ===========================================================================
  31 // implementation
  32 // ===========================================================================
  33
  34 // ---------------------------------------------------------------------------
  35 // wxUniChar
  36 // ---------------------------------------------------------------------------
  37
  38 /* static */
  39 wxUniChar::value_type wxUniChar::From8bit(char c)
  40 {
  41     // all supported charsets have the first 128 characters same as ASCII:
  42     if ( (unsigned char)c < 0x80 )
  43         return c;
  44
  45 #if wxUSE_UTF8_LOCALE_ONLY
  46     wxFAIL_MSG( _T("invalid UTF-8 character") );
  47     return wxT('?'); // FIXME-UTF8: what to use as failure character?
  48 #else
  49     wchar_t buf[2];
  50     if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
  51         return wxT('?'); // FIXME-UTF8: what to use as failure character?
  52     return buf[0];
  53 #endif
  54 }
  55
  56 /* static */
  57 char wxUniChar::To8bit(wxUniChar::value_type c)
  58 {
  59     // all supported charsets have the first 128 characters same as ASCII:
  60     if ( c < 0x80 )
  61         return c;
  62
  63 #if wxUSE_UTF8_LOCALE_ONLY
  64     wxFAIL_MSG( _T("character cannot be converted to single UTF-8 byte") );
  65     return '?'; // FIXME-UTF8: what to use as failure character?
  66 #else
  67     wchar_t in = c;
  68     char buf[2];
  69     if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
  70         return '?'; // FIXME-UTF8: what to use as failure character?
  71     return buf[0];
  72 #endif
  73 }
  74
  75
  76 // ---------------------------------------------------------------------------
  77 // wxUniCharRef
  78 // ---------------------------------------------------------------------------
  79
  80 #if wxUSE_UNICODE_UTF8
  81 wxUniChar wxUniCharRef::UniChar() const
  82 {
  83     return wxStringOperations::DecodeChar(m_pos);
  84 }
  85
  86 wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
  87 {
  88     wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
  89     size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
  90     size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
  91
  92     if ( lenNew == lenOld )
  93     {
  94         // this is the simpler case: if the new value's UTF-8 code has the
  95         // same length, we can just replace it:
  96
  97         iterator pos(m_pos);
  98         for ( size_t i = 0; i < lenNew; ++i, ++pos )
  99             *pos = utf[i];
 100     }
 101     else
 102     {
 103         // the worse case is when the new value has either longer or shorter
 104         // code -- in that case, we have to use wxStringImpl::replace() and
 105         // this invalidates all iterators, so we have to update them too:
 106
 107         wxString& str = *wx_const_cast(wxString*, m_node.m_str);
 108         wxStringImpl& strimpl = str.m_impl;
 109
 110         int iterDiff = lenNew - lenOld;
 111         size_t posIdx = m_pos - strimpl.begin();
 112
 113         // compute positions of outstanding iterators for this string after the
 114         // replacement is done (there is only a small number of iterators at
 115         // any time, so we use an array on the stack to avoid unneeded
 116         // allocation):
 117         static const size_t STATIC_SIZE = 32;
 118         size_t indexes_a[STATIC_SIZE];
 119         size_t *indexes = indexes_a;
 120         size_t iterNum = 0;
 121         wxStringIteratorNode *it;
 122         for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
 123         {
 124             wxASSERT( it->m_iter || it->m_citer );
 125
 126             if ( iterNum == STATIC_SIZE )
 127             {
 128                 wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") );
 129
 130                 size_t total = iterNum + 1;
 131                 for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
 132                     total++;
 133                 indexes = new size_t[total];
 134                 memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
 135             }
 136
 137             size_t idx = it->m_iter
 138                          ? (*it->m_iter - strimpl.begin())
 139                          : (*it->m_citer - strimpl.begin());
 140
 141             if ( idx > posIdx )
 142                 idx += iterDiff;
 143
 144             indexes[iterNum] = idx;
 145         }
 146
 147         // update the string:
 148         strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
 149
 150         // finally, set the iterators to valid values again (note that this
 151         // updates m_pos as well):
 152         size_t i;
 153         for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i )
 154         {
 155             wxASSERT( i < iterNum );
 156             wxASSERT( it->m_iter || it->m_citer );
 157
 158             if ( it->m_iter )
 159                 *it->m_iter = strimpl.begin() + indexes[i];
 160             else // it->m_citer
 161                 *it->m_citer = strimpl.begin() + indexes[i];
 162         }
 163
 164         if ( indexes != indexes_a )
 165             delete[] indexes;
 166     }
 167
 168     return *this;
 169 }
 170 #endif // wxUSE_UNICODE_UTF8