| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: src/common/unichar.cpp |
| 3 | // Purpose: wxUniChar and wxUniCharRef classes |
| 4 | // Author: Vaclav Slavik |
| 5 | // Created: 2007-03-19 |
| 6 | // RCS-ID: $Id$ |
| 7 | // Copyright: (c) 2007 REA Elektronik GmbH |
| 8 | // Licence: wxWindows licence |
| 9 | /////////////////////////////////////////////////////////////////////////////// |
| 10 | |
| 11 | // =========================================================================== |
| 12 | // headers |
| 13 | // =========================================================================== |
| 14 | |
| 15 | // For compilers that support precompilation, includes "wx.h". |
| 16 | #include "wx/wxprec.h" |
| 17 | |
| 18 | #ifdef __BORLANDC__ |
| 19 | #pragma hdrstop |
| 20 | #endif |
| 21 | |
| 22 | #ifndef WX_PRECOMP |
| 23 | #include "wx/strconv.h" // wxConvLibc |
| 24 | #include "wx/log.h" |
| 25 | #endif |
| 26 | |
| 27 | #include "wx/unichar.h" |
| 28 | #include "wx/string.h" |
| 29 | |
| 30 | // =========================================================================== |
| 31 | // implementation |
| 32 | // =========================================================================== |
| 33 | |
| 34 | // --------------------------------------------------------------------------- |
| 35 | // wxUniChar |
| 36 | // --------------------------------------------------------------------------- |
| 37 | |
| 38 | /* static */ |
| 39 | wxUniChar::value_type wxUniChar::FromHi8bit(char c) |
| 40 | { |
| 41 | #if wxUSE_UTF8_LOCALE_ONLY |
| 42 | wxFAIL_MSG( "invalid UTF-8 character" ); |
| 43 | wxUnusedVar(c); |
| 44 | |
| 45 | return wxT('?'); // FIXME-UTF8: what to use as failure character? |
| 46 | #else |
| 47 | wchar_t buf[2]; |
| 48 | if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 ) |
| 49 | { |
| 50 | wxFAIL_MSG( "invalid multibyte character" ); |
| 51 | return wxT('?'); // FIXME-UTF8: what to use as failure character? |
| 52 | } |
| 53 | return buf[0]; |
| 54 | #endif |
| 55 | } |
| 56 | |
| 57 | /* static */ |
| 58 | char wxUniChar::ToHi8bit(wxUniChar::value_type c) |
| 59 | { |
| 60 | #if wxUSE_UTF8_LOCALE_ONLY |
| 61 | wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" ); |
| 62 | wxUnusedVar(c); |
| 63 | |
| 64 | return '?'; // FIXME-UTF8: what to use as failure character? |
| 65 | #else |
| 66 | wchar_t in = c; |
| 67 | char buf[2]; |
| 68 | if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 ) |
| 69 | { |
| 70 | wxFAIL_MSG( "character cannot be converted to single byte" ); |
| 71 | return '?'; // FIXME-UTF8: what to use as failure character? |
| 72 | } |
| 73 | return buf[0]; |
| 74 | #endif |
| 75 | } |
| 76 | |
| 77 | |
| 78 | // --------------------------------------------------------------------------- |
| 79 | // wxUniCharRef |
| 80 | // --------------------------------------------------------------------------- |
| 81 | |
| 82 | #if wxUSE_UNICODE_UTF8 |
| 83 | wxUniChar wxUniCharRef::UniChar() const |
| 84 | { |
| 85 | return wxStringOperations::DecodeChar(m_pos); |
| 86 | } |
| 87 | |
| 88 | wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c) |
| 89 | { |
| 90 | wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c)); |
| 91 | size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos); |
| 92 | size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]); |
| 93 | |
| 94 | if ( lenNew == lenOld ) |
| 95 | { |
| 96 | // this is the simpler case: if the new value's UTF-8 code has the |
| 97 | // same length, we can just replace it: |
| 98 | |
| 99 | iterator pos(m_pos); |
| 100 | for ( size_t i = 0; i < lenNew; ++i, ++pos ) |
| 101 | *pos = utf[i]; |
| 102 | } |
| 103 | else // length of character encoding in UTF-8 changed |
| 104 | { |
| 105 | // the worse case is when the new value has either longer or shorter |
| 106 | // code -- in that case, we have to use wxStringImpl::replace() and |
| 107 | // this invalidates all iterators, so we have to update them too: |
| 108 | |
| 109 | wxStringImpl& strimpl = m_str.m_impl; |
| 110 | |
| 111 | int iterDiff = lenNew - lenOld; |
| 112 | size_t posIdx = m_pos - strimpl.begin(); |
| 113 | |
| 114 | // compute positions of outstanding iterators for this string after the |
| 115 | // replacement is done (there is only a small number of iterators at |
| 116 | // any time, so we use an array on the stack to avoid unneeded |
| 117 | // allocation): |
| 118 | static const size_t STATIC_SIZE = 32; |
| 119 | size_t indexes_a[STATIC_SIZE]; |
| 120 | size_t *indexes = indexes_a; |
| 121 | size_t iterNum = 0; |
| 122 | wxStringIteratorNode *it; |
| 123 | for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum ) |
| 124 | { |
| 125 | wxASSERT( it->m_iter || it->m_citer ); |
| 126 | |
| 127 | if ( iterNum == STATIC_SIZE ) |
| 128 | { |
| 129 | wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") ); |
| 130 | |
| 131 | size_t total = iterNum + 1; |
| 132 | for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next ) |
| 133 | total++; |
| 134 | indexes = new size_t[total]; |
| 135 | memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE); |
| 136 | } |
| 137 | |
| 138 | size_t idx = it->m_iter |
| 139 | ? (*it->m_iter - strimpl.begin()) |
| 140 | : (*it->m_citer - strimpl.begin()); |
| 141 | |
| 142 | if ( idx > posIdx ) |
| 143 | idx += iterDiff; |
| 144 | |
| 145 | indexes[iterNum] = idx; |
| 146 | } |
| 147 | |
| 148 | // update the string: |
| 149 | strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew); |
| 150 | |
| 151 | #if wxUSE_STRING_POS_CACHE |
| 152 | m_str.InvalidateCache(); |
| 153 | #endif // wxUSE_STRING_POS_CACHE |
| 154 | |
| 155 | // finally, set the iterators to valid values again (note that this |
| 156 | // updates m_pos as well): |
| 157 | size_t i; |
| 158 | for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i ) |
| 159 | { |
| 160 | wxASSERT( i < iterNum ); |
| 161 | wxASSERT( it->m_iter || it->m_citer ); |
| 162 | |
| 163 | if ( it->m_iter ) |
| 164 | *it->m_iter = strimpl.begin() + indexes[i]; |
| 165 | else // it->m_citer |
| 166 | *it->m_citer = strimpl.begin() + indexes[i]; |
| 167 | } |
| 168 | |
| 169 | if ( indexes != indexes_a ) |
| 170 | delete[] indexes; |
| 171 | } |
| 172 | |
| 173 | return *this; |
| 174 | } |
| 175 | #endif // wxUSE_UNICODE_UTF8 |