]> git.saurik.com Git - wxWidgets.git/blob - src/common/unichar.cpp
Do not follow HTML links if selecting text (patch #1719577)
[wxWidgets.git] / src / common / unichar.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/unichar.cpp
3 // Purpose: wxUniChar and wxUniCharRef classes
4 // Author: Vaclav Slavik
5 // Created: 2007-03-19
6 // RCS-ID: $Id$
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // headers
13 // ===========================================================================
14
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
17
18 #ifdef __BORLANDC__
19 #pragma hdrstop
20 #endif
21
22 #ifndef WX_PRECOMP
23 #include "wx/strconv.h" // wxConvLibc
24 #endif
25
26 #include "wx/unichar.h"
27 #include "wx/string.h"
28
29 // ===========================================================================
30 // implementation
31 // ===========================================================================
32
33 // ---------------------------------------------------------------------------
34 // wxUniChar
35 // ---------------------------------------------------------------------------
36
37 /* static */
38 wxUniChar::value_type wxUniChar::From8bit(char c)
39 {
40 // all supported charsets have the first 128 characters same as ASCII:
41 if ( (unsigned char)c < 0x80 )
42 return c;
43
44 #if wxUSE_UTF8_LOCALE_ONLY
45 wxFAIL_MSG( _T("invalid UTF-8 character") );
46 return wxT('?'); // FIXME-UTF8: what to use as failure character?
47 #else
48 wchar_t buf[2];
49 if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
50 return wxT('?'); // FIXME-UTF8: what to use as failure character?
51 return buf[0];
52 #endif
53 }
54
55 /* static */
56 char wxUniChar::To8bit(wxUniChar::value_type c)
57 {
58 // all supported charsets have the first 128 characters same as ASCII:
59 if ( c < 0x80 )
60 return c;
61
62 #if wxUSE_UTF8_LOCALE_ONLY
63 wxFAIL_MSG( _T("character cannot be converted to single UTF-8 byte") );
64 return '?'; // FIXME-UTF8: what to use as failure character?
65 #else
66 wchar_t in = c;
67 char buf[2];
68 if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
69 return '?'; // FIXME-UTF8: what to use as failure character?
70 return buf[0];
71 #endif
72 }
73
74
75 // ---------------------------------------------------------------------------
76 // wxUniCharRef
77 // ---------------------------------------------------------------------------
78
79 #if wxUSE_UNICODE_UTF8
80 wxUniChar wxUniCharRef::UniChar() const
81 {
82 return wxStringOperations::DecodeChar(m_pos);
83 }
84
85 wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
86 {
87 wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
88 size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
89 size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
90
91 if ( lenNew == lenOld )
92 {
93 // this is the simpler case: if the new value's UTF-8 code has the
94 // same length, we can just replace it:
95
96 iterator pos(m_pos);
97 for ( size_t i = 0; i < lenNew; ++i, ++pos )
98 *pos = utf[i];
99 }
100 else
101 {
102 // the worse case is when the new value has either longer or shorter
103 // code -- in that case, we have to use wxStringImpl::replace() and
104 // this invalidates all iterators, so we have to update them too:
105
106 wxString& str = *wx_const_cast(wxString*, m_node.m_str);
107 wxStringImpl& strimpl = str.m_impl;
108
109 int iterDiff = lenNew - lenOld;
110 size_t posIdx = m_pos - strimpl.begin();
111
112 // compute positions of outstanding iterators for this string after the
113 // replacement is done (there is only a small number of iterators at
114 // any time, so we use an array on the stack to avoid unneeded
115 // allocation):
116 static const size_t STATIC_SIZE = 32;
117 size_t indexes_a[STATIC_SIZE];
118 size_t *indexes = indexes_a;
119 size_t iterNum = 0;
120 wxStringIteratorNode *it;
121 for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
122 {
123 wxASSERT( it->m_iter || it->m_citer );
124
125 if ( iterNum == STATIC_SIZE )
126 {
127 wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") );
128
129 size_t total = iterNum + 1;
130 for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
131 total++;
132 indexes = new size_t[total];
133 memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
134 }
135
136 size_t idx = it->m_iter
137 ? (*it->m_iter - strimpl.begin())
138 : (*it->m_citer - strimpl.begin());
139
140 if ( idx > posIdx )
141 idx += iterDiff;
142
143 indexes[iterNum] = idx;
144 }
145
146 // update the string:
147 strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
148
149 // finally, set the iterators to valid values again (note that this
150 // updates m_pos as well):
151 size_t i;
152 for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i )
153 {
154 wxASSERT( i < iterNum );
155 wxASSERT( it->m_iter || it->m_citer );
156
157 if ( it->m_iter )
158 *it->m_iter = strimpl.begin() + indexes[i];
159 else // it->m_citer
160 *it->m_citer = strimpl.begin() + indexes[i];
161 }
162
163 if ( indexes != indexes_a )
164 delete[] indexes;
165 }
166
167 return *this;
168 }
169 #endif // wxUSE_UNICODE_UTF8