]> git.saurik.com Git - wxWidgets.git/blame - src/common/unichar.cpp
Fix wxFileSystem::FileNameToURL() for Unicode file names.
[wxWidgets.git] / src / common / unichar.cpp
CommitLineData
dd0ef332
VS
1/////////////////////////////////////////////////////////////////////////////
2// Name: src/common/unichar.cpp
3// Purpose: wxUniChar and wxUniCharRef classes
4// Author: Vaclav Slavik
5// Created: 2007-03-19
6// RCS-ID: $Id$
7// Copyright: (c) 2007 REA Elektronik GmbH
8// Licence: wxWindows licence
9///////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// headers
13// ===========================================================================
14
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
18#ifdef __BORLANDC__
19 #pragma hdrstop
20#endif
21
0cb6a6e4
VZ
22#ifndef WX_PRECOMP
23 #include "wx/strconv.h" // wxConvLibc
98d2df74 24 #include "wx/log.h"
0cb6a6e4
VZ
25#endif
26
dd0ef332 27#include "wx/unichar.h"
155c2f6c 28#include "wx/string.h"
81727065 29
dd0ef332
VS
30// ===========================================================================
31// implementation
32// ===========================================================================
33
81727065
VS
34// ---------------------------------------------------------------------------
35// wxUniChar
36// ---------------------------------------------------------------------------
37
dd0ef332 38/* static */
ce65118e 39wxUniChar::value_type wxUniChar::FromHi8bit(char c)
dd0ef332 40{
111d9948 41#if wxUSE_UTF8_LOCALE_ONLY
68fb51cd 42 wxFAIL_MSG( "invalid UTF-8 character" );
eea9eca5
VZ
43 wxUnusedVar(c);
44
111d9948
VS
45 return wxT('?'); // FIXME-UTF8: what to use as failure character?
46#else
7a8e90dd
VZ
47 char cbuf[2];
48 cbuf[0] = c;
49 cbuf[1] = '\0';
50 wchar_t wbuf[2];
51 if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 )
68fb51cd
VS
52 {
53 wxFAIL_MSG( "invalid multibyte character" );
dd0ef332 54 return wxT('?'); // FIXME-UTF8: what to use as failure character?
68fb51cd 55 }
7a8e90dd 56 return wbuf[0];
111d9948 57#endif
dd0ef332
VS
58}
59
60/* static */
874dbd3a 61char wxUniChar::ToHi8bit(wxUniChar::value_type v)
dd0ef332 62{
874dbd3a
VZ
63 char c;
64 if ( !GetAsHi8bit(v, &c) )
65 {
66 wxFAIL_MSG( "character cannot be converted to single byte" );
67 c = '?'; // FIXME-UTF8: what to use as failure character?
68 }
eea9eca5 69
874dbd3a
VZ
70 return c;
71}
72
73/* static */
74bool wxUniChar::GetAsHi8bit(value_type v, char *c)
75{
7a8e90dd 76 wchar_t wbuf[2];
874dbd3a 77 wbuf[0] = v;
7a8e90dd
VZ
78 wbuf[1] = L'\0';
79 char cbuf[2];
80 if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 )
874dbd3a 81 return false;
81727065 82
874dbd3a
VZ
83 *c = cbuf[0];
84 return true;
85}
81727065
VS
86
87// ---------------------------------------------------------------------------
88// wxUniCharRef
89// ---------------------------------------------------------------------------
90
91#if wxUSE_UNICODE_UTF8
467175ab
VS
92wxUniChar wxUniCharRef::UniChar() const
93{
94 return wxStringOperations::DecodeChar(m_pos);
95}
96
81727065
VS
97wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
98{
467175ab
VS
99 wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
100 size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
101 size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
81727065
VS
102
103 if ( lenNew == lenOld )
104 {
b0c4d5d7
VS
105 // this is the simpler case: if the new value's UTF-8 code has the
106 // same length, we can just replace it:
107
81727065
VS
108 iterator pos(m_pos);
109 for ( size_t i = 0; i < lenNew; ++i, ++pos )
110 *pos = utf[i];
111 }
68482dc5 112 else // length of character encoding in UTF-8 changed
81727065 113 {
b0c4d5d7
VS
114 // the worse case is when the new value has either longer or shorter
115 // code -- in that case, we have to use wxStringImpl::replace() and
116 // this invalidates all iterators, so we have to update them too:
117
6bd4f281 118 wxStringImpl& strimpl = m_str.m_impl;
b0c4d5d7
VS
119
120 int iterDiff = lenNew - lenOld;
121 size_t posIdx = m_pos - strimpl.begin();
122
123 // compute positions of outstanding iterators for this string after the
124 // replacement is done (there is only a small number of iterators at
125 // any time, so we use an array on the stack to avoid unneeded
126 // allocation):
127 static const size_t STATIC_SIZE = 32;
128 size_t indexes_a[STATIC_SIZE];
129 size_t *indexes = indexes_a;
130 size_t iterNum = 0;
131 wxStringIteratorNode *it;
6bd4f281 132 for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
b0c4d5d7
VS
133 {
134 wxASSERT( it->m_iter || it->m_citer );
135
136 if ( iterNum == STATIC_SIZE )
137 {
9a83f860 138 wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") );
b0c4d5d7
VS
139
140 size_t total = iterNum + 1;
141 for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
142 total++;
143 indexes = new size_t[total];
144 memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
145 }
146
147 size_t idx = it->m_iter
148 ? (*it->m_iter - strimpl.begin())
149 : (*it->m_citer - strimpl.begin());
150
151 if ( idx > posIdx )
152 idx += iterDiff;
153
154 indexes[iterNum] = idx;
155 }
156
157 // update the string:
158 strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
159
68482dc5
VZ
160#if wxUSE_STRING_POS_CACHE
161 m_str.InvalidateCache();
162#endif // wxUSE_STRING_POS_CACHE
163
b0c4d5d7
VS
164 // finally, set the iterators to valid values again (note that this
165 // updates m_pos as well):
166 size_t i;
6bd4f281 167 for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i )
b0c4d5d7
VS
168 {
169 wxASSERT( i < iterNum );
170 wxASSERT( it->m_iter || it->m_citer );
171
172 if ( it->m_iter )
173 *it->m_iter = strimpl.begin() + indexes[i];
174 else // it->m_citer
175 *it->m_citer = strimpl.begin() + indexes[i];
176 }
177
178 if ( indexes != indexes_a )
179 delete[] indexes;
81727065
VS
180 }
181
182 return *this;
183}
184#endif // wxUSE_UNICODE_UTF8