]> git.saurik.com Git - wxWidgets.git/blob - src/common/convauto.cpp
removing additional offset - why did this ever work ?
[wxWidgets.git] / src / common / convauto.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/convauto.cpp
3 // Purpose: implementation of wxConvAuto
4 // Author: Vadim Zeitlin
5 // Created: 2006-04-04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #if wxUSE_WCHAR_T
27
28 #ifndef WX_PRECOMP
29 #endif //WX_PRECOMP
30
31 #include "wx/convauto.h"
32
33 // ============================================================================
34 // implementation
35 // ============================================================================
36
37 /* static */
38 wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
39 {
40 if ( srcLen < 2 )
41 {
42 // minimal BOM is 2 bytes so bail out immediately and simplify the code
43 // below which wouldn't need to check for length for UTF-16 cases
44 return BOM_None;
45 }
46
47 // examine the buffer for BOM presence
48 //
49 // see http://www.unicode.org/faq/utf_bom.html#BOM
50 switch ( *src++ )
51 {
52 case '\0':
53 // could only be big endian UTF-32 (00 00 FE FF)
54 if ( srcLen >= 4 &&
55 src[0] == '\0' &&
56 src[1] == '\xfe' &&
57 src[2] == '\xff' )
58 {
59 return BOM_UTF32BE;
60 }
61 break;
62
63 case '\xfe':
64 // could only be big endian UTF-16 (FE FF)
65 if ( *src++ == '\xff' )
66 {
67 return BOM_UTF16BE;
68 }
69 break;
70
71 case '\xff':
72 // could be either little endian UTF-16 or UTF-32, both start
73 // with FF FE
74 if ( *src++ == '\xfe' )
75 {
76 return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
77 ? BOM_UTF32LE
78 : BOM_UTF16LE;
79 }
80 break;
81
82 case '\xef':
83 // is this UTF-8 BOM (EF BB BF)?
84 if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
85 {
86 return BOM_UTF8;
87 }
88 break;
89 }
90
91 return BOM_None;
92 }
93
94 void wxConvAuto::InitFromBOM(BOMType bomType)
95 {
96 m_consumedBOM = false;
97
98 switch ( bomType )
99 {
100 case BOM_UTF32BE:
101 m_conv = new wxMBConvUTF32BE;
102 m_ownsConv = true;
103 break;
104
105 case BOM_UTF32LE:
106 m_conv = new wxMBConvUTF32LE;
107 m_ownsConv = true;
108 break;
109
110 case BOM_UTF16BE:
111 m_conv = new wxMBConvUTF16BE;
112 m_ownsConv = true;
113 break;
114
115 case BOM_UTF16LE:
116 m_conv = new wxMBConvUTF16LE;
117 m_ownsConv = true;
118 break;
119
120 case BOM_UTF8:
121 m_conv = &wxConvUTF8;
122 m_ownsConv = false;
123 break;
124
125 default:
126 wxFAIL_MSG( _T("unexpected BOM type") );
127 // fall through: still need to create something
128
129 case BOM_None:
130 InitWithDefault();
131 m_consumedBOM = true; // as there is nothing to consume
132 }
133 }
134
135 void wxConvAuto::SkipBOM(const char **src, size_t *len) const
136 {
137 int ofs;
138 switch ( m_bomType )
139 {
140 case BOM_UTF32BE:
141 case BOM_UTF32LE:
142 ofs = 4;
143 break;
144
145 case BOM_UTF16BE:
146 case BOM_UTF16LE:
147 ofs = 2;
148 break;
149
150 case BOM_UTF8:
151 ofs = 3;
152 break;
153
154 default:
155 wxFAIL_MSG( _T("unexpected BOM type") );
156 // fall through: still need to create something
157
158 case BOM_None:
159 ofs = 0;
160 }
161
162 *src += ofs;
163 if ( *len != (size_t)-1 )
164 *len -= ofs;
165 }
166
167 void wxConvAuto::InitFromInput(const char **src, size_t *len)
168 {
169 m_bomType = DetectBOM(*src, *len);
170 InitFromBOM(m_bomType);
171 SkipBOM(src, len);
172 }
173
174 size_t
175 wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
176 const char *src, size_t srcLen) const
177 {
178 // we check BOM and create the appropriate conversion the first time we're
179 // called but we also need to ensure that the BOM is skipped not only
180 // during this initial call but also during the first call with non-NULL
181 // dst as typically we're first called with NULL dst to calculate the
182 // needed buffer size
183 wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
184 if ( !m_conv )
185 {
186 self->InitFromInput(&src, &srcLen);
187 if ( dst )
188 self->m_consumedBOM = true;
189 }
190
191 if ( !m_consumedBOM && dst )
192 {
193 self->m_consumedBOM = true;
194 SkipBOM(&src, &srcLen);
195 }
196
197 return m_conv->ToWChar(dst, dstLen, src, srcLen);
198 }
199
200 size_t
201 wxConvAuto::FromWChar(char *dst, size_t dstLen,
202 const wchar_t *src, size_t srcLen) const
203 {
204 if ( !m_conv )
205 {
206 // default to UTF-8 for the multibyte output
207 wx_const_cast(wxConvAuto *, this)->InitWithDefault();
208 }
209
210 return m_conv->FromWChar(dst, dstLen, src, srcLen);
211 }
212
213 #endif // wxUSE_WCHAR_T
214