]> git.saurik.com Git - wxWidgets.git/blame - src/common/convauto.cpp
fix dereferencing end() iterator in ParseFormat() and constructing out of bound itera...
[wxWidgets.git] / src / common / convauto.cpp
CommitLineData
830f8f11
VZ
1///////////////////////////////////////////////////////////////////////////////
2// Name: src/common/convauto.cpp
3// Purpose: implementation of wxConvAuto
4// Author: Vadim Zeitlin
5// Created: 2006-04-04
6// RCS-ID: $Id$
7// Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
8// Licence: wxWindows licence
9///////////////////////////////////////////////////////////////////////////////
10
11// ============================================================================
12// declarations
13// ============================================================================
14
15// ----------------------------------------------------------------------------
16// headers
17// ----------------------------------------------------------------------------
18
19// for compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
26#if wxUSE_WCHAR_T
27
28#ifndef WX_PRECOMP
29#endif //WX_PRECOMP
30
31#include "wx/convauto.h"
32
01a9232b
VZ
33// we use latin1 by default as it seems the least bad choice: the files we need
34// to detect input of don't always come from the user system (they are often
35// received from other machines) and so using wxFONTENCODING_SYSTEM doesn't
36// seem to be a good idea and there is no other reasonable alternative
37wxFontEncoding wxConvAuto::ms_defaultMBEncoding = wxFONTENCODING_ISO8859_1;
38
830f8f11
VZ
39// ============================================================================
40// implementation
41// ============================================================================
42
01a9232b
VZ
43/* static */
44void wxConvAuto::SetFallbackEncoding(wxFontEncoding enc)
45{
46 wxASSERT_MSG( enc != wxFONTENCODING_DEFAULT,
47 _T("wxFONTENCODING_DEFAULT doesn't make sense here") );
48
49 ms_defaultMBEncoding = enc;
50}
51
830f8f11
VZ
52/* static */
53wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
54{
55 if ( srcLen < 2 )
56 {
57 // minimal BOM is 2 bytes so bail out immediately and simplify the code
58 // below which wouldn't need to check for length for UTF-16 cases
59 return BOM_None;
60 }
61
62 // examine the buffer for BOM presence
63 //
64 // see http://www.unicode.org/faq/utf_bom.html#BOM
65 switch ( *src++ )
66 {
67 case '\0':
68 // could only be big endian UTF-32 (00 00 FE FF)
69 if ( srcLen >= 4 &&
70 src[0] == '\0' &&
71 src[1] == '\xfe' &&
72 src[2] == '\xff' )
73 {
74 return BOM_UTF32BE;
75 }
76 break;
77
78 case '\xfe':
79 // could only be big endian UTF-16 (FE FF)
80 if ( *src++ == '\xff' )
81 {
82 return BOM_UTF16BE;
83 }
84 break;
85
86 case '\xff':
87 // could be either little endian UTF-16 or UTF-32, both start
88 // with FF FE
89 if ( *src++ == '\xfe' )
90 {
91 return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
92 ? BOM_UTF32LE
93 : BOM_UTF16LE;
94 }
95 break;
96
97 case '\xef':
98 // is this UTF-8 BOM (EF BB BF)?
99 if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
100 {
101 return BOM_UTF8;
102 }
103 break;
104 }
105
106 return BOM_None;
107}
108
109void wxConvAuto::InitFromBOM(BOMType bomType)
110{
111 m_consumedBOM = false;
112
113 switch ( bomType )
114 {
115 case BOM_UTF32BE:
116 m_conv = new wxMBConvUTF32BE;
117 m_ownsConv = true;
118 break;
119
120 case BOM_UTF32LE:
121 m_conv = new wxMBConvUTF32LE;
122 m_ownsConv = true;
123 break;
124
125 case BOM_UTF16BE:
126 m_conv = new wxMBConvUTF16BE;
127 m_ownsConv = true;
128 break;
129
130 case BOM_UTF16LE:
131 m_conv = new wxMBConvUTF16LE;
132 m_ownsConv = true;
133 break;
134
135 case BOM_UTF8:
01a9232b 136 InitWithUTF8();
830f8f11
VZ
137 break;
138
139 default:
140 wxFAIL_MSG( _T("unexpected BOM type") );
141 // fall through: still need to create something
142
143 case BOM_None:
01a9232b 144 InitWithUTF8();
830f8f11
VZ
145 m_consumedBOM = true; // as there is nothing to consume
146 }
147}
148
149void wxConvAuto::SkipBOM(const char **src, size_t *len) const
150{
151 int ofs;
152 switch ( m_bomType )
153 {
154 case BOM_UTF32BE:
155 case BOM_UTF32LE:
156 ofs = 4;
157 break;
158
159 case BOM_UTF16BE:
160 case BOM_UTF16LE:
161 ofs = 2;
162 break;
163
164 case BOM_UTF8:
165 ofs = 3;
166 break;
167
168 default:
169 wxFAIL_MSG( _T("unexpected BOM type") );
170 // fall through: still need to create something
171
172 case BOM_None:
173 ofs = 0;
174 }
175
176 *src += ofs;
177 if ( *len != (size_t)-1 )
178 *len -= ofs;
179}
180
181void wxConvAuto::InitFromInput(const char **src, size_t *len)
182{
183 m_bomType = DetectBOM(*src, *len);
184 InitFromBOM(m_bomType);
185 SkipBOM(src, len);
186}
187
188size_t
189wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
190 const char *src, size_t srcLen) const
191{
192 // we check BOM and create the appropriate conversion the first time we're
193 // called but we also need to ensure that the BOM is skipped not only
194 // during this initial call but also during the first call with non-NULL
195 // dst as typically we're first called with NULL dst to calculate the
196 // needed buffer size
5c33522f 197 wxConvAuto *self = const_cast<wxConvAuto *>(this);
830f8f11
VZ
198 if ( !m_conv )
199 {
200 self->InitFromInput(&src, &srcLen);
201 if ( dst )
202 self->m_consumedBOM = true;
203 }
204
205 if ( !m_consumedBOM && dst )
206 {
207 self->m_consumedBOM = true;
208 SkipBOM(&src, &srcLen);
209 }
210
01a9232b
VZ
211 // try to convert using the auto-detected encoding
212 size_t rc = m_conv->ToWChar(dst, dstLen, src, srcLen);
213 if ( rc == wxCONV_FAILED && m_bomType == BOM_None )
214 {
215 // if the conversion failed but we didn't really detect anything and
216 // simply tried UTF-8 by default, retry it using the fall-back
217 if ( m_encDefault != wxFONTENCODING_MAX )
218 {
219 if ( m_ownsConv )
220 delete m_conv;
221
222 self->m_conv = new wxCSConv(m_encDefault == wxFONTENCODING_DEFAULT
223 ? GetFallbackEncoding()
224 : m_encDefault);
225 self->m_ownsConv = true;
226
227 rc = m_conv->ToWChar(dst, dstLen, src, srcLen);
228 }
229 }
230
231 return rc;
830f8f11
VZ
232}
233
234size_t
235wxConvAuto::FromWChar(char *dst, size_t dstLen,
236 const wchar_t *src, size_t srcLen) const
237{
238 if ( !m_conv )
239 {
240 // default to UTF-8 for the multibyte output
5c33522f 241 const_cast<wxConvAuto *>(this)->InitWithUTF8();
830f8f11
VZ
242 }
243
244 return m_conv->FromWChar(dst, dstLen, src, srcLen);
245}
246
247#endif // wxUSE_WCHAR_T
248