]>
git.saurik.com Git - wxWidgets.git/blob - src/common/convauto.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/convauto.cpp
3 // Purpose: implementation of wxConvAuto
4 // Author: Vadim Zeitlin
7 // Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
31 #include "wx/convauto.h"
33 // we use latin1 by default as it seems the least bad choice: the files we need
34 // to detect input of don't always come from the user system (they are often
35 // received from other machines) and so using wxFONTENCODING_SYSTEM doesn't
36 // seem to be a good idea and there is no other reasonable alternative
37 wxFontEncoding
wxConvAuto::ms_defaultMBEncoding
= wxFONTENCODING_ISO8859_1
;
39 // ============================================================================
41 // ============================================================================
44 void wxConvAuto::SetFallbackEncoding(wxFontEncoding enc
)
46 wxASSERT_MSG( enc
!= wxFONTENCODING_DEFAULT
,
47 wxT("wxFONTENCODING_DEFAULT doesn't make sense here") );
49 ms_defaultMBEncoding
= enc
;
53 wxConvAuto::BOMType
wxConvAuto::DetectBOM(const char *src
, size_t srcLen
)
57 // minimal BOM is 2 bytes so bail out immediately and simplify the code
58 // below which wouldn't need to check for length for UTF-16 cases
62 // examine the buffer for BOM presence
64 // see http://www.unicode.org/faq/utf_bom.html#BOM
68 // could only be big endian UTF-32 (00 00 FE FF)
79 // could only be big endian UTF-16 (FE FF)
80 if ( *src
++ == '\xff' )
87 // could be either little endian UTF-16 or UTF-32, both start
89 if ( *src
++ == '\xfe' )
91 return srcLen
>= 4 && src
[0] == '\0' && src
[1] == '\0'
98 // is this UTF-8 BOM (EF BB BF)?
99 if ( srcLen
>= 3 && src
[0] == '\xbb' && src
[1] == '\xbf' )
109 void wxConvAuto::InitFromBOM(BOMType bomType
)
111 m_consumedBOM
= false;
116 m_conv
= new wxMBConvUTF32BE
;
121 m_conv
= new wxMBConvUTF32LE
;
126 m_conv
= new wxMBConvUTF16BE
;
131 m_conv
= new wxMBConvUTF16LE
;
140 wxFAIL_MSG( wxT("unexpected BOM type") );
141 // fall through: still need to create something
145 m_consumedBOM
= true; // as there is nothing to consume
149 void wxConvAuto::SkipBOM(const char **src
, size_t *len
) const
169 wxFAIL_MSG( wxT("unexpected BOM type") );
170 // fall through: still need to create something
177 if ( *len
!= (size_t)-1 )
181 void wxConvAuto::InitFromInput(const char **src
, size_t *len
)
183 m_bomType
= DetectBOM(*src
, *len
);
184 InitFromBOM(m_bomType
);
189 wxConvAuto::ToWChar(wchar_t *dst
, size_t dstLen
,
190 const char *src
, size_t srcLen
) const
192 // we check BOM and create the appropriate conversion the first time we're
193 // called but we also need to ensure that the BOM is skipped not only
194 // during this initial call but also during the first call with non-NULL
195 // dst as typically we're first called with NULL dst to calculate the
196 // needed buffer size
197 wxConvAuto
*self
= const_cast<wxConvAuto
*>(this);
200 self
->InitFromInput(&src
, &srcLen
);
202 self
->m_consumedBOM
= true;
205 if ( !m_consumedBOM
&& dst
)
207 self
->m_consumedBOM
= true;
208 SkipBOM(&src
, &srcLen
);
211 // try to convert using the auto-detected encoding
212 size_t rc
= m_conv
->ToWChar(dst
, dstLen
, src
, srcLen
);
213 if ( rc
== wxCONV_FAILED
&& m_bomType
== BOM_None
)
215 // if the conversion failed but we didn't really detect anything and
216 // simply tried UTF-8 by default, retry it using the fall-back
217 if ( m_encDefault
!= wxFONTENCODING_MAX
)
222 self
->m_conv
= new wxCSConv(m_encDefault
== wxFONTENCODING_DEFAULT
223 ? GetFallbackEncoding()
225 self
->m_ownsConv
= true;
227 rc
= m_conv
->ToWChar(dst
, dstLen
, src
, srcLen
);
235 wxConvAuto::FromWChar(char *dst
, size_t dstLen
,
236 const wchar_t *src
, size_t srcLen
) const
240 // default to UTF-8 for the multibyte output
241 const_cast<wxConvAuto
*>(this)->InitWithUTF8();
244 return m_conv
->FromWChar(dst
, dstLen
, src
, srcLen
);
247 #endif // wxUSE_WCHAR_T