Commit | Line | Data |
---|---|---|
830f8f11 VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/convauto.cpp | |
3 | // Purpose: implementation of wxConvAuto | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2006-04-04 | |
6 | // RCS-ID: $Id$ | |
7 | // Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org> | |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
26 | #if wxUSE_WCHAR_T | |
27 | ||
28 | #ifndef WX_PRECOMP | |
29 | #endif //WX_PRECOMP | |
30 | ||
31 | #include "wx/convauto.h" | |
32 | ||
33 | // ============================================================================ | |
34 | // implementation | |
35 | // ============================================================================ | |
36 | ||
37 | /* static */ | |
38 | wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) | |
39 | { | |
40 | if ( srcLen < 2 ) | |
41 | { | |
42 | // minimal BOM is 2 bytes so bail out immediately and simplify the code | |
43 | // below which wouldn't need to check for length for UTF-16 cases | |
44 | return BOM_None; | |
45 | } | |
46 | ||
47 | // examine the buffer for BOM presence | |
48 | // | |
49 | // see http://www.unicode.org/faq/utf_bom.html#BOM | |
50 | switch ( *src++ ) | |
51 | { | |
52 | case '\0': | |
53 | // could only be big endian UTF-32 (00 00 FE FF) | |
54 | if ( srcLen >= 4 && | |
55 | src[0] == '\0' && | |
56 | src[1] == '\xfe' && | |
57 | src[2] == '\xff' ) | |
58 | { | |
59 | return BOM_UTF32BE; | |
60 | } | |
61 | break; | |
62 | ||
63 | case '\xfe': | |
64 | // could only be big endian UTF-16 (FE FF) | |
65 | if ( *src++ == '\xff' ) | |
66 | { | |
67 | return BOM_UTF16BE; | |
68 | } | |
69 | break; | |
70 | ||
71 | case '\xff': | |
72 | // could be either little endian UTF-16 or UTF-32, both start | |
73 | // with FF FE | |
74 | if ( *src++ == '\xfe' ) | |
75 | { | |
76 | return srcLen >= 4 && src[0] == '\0' && src[1] == '\0' | |
77 | ? BOM_UTF32LE | |
78 | : BOM_UTF16LE; | |
79 | } | |
80 | break; | |
81 | ||
82 | case '\xef': | |
83 | // is this UTF-8 BOM (EF BB BF)? | |
84 | if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' ) | |
85 | { | |
86 | return BOM_UTF8; | |
87 | } | |
88 | break; | |
89 | } | |
90 | ||
91 | return BOM_None; | |
92 | } | |
93 | ||
94 | void wxConvAuto::InitFromBOM(BOMType bomType) | |
95 | { | |
96 | m_consumedBOM = false; | |
97 | ||
98 | switch ( bomType ) | |
99 | { | |
100 | case BOM_UTF32BE: | |
101 | m_conv = new wxMBConvUTF32BE; | |
102 | m_ownsConv = true; | |
103 | break; | |
104 | ||
105 | case BOM_UTF32LE: | |
106 | m_conv = new wxMBConvUTF32LE; | |
107 | m_ownsConv = true; | |
108 | break; | |
109 | ||
110 | case BOM_UTF16BE: | |
111 | m_conv = new wxMBConvUTF16BE; | |
112 | m_ownsConv = true; | |
113 | break; | |
114 | ||
115 | case BOM_UTF16LE: | |
116 | m_conv = new wxMBConvUTF16LE; | |
117 | m_ownsConv = true; | |
118 | break; | |
119 | ||
120 | case BOM_UTF8: | |
121 | m_conv = &wxConvUTF8; | |
122 | m_ownsConv = false; | |
123 | break; | |
124 | ||
125 | default: | |
126 | wxFAIL_MSG( _T("unexpected BOM type") ); | |
127 | // fall through: still need to create something | |
128 | ||
129 | case BOM_None: | |
130 | InitWithDefault(); | |
131 | m_consumedBOM = true; // as there is nothing to consume | |
132 | } | |
133 | } | |
134 | ||
135 | void wxConvAuto::SkipBOM(const char **src, size_t *len) const | |
136 | { | |
137 | int ofs; | |
138 | switch ( m_bomType ) | |
139 | { | |
140 | case BOM_UTF32BE: | |
141 | case BOM_UTF32LE: | |
142 | ofs = 4; | |
143 | break; | |
144 | ||
145 | case BOM_UTF16BE: | |
146 | case BOM_UTF16LE: | |
147 | ofs = 2; | |
148 | break; | |
149 | ||
150 | case BOM_UTF8: | |
151 | ofs = 3; | |
152 | break; | |
153 | ||
154 | default: | |
155 | wxFAIL_MSG( _T("unexpected BOM type") ); | |
156 | // fall through: still need to create something | |
157 | ||
158 | case BOM_None: | |
159 | ofs = 0; | |
160 | } | |
161 | ||
162 | *src += ofs; | |
163 | if ( *len != (size_t)-1 ) | |
164 | *len -= ofs; | |
165 | } | |
166 | ||
167 | void wxConvAuto::InitFromInput(const char **src, size_t *len) | |
168 | { | |
169 | m_bomType = DetectBOM(*src, *len); | |
170 | InitFromBOM(m_bomType); | |
171 | SkipBOM(src, len); | |
172 | } | |
173 | ||
174 | size_t | |
175 | wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen, | |
176 | const char *src, size_t srcLen) const | |
177 | { | |
178 | // we check BOM and create the appropriate conversion the first time we're | |
179 | // called but we also need to ensure that the BOM is skipped not only | |
180 | // during this initial call but also during the first call with non-NULL | |
181 | // dst as typically we're first called with NULL dst to calculate the | |
182 | // needed buffer size | |
183 | wxConvAuto *self = wx_const_cast(wxConvAuto *, this); | |
184 | if ( !m_conv ) | |
185 | { | |
186 | self->InitFromInput(&src, &srcLen); | |
187 | if ( dst ) | |
188 | self->m_consumedBOM = true; | |
189 | } | |
190 | ||
191 | if ( !m_consumedBOM && dst ) | |
192 | { | |
193 | self->m_consumedBOM = true; | |
194 | SkipBOM(&src, &srcLen); | |
195 | } | |
196 | ||
197 | return m_conv->ToWChar(dst, dstLen, src, srcLen); | |
198 | } | |
199 | ||
200 | size_t | |
201 | wxConvAuto::FromWChar(char *dst, size_t dstLen, | |
202 | const wchar_t *src, size_t srcLen) const | |
203 | { | |
204 | if ( !m_conv ) | |
205 | { | |
206 | // default to UTF-8 for the multibyte output | |
207 | wx_const_cast(wxConvAuto *, this)->InitWithDefault(); | |
208 | } | |
209 | ||
210 | return m_conv->FromWChar(dst, dstLen, src, srcLen); | |
211 | } | |
212 | ||
213 | #endif // wxUSE_WCHAR_T | |
214 |