]> git.saurik.com Git - wxWidgets.git/blob - src/html/htmlfilt.cpp
rewrote wxHtmlEntitiesParser::Parse() using iterators, optimized for the common case...
[wxWidgets.git] / src / html / htmlfilt.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmlfilt.cpp
3 // Purpose: wxHtmlFilter - input filter for translating into HTML format
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML && wxUSE_STREAMS
17
18 #ifndef WX_PRECOMP
19 #include "wx/log.h"
20 #include "wx/intl.h"
21 #endif
22
23 #include "wx/strconv.h"
24 #include "wx/html/htmlfilt.h"
25 #include "wx/html/htmlwin.h"
26
27 // utility function: read a wxString from a wxInputStream
28 static void ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
29 {
30 size_t streamSize = s->GetSize();
31
32 if (streamSize == ~(size_t)0)
33 {
34 const size_t bufSize = 4095;
35 char buffer[bufSize+1];
36 size_t lastRead;
37
38 do
39 {
40 s->Read(buffer, bufSize);
41 lastRead = s->LastRead();
42 buffer[lastRead] = 0;
43 str.Append(wxString(buffer, conv));
44 }
45 while (lastRead == bufSize);
46 }
47 else
48 {
49 char* src = new char[streamSize+1];
50 s->Read(src, streamSize);
51 src[streamSize] = 0;
52 str = wxString(src, conv);
53 delete[] src;
54 }
55 }
56
57 /*
58
59 There is code for several default filters:
60
61 */
62
63 IMPLEMENT_ABSTRACT_CLASS(wxHtmlFilter, wxObject)
64
65 //--------------------------------------------------------------------------------
66 // wxHtmlFilterPlainText
67 // filter for text/plain or uknown
68 //--------------------------------------------------------------------------------
69
70 IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterPlainText, wxHtmlFilter)
71
72 bool wxHtmlFilterPlainText::CanRead(const wxFSFile& WXUNUSED(file)) const
73 {
74 return true;
75 }
76
77
78
79 wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
80 {
81 wxInputStream *s = file.GetStream();
82 wxString doc, doc2;
83
84 if (s == NULL) return wxEmptyString;
85 ReadString(doc, s, wxConvISO8859_1);
86
87 doc.Replace(wxT("&"), wxT("&"), true);
88 doc.Replace(wxT("<"), wxT("&lt;"), true);
89 doc.Replace(wxT(">"), wxT("&gt;"), true);
90 doc2 = wxT("<HTML><BODY><PRE>\n") + doc + wxT("\n</PRE></BODY></HTML>");
91 return doc2;
92 }
93
94
95
96
97
98 //--------------------------------------------------------------------------------
99 // wxHtmlFilterImage
100 // filter for image/*
101 //--------------------------------------------------------------------------------
102
103 class wxHtmlFilterImage : public wxHtmlFilter
104 {
105 DECLARE_DYNAMIC_CLASS(wxHtmlFilterImage)
106
107 public:
108 virtual bool CanRead(const wxFSFile& file) const;
109 virtual wxString ReadFile(const wxFSFile& file) const;
110 };
111
112 IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterImage, wxHtmlFilter)
113
114
115
116 bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const
117 {
118 return (file.GetMimeType().Left(6) == wxT("image/"));
119 }
120
121
122
123 wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const
124 {
125 wxString res = wxT("<HTML><BODY><IMG SRC=\"") + file.GetLocation() + wxT("\"></BODY></HTML>");
126 return res;
127 }
128
129
130
131
132 //--------------------------------------------------------------------------------
133 // wxHtmlFilterHTML
134 // filter for text/html
135 //--------------------------------------------------------------------------------
136
137
138 IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter)
139
140 bool wxHtmlFilterHTML::CanRead(const wxFSFile& file) const
141 {
142 // return (file.GetMimeType() == "text/html");
143 // This is true in most case but some page can return:
144 // "text/html; char-encoding=...."
145 // So we use Find instead
146 return (file.GetMimeType().Find(wxT("text/html")) == 0);
147 }
148
149
150
151 wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
152 {
153 wxInputStream *s = file.GetStream();
154 wxString doc;
155
156 if (s == NULL)
157 {
158 wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
159 return wxEmptyString;
160 }
161
162 // NB: We convert input file to wchar_t here in Unicode mode, based on
163 // either Content-Type header or <meta> tags. In ANSI mode, we don't
164 // do it as it is done by wxHtmlParser (for this reason, we add <meta>
165 // tag if we used Content-Type header).
166 #if wxUSE_UNICODE
167 int charsetPos;
168 if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
169 {
170 wxString charset = file.GetMimeType().Mid(charsetPos + 10);
171 wxCSConv conv(charset);
172 ReadString(doc, s, conv);
173 }
174 else
175 {
176 wxString tmpdoc;
177 ReadString(tmpdoc, s, wxConvISO8859_1);
178 wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
179 if (charset.empty())
180 doc = tmpdoc;
181 else
182 {
183 wxCSConv conv(charset);
184 doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
185 }
186 }
187 #else // !wxUSE_UNICODE
188 ReadString(doc, s, wxConvLibc);
189 // add meta tag if we obtained this through http:
190 if (!file.GetMimeType().empty())
191 {
192 wxString hdr;
193 wxString mime = file.GetMimeType();
194 hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
195 return hdr+doc;
196 }
197 #endif
198
199 return doc;
200 }
201
202
203
204
205 ///// Module:
206
207 class wxHtmlFilterModule : public wxModule
208 {
209 DECLARE_DYNAMIC_CLASS(wxHtmlFilterModule)
210
211 public:
212 virtual bool OnInit()
213 {
214 wxHtmlWindow::AddFilter(new wxHtmlFilterHTML);
215 wxHtmlWindow::AddFilter(new wxHtmlFilterImage);
216 return true;
217 }
218 virtual void OnExit() {}
219 };
220
221 IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterModule, wxModule)
222
223 #endif