/////////////////////////////////////////////////////////////////////////////
-#ifdef __GNUG__
-#pragma implementation
+#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
+#pragma implementation "htmlfilt.h"
#endif
#include "wx/wxprec.h"
-#if wxUSE_HTML
+#include "wx/defs.h"
+#if wxUSE_HTML && wxUSE_STREAMS
-#ifdef __BORDLANDC__
+#ifdef __BORLANDC__
#pragma hdrstop
#endif
#ifndef WXPRECOMP
-#include "wx/wx.h"
+ #include "wx/log.h"
+ #include "wx/intl.h"
#endif
+#include "wx/strconv.h"
#include "wx/html/htmlfilt.h"
#include "wx/html/htmlwin.h"
+// utility function: read a wxString from a wxInputStream
+static void ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
+{
+ size_t streamSize = s->GetSize();
+
+ if (streamSize == ~(size_t)0)
+ {
+ const size_t bufSize = 4095;
+ char buffer[bufSize+1];
+ size_t lastRead;
+
+ do
+ {
+ s->Read(buffer, bufSize);
+ lastRead = s->LastRead();
+ buffer[lastRead] = 0;
+ str.Append(wxString(buffer, conv));
+ }
+ while (lastRead == bufSize);
+ }
+ else
+ {
+ char* src = new char[streamSize+1];
+ s->Read(src, streamSize);
+ src[streamSize] = 0;
+ str = wxString(src, conv);
+ delete[] src;
+ }
+}
/*
wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
{
wxInputStream *s = file.GetStream();
- char *src;
wxString doc, doc2;
if (s == NULL) return wxEmptyString;
- src = new char[s -> GetSize()+1];
- src[s -> GetSize()] = 0;
- s -> Read(src, s -> GetSize());
- doc = src;
- delete [] src;
-
- doc.Replace("<", "<", TRUE);
- doc.Replace(">", ">", TRUE);
- doc2 = "<HTML><BODY><PRE>\n" + doc + "\n</PRE></BODY></HTML>";
+ ReadString(doc, s, wxConvISO8859_1);
+
+ doc.Replace(wxT("&"), wxT("&"), TRUE);
+ doc.Replace(wxT("<"), wxT("<"), TRUE);
+ doc.Replace(wxT(">"), wxT(">"), TRUE);
+ doc2 = wxT("<HTML><BODY><PRE>\n") + doc + wxT("\n</PRE></BODY></HTML>");
return doc2;
}
bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const
{
- return (file.GetMimeType().Left(6) == "image/");
+ return (file.GetMimeType().Left(6) == wxT("image/"));
}
wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const
{
- return ("<HTML><BODY><IMG SRC=\"" + file.GetLocation() + "\"></BODY></HTML>");
+ wxString res = wxT("<HTML><BODY><IMG SRC=\"") + file.GetLocation() + wxT("\"></BODY></HTML>");
+ return res;
}
//--------------------------------------------------------------------------------
-// wxHtmlFilterPlainText
-// filter for text/plain or uknown
+// wxHtmlFilterHTML
+// filter for text/html
//--------------------------------------------------------------------------------
-class wxHtmlFilterHTML : public wxHtmlFilter
-{
- DECLARE_DYNAMIC_CLASS(wxHtmlFilterHTML)
-
- public:
- virtual bool CanRead(const wxFSFile& file) const;
- virtual wxString ReadFile(const wxFSFile& file) const;
-};
-
IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter)
// This is true in most case but some page can return:
// "text/html; char-encoding=...."
// So we use Find instead
- return (file.GetMimeType().Find(_T("text/html")) == 0);
+ return (file.GetMimeType().Find(wxT("text/html")) == 0);
}
wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
{
wxInputStream *s = file.GetStream();
- char *src;
wxString doc;
- if (s == NULL) return wxEmptyString;
- src = new char[s -> GetSize() + 1];
- src[s -> GetSize()] = 0;
- s -> Read(src, s -> GetSize());
- doc = src;
- delete[] src;
+ if (s == NULL)
+ {
+ wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
+ return wxEmptyString;
+ }
+
+ // NB: We convert input file to wchar_t here in Unicode mode, based on
+ // either Content-Type header or <meta> tags. In ANSI mode, we don't
+ // do it as it is done by wxHtmlParser (for this reason, we add <meta>
+ // tag if we used Content-Type header).
+#if wxUSE_UNICODE
+ int charsetPos;
+ if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
+ {
+ wxString charset = file.GetMimeType().Mid(charsetPos + 10);
+ wxCSConv conv(charset);
+ ReadString(doc, s, conv);
+ }
+ else
+ {
+ wxString tmpdoc;
+ ReadString(tmpdoc, s, wxConvISO8859_1);
+ wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
+ if (charset.empty())
+ doc = tmpdoc;
+ else
+ {
+ wxCSConv conv(charset);
+ doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
+ }
+ }
+#else // !wxUSE_UNICODE
+ ReadString(doc, s, wxConvLibc);
+ // add meta tag if we obtained this through http:
+ if (!file.GetMimeType().empty())
+ {
+ wxString hdr;
+ wxString mime = file.GetMimeType();
+ hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
+ return hdr+doc;
+ }
+#endif
return doc;
}